Kouhei Sutou
null+****@clear*****
Fri Jan 6 16:57:37 JST 2017
Kouhei Sutou 2017-01-06 16:57:37 +0900 (Fri, 06 Jan 2017) New Revision: b9d600189174ae9706924f4805d8dd3ae54b8eb8 https://github.com/groonga/groonga/commit/b9d600189174ae9706924f4805d8dd3ae54b8eb8 Message: TokenRegexp: fix a bug that "\Ax\z" returns all one character data Added files: test/command/suite/tokenizers/regexp/get/begin_end/one.expected test/command/suite/tokenizers/regexp/get/begin_end/one.test Modified files: lib/tokenizers.c Modified: lib/tokenizers.c (+2 -1) =================================================================== --- lib/tokenizers.c 2017-01-05 12:49:57 +0900 (87938e6) +++ lib/tokenizers.c 2017-01-06 16:57:37 +0900 (6bd0a1b) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2015 Brazil + Copyright(C) 2009-2017 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -603,6 +603,7 @@ regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) if (is_begin && char_len == GRN_TOKENIZER_BEGIN_MARK_UTF8_LEN && memcmp(current, GRN_TOKENIZER_BEGIN_MARK_UTF8, char_len) == 0) { + tokenizer->is_start_token = GRN_TRUE; n_characters++; GRN_TEXT_PUT(ctx, buffer, current, char_len); current += char_len; Added: test/command/suite/tokenizers/regexp/get/begin_end/one.expected (+52 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/regexp/get/begin_end/one.expected 2017-01-06 16:57:37 +0900 (6fa5b6e) @@ -0,0 +1,52 @@ +table_create Lexicon TABLE_PAT_KEY ShortText --default_tokenizer TokenRegexp +[[0,0.0,0.0],true] +table_tokenize Lexicon "x" --mode ADD +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "", + "position": 0, + "force_prefix": false + }, + { + "value": "x", + "position": 1, + "force_prefix": false + }, + { + "value": "", + "position": 2, + "force_prefix": false + } + ] +] +table_tokenize Lexicon "x" --mode GET +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "", + "position": 0, + "force_prefix": false + }, + { + "value": "x", + "position": 1, + "force_prefix": false + }, + { + "value": "", + "position": 2, + "force_prefix": false + } + ] +] Added: test/command/suite/tokenizers/regexp/get/begin_end/one.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/regexp/get/begin_end/one.test 2017-01-06 16:57:37 +0900 (28e1e0a) @@ -0,0 +1,5 @@ +table_create Lexicon TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenRegexp +table_tokenize Lexicon "x" --mode ADD + +table_tokenize Lexicon "x" --mode GET -------------- next part -------------- HTML����������������������������...Download