Kouhei Sutou
null+****@clear*****
Sat Feb 28 20:02:09 JST 2015
Kouhei Sutou 2015-02-28 20:02:09 +0900 (Sat, 28 Feb 2015) New Revision: be2529402589dd6b9165e163b834a538a68d22a1 https://github.com/groonga/groonga/commit/be2529402589dd6b9165e163b834a538a68d22a1 Message: mecab: don't report "empty token" multiple times on full wide space only case Added files: test/command/suite/tokenizers/mecab/full_width_space/only.expected test/command/suite/tokenizers/mecab/full_width_space/only.test Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+9 -1) =================================================================== --- plugins/tokenizers/mecab.c 2015-02-28 19:57:06 +0900 (c412714) +++ plugins/tokenizers/mecab.c 2015-02-28 20:02:09 +0900 (71cc950) @@ -224,11 +224,19 @@ mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) for (r = p; r < e; r += cl) { int space_len; + + space_len = grn_isspace(r, encoding); + if (space_len > 0 && r == p) { + cl = space_len; + p = r + cl; + continue; + } + if (!(cl = grn_charlen_(ctx, r, e, encoding))) { tokenizer->next = e; break; } - space_len = grn_isspace(r, encoding); + if (space_len > 0) { const char *q = r + space_len; while ((space_len = grn_isspace(q, encoding))) { Added: test/command/suite/tokenizers/mecab/full_width_space/only.expected (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/mecab/full_width_space/only.expected 2015-02-28 20:02:09 +0900 (90db4fa) @@ -0,0 +1,3 @@ +tokenize TokenMecab ' ' +[[0,0.0,0.0],[]] +#|w| [token_next] ignore an empty token: <TokenMecab>: < > Added: test/command/suite/tokenizers/mecab/full_width_space/only.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/mecab/full_width_space/only.test 2015-02-28 20:02:09 +0900 (ea4385a) @@ -0,0 +1 @@ +tokenize TokenMecab ' ' -------------- next part -------------- HTML����������������������������...Download