Kouhei Sutou
null+****@clear*****
Mon Sep 10 16:51:44 JST 2018
Kouhei Sutou 2018-09-10 16:51:44 +0900 (Mon, 10 Sep 2018) Revision: 1ed550f7f7b3603645bc311dd2bfb38143c00365 https://github.com/groonga/groonga/commit/1ed550f7f7b3603645bc311dd2bfb38143c00365 Message: TokenMecab: add no reading case test Modified files: plugins/tokenizers/mecab.c test/command/suite/tokenizers/mecab/options/include_reading.expected test/command/suite/tokenizers/mecab/options/include_reading.test Modified: plugins/tokenizers/mecab.c (+8 -5) =================================================================== --- plugins/tokenizers/mecab.c 2018-09-10 16:37:06 +0900 (105033a99) +++ plugins/tokenizers/mecab.c 2018-09-10 16:51:44 +0900 (0b51370e8) @@ -642,7 +642,10 @@ mecab_init(grn_ctx *ctx, grn_tokenizer_query *query) GRN_PLUGIN_FREE(ctx, tokenizer); return NULL; } - { + if (mecab_tokenizer_options_need_default_output(tokenizer->options)) { + tokenizer->next = GRN_TEXT_VALUE(&(tokenizer->buf)); + tokenizer->end = tokenizer->next + GRN_TEXT_LEN(&(tokenizer->buf)); + } else { char *buf, *p; unsigned int bufsize; @@ -670,11 +673,11 @@ mecab_next_default_format_skip_eos(grn_ctx *ctx, } if (strncmp(tokenizer->next, "EOS", 3) == 0) { - const char *current = tokenizer->next; - if (current + 1 < tokenizer->end && current[0] == '\r') { + const char *current = tokenizer->next + 3; + if (current < tokenizer->end && current[0] == '\r') { current++; } - if (current + 1 < tokenizer->end && current[0] == '\n') { + if (current < tokenizer->end && current[0] == '\n') { current++; tokenizer->next = current; } @@ -702,7 +705,7 @@ mecab_next_default_format_add_feature(grn_ctx *ctx, size_t feature_length; grn_obj value; - if (i + 1 > n_locations) { + if (i + 2 > n_locations) { return; } Modified: test/command/suite/tokenizers/mecab/options/include_reading.expected (+16 -1) =================================================================== --- test/command/suite/tokenizers/mecab/options/include_reading.expected 2018-09-10 16:37:06 +0900 (de2d54cf2) +++ test/command/suite/tokenizers/mecab/options/include_reading.expected 2018-09-10 16:51:44 +0900 (4653e9054) @@ -1,4 +1,4 @@ -tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼きにく' +tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼きにくとyakiniku' [ [ 0, @@ -29,6 +29,21 @@ tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼きにく' "metadata": { "reading": "ヤキニク" } + }, + { + "value": "と", + "position": 3, + "force_prefix": false, + "metadata": { + "reading": "ト" + } + }, + { + "value": "yakiniku", + "position": 4, + "force_prefix": false, + "metadata": { + } } ] ] Modified: test/command/suite/tokenizers/mecab/options/include_reading.test (+1 -1) =================================================================== --- test/command/suite/tokenizers/mecab/options/include_reading.test 2018-09-10 16:37:06 +0900 (0d637833f) +++ test/command/suite/tokenizers/mecab/options/include_reading.test 2018-09-10 16:51:44 +0900 (3064d672e) @@ -1,5 +1,5 @@ #@on-error omit tokenize \ 'TokenMecab("include_reading", true)' \ - '焼き肉と焼きにく' + '焼き肉と焼きにくとyakiniku' #@on-error default -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180910/6b933ca7/attachment-0001.htm