Kouhei Sutou
null+****@clear*****
Mon Sep 10 16:37:06 JST 2018
Kouhei Sutou 2018-09-10 16:37:06 +0900 (Mon, 10 Sep 2018) Revision: e262c284afdbd6a0e92ba8e9116d4216040c6910 https://github.com/groonga/groonga/commit/e262c284afdbd6a0e92ba8e9116d4216040c6910 Message: TokenMecab: add include_reading option It adds reading to tokens. Added files: test/command/suite/tokenizers/mecab/options/include_reading.expected test/command/suite/tokenizers/mecab/options/include_reading.test Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+20 -0) =================================================================== --- plugins/tokenizers/mecab.c 2018-09-10 16:17:57 +0900 (8a56d8872) +++ plugins/tokenizers/mecab.c 2018-09-10 16:37:06 +0900 (105033a99) @@ -50,6 +50,7 @@ typedef struct { grn_bool chunked_tokenize; int32_t chunk_size_threshold; grn_bool include_class; + grn_bool include_reading; } grn_mecab_tokenizer_options; typedef struct { @@ -142,6 +143,7 @@ mecab_tokenizer_options_init(grn_mecab_tokenizer_options *options) options->chunked_tokenize = grn_mecab_chunked_tokenize_enabled; options->chunk_size_threshold = grn_mecab_chunk_size_threshold; options->include_class = GRN_FALSE; + options->include_reading = GRN_FALSE; } static grn_bool @@ -155,6 +157,10 @@ mecab_tokenizer_options_need_default_output(grn_mecab_tokenizer_options *options return GRN_TRUE; } + if (options->include_reading) { + return GRN_TRUE; + } + return GRN_FALSE; } @@ -200,6 +206,12 @@ mecab_tokenizer_options_open(grn_ctx *ctx, raw_options, i, options->include_class); + } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "include_reading")) { + options->include_reading = + grn_vector_get_element_bool(ctx, + raw_options, + i, + options->include_reading); } } GRN_OPTION_VALUES_EACH_END(); @@ -802,6 +814,14 @@ mecab_next_default_format(grn_ctx *ctx, mecab_next_default_format_add_feature(ctx, &data, "subclass1", 2); mecab_next_default_format_add_feature(ctx, &data, "subclass2", 3); } + if (tokenizer->options->include_reading) { + add_feature_data data; + data.token = token; + data.features = &features; + data.ignore_empty_value = GRN_TRUE; + data.ignore_asterisk_value = GRN_FALSE; + mecab_next_default_format_add_feature(ctx, &data, "reading", 7); + } GRN_OBJ_FIN(ctx, &features); } Added: test/command/suite/tokenizers/mecab/options/include_reading.expected (+34 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/mecab/options/include_reading.expected 2018-09-10 16:37:06 +0900 (de2d54cf2) @@ -0,0 +1,34 @@ +tokenize 'TokenMecab("include_reading", true)' '焼き肉と焼きにく' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "焼き肉", + "position": 0, + "force_prefix": false, + "metadata": { + "reading": "ヤキニク" + } + }, + { + "value": "と", + "position": 1, + "force_prefix": false, + "metadata": { + "reading": "ト" + } + }, + { + "value": "焼きにく", + "position": 2, + "force_prefix": false, + "metadata": { + "reading": "ヤキニク" + } + } + ] +] Added: test/command/suite/tokenizers/mecab/options/include_reading.test (+5 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/mecab/options/include_reading.test 2018-09-10 16:37:06 +0900 (0d637833f) @@ -0,0 +1,5 @@ +#@on-error omit +tokenize \ + 'TokenMecab("include_reading", true)' \ + '焼き肉と焼きにく' +#@on-error default -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180910/b2122980/attachment-0001.htm