naoa
null+****@clear*****
Thu May 21 21:04:13 JST 2015
naoa 2015-05-21 21:04:13 +0900 (Thu, 21 May 2015) New Revision: 9e75d2d48c30c9e906f66c5e3762bd4a84559707 https://github.com/groonga/groonga/commit/9e75d2d48c30c9e906f66c5e3762bd4a84559707 Merged 6c4e87d: Merge pull request #339 from naoa/add-force_prefix-to-tokenize Message: tokenize: Add force_prefix Added files: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.test test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.test test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.test test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.test Modified files: lib/proc.c Modified: lib/proc.c (+6 -1) =================================================================== --- lib/proc.c 2015-05-22 10:02:12 +0900 (00be429) +++ lib/proc.c 2015-05-21 21:04:13 +0900 (f4a58dc) @@ -3983,6 +3983,7 @@ parse_tokenize_flags(grn_ctx *ctx, grn_obj *flag_names) typedef struct { grn_id id; int32_t position; + grn_bool force_prefix; } tokenize_token; static void @@ -3999,7 +4000,7 @@ output_tokens(grn_ctx *ctx, grn_obj *tokens, grn_obj *lexicon) token = ((tokenize_token *)(GRN_BULK_HEAD(tokens))) + i; - GRN_OUTPUT_MAP_OPEN("TOKEN", 2); + GRN_OUTPUT_MAP_OPEN("TOKEN", 3); GRN_OUTPUT_CSTR("value"); value_size = grn_table_get_key(ctx, lexicon, token->id, @@ -4009,6 +4010,9 @@ output_tokens(grn_ctx *ctx, grn_obj *tokens, grn_obj *lexicon) GRN_OUTPUT_CSTR("position"); GRN_OUTPUT_INT32(token->position); + GRN_OUTPUT_CSTR("force_prefix"); + GRN_OUTPUT_BOOL(token->force_prefix); + GRN_OUTPUT_MAP_CLOSE(); } GRN_OUTPUT_ARRAY_CLOSE(); @@ -4118,6 +4122,7 @@ tokenize(grn_ctx *ctx, grn_obj *lexicon, grn_obj *string, grn_tokenize_mode mode current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1; current_token->id = token_id; current_token->position = token_cursor->pos; + current_token->force_prefix = token_cursor->force_prefix; } grn_token_cursor_close(ctx, token_cursor); } Added: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected (+20 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.expected 2015-05-21 21:04:13 +0900 (aa9a223) @@ -0,0 +1,20 @@ +tokenize TokenBigram "ABCだよ" NormalizerAuto --mode GET +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "abc", + "position": 0, + "force_prefix": false + }, + { + "value": "だよ", + "position": 1, + "force_prefix": false + } + ] +] Added: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/matured.test 2015-05-21 21:04:13 +0900 (01f8ec1) @@ -0,0 +1 @@ +tokenize TokenBigram "ABCだよ" NormalizerAuto --mode GET Added: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected (+20 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.expected 2015-05-21 21:04:13 +0900 (23f8a25) @@ -0,0 +1,20 @@ +tokenize TokenBigram "ABCだ" NormalizerAuto --mode GET +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "abc", + "position": 0, + "force_prefix": false + }, + { + "value": "だ", + "position": 1, + "force_prefix": true + } + ] +] Added: test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/multiple_tokens/unmatured.test 2015-05-21 21:04:13 +0900 (a924758) @@ -0,0 +1 @@ +tokenize TokenBigram "ABCだ" NormalizerAuto --mode GET Added: test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected (+2 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.expected 2015-05-21 21:04:13 +0900 (a311466) @@ -0,0 +1,2 @@ +tokenize TokenBigram "だよ" NormalizerAuto --mode GET +[[0,0.0,0.0],[{"value":"だよ","position":0,"force_prefix":false}]] Added: test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/matured.test 2015-05-21 21:04:13 +0900 (2e4f195) @@ -0,0 +1 @@ +tokenize TokenBigram "だよ" NormalizerAuto --mode GET Added: test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected (+2 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.expected 2015-05-21 21:04:13 +0900 (4867dda) @@ -0,0 +1,2 @@ +tokenize TokenBigram "だ" NormalizerAuto --mode GET +[[0,0.0,0.0],[{"value":"だ","position":0,"force_prefix":true}]] Added: test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenizers/bigram/force_prefix/single_token/unmatured.test 2015-05-21 21:04:13 +0900 (2f82f49) @@ -0,0 +1 @@ +tokenize TokenBigram "だ" NormalizerAuto --mode GET -------------- next part -------------- HTML����������������������������...Download