naoa
null+****@clear*****
Thu Aug 14 16:35:36 JST 2014
naoa 2014-08-14 16:35:36 +0900 (Thu, 14 Aug 2014) New Revision: 4a6f981ceb908a571c07ff4d9c7f85208774f8f9 https://github.com/groonga/groonga/commit/4a6f981ceb908a571c07ff4d9c7f85208774f8f9 Merged 46e2243: Merge pull request #191 from naoa/tokenize-support-mode Message: tokenize: support mode Added files: test/command/suite/tokenize/add_mode.expected test/command/suite/tokenize/add_mode.test test/command/suite/tokenize/get_mode.expected test/command/suite/tokenize/get_mode.test test/command/suite/tokenize/invalid/mode/unknown_mode.expected test/command/suite/tokenize/invalid/mode/unknown_mode.test Modified files: lib/proc.c Modified: lib/proc.c (+29 -9) =================================================================== --- lib/proc.c 2014-08-13 12:17:09 +0900 (ad5b20b) +++ lib/proc.c 2014-08-14 16:35:36 +0900 (d7d97fe) @@ -3274,14 +3274,14 @@ create_lexicon_for_tokenize(grn_ctx *ctx, } static void -tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, unsigned int flags, - grn_obj *tokens) +tokenize(grn_ctx *ctx, grn_hash *lexicon, grn_obj *string, grn_token_mode mode, + unsigned int flags, grn_obj *tokens) { grn_token *token; token = grn_token_open(ctx, (grn_obj *)lexicon, GRN_TEXT_VALUE(string), GRN_TEXT_LEN(string), - GRN_TOKEN_ADD, flags); + mode, flags); if (!token) { return; } @@ -3307,11 +3307,13 @@ proc_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) grn_obj *string; grn_obj *normalizer_name; grn_obj *flag_names; + grn_obj *mode_name; tokenizer_name = VAR(0); string = VAR(1); normalizer_name = VAR(2); flag_names = VAR(3); + mode_name = VAR(4); if (GRN_TEXT_LEN(tokenizer_name) == 0) { ERR(GRN_INVALID_ARGUMENT, "[tokenize] tokenizer name is missing"); @@ -3328,7 +3330,6 @@ proc_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { unsigned int flags; grn_hash *lexicon; - grn_obj tokens; flags = parse_tokenize_flags(ctx, flag_names); if (ctx->rc != GRN_SUCCESS) { @@ -3342,10 +3343,28 @@ proc_tokenize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) return NULL; } - GRN_VALUE_FIX_SIZE_INIT(&tokens, GRN_OBJ_VECTOR, GRN_ID_NIL); - tokenize(ctx, lexicon, string, flags, &tokens); - output_tokens(ctx, &tokens, lexicon); - GRN_OBJ_FIN(ctx, &tokens); + if (GRN_TEXT_LEN(mode_name) == 0 || + !memcmp(GRN_TEXT_VALUE(mode_name), "ADD", 3)) { + grn_obj add_tokens; + GRN_VALUE_FIX_SIZE_INIT(&add_tokens, GRN_OBJ_VECTOR, GRN_ID_NIL); + tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &add_tokens); + output_tokens(ctx, &add_tokens, lexicon); + GRN_OBJ_FIN(ctx, &add_tokens); + } else if (!memcmp(GRN_TEXT_VALUE(mode_name), "GET", 3)) { + grn_obj add_tokens; + grn_obj get_tokens; + GRN_VALUE_FIX_SIZE_INIT(&add_tokens, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_VALUE_FIX_SIZE_INIT(&get_tokens, GRN_OBJ_VECTOR, GRN_ID_NIL); + tokenize(ctx, lexicon, string, GRN_TOKEN_ADD, flags, &add_tokens); + tokenize(ctx, lexicon, string, GRN_TOKEN_GET, flags, &get_tokens); + output_tokens(ctx, &get_tokens, lexicon); + GRN_OBJ_FIN(ctx, &add_tokens); + GRN_OBJ_FIN(ctx, &get_tokens); + } else { + ERR(GRN_INVALID_ARGUMENT, "[tokenize] invalid mode: <%.*s>", + (int)GRN_TEXT_LEN(mode_name), GRN_TEXT_VALUE(mode_name)); + output_tokens(ctx, NULL, NULL); + } grn_hash_close(ctx, lexicon); } @@ -5128,7 +5147,8 @@ grn_db_init_builtin_query(grn_ctx *ctx) DEF_VAR(vars[1], "string"); DEF_VAR(vars[2], "normalizer"); DEF_VAR(vars[3], "flags"); - DEF_COMMAND("tokenize", proc_tokenize, 4, vars); + DEF_VAR(vars[4], "mode"); + DEF_COMMAND("tokenize", proc_tokenize, 5, vars); DEF_COMMAND("tokenizer_list", proc_tokenizer_list, 0, vars); Added: test/command/suite/tokenize/add_mode.expected (+30 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/add_mode.expected 2014-08-14 16:35:36 +0900 (3fdbed8) @@ -0,0 +1,30 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE ADD +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "あい", + "position": 0 + }, + { + "value": "い", + "position": 1 + }, + { + "value": "ab", + "position": 2 + }, + { + "value": "アイ", + "position": 3 + }, + { + "value": "イ", + "position": 4 + } + ] +] Added: test/command/suite/tokenize/add_mode.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/add_mode.test 2014-08-14 16:35:36 +0900 (3e5089b) @@ -0,0 +1 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE ADD Added: test/command/suite/tokenize/get_mode.expected (+22 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/get_mode.expected 2014-08-14 16:35:36 +0900 (bc06ba9) @@ -0,0 +1,22 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE GET +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "value": "あい", + "position": 0 + }, + { + "value": "ab", + "position": 2 + }, + { + "value": "アイ", + "position": 3 + } + ] +] Added: test/command/suite/tokenize/get_mode.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/get_mode.test 2014-08-14 16:35:36 +0900 (2b08416) @@ -0,0 +1 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE GET Added: test/command/suite/tokenize/invalid/mode/unknown_mode.expected (+3 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/mode/unknown_mode.expected 2014-08-14 16:35:36 +0900 (37f8645) @@ -0,0 +1,3 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE UNKNOWN +[[[-22,0.0,0.0],"[tokenize] invalid mode: <UNKNOWN>"],[]] +#|e| [tokenize] invalid mode: <UNKNOWN> Added: test/command/suite/tokenize/invalid/mode/unknown_mode.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/tokenize/invalid/mode/unknown_mode.test 2014-08-14 16:35:36 +0900 (b4418af) @@ -0,0 +1 @@ +tokenize TokenBigram "あいabアイ" NormalizerAuto NONE UNKNOWN -------------- next part -------------- HTML����������������������������...Download