Kouhei Sutou
null+****@clear*****
Sat Oct 11 22:53:32 JST 2014
Kouhei Sutou 2014-10-11 22:53:32 +0900 (Sat, 11 Oct 2014) New Revision: 46beac9dbec71a7a6a44c3f4657e51b1dae8627a https://github.com/groonga/groonga/commit/46beac9dbec71a7a6a44c3f4657e51b1dae8627a Message: table_create: support multiple token filters Added files: test/command/suite/table_create/token_filters/empty_between_comma.expected test/command/suite/table_create/token_filters/empty_between_comma.test test/command/suite/table_create/token_filters/leading_comma.expected test/command/suite/table_create/token_filters/leading_comma.test test/command/suite/table_create/token_filters/leading_spaces.expected test/command/suite/table_create/token_filters/leading_spaces.test test/command/suite/table_create/token_filters/multiple.expected test/command/suite/table_create/token_filters/multiple.test test/command/suite/table_create/token_filters/spaces_around_comma.expected test/command/suite/table_create/token_filters/spaces_around_comma.test test/command/suite/table_create/token_filters/trailing_comma.expected test/command/suite/table_create/token_filters/trailing_comma.test Modified files: lib/proc.c Modified: lib/proc.c (+95 -6) =================================================================== --- lib/proc.c 2014-10-11 22:14:23 +0900 (656b9e9) +++ lib/proc.c 2014-10-11 22:53:32 +0900 (df4927f) @@ -1194,6 +1194,96 @@ grn_column_create_flags_to_text(grn_ctx *ctx, grn_obj *buf, grn_obj_flags flags) } } +static grn_bool +proc_table_create_set_token_filters_put(grn_ctx *ctx, + grn_obj *token_filters, + const char *token_filter_name, + int token_filter_name_length) +{ + grn_obj *token_filter; + + token_filter = grn_ctx_get(ctx, + token_filter_name, + token_filter_name_length); + if (token_filter) { + GRN_PTR_PUT(ctx, token_filters, token_filter); + return GRN_TRUE; + } else { + ERR(GRN_INVALID_ARGUMENT, + "[table][create][token-filter] nonexistent token filter: <%.*s>", + token_filter_name_length, token_filter_name); + return GRN_FALSE; + } +} + +static grn_bool +proc_table_create_set_token_filters_fill(grn_ctx *ctx, + grn_obj *token_filters, + grn_obj *token_filter_names) +{ + const char *start, *current, *end; + const char *name_start, *name_end; + const char *last_name_end; + + start = GRN_TEXT_VALUE(token_filter_names); + end = start + GRN_TEXT_LEN(token_filter_names); + current = start; + name_start = NULL; + name_end = NULL; + last_name_end = start; + while (current < end) { + switch (current[0]) { + case ' ' : + if (name_start && !name_end) { + name_end = current; + } + break; + case ',' : + if (!name_start) { + goto break_loop; + } + if (!name_end) { + name_end = current; + } + proc_table_create_set_token_filters_put(ctx, + token_filters, + name_start, + name_end - name_start); + last_name_end = name_end + 1; + name_start = NULL; + name_end = NULL; + break; + default : + if (!name_start) { + name_start = current; + } + break; + } + current++; + } + +break_loop: + if (!name_start) { + ERR(GRN_INVALID_ARGUMENT, + "[table][create][token-filter] empty token filter name: " + "<%.*s|%.*s|%.*s>", + (int)(last_name_end - start), start, + (int)(current - last_name_end), last_name_end, + (int)(end - current), current); + return GRN_FALSE; + } + + if (!name_end) { + name_end = current; + } + proc_table_create_set_token_filters_put(ctx, + token_filters, + name_start, + name_end - name_start); + + return GRN_TRUE; +} + static void proc_table_create_set_token_filters(grn_ctx *ctx, grn_obj *table, @@ -1206,12 +1296,11 @@ proc_table_create_set_token_filters(grn_ctx *ctx, } GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, 0); - GRN_PTR_PUT(ctx, - &token_filters, - grn_ctx_get(ctx, - GRN_TEXT_VALUE(token_filter_names), - GRN_TEXT_LEN(token_filter_names))); - grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); + if (proc_table_create_set_token_filters_fill(ctx, + &token_filters, + token_filter_names)) { + grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters); + } grn_obj_unlink(ctx, &token_filters); } Added: test/command/suite/table_create/token_filters/empty_between_comma.expected (+18 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/empty_between_comma.expected 2014-10-11 22:53:32 +0900 (4cfbf35) @@ -0,0 +1,18 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord, ,TokenFilterStopWord" +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord>" + ], + false +] +#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord> +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto + Added: test/command/suite/table_create/token_filters/empty_between_comma.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/empty_between_comma.test 2014-10-11 22:53:32 +0900 (492002b) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters "TokenFilterStopWord, ,TokenFilterStopWord" + +dump Added: test/command/suite/table_create/token_filters/leading_comma.expected (+18 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/leading_comma.expected 2014-10-11 22:53:32 +0900 (0f4c044) @@ -0,0 +1,18 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters ",TokenFilterStopWord" +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "[table][create][token-filter] empty token filter name: <||,TokenFilterStopWord>" + ], + false +] +#|e| [table][create][token-filter] empty token filter name: <||,TokenFilterStopWord> +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto + Added: test/command/suite/table_create/token_filters/leading_comma.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/leading_comma.test 2014-10-11 22:53:32 +0900 (f2b793d) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters ",TokenFilterStopWord" + +dump Added: test/command/suite/table_create/token_filters/leading_spaces.expected (+7 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/leading_spaces.expected 2014-10-11 22:53:32 +0900 (3fc3481) @@ -0,0 +1,7 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters " TokenFilterStopWord" +[[0,0.0,0.0],true] +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord + Added: test/command/suite/table_create/token_filters/leading_spaces.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/leading_spaces.test 2014-10-11 22:53:32 +0900 (6915b88) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters " TokenFilterStopWord" + +dump Added: test/command/suite/table_create/token_filters/multiple.expected (+7 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/multiple.expected 2014-10-11 22:53:32 +0900 (bc3a2c4) @@ -0,0 +1,7 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord +[[0,0.0,0.0],true] +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord + Added: test/command/suite/table_create/token_filters/multiple.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/multiple.test 2014-10-11 22:53:32 +0900 (82c8ed3) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters TokenFilterStopWord,TokenFilterStopWord + +dump Added: test/command/suite/table_create/token_filters/spaces_around_comma.expected (+7 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/spaces_around_comma.expected 2014-10-11 22:53:32 +0900 (e3a6a55) @@ -0,0 +1,7 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord , TokenFilterStopWord" +[[0,0.0,0.0],true] +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord + Added: test/command/suite/table_create/token_filters/spaces_around_comma.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/spaces_around_comma.test 2014-10-11 22:53:32 +0900 (4879a70) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters "TokenFilterStopWord , TokenFilterStopWord" + +dump Added: test/command/suite/table_create/token_filters/trailing_comma.expected (+18 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/trailing_comma.expected 2014-10-11 22:53:32 +0900 (c7b9bdf) @@ -0,0 +1,18 @@ +register token_filters/stop_word +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord," +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,||>" + ], + false +] +#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,||> +dump +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto + Added: test/command/suite/table_create/token_filters/trailing_comma.test (+8 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/table_create/token_filters/trailing_comma.test 2014-10-11 22:53:32 +0900 (dace8c1) @@ -0,0 +1,8 @@ +register token_filters/stop_word + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto \ + --token_filters "TokenFilterStopWord," + +dump -------------- next part -------------- HTML����������������������������... Download