Kouhei Sutou
null+****@clear*****
Thu Apr 5 18:18:29 JST 2018
Kouhei Sutou 2018-04-05 18:18:29 +0900 (Thu, 05 Apr 2018) New Revision: 53624c39fc4e1507243c5c4494b383c2cd0ebd19 https://github.com/groonga/groonga/commit/53624c39fc4e1507243c5c4494b383c2cd0ebd19 Message: dump: support tokenizer options Added files: test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected test/command/suite/dump/schema/table/hash/tokenizer_with_options.test test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test Modified files: include/groonga/table.h lib/proc/proc_dump.c lib/table.c Modified: include/groonga/table.h (+6 -0) =================================================================== --- include/groonga/table.h 2018-04-05 18:16:58 +0900 (dfaebdc88) +++ include/groonga/table.h 2018-04-05 18:18:29 +0900 (934ea5f59) @@ -246,6 +246,9 @@ typedef void *(*grn_tokenizer_open_options_func)(grn_ctx *ctx, grn_obj *values, void *user_data); +GRN_API grn_rc +grn_table_get_tokenizer_options(grn_ctx *ctx, grn_obj *table, grn_obj *options); + GRN_API void * grn_table_cache_tokenizer_options(grn_ctx *ctx, grn_obj *table, @@ -253,6 +256,9 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx, grn_close_func close_options_func, void *user_data); +GRN_API grn_rc +grn_table_get_tokenizer_string(grn_ctx *ctx, grn_obj *table, grn_obj *output); + #ifdef __cplusplus } #endif Modified: lib/proc/proc_dump.c (+20 -2) =================================================================== --- lib/proc/proc_dump.c 2018-04-05 18:16:58 +0900 (9439e0cda) +++ lib/proc/proc_dump.c 2018-04-05 18:18:29 +0900 (16a1e76bb) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2009-2017 Brazil + Copyright(C) 2009-2018 Brazil This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -704,6 +704,20 @@ exit : } static void +dump_optionable_obj_string(grn_ctx *ctx, + grn_dumper *dumper, + grn_obj *string) +{ + const char *value = GRN_TEXT_VALUE(string); + size_t length = GRN_TEXT_LEN(string); + if (length > 0 && value[length - 1] == ')') { + grn_text_otoj(ctx, dumper->output, string, NULL); + } else { + GRN_TEXT_PUT(ctx, dumper->output, value, length); + } +} + +static void dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) { grn_obj *domain = NULL; @@ -763,8 +777,12 @@ dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table) grn_obj_unlink(ctx, range); } if (default_tokenizer) { + grn_obj sub_output; GRN_TEXT_PUTS(ctx, dumper->output, " --default_tokenizer "); - dump_obj_name(ctx, dumper, default_tokenizer); + GRN_TEXT_INIT(&sub_output, 0); + grn_table_get_tokenizer_string(ctx, table, &sub_output); + dump_optionable_obj_string(ctx, dumper, &sub_output); + GRN_OBJ_FIN(ctx, &sub_output); } if (normalizer) { GRN_TEXT_PUTS(ctx, dumper->output, " --normalizer "); Modified: lib/table.c (+96 -2) =================================================================== --- lib/table.c 2018-04-05 18:16:58 +0900 (7e7c1813e) +++ lib/table.c 2018-04-05 18:18:29 +0900 (6201dbdf5) @@ -184,6 +184,29 @@ grn_table_tokenizer_set_options(grn_ctx *ctx, } } +grn_rc +grn_table_get_tokenizer_options(grn_ctx *ctx, + grn_obj *table, + grn_obj *options) +{ + GRN_API_ENTER; + + if (!grn_obj_is_lexicon(ctx, table)) { + ERR(GRN_INVALID_ARGUMENT, + "[table][tokenizer-options][get] table must be key table: %s", + table ? grn_obj_type_to_string(table->header.type) : "(null)"); + GRN_API_RETURN(ctx->rc); + } + + grn_obj_get_option_values(ctx, + table, + "tokenizer", + -1, + GRN_OPTION_REVISION_NONE, + options); + GRN_API_RETURN(ctx->rc); +} + void * grn_table_cache_tokenizer_options(grn_ctx *ctx, grn_obj *table, @@ -200,7 +223,7 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx, if (!table) { ERR(GRN_INVALID_ARGUMENT, - "[table][tokenizer-options][set] table is NULL"); + "[table][tokenizer-options][cache] table is NULL"); GRN_API_RETURN(NULL); } @@ -216,7 +239,7 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx, break; default : ERR(GRN_INVALID_ARGUMENT, - "[table][tokenizer-options][set] table must key table: %s", + "[table][tokenizer-options][cache] table must key table: %s", grn_obj_type_to_string(table->header.type)); GRN_API_RETURN(NULL); break; @@ -247,3 +270,74 @@ exit : GRN_API_RETURN(tokenizer->options); } +grn_rc +grn_table_get_tokenizer_string(grn_ctx *ctx, + grn_obj *table, + grn_obj *output) +{ + grn_obj *tokenizer; + char name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int name_size; + grn_obj options; + unsigned int n = 0; + + GRN_API_ENTER; + + if (!grn_obj_is_lexicon(ctx, table)) { + ERR(GRN_INVALID_ARGUMENT, + "[table][tokenizer-options][get] table must be key table: %s", + table ? grn_obj_type_to_string(table->header.type) : "(null)"); + GRN_API_RETURN(ctx->rc); + } + + grn_table_get_info(ctx, table, NULL, NULL, &tokenizer, NULL, NULL); + if (!tokenizer) { + GRN_API_RETURN(ctx->rc); + } + + name_size = grn_obj_name(ctx, tokenizer, name, GRN_TABLE_MAX_KEY_SIZE); + GRN_TEXT_PUT(ctx, output, name, name_size); + + GRN_VOID_INIT(&options); + grn_obj_get_option_values(ctx, + table, + "tokenizer", + -1, + GRN_OPTION_REVISION_NONE, + &options); + if (options.header.type != GRN_DB_VOID) { + n = grn_vector_size(ctx, &options); + } + if (n > 0) { + unsigned int i; + grn_obj option; + + GRN_VOID_INIT(&option); + GRN_TEXT_PUTS(ctx, output, "("); + for (i = 0; i < n; i++) { + const char *value; + unsigned int value_size; + grn_id domain; + + if (i > 0) { + GRN_TEXT_PUTS(ctx, output, ", "); + } + + value_size = grn_vector_get_element(ctx, + &options, + i, + &value, + NULL, + &domain); + grn_obj_reinit(ctx, &option, domain, 0); + grn_bulk_write(ctx, &option, value, value_size); + grn_text_otoj(ctx, output, &option, NULL); + } + GRN_TEXT_PUTS(ctx, output, ")"); + GRN_OBJ_FIN(ctx, &option); + } + GRN_OBJ_FIN(ctx, &options); + + GRN_API_RETURN(ctx->rc); +} + Added: test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected 2018-04-05 18:18:29 +0900 (b07eb6b32) @@ -0,0 +1,4 @@ +table_create Users TABLE_DAT_KEY ShortText --default_tokenizer 'TokenNgram("n", 4)' +[[0,0.0,0.0],true] +dump +table_create Users TABLE_DAT_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)" Added: test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test 2018-04-05 18:18:29 +0900 (a4c360ff8) @@ -0,0 +1,4 @@ +table_create Users TABLE_DAT_KEY ShortText \ + --default_tokenizer 'TokenNgram("n", 4)' + +dump Added: test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected 2018-04-05 18:18:29 +0900 (8034ec3ca) @@ -0,0 +1,4 @@ +table_create Users TABLE_HASH_KEY ShortText --default_tokenizer 'TokenNgram("n", 4)' +[[0,0.0,0.0],true] +dump +table_create Users TABLE_HASH_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)" Added: test/command/suite/dump/schema/table/hash/tokenizer_with_options.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/hash/tokenizer_with_options.test 2018-04-05 18:18:29 +0900 (10249ea5b) @@ -0,0 +1,4 @@ +table_create Users TABLE_HASH_KEY ShortText \ + --default_tokenizer 'TokenNgram("n", 4)' + +dump Added: test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected 2018-04-05 18:18:29 +0900 (314ecc527) @@ -0,0 +1,4 @@ +table_create Users TABLE_PAT_KEY ShortText --default_tokenizer 'TokenNgram("n", 4)' +[[0,0.0,0.0],true] +dump +table_create Users TABLE_PAT_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)" Added: test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test 2018-04-05 18:18:29 +0900 (bbc58f62f) @@ -0,0 +1,4 @@ +table_create Users TABLE_PAT_KEY ShortText \ + --default_tokenizer 'TokenNgram("n", 4)' + +dump -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/784c9048/attachment-0001.htm