Kouhei Sutou 2018-10-29 15:40:03 +0900 (Mon, 29 Oct 2018) Revision: f2345b93cb826cf78d496320d01127e7a4c7cdcd https://github.com/groonga/groonga/commit/f2345b93cb826cf78d496320d01127e7a4c7cdcd Message: Start supporting token filter options New API: * grn_table_set_token_filter_options() * grn_table_get_token_filter_options() * grn_table_cache_token_filter_options() * grn_table_get_token_filter_strings() TODO: * Support HASH_KEY and DAT_KEY * Support multiple token filters Modified files: include/groonga/table.h lib/db.c lib/grn_pat.h lib/pat.c lib/table.c Modified: include/groonga/table.h (+26 -0) =================================================================== --- include/groonga/table.h 2018-10-29 15:37:23 +0900 (3c89afa59) +++ include/groonga/table.h 2018-10-29 15:40:03 +0900 (edfdfb585) @@ -300,6 +300,32 @@ grn_table_get_normalizer_string(grn_ctx *ctx, grn_obj *table, grn_obj *output); + +GRN_API grn_rc +grn_table_set_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_obj *options); + +GRN_API grn_rc +grn_table_get_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_obj *options); + +GRN_API void * +grn_table_cache_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_table_module_open_options_func open_options_func, + grn_close_func close_options_func, + void *user_data); + +GRN_API grn_rc +grn_table_get_token_filter_strings(grn_ctx *ctx, + grn_obj *table, + grn_obj *output); + #ifdef __cplusplus } #endif Modified: lib/db.c (+327 -113) =================================================================== --- lib/db.c 2018-10-29 15:37:23 +0900 (323f81479) +++ lib/db.c 2018-10-29 15:40:03 +0900 (bfc2dca31) @@ -2427,7 +2427,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags, if (encoding) { *encoding = ((grn_pat *)table)->encoding; } if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer.proc; } if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer.proc; } - if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filters); } + if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filter_procs); } rc = GRN_SUCCESS; break; case GRN_TABLE_DAT_KEY : @@ -8368,39 +8368,23 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb } break; case GRN_INFO_TOKEN_FILTERS : - if (!valuebuf) { - if (!(valuebuf = grn_obj_open(ctx, GRN_PVECTOR, 0, 0))) { - ERR(GRN_NO_MEMORY_AVAILABLE, - "grn_obj_get_info: failed to allocate value buffer"); - goto exit; - } - } - { - grn_obj *token_filters = NULL; - switch (obj->header.type) { - case GRN_TABLE_HASH_KEY : - token_filters = &(((grn_hash *)obj)->token_filters); - break; - case GRN_TABLE_PAT_KEY : - token_filters = &(((grn_pat *)obj)->token_filters); - break; - case GRN_TABLE_DAT_KEY : - token_filters = &(((grn_dat *)obj)->token_filters); - break; - default : - ERR(GRN_INVALID_ARGUMENT, - /* TODO: Show type name instead of type ID */ - "[info][get][token-filters] target object must be one of " - "GRN_TABLE_HASH_KEY, GRN_TABLE_PAT_KEY and GRN_TABLE_DAT_KEY: %d", - obj->header.type); - break; - } - if (token_filters) { - grn_bulk_write(ctx, - valuebuf, - GRN_BULK_HEAD(token_filters), - GRN_BULK_VSIZE(token_filters)); - } + switch (obj->header.type) { + case GRN_TABLE_HASH_KEY : + valuebuf = &(((grn_hash *)obj)->token_filters); + break; + case GRN_TABLE_PAT_KEY : + valuebuf = &(((grn_pat *)obj)->token_filter_procs); + break; + case GRN_TABLE_DAT_KEY : + valuebuf = &(((grn_dat *)obj)->token_filters); + break; + default : + ERR(GRN_INVALID_ARGUMENT, + /* TODO: Show type name instead of type ID */ + "[info][get][token-filters] target object must be one of " + "GRN_TABLE_HASH_KEY, GRN_TABLE_PAT_KEY and GRN_TABLE_DAT_KEY: %d", + obj->header.type); + break; } break; default : @@ -8671,7 +8655,7 @@ grn_obj_spec_save(grn_ctx *ctx, grn_db_obj *obj) grn_vector_delimit(ctx, &v, 0, 0); break; case GRN_TABLE_PAT_KEY : - grn_token_filters_pack(ctx, &(((grn_pat *)obj)->token_filters), b); + grn_token_filters_pack(ctx, &(((grn_pat *)obj)->token_filter_procs), b); grn_vector_delimit(ctx, &v, 0, 0); break; case GRN_TABLE_DAT_KEY : @@ -9066,27 +9050,24 @@ grn_obj_set_info_require_key_table(grn_ctx *ctx, } static grn_rc -grn_obj_set_info_table_module(grn_ctx *ctx, - grn_obj *table, - grn_info_type type, - grn_obj *module, - const char *context_tag, - const char *module_name) +grn_obj_set_info_table_module_raw(grn_ctx *ctx, + grn_obj *table, + grn_info_type type, + grn_table_module *table_module, + grn_id *proc_id, + unsigned int i, + grn_obj *module, + const char *context_tag, + const char *module_name) { char name[GRN_TABLE_MAX_KEY_SIZE]; unsigned int name_size; grn_obj *proc = NULL; - grn_id proc_id = GRN_ID_NIL; grn_obj *expression = NULL; grn_obj options; GRN_TEXT_INIT(&options, GRN_OBJ_VECTOR); - if (grn_obj_set_info_require_key_table(ctx, table, context_tag) != - GRN_SUCCESS) { - goto exit; - } - name_size = grn_obj_name(ctx, table, name, sizeof(name)); if (name_size == 0) { grn_strcpy(name, sizeof(name), "(anonymous)"); @@ -9161,6 +9142,9 @@ grn_obj_set_info_table_module(grn_ctx *ctx, case GRN_INFO_NORMALIZER : is_valid_proc = grn_obj_is_normalizer_proc(ctx, proc); break; + case GRN_INFO_TOKEN_FILTERS : + is_valid_proc = grn_obj_is_token_filter_proc(ctx, proc); + break; default : break; } @@ -9182,25 +9166,66 @@ grn_obj_set_info_table_module(grn_ctx *ctx, } } + grn_table_module_set_proc(ctx, table_module, proc); if (proc) { - proc_id = grn_obj_id(ctx, proc); + *proc_id = grn_obj_id(ctx, proc); + } else { + *proc_id = GRN_ID_NIL; + } + switch (type) { + case GRN_INFO_DEFAULT_TOKENIZER : + grn_table_set_default_tokenizer_options(ctx, table, &options); + if (DB_OBJ(table)->header.type == GRN_TABLE_PAT_KEY) { + grn_pat_cache_enable(ctx, (grn_pat *)table, GRN_TABLE_PAT_KEY_CACHE_SIZE); + } + break; + case GRN_INFO_NORMALIZER : + grn_table_set_normalizer_options(ctx, table, &options); + break; + case GRN_INFO_TOKEN_FILTERS : + grn_table_set_token_filter_options(ctx, table, i, &options); + break; + default : + break; + } + +exit : + GRN_OBJ_FIN(ctx, &options); + + if (expression) { + grn_obj_close(ctx, expression); + } + + return ctx->rc; +} + +static grn_rc +grn_obj_set_info_table_module(grn_ctx *ctx, + grn_obj *table, + grn_info_type type, + grn_obj *module, + const char *context_tag, + const char *module_name) +{ + grn_table_module *table_module; + grn_id *proc_id; + + if (grn_obj_set_info_require_key_table(ctx, + table, + context_tag) != GRN_SUCCESS) { + return ctx->rc; } + switch (DB_OBJ(table)->header.type) { case GRN_TABLE_HASH_KEY : switch (type) { case GRN_INFO_DEFAULT_TOKENIZER : - grn_table_module_set_proc(ctx, - &(((grn_hash *)table)->tokenizer), - proc); - ((grn_hash *)table)->header.common->tokenizer = proc_id; - grn_table_set_default_tokenizer_options(ctx, table, &options); + table_module = &(((grn_hash *)table)->tokenizer); + proc_id = &(((grn_hash *)table)->header.common->tokenizer); break; case GRN_INFO_NORMALIZER : - grn_table_module_set_proc(ctx, - &(((grn_hash *)table)->normalizer), - proc); - ((grn_hash *)table)->header.common->normalizer = proc_id; - grn_table_set_normalizer_options(ctx, table, &options); + table_module = &(((grn_hash *)table)->normalizer); + proc_id = &(((grn_hash *)table)->header.common->normalizer); break; default : break; @@ -9209,21 +9234,12 @@ grn_obj_set_info_table_module(grn_ctx *ctx, case GRN_TABLE_PAT_KEY : switch (type) { case GRN_INFO_DEFAULT_TOKENIZER : - grn_table_module_set_proc(ctx, - &(((grn_pat *)table)->tokenizer), - proc); - ((grn_pat *)table)->header->tokenizer = proc_id; - grn_pat_cache_enable(ctx, - ((grn_pat *)table), - GRN_TABLE_PAT_KEY_CACHE_SIZE); - grn_table_set_default_tokenizer_options(ctx, table, &options); + table_module = &(((grn_pat *)table)->tokenizer); + proc_id = &(((grn_pat *)table)->header->tokenizer); break; case GRN_INFO_NORMALIZER : - grn_table_module_set_proc(ctx, - &(((grn_pat *)table)->normalizer), - proc); - ((grn_pat *)table)->header->normalizer = proc_id; - grn_table_set_normalizer_options(ctx, table, &options); + table_module = &(((grn_pat *)table)->normalizer); + proc_id = &(((grn_pat *)table)->header->normalizer); break; default : break; @@ -9232,18 +9248,12 @@ grn_obj_set_info_table_module(grn_ctx *ctx, case GRN_TABLE_DAT_KEY : switch (type) { case GRN_INFO_DEFAULT_TOKENIZER : - grn_table_module_set_proc(ctx, - &(((grn_dat *)table)->tokenizer), - proc); - ((grn_dat *)table)->header->tokenizer = proc_id; - grn_table_set_default_tokenizer_options(ctx, table, &options); + table_module = &(((grn_dat *)table)->tokenizer); + proc_id = &(((grn_dat *)table)->header->tokenizer); break; case GRN_INFO_NORMALIZER : - grn_table_module_set_proc(ctx, - &(((grn_dat *)table)->normalizer), - proc); - ((grn_dat *)table)->header->normalizer = proc_id; - grn_table_set_normalizer_options(ctx, table, &options); + table_module = &(((grn_dat *)table)->normalizer); + proc_id = &(((grn_dat *)table)->header->normalizer); break; default : break; @@ -9252,6 +9262,134 @@ grn_obj_set_info_table_module(grn_ctx *ctx, break; } + return grn_obj_set_info_table_module_raw(ctx, + table, + type, + table_module, + proc_id, + 0, + module, + context_tag, + module_name); +} + +static grn_rc +grn_obj_set_info_table_modules(grn_ctx *ctx, + grn_obj *table, + grn_info_type type, + grn_obj *table_modules, + grn_obj *procs, + grn_obj *modules, + const char *context_tag, + const char *module_name) +{ + char name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int name_size; + grn_obj *unused; + grn_obj *expression = NULL; + size_t i, n; + grn_obj options; + + GRN_TEXT_INIT(&options, GRN_OBJ_VECTOR); + + name_size = grn_obj_name(ctx, table, name, sizeof(name)); + if (name_size == 0) { + grn_strcpy(name, sizeof(name), "(anonymous)"); + name_size = strlen(name); + } + + if (GRN_TEXT_LEN(modules) == 0) { + goto exit; + } + + GRN_EXPR_CREATE_FOR_QUERY(ctx, table, expression, unused); + grn_expr_parse(ctx, + expression, + GRN_TEXT_VALUE(modules), + GRN_TEXT_LEN(modules), + NULL, + GRN_OP_MATCH, + GRN_OP_AND, + GRN_EXPR_SYNTAX_SCRIPT); + if (ctx->rc != GRN_SUCCESS) { + ERR(GRN_INVALID_ARGUMENT, + "%s[%.*s] failed to parse %s: <%.*s>: %s", + context_tag, + (int)name_size, + name, + module_name, + (int)GRN_TEXT_LEN(modules), + GRN_TEXT_VALUE(modules), + ctx->errbuf); + goto exit; + } + if (!grn_expr_is_simple_function_calls(ctx, expression)) { + ERR(GRN_INVALID_ARGUMENT, + "%s[%.*s] must be %s(option1, option2, ...), ... format: <%.*s>", + context_tag, + (int)name_size, + name, + module_name, + (int)GRN_TEXT_LEN(modules), + GRN_TEXT_VALUE(modules)); + goto exit; + } + + n = grn_expr_simple_function_calls_get_n_calls(ctx, expression); + for (i = 0; i < n; i++) { + grn_obj *proc; + grn_id proc_id; + grn_bool is_valid_proc = GRN_FALSE; + grn_table_module *raw_table_module; + + proc = grn_expr_simple_function_calls_get_function(ctx, expression, i); + GRN_BULK_REWIND(&options); + grn_expr_simple_function_calls_get_arguments(ctx, + expression, + i, + &options); + + switch (type) { + case GRN_INFO_TOKEN_FILTERS : + is_valid_proc = grn_obj_is_token_filter_proc(ctx, proc); + break; + default : + break; + } + + if (!is_valid_proc) { + char proc_name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int proc_name_size; + + proc_name_size = grn_obj_name(ctx, proc, proc_name, sizeof(proc_name)); + ERR(GRN_INVALID_ARGUMENT, + "%s[%.*s] invalid %s: <%.*s>", + context_tag, + (int)name_size, + name, + module_name, + (int)proc_name_size, + proc_name); + goto exit; + } + + proc_id = grn_obj_id(ctx, proc); + + grn_bulk_space(ctx, table_modules, sizeof(grn_table_module)); + raw_table_module = ((grn_table_module *)GRN_BULK_HEAD(table_modules)) + i; + grn_table_module_init(ctx, raw_table_module, proc_id); + grn_table_module_set_proc(ctx, raw_table_module, proc); + GRN_PTR_PUT(ctx, procs, proc); + + switch (type) { + case GRN_INFO_TOKEN_FILTERS : + grn_table_set_token_filter_options(ctx, table, i, &options); + break; + default : + break; + } + } + exit : GRN_OBJ_FIN(ctx, &options); @@ -9265,63 +9403,139 @@ exit : static grn_rc grn_obj_set_info_token_filters(grn_ctx *ctx, grn_obj *table, - grn_obj *token_filters) + grn_obj *new_token_filters) { - const char *tag = "[info][set][token-filters]"; - grn_obj *current_token_filters = NULL; - unsigned int i, n_current_token_filters, n_token_filters; - grn_obj token_filter_names; + const char *context_tag = "[info][set][token-filters]"; + const char *module_name = "token filters"; + grn_obj *token_filters = NULL; + grn_obj *token_filter_procs = NULL; - if (grn_obj_set_info_require_key_table(ctx, table, tag) != GRN_SUCCESS) { + if (grn_obj_set_info_require_key_table(ctx, + table, + context_tag) != GRN_SUCCESS) { return ctx->rc; } switch (table->header.type) { case GRN_TABLE_HASH_KEY : - current_token_filters = &(((grn_hash *)table)->token_filters); + token_filters = &(((grn_hash *)table)->token_filters); break; case GRN_TABLE_PAT_KEY : - current_token_filters = &(((grn_pat *)table)->token_filters); + token_filters = &(((grn_pat *)table)->token_filters); + token_filter_procs = &(((grn_pat *)table)->token_filter_procs); break; case GRN_TABLE_DAT_KEY : - current_token_filters = &(((grn_dat *)table)->token_filters); + token_filters = &(((grn_dat *)table)->token_filters); break; default : break; } - n_current_token_filters = - GRN_BULK_VSIZE(current_token_filters) / sizeof(grn_obj *); - n_token_filters = GRN_BULK_VSIZE(token_filters) / sizeof(grn_obj *); + if (token_filters) { + unsigned int i, n_token_filters; + grn_table_module *raw_token_filters; - GRN_TEXT_INIT(&token_filter_names, 0); - GRN_BULK_REWIND(current_token_filters); - for (i = 0; i < n_token_filters; i++) { - grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filters, i); - char token_filter_name[GRN_TABLE_MAX_KEY_SIZE]; - unsigned int token_filter_name_size; + n_token_filters = GRN_BULK_VSIZE(token_filter_procs) / sizeof(grn_obj *); + raw_token_filters = (grn_table_module *)GRN_BULK_HEAD(token_filters); + for (i = 0; i < n_token_filters; i++) { + grn_table_module *raw_token_filter = raw_token_filters + i; + grn_table_module_fin(ctx, raw_token_filter); + grn_table_set_token_filter_options(ctx, table, i, NULL); + } + GRN_BULK_REWIND(token_filters); + } + GRN_BULK_REWIND(token_filter_procs); - GRN_PTR_PUT(ctx, current_token_filters, token_filter); + if (grn_obj_is_text_family_bulk(ctx, new_token_filters)) { + grn_obj_set_info_table_modules(ctx, + table, + GRN_INFO_TOKEN_FILTERS, + token_filters, + token_filter_procs, + new_token_filters, + context_tag, + module_name); + if (ctx->rc != GRN_SUCCESS) { + return ctx->rc; + } + } else { + grn_bool is_token_filter_names; + unsigned int i, n_new_token_filters; - if (i > 0) { - GRN_TEXT_PUTC(ctx, &token_filter_names, ','); + is_token_filter_names = grn_obj_is_vector(ctx, new_token_filters); + if (is_token_filter_names) { + n_new_token_filters = grn_vector_size(ctx, new_token_filters); + } else { + n_new_token_filters = + GRN_BULK_VSIZE(new_token_filters) / sizeof(grn_obj *); + } + + for (i = 0; i < n_new_token_filters; i++) { + if (is_token_filter_names) { + grn_obj token_filter; + grn_id token_filter_id = GRN_ID_NIL; + const char *name; + unsigned int name_size; + grn_table_module *raw_token_filter; + + name_size = grn_vector_get_element(ctx, + new_token_filters, + i, + &name, + NULL, + NULL); + GRN_TEXT_INIT(&token_filter, GRN_OBJ_DO_SHALLOW_COPY); + GRN_TEXT_SET(ctx, &token_filter, name, name_size); + grn_bulk_space(ctx, token_filters, sizeof(grn_table_module)); + raw_token_filter = + ((grn_table_module *)GRN_BULK_HEAD(token_filters)) + i; + grn_table_module_init(ctx, raw_token_filter, GRN_ID_NIL); + grn_obj_set_info_table_module_raw(ctx, + table, + GRN_INFO_TOKEN_FILTERS, + raw_token_filter, + &token_filter_id, + i, + &token_filter, + context_tag, + module_name); + GRN_PTR_PUT(ctx, token_filter_procs, raw_token_filter->proc); + GRN_OBJ_FIN(ctx, &token_filter); + } else { + grn_obj *token_filter = GRN_PTR_VALUE_AT(new_token_filters, i); + GRN_PTR_PUT(ctx, token_filter_procs, token_filter); + } } - token_filter_name_size = grn_obj_name(ctx, - token_filter, - token_filter_name, - GRN_TABLE_MAX_KEY_SIZE); - GRN_TEXT_PUT(ctx, - &token_filter_names, - token_filter_name, - token_filter_name_size); } - if (n_token_filters > 0 || n_token_filters != n_current_token_filters) { - GRN_LOG(ctx, GRN_LOG_NOTICE, "DDL:%u:set_token_filters %.*s", + + { + grn_obj token_filter_names; + unsigned int i, n_new_token_filters; + + GRN_TEXT_INIT(&token_filter_names, 0); + n_new_token_filters = GRN_BULK_VSIZE(token_filter_procs) / sizeof(grn_obj *); + for (i = 0; i < n_new_token_filters; i++) { + grn_obj *token_filter = GRN_PTR_VALUE_AT(token_filter_procs, i); + char name[GRN_TABLE_MAX_KEY_SIZE]; + unsigned int name_size; + + if (i > 0) { + GRN_TEXT_PUTC(ctx, &token_filter_names, ','); + } + + name_size = grn_obj_name(ctx, + token_filter, + name, + GRN_TABLE_MAX_KEY_SIZE); + GRN_TEXT_PUT(ctx, &token_filter_names, name, name_size); + } + GRN_LOG(ctx, GRN_LOG_NOTICE, "DDL:%u:set_token_filters%s%.*s", DB_OBJ(table)->id, + n_new_token_filters == 0 ? "" : " ", (int)GRN_BULK_VSIZE(&token_filter_names), GRN_BULK_HEAD(&token_filter_names)); + GRN_OBJ_FIN(ctx, &token_filter_names); } - GRN_OBJ_FIN(ctx, &token_filter_names); grn_obj_spec_save(ctx, DB_OBJ(table)); return GRN_SUCCESS; Modified: lib/grn_pat.h (+3 -0) =================================================================== --- lib/grn_pat.h 2018-10-29 15:37:23 +0900 (461043427) +++ lib/grn_pat.h 2018-10-29 15:40:03 +0900 (d435fb003) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2018 Brazil + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -40,6 +41,8 @@ struct _grn_pat { grn_table_module tokenizer; grn_table_module normalizer; grn_obj token_filters; + /* For backward compatibility */ + grn_obj token_filter_procs; grn_id *cache; uint32_t cache_size; grn_bool is_dirty; Modified: lib/pat.c (+24 -4) =================================================================== --- lib/pat.c 2018-10-29 15:37:23 +0900 (41d2138af) +++ lib/pat.c 2018-10-29 15:40:03 +0900 (d756ac0f9) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2018 Brazil + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -509,7 +510,8 @@ _grn_pat_create(grn_ctx *ctx, grn_pat *pat, header->normalizer = GRN_ID_NIL; } header->truncated = GRN_FALSE; - GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_TEXT_INIT(&(pat->token_filters), 0); + GRN_PTR_INIT(&(pat->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL); pat->io = io; pat->header = header; pat->key_size = key_size; @@ -625,7 +627,8 @@ grn_pat_open(grn_ctx *ctx, const char *path) header->normalizer = grn_obj_id(ctx, normalizer); } grn_table_module_init(ctx, &(pat->normalizer), header->normalizer); - GRN_PTR_INIT(&(pat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_TEXT_INIT(&(pat->token_filters), 0); + GRN_PTR_INIT(&(pat->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL); pat->obj.header.flags = header->flags; PAT_AT(pat, 0, node0); if (!node0) { @@ -659,6 +662,23 @@ grn_pat_error_if_truncated(grn_ctx *ctx, grn_pat *pat) return GRN_SUCCESS; } +static void +grn_pat_close_token_filters(grn_ctx *ctx, grn_pat *pat) +{ + grn_obj *token_filters = &(pat->token_filters); + grn_table_module *raw_token_filters = + (grn_table_module *)GRN_BULK_HEAD(token_filters); + size_t i, n; + + n = GRN_BULK_VSIZE(token_filters) / sizeof(grn_table_module); + for (i = 0; i < n; i++) { + grn_table_module *raw_token_filter = raw_token_filters + i; + grn_table_module_fin(ctx, raw_token_filter); + } + GRN_OBJ_FIN(ctx, token_filters); + GRN_OBJ_FIN(ctx, &(pat->token_filter_procs)); +} + grn_rc grn_pat_close(grn_ctx *ctx, grn_pat *pat) { @@ -677,7 +697,7 @@ grn_pat_close(grn_ctx *ctx, grn_pat *pat) } grn_table_module_fin(ctx, &(pat->tokenizer)); grn_table_module_fin(ctx, &(pat->normalizer)); - grn_pvector_fin(ctx, &pat->token_filters); + grn_pat_close_token_filters(ctx, pat); if (pat->cache) { grn_pat_cache_disable(ctx, pat); } GRN_FREE(pat); @@ -723,7 +743,7 @@ grn_pat_truncate(grn_ctx *ctx, grn_pat *pat) if ((rc = grn_io_close(ctx, pat->io))) { goto exit; } grn_table_module_fin(ctx, &(pat->tokenizer)); grn_table_module_fin(ctx, &(pat->normalizer)); - grn_pvector_fin(ctx, &pat->token_filters); + grn_pat_close_token_filters(ctx, pat); pat->io = NULL; if (path && (rc = grn_io_remove(ctx, path))) { goto exit; } if (!_grn_pat_create(ctx, pat, path, key_size, value_size, flags)) { Modified: lib/table.c (+157 -0) =================================================================== --- lib/table.c 2018-10-29 15:37:23 +0900 (16c000fc9) +++ lib/table.c 2018-10-29 15:40:03 +0900 (a53557336) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2017-2018 Brazil + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -23,8 +24,11 @@ #include "grn_hash.h" #include "grn_pat.h" +#include <stdio.h> + static const char *OPTION_NAME_DEFAULT_TOKENIZER = "default_tokenizer"; static const char *OPTION_NAME_NORMALIZER = "normalizer"; +static const char *OPTION_NAME_TOKEN_FILTER = "token_filter"; grn_rc grn_table_apply_expr(grn_ctx *ctx, @@ -272,6 +276,7 @@ typedef struct { const char *context_tag; const char *module_name; grn_info_type type; + unsigned int token_filter_index; grn_table_module_open_options_func open_options_func; grn_close_func close_options_func; void *user_data; @@ -328,6 +333,27 @@ grn_table_cache_module_options(grn_ctx *ctx, break; } break; + case GRN_INFO_TOKEN_FILTERS : + { + grn_obj *token_filters; + switch (table->header.type) { + case GRN_TABLE_HASH_KEY : + token_filters = &(((grn_hash *)table)->token_filters); + break; + case GRN_TABLE_PAT_KEY : + token_filters = &(((grn_pat *)table)->token_filters); + break; + case GRN_TABLE_DAT_KEY : + token_filters = &(((grn_dat *)table)->token_filters); + break; + default : + break; + } + module = + ((grn_table_module *)GRN_BULK_HEAD(token_filters)) + + data->token_filter_index; + } + break; default : break; } @@ -446,6 +472,56 @@ grn_table_get_module_string(grn_ctx *ctx, GRN_API_RETURN(ctx->rc); } +static grn_rc +grn_table_get_module_strings(grn_ctx *ctx, + grn_obj *table, + grn_obj *output, + grn_info_type type, + const char *module_name, + const char *context_tag) +{ + grn_obj *procs; + unsigned int i, n; + + GRN_API_ENTER; + + if (!grn_obj_is_lexicon(ctx, table)) { + ERR(GRN_INVALID_ARGUMENT, + "[table][%s][options][strings] table must be key table: %s", + context_tag, + table ? grn_obj_type_to_string(table->header.type) : "(null)"); + GRN_API_RETURN(ctx->rc); + } + + procs = grn_obj_get_info(ctx, table, type, NULL); + if (!procs) { + GRN_API_RETURN(ctx->rc); + } + + n = grn_vector_size(ctx, procs); + if (n == 0) { + GRN_API_RETURN(ctx->rc); + } + + for (i = 0; i < n; i++) { + char real_module_name[GRN_TABLE_MAX_KEY_SIZE]; + grn_obj *proc = GRN_PTR_VALUE_AT(procs, i); + + if (i > 0) { + GRN_TEXT_PUTS(ctx, output, ", "); + } + grn_snprintf(real_module_name, + GRN_TABLE_MAX_KEY_SIZE, + GRN_TABLE_MAX_KEY_SIZE, + "%s%u", + module_name, + i); + grn_table_get_module_string_raw(ctx, + table, + output, + proc, + real_module_name); + } GRN_API_RETURN(ctx->rc); } @@ -563,3 +639,84 @@ grn_table_get_normalizer_string(grn_ctx *ctx, OPTION_NAME_NORMALIZER, "normalizer"); } + +grn_rc +grn_table_set_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_obj *options) +{ + char module_name[GRN_TABLE_MAX_KEY_SIZE]; + grn_snprintf(module_name, + GRN_TABLE_MAX_KEY_SIZE, + GRN_TABLE_MAX_KEY_SIZE, + "%s%u", + OPTION_NAME_TOKEN_FILTER, + i); + return grn_table_set_module_options(ctx, + table, + module_name, + options, + "token-filter"); +} + +grn_rc +grn_table_get_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_obj *options) +{ + char module_name[GRN_TABLE_MAX_KEY_SIZE]; + grn_snprintf(module_name, + GRN_TABLE_MAX_KEY_SIZE, + GRN_TABLE_MAX_KEY_SIZE, + "%s%u", + OPTION_NAME_TOKEN_FILTER, + i); + return grn_table_get_module_options(ctx, + table, + module_name, + options, + "token-filter"); +} + +void * +grn_table_cache_token_filter_options(grn_ctx *ctx, + grn_obj *table, + unsigned int i, + grn_table_module_open_options_func open_options_func, + grn_close_func close_options_func, + void *user_data) +{ + grn_table_cache_data data; + char module_name[GRN_TABLE_MAX_KEY_SIZE]; + grn_snprintf(module_name, + GRN_TABLE_MAX_KEY_SIZE, + GRN_TABLE_MAX_KEY_SIZE, + "%s%u", + OPTION_NAME_TOKEN_FILTER, + i); + + memset(&data, 0, sizeof(data)); + data.context_tag = "token-filter"; + data.module_name = module_name; + data.type = GRN_INFO_TOKEN_FILTERS; + data.token_filter_index = i; + data.open_options_func = open_options_func; + data.close_options_func = close_options_func; + data.user_data = user_data; + return grn_table_cache_module_options(ctx, table, &data); +} + +grn_rc +grn_table_get_token_filter_strings(grn_ctx *ctx, + grn_obj *table, + grn_obj *output) +{ + return grn_table_get_module_strings(ctx, + table, + output, + GRN_INFO_TOKEN_FILTERS, + OPTION_NAME_TOKEN_FILTER, + "token-filter"); +} -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181029/081d5e8a/attachment-0001.html>