Kouhei Sutou
null+****@clear*****
Mon Jun 25 11:41:26 JST 2018
Kouhei Sutou 2018-06-25 11:41:26 +0900 (Mon, 25 Jun 2018) New Revision: 08e2456ba35407e3d5172f71a0200fac2a770142 https://github.com/groonga/groonga/commit/08e2456ba35407e3d5172f71a0200fac2a770142 Merged 8a52f23: Merge pull request #850 from komainu8/add_check_exist_with_section_flag Message: column_create: add more validations 1: Full text search index for vector column must have WITH_SECTION flag. 2: Full text search index for vector column must not be multi column index. The following command returns nothing: plugin_register functions/index_column table_create Docs TABLE_HASH_KEY ShortText column_create Docs sentences1 COLUMN_VECTOR Text column_create Docs sentences2 COLUMN_VECTOR Text table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION|WITH_SECTION Docs sentences1,sentences2 load --table Docs [ {"_key": "x", "sentences1": ["-", "-", "-"], "sentences2": ["-", "-", "-"]} ] load --table Docs [ {"_key": "x", "sentences1": []} ] select Words \ --limit -1 \ --sort_keys _key \ --output_columns '_key, index_column_source_records("docs_sentences")' index_column_source_records() output: [ [ 0, 0.0, 0.0 ], [ [ [ 1 ], [ [ "_key", "ShortText" ], [ "index_column_source_records", null ] ], [ "-", [ ] ] ] ] ] The second load removes posting lists for Docs.sentences2 unexpectedly. We can remove the 2nd validation when we support the update case. Added files: test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test test/command/suite/column_create/index/source/vector_column/full_text_search.expected test/command/suite/column_create/index/source/vector_column/full_text_search.test Removed files: test/command/suite/load/index/offline/vector/text_without_section.test test/command/suite/load/index/online/vector/text_without_section.expected Modified files: lib/db.c Renamed files: test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected (from test/command/suite/load/index/offline/vector/text_without_section.expected) test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test (from test/command/suite/load/index/online/vector/text_without_section.test) Modified: lib/db.c (+44 -20) =================================================================== --- lib/db.c 2018-06-25 11:02:05 +0900 (f445d2425) +++ lib/db.c 2018-06-25 11:41:26 +0900 (b65947add) @@ -8804,6 +8804,7 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value) grn_obj *lexicon_domain = NULL; grn_bool lexicon_domain_is_table; grn_bool lexicon_have_tokenizer; + grn_bool is_full_text_search_index; grn_id *source_ids; int i, n_source_ids; @@ -8824,29 +8825,13 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value) grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL); lexicon_have_tokenizer = (tokenizer != NULL); } + is_full_text_search_index = + (grn_obj_is_index_column(ctx, obj) && + (obj->header.flags & GRN_OBJ_WITH_POSITION) && + lexicon_have_tokenizer); source_ids = (grn_id *)GRN_BULK_HEAD(value); n_source_ids = GRN_BULK_VSIZE(value) / sizeof(grn_id); - if (grn_obj_is_index_column(ctx, obj) && n_source_ids == 1) { - grn_obj *source; - - source = grn_ctx_at(ctx, source_ids[0]); - if (grn_obj_is_vector_column(ctx, source) && - (obj->header.flags & GRN_OBJ_WITH_POSITION) && - lexicon_have_tokenizer && - !(obj->header.flags & GRN_OBJ_WITH_SECTION)) { - char index_name[GRN_TABLE_MAX_KEY_SIZE]; - int index_name_size; - index_name_size = grn_obj_name(ctx, obj, - index_name, GRN_TABLE_MAX_KEY_SIZE); - ERR(GRN_INVALID_ARGUMENT, - "grn_obj_set_info(): GRN_INFO_SOURCE: " - "full text index for vector column " - "must be created with WITH_SECTION flag: <%.*s>", - index_name_size, index_name); - goto exit; - } - } if (n_source_ids > 1 && !(obj->header.flags & GRN_OBJ_WITH_SECTION)) { char index_name[GRN_TABLE_MAX_KEY_SIZE]; @@ -8860,6 +8845,45 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value) goto exit; } + if (is_full_text_search_index) { + grn_bool have_vector_source_column = GRN_FALSE; + + for (i = 0; i < n_source_ids; i++) { + grn_obj *source; + + source = grn_ctx_at(ctx, source_ids[i]); + if (!grn_obj_is_vector_column(ctx, source)) { + continue; + } + + have_vector_source_column = GRN_TRUE; + if (!(obj->header.flags & GRN_OBJ_WITH_SECTION)) { + char index_name[GRN_TABLE_MAX_KEY_SIZE]; + int index_name_size; + index_name_size = grn_obj_name(ctx, obj, + index_name, GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_INVALID_ARGUMENT, + "grn_obj_set_info(): GRN_INFO_SOURCE: " + "full text index for vector column " + "must be created with WITH_SECTION flag: <%.*s>", + index_name_size, index_name); + goto exit; + } + } + + if (have_vector_source_column && n_source_ids > 1) { + char index_name[GRN_TABLE_MAX_KEY_SIZE]; + int index_name_size; + index_name_size = grn_obj_name(ctx, obj, + index_name, GRN_TABLE_MAX_KEY_SIZE); + ERR(GRN_INVALID_ARGUMENT, + "grn_obj_set_info(): GRN_INFO_SOURCE: " + "multi column full text index with vector column isn't supported yet: " + "<%.*s>", + index_name_size, index_name); + goto exit; + } + } for (i = 0; i < n_source_ids; i++) { grn_id source_id = source_ids[i]; Added: test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected 2018-06-25 11:41:26 +0900 (1d1d2d1f3) @@ -0,0 +1,23 @@ +plugin_register functions/index_column +[[0,0.0,0.0],true] +table_create Docs TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Docs title COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +column_create Docs sentences COLUMN_VECTOR Text +[[0,0.0,0.0],true] +table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Words docs_content COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences +[ + [ + [ + -22, + 0.0, + 0.0 + ], + "grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content>" + ], + false +] +#|e| grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content> Added: test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test (+9 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test 2018-06-25 11:41:26 +0900 (6f6e9dba7) @@ -0,0 +1,9 @@ +plugin_register functions/index_column + +table_create Docs TABLE_NO_KEY +column_create Docs title COLUMN_SCALAR ShortText +column_create Docs sentences COLUMN_VECTOR Text + +table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +column_create Words docs_content \ + COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences Added: test/command/suite/column_create/index/source/vector_column/full_text_search.expected (+10 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/column_create/index/source/vector_column/full_text_search.expected 2018-06-25 11:41:26 +0900 (19d8ff5b2) @@ -0,0 +1,10 @@ +plugin_register functions/index_column +[[0,0.0,0.0],true] +table_create Docs TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Docs sentences COLUMN_VECTOR Text +[[0,0.0,0.0],true] +table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +[[0,0.0,0.0],true] +column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences +[[0,0.0,0.0],true] Added: test/command/suite/column_create/index/source/vector_column/full_text_search.test (+7 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/column_create/index/source/vector_column/full_text_search.test 2018-06-25 11:41:26 +0900 (6bd345ced) @@ -0,0 +1,7 @@ +plugin_register functions/index_column + +table_create Docs TABLE_NO_KEY +column_create Docs sentences COLUMN_VECTOR Text + +table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram +column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences Renamed: test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected (+0 -30) 61% =================================================================== --- test/command/suite/load/index/offline/vector/text_without_section.expected 2018-06-25 11:02:05 +0900 (08c94ac34) +++ test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected 2018-06-25 11:41:26 +0900 (a615fd069) @@ -4,11 +4,6 @@ table_create Docs TABLE_NO_KEY [[0,0.0,0.0],true] column_create Docs sentences COLUMN_VECTOR Text [[0,0.0,0.0],true] -load --table Docs -[ -{"sentences": ["-", "-", "-"]} -] -[[0,0.0,0.0],1] table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram [[0,0.0,0.0],true] column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences @@ -24,28 +19,3 @@ column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences false ] #|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences> -select Words --limit -1 --sort_keys _key --output_columns '_key, index_column_source_records("docs_sentences")' -[ - [ - 0, - 0.0, - 0.0 - ], - [ - [ - [ - 0 - ], - [ - [ - "_key", - "ShortText" - ], - [ - "index_column_source_records", - null - ] - ] - ] - ] -] Renamed: test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test (+0 -10) 60% =================================================================== --- test/command/suite/load/index/online/vector/text_without_section.test 2018-06-25 11:02:05 +0900 (8a3bfe770) +++ test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test 2018-06-25 11:41:26 +0900 (aa3827af0) @@ -5,13 +5,3 @@ column_create Docs sentences COLUMN_VECTOR Text table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences - -load --table Docs -[ -{"sentences": ["-", "-", "-"]} -] - -select Words \ - --limit -1 \ - --sort_keys _key \ - --output_columns '_key, index_column_source_records("docs_sentences")' Deleted: test/command/suite/load/index/offline/vector/text_without_section.test (+0 -17) 100644 =================================================================== --- test/command/suite/load/index/offline/vector/text_without_section.test 2018-06-25 11:02:05 +0900 (72c341b56) +++ /dev/null @@ -1,17 +0,0 @@ -plugin_register functions/index_column - -table_create Docs TABLE_NO_KEY -column_create Docs sentences COLUMN_VECTOR Text - -load --table Docs -[ -{"sentences": ["-", "-", "-"]} -] - -table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram -column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences - -select Words \ - --limit -1 \ - --sort_keys _key \ - --output_columns '_key, index_column_source_records("docs_sentences")' Deleted: test/command/suite/load/index/online/vector/text_without_section.expected (+0 -51) 100644 =================================================================== --- test/command/suite/load/index/online/vector/text_without_section.expected 2018-06-25 11:02:05 +0900 (6fc38919f) +++ /dev/null @@ -1,51 +0,0 @@ -plugin_register functions/index_column -[[0,0.0,0.0],true] -table_create Docs TABLE_NO_KEY -[[0,0.0,0.0],true] -column_create Docs sentences COLUMN_VECTOR Text -[[0,0.0,0.0],true] -table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram -[[0,0.0,0.0],true] -column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences -[ - [ - [ - -22, - 0.0, - 0.0 - ], - "grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sent" - ], - false -] -#|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences> -load --table Docs -[ -{"sentences": ["-", "-", "-"]} -] -[[0,0.0,0.0],1] -select Words --limit -1 --sort_keys _key --output_columns '_key, index_column_source_records("docs_sentences")' -[ - [ - 0, - 0.0, - 0.0 - ], - [ - [ - [ - 0 - ], - [ - [ - "_key", - "ShortText" - ], - [ - "index_column_source_records", - null - ] - ] - ] - ] -] -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180625/b69d7071/attachment-0001.htm