[Groonga-commit] groonga/groonga at 08e2456 [master] column_create: add more validations

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Jun 25 11:41:26 JST 2018


Kouhei Sutou	2018-06-25 11:41:26 +0900 (Mon, 25 Jun 2018)

  New Revision: 08e2456ba35407e3d5172f71a0200fac2a770142
  https://github.com/groonga/groonga/commit/08e2456ba35407e3d5172f71a0200fac2a770142

  Merged 8a52f23: Merge pull request #850 from komainu8/add_check_exist_with_section_flag

  Message:
    column_create: add more validations
    
    1: Full text search index for vector column must have WITH_SECTION flag.
    
    2: Full text search index for vector column must not be multi column
    index. The following command returns nothing:
    
        plugin_register functions/index_column
    
        table_create Docs TABLE_HASH_KEY ShortText
        column_create Docs sentences1 COLUMN_VECTOR Text
        column_create Docs sentences2 COLUMN_VECTOR Text
    
        table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
        column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION|WITH_SECTION Docs sentences1,sentences2
    
        load --table Docs
        [
        {"_key": "x", "sentences1": ["-", "-", "-"], "sentences2": ["-", "-", "-"]}
        ]
    
        load --table Docs
        [
        {"_key": "x", "sentences1": []}
        ]
    
        select Words \
          --limit -1 \
          --sort_keys _key \
          --output_columns '_key, index_column_source_records("docs_sentences")'
    
    index_column_source_records() output:
    
        [
          [
            0,
            0.0,
            0.0
          ],
          [
            [
              [
                1
              ],
              [
                [
                  "_key",
                  "ShortText"
                ],
                [
                  "index_column_source_records",
                  null
                ]
              ],
              [
                "-",
                [
    
                ]
              ]
            ]
          ]
        ]
    
    The second load removes posting lists for Docs.sentences2
    unexpectedly. We can remove the 2nd validation when we support the
    update case.

  Added files:
    test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected
    test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test
    test/command/suite/column_create/index/source/vector_column/full_text_search.expected
    test/command/suite/column_create/index/source/vector_column/full_text_search.test
  Removed files:
    test/command/suite/load/index/offline/vector/text_without_section.test
    test/command/suite/load/index/online/vector/text_without_section.expected
  Modified files:
    lib/db.c
  Renamed files:
    test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected
      (from test/command/suite/load/index/offline/vector/text_without_section.expected)
    test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test
      (from test/command/suite/load/index/online/vector/text_without_section.test)

  Modified: lib/db.c (+44 -20)
===================================================================
--- lib/db.c    2018-06-25 11:02:05 +0900 (f445d2425)
+++ lib/db.c    2018-06-25 11:41:26 +0900 (b65947add)
@@ -8804,6 +8804,7 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
   grn_obj *lexicon_domain = NULL;
   grn_bool lexicon_domain_is_table;
   grn_bool lexicon_have_tokenizer;
+  grn_bool is_full_text_search_index;
   grn_id *source_ids;
   int i, n_source_ids;
 
@@ -8824,29 +8825,13 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
     grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL, NULL);
     lexicon_have_tokenizer = (tokenizer != NULL);
   }
+  is_full_text_search_index =
+    (grn_obj_is_index_column(ctx, obj) &&
+     (obj->header.flags & GRN_OBJ_WITH_POSITION) &&
+     lexicon_have_tokenizer);
 
   source_ids = (grn_id *)GRN_BULK_HEAD(value);
   n_source_ids = GRN_BULK_VSIZE(value) / sizeof(grn_id);
-  if (grn_obj_is_index_column(ctx, obj) && n_source_ids == 1) {
-    grn_obj *source;
-
-    source = grn_ctx_at(ctx, source_ids[0]);
-    if (grn_obj_is_vector_column(ctx, source) &&
-        (obj->header.flags & GRN_OBJ_WITH_POSITION) &&
-        lexicon_have_tokenizer &&
-        !(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
-      char index_name[GRN_TABLE_MAX_KEY_SIZE];
-      int index_name_size;
-      index_name_size = grn_obj_name(ctx, obj,
-                                     index_name, GRN_TABLE_MAX_KEY_SIZE);
-      ERR(GRN_INVALID_ARGUMENT,
-          "grn_obj_set_info(): GRN_INFO_SOURCE: "
-          "full text index for vector column "
-          "must be created with WITH_SECTION flag: <%.*s>",
-          index_name_size, index_name);
-      goto exit;
-    }
-  }
 
   if (n_source_ids > 1 && !(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
     char index_name[GRN_TABLE_MAX_KEY_SIZE];
@@ -8860,6 +8845,45 @@ grn_obj_set_info_source_validate(grn_ctx *ctx, grn_obj *obj, grn_obj *value)
     goto exit;
   }
 
+  if (is_full_text_search_index) {
+    grn_bool have_vector_source_column = GRN_FALSE;
+
+    for (i = 0; i < n_source_ids; i++) {
+      grn_obj *source;
+
+      source = grn_ctx_at(ctx, source_ids[i]);
+      if (!grn_obj_is_vector_column(ctx, source)) {
+        continue;
+      }
+
+      have_vector_source_column = GRN_TRUE;
+      if (!(obj->header.flags & GRN_OBJ_WITH_SECTION)) {
+        char index_name[GRN_TABLE_MAX_KEY_SIZE];
+        int index_name_size;
+        index_name_size = grn_obj_name(ctx, obj,
+                                       index_name, GRN_TABLE_MAX_KEY_SIZE);
+        ERR(GRN_INVALID_ARGUMENT,
+            "grn_obj_set_info(): GRN_INFO_SOURCE: "
+            "full text index for vector column "
+            "must be created with WITH_SECTION flag: <%.*s>",
+            index_name_size, index_name);
+        goto exit;
+      }
+    }
+
+    if (have_vector_source_column && n_source_ids > 1) {
+      char index_name[GRN_TABLE_MAX_KEY_SIZE];
+      int index_name_size;
+      index_name_size = grn_obj_name(ctx, obj,
+                                     index_name, GRN_TABLE_MAX_KEY_SIZE);
+      ERR(GRN_INVALID_ARGUMENT,
+          "grn_obj_set_info(): GRN_INFO_SOURCE: "
+          "multi column full text index with vector column isn't supported yet: "
+          "<%.*s>",
+          index_name_size, index_name);
+      goto exit;
+    }
+  }
 
   for (i = 0; i < n_source_ids; i++) {
     grn_id source_id = source_ids[i];

  Added: test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/column_create/index/source/multi_column/vector_full_text_search.expected    2018-06-25 11:41:26 +0900 (1d1d2d1f3)
@@ -0,0 +1,23 @@
+plugin_register functions/index_column
+[[0,0.0,0.0],true]
+table_create Docs TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Docs title COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+column_create Docs sentences COLUMN_VECTOR Text
+[[0,0.0,0.0],true]
+table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+[[0,0.0,0.0],true]
+column_create Words docs_content   COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content>"
+  ],
+  false
+]
+#|e| grn_obj_set_info(): GRN_INFO_SOURCE: multi column full text index with vector column isn't supported yet: <Words.docs_content>

  Added: test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test (+9 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/column_create/index/source/multi_column/vector_full_text_search.test    2018-06-25 11:41:26 +0900 (6f6e9dba7)
@@ -0,0 +1,9 @@
+plugin_register functions/index_column
+
+table_create Docs TABLE_NO_KEY
+column_create Docs title COLUMN_SCALAR ShortText
+column_create Docs sentences COLUMN_VECTOR Text
+
+table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+column_create Words docs_content \
+  COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs title,sentences

  Added: test/command/suite/column_create/index/source/vector_column/full_text_search.expected (+10 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/column_create/index/source/vector_column/full_text_search.expected    2018-06-25 11:41:26 +0900 (19d8ff5b2)
@@ -0,0 +1,10 @@
+plugin_register functions/index_column
+[[0,0.0,0.0],true]
+table_create Docs TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Docs sentences COLUMN_VECTOR Text
+[[0,0.0,0.0],true]
+table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+[[0,0.0,0.0],true]
+column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences
+[[0,0.0,0.0],true]

  Added: test/command/suite/column_create/index/source/vector_column/full_text_search.test (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/column_create/index/source/vector_column/full_text_search.test    2018-06-25 11:41:26 +0900 (6bd345ced)
@@ -0,0 +1,7 @@
+plugin_register functions/index_column
+
+table_create Docs TABLE_NO_KEY
+column_create Docs sentences COLUMN_VECTOR Text
+
+table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+column_create Words docs_sentences COLUMN_INDEX|WITH_SECTION|WITH_POSITION Docs sentences

  Renamed: test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected (+0 -30) 61%
===================================================================
--- test/command/suite/load/index/offline/vector/text_without_section.expected    2018-06-25 11:02:05 +0900 (08c94ac34)
+++ test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.expected    2018-06-25 11:41:26 +0900 (a615fd069)
@@ -4,11 +4,6 @@ table_create Docs TABLE_NO_KEY
 [[0,0.0,0.0],true]
 column_create Docs sentences COLUMN_VECTOR Text
 [[0,0.0,0.0],true]
-load --table Docs
-[
-{"sentences": ["-", "-", "-"]}
-]
-[[0,0.0,0.0],1]
 table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
 [[0,0.0,0.0],true]
 column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
@@ -24,28 +19,3 @@ column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
   false
 ]
 #|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences>
-select Words   --limit -1   --sort_keys _key   --output_columns '_key, index_column_source_records("docs_sentences")'
-[
-  [
-    0,
-    0.0,
-    0.0
-  ],
-  [
-    [
-      [
-        0
-      ],
-      [
-        [
-          "_key",
-          "ShortText"
-        ],
-        [
-          "index_column_source_records",
-          null
-        ]
-      ]
-    ]
-  ]
-]

  Renamed: test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test (+0 -10) 60%
===================================================================
--- test/command/suite/load/index/online/vector/text_without_section.test    2018-06-25 11:02:05 +0900 (8a3bfe770)
+++ test/command/suite/column_create/index/source/vector_column/full_text_search_without_section.test    2018-06-25 11:41:26 +0900 (aa3827af0)
@@ -5,13 +5,3 @@ column_create Docs sentences COLUMN_VECTOR Text
 
 table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
 column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
-
-load --table Docs
-[
-{"sentences": ["-", "-", "-"]}
-]
-
-select Words \
-  --limit -1 \
-  --sort_keys _key \
-  --output_columns '_key, index_column_source_records("docs_sentences")'

  Deleted: test/command/suite/load/index/offline/vector/text_without_section.test (+0 -17) 100644
===================================================================
--- test/command/suite/load/index/offline/vector/text_without_section.test    2018-06-25 11:02:05 +0900 (72c341b56)
+++ /dev/null
@@ -1,17 +0,0 @@
-plugin_register functions/index_column
-
-table_create Docs TABLE_NO_KEY
-column_create Docs sentences COLUMN_VECTOR Text
-
-load --table Docs
-[
-{"sentences": ["-", "-", "-"]}
-]
-
-table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
-column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
-
-select Words \
-  --limit -1 \
-  --sort_keys _key \
-  --output_columns '_key, index_column_source_records("docs_sentences")'

  Deleted: test/command/suite/load/index/online/vector/text_without_section.expected (+0 -51) 100644
===================================================================
--- test/command/suite/load/index/online/vector/text_without_section.expected    2018-06-25 11:02:05 +0900 (6fc38919f)
+++ /dev/null
@@ -1,51 +0,0 @@
-plugin_register functions/index_column
-[[0,0.0,0.0],true]
-table_create Docs TABLE_NO_KEY
-[[0,0.0,0.0],true]
-column_create Docs sentences COLUMN_VECTOR Text
-[[0,0.0,0.0],true]
-table_create Words TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
-[[0,0.0,0.0],true]
-column_create Words docs_sentences COLUMN_INDEX|WITH_POSITION Docs sentences
-[
-  [
-    [
-      -22,
-      0.0,
-      0.0
-    ],
-    "grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sent"
-  ],
-  false
-]
-#|e| grn_obj_set_info(): GRN_INFO_SOURCE: full text index for vector column must be created with WITH_SECTION flag: <Words.docs_sentences>
-load --table Docs
-[
-{"sentences": ["-", "-", "-"]}
-]
-[[0,0.0,0.0],1]
-select Words   --limit -1   --sort_keys _key   --output_columns '_key, index_column_source_records("docs_sentences")'
-[
-  [
-    0,
-    0.0,
-    0.0
-  ],
-  [
-    [
-      [
-        0
-      ],
-      [
-        [
-          "_key",
-          "ShortText"
-        ],
-        [
-          "index_column_source_records",
-          null
-        ]
-      ]
-    ]
-  ]
-]
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180625/b69d7071/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index