[Groonga-commit] groonga/groonga at e6766d3 [master] index_column_diff: add support for reference vector

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Mar 22 18:51:15 JST 2019


Kouhei Sutou	2019-03-22 18:51:15 +0900 (Fri, 22 Mar 2019)

  Revision: e6766d31f6968224cf7ffc359eed91fa439bc0bb
  https://github.com/groonga/groonga/commit/e6766d31f6968224cf7ffc359eed91fa439bc0bb

  Message:
    index_column_diff: add support for reference vector

  Added files:
    test/command/suite/index_column_diff/reference_vector.expected
    test/command/suite/index_column_diff/reference_vector.test
  Modified files:
    lib/index_column.c

  Modified: lib/index_column.c (+99 -60)
===================================================================
--- lib/index_column.c    2019-03-22 16:57:21 +0900 (824ead211)
+++ lib/index_column.c    2019-03-22 18:51:15 +0900 (64e5a1fc0)
@@ -719,6 +719,85 @@ grn_index_column_diff_find_posting(grn_ctx *ctx,
 }
 
 static void
+grn_index_column_diff_process_token_id(grn_ctx *ctx,
+                                       grn_index_column_diff_data *data)
+{
+  {
+    int added = 0;
+    data->current.diff_id =
+      grn_table_add(ctx,
+                    data->diff,
+                    &(data->current.token_id), sizeof(grn_id),
+                    &added);
+    data->current.is_new_diff = (added != 0);
+  }
+
+  const int64_t nth_posting =
+    grn_index_column_diff_find_posting(ctx, data);
+  if (nth_posting >= 0) {
+    grn_index_column_diff_cache_remove_posting(ctx,
+                                               data,
+                                               (size_t)nth_posting);
+  } else {
+    grn_obj *postings = &(data->buffers.postings);
+    GRN_BULK_REWIND(postings);
+    const grn_posting *posting = &(data->current.posting);
+    GRN_UINT32_PUT(ctx, postings, posting->rid);
+    if (data->index.with_section) {
+      GRN_UINT32_PUT(ctx, postings, posting->sid);
+    }
+    if (data->index.with_position) {
+      GRN_UINT32_PUT(ctx, postings, posting->pos);
+    }
+    grn_obj_set_value(ctx,
+                      data->missings,
+                      data->current.diff_id,
+                      postings,
+                      GRN_OBJ_APPEND);
+  }
+}
+
+static void
+grn_index_column_diff_process_token(grn_ctx *ctx,
+                                    grn_index_column_diff_data *data,
+                                    const void *value_data,
+                                    size_t value_size)
+{
+  if (value_size == 0) {
+    return;
+  }
+
+  const unsigned int token_cursor_flags = 0;
+  grn_token_cursor *token_cursor =
+    grn_token_cursor_open(ctx,
+                          data->lexicon,
+                          value_data,
+                          value_size,
+                          GRN_TOKEN_ADD,
+                          token_cursor_flags);
+  if (!token_cursor) {
+    return;
+  }
+
+  const grn_bool with_position = data->index.with_position;
+  while (grn_token_cursor_get_status(ctx, token_cursor) ==
+         GRN_TOKEN_CURSOR_DOING) {
+    data->current.token_id = grn_token_cursor_next(ctx, token_cursor);
+    if (data->current.token_id == GRN_ID_NIL) {
+      continue;
+    }
+
+    if (with_position) {
+      grn_token *token = grn_token_cursor_get_token(ctx, token_cursor);
+      data->current.posting.pos = grn_token_get_position(ctx, token);
+    }
+
+    grn_index_column_diff_process_token_id(ctx, data);
+  }
+  grn_token_cursor_close(ctx, token_cursor);
+}
+
+static void
 grn_index_column_diff_compute(grn_ctx *ctx,
                               grn_index_column_diff_data *data)
 {
@@ -726,8 +805,6 @@ grn_index_column_diff_compute(grn_ctx *ctx,
   const size_t n_source_columns = GRN_PTR_VECTOR_SIZE(source_columns);
   grn_obj *value = &(data->buffers.value);
   grn_obj *postings = &(data->buffers.postings);
-  const grn_bool with_section = data->index.with_section;
-  const grn_bool with_position = data->index.with_position;
 
   grn_index_column_diff_init_progress(ctx, data);
 
@@ -746,68 +823,30 @@ grn_index_column_diff_compute(grn_ctx *ctx,
       GRN_BULK_REWIND(value);
       grn_obj_get_value(ctx, source, id, value);
 
-      if (GRN_BULK_VSIZE(value) == 0) {
-        continue;
-      }
-
-      const unsigned int token_cursor_flags = 0;
-      grn_token_cursor *token_cursor =
-        grn_token_cursor_open(ctx,
-                              data->lexicon,
-                              GRN_BULK_HEAD(value),
-                              GRN_BULK_VSIZE(value),
-                              GRN_TOKEN_ADD,
-                              token_cursor_flags);
-      if (!token_cursor) {
-        continue;
-      }
-
-      while (grn_token_cursor_get_status(ctx, token_cursor) ==
-             GRN_TOKEN_CURSOR_DOING) {
-        data->current.token_id = grn_token_cursor_next(ctx, token_cursor);
-        if (data->current.token_id == GRN_ID_NIL) {
-          continue;
-        }
-
-        if (with_position) {
-          grn_token *token = grn_token_cursor_get_token(ctx, token_cursor);
-          data->current.posting.pos = grn_token_get_position(ctx, token);
-        }
-
+      switch (value->header.type) {
+      case GRN_VECTOR :
+        break;
+      case GRN_UVECTOR :
         {
-          int added = 0;
-          data->current.diff_id =
-            grn_table_add(ctx,
-                          data->diff,
-                          &(data->current.token_id), sizeof(grn_id),
-                          &added);
-          data->current.is_new_diff = (added != 0);
-        }
-
-        const int64_t nth_posting =
-          grn_index_column_diff_find_posting(ctx, data);
-        if (nth_posting >= 0) {
-          grn_index_column_diff_cache_remove_posting(ctx,
-                                                     data,
-                                                     (size_t)nth_posting);
-        } else {
-          const grn_posting *posting = &(data->current.posting);
-          GRN_BULK_REWIND(postings);
-          GRN_UINT32_PUT(ctx, postings, posting->rid);
-          if (with_section) {
-            GRN_UINT32_PUT(ctx, postings, posting->sid);
-          }
-          if (with_position) {
-            GRN_UINT32_PUT(ctx, postings, posting->pos);
+          const size_t n_elements = grn_uvector_size(ctx, value);
+          for (size_t i = 0; i < n_elements; i++) {
+            const grn_id element =
+              grn_uvector_get_element(ctx, value, i, NULL);
+            data->current.token_id = element;
+            data->current.posting.pos = 0;
+            grn_index_column_diff_process_token_id(ctx, data);
           }
-          grn_obj_set_value(ctx,
-                            data->missings,
-                            data->current.diff_id,
-                            postings,
-                            GRN_OBJ_APPEND);
         }
+        break;
+      case GRN_BULK :
+        grn_index_column_diff_process_token(ctx,
+                                            data,
+                                            GRN_BULK_HEAD(value),
+                                            GRN_BULK_VSIZE(value));
+        break;
+      default :
+        break;
       }
-      grn_token_cursor_close(ctx, token_cursor);
     }
   } GRN_TABLE_EACH_END(ctx, cursor);
 

  Added: test/command/suite/index_column_diff/reference_vector.expected (+88 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/reference_vector.expected    2019-03-22 18:51:15 +0900 (a2ade3d54)
@@ -0,0 +1,88 @@
+table_create Values TABLE_HASH_KEY ShortText
+[[0,0.0,0.0],true]
+table_create Data TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Data values COLUMN_VECTOR Values
+[[0,0.0,0.0],true]
+column_create Values data_index COLUMN_INDEX Data values
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"values": ["Hello World", "Good-by World"]},
+{"values": ["Hello Groonga", "Good-by Groonga"]}
+]
+[[0,0.0,0.0],2]
+truncate Values.data_index
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"values": ["Morning World", "Afternoon World"]},
+{"values": ["Morning Groonga", "Afternoon World"]}
+]
+[[0,0.0,0.0],2]
+index_column_diff Values data_index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "token": {
+        "id": 1,
+        "value": "Hello World"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 2,
+        "value": "Good-by World"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 3,
+        "value": "Hello Groonga"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 2
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 4,
+        "value": "Good-by Groonga"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 2
+        }
+      ]
+    }
+  ]
+]

  Added: test/command/suite/index_column_diff/reference_vector.test (+22 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/reference_vector.test    2019-03-22 18:51:15 +0900 (7ced67ee9)
@@ -0,0 +1,22 @@
+table_create Values TABLE_HASH_KEY ShortText
+
+table_create Data TABLE_NO_KEY
+column_create Data values COLUMN_VECTOR Values
+
+column_create Values data_index COLUMN_INDEX Data values
+
+load --table Data
+[
+{"values": ["Hello World", "Good-by World"]},
+{"values": ["Hello Groonga", "Good-by Groonga"]}
+]
+
+truncate Values.data_index
+
+load --table Data
+[
+{"values": ["Morning World", "Afternoon World"]},
+{"values": ["Morning Groonga", "Afternoon World"]}
+]
+
+index_column_diff Values data_index
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190322/60518dba/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index