[Groonga-commit] groonga/groonga at bc2ab09 [master] Add index_column_diff command

Back to archive index
Kouhei Sutou null+****@clear*****
Mon Mar 18 18:51:07 JST 2019


Kouhei Sutou	2019-03-18 18:51:07 +0900 (Mon, 18 Mar 2019)

  Revision: bc2ab09bca5dd16f2293e9b7ae28af62a73e94d1
  https://github.com/groonga/groonga/commit/bc2ab09bca5dd16f2293e9b7ae28af62a73e94d1

  Message:
    Add index_column_diff command
    
    New C API:
    
      * grn_index_column_diff()

  Added files:
    include/groonga/index_column.h
    lib/proc/proc_index_column.c
    test/command/suite/index_column_diff/missing/with_section.expected
    test/command/suite/index_column_diff/missing/with_section.test
    test/command/suite/index_column_diff/missing/without_section.expected
    test/command/suite/index_column_diff/missing/without_section.test
  Modified files:
    include/groonga.h
    include/groonga/Makefile.am
    lib/grn_proc.h
    lib/index_column.c
    lib/proc.c
    lib/proc/sources.am

  Modified: include/groonga.h (+2 -1)
===================================================================
--- include/groonga.h    2019-03-18 18:50:39 +0900 (a398be7fc)
+++ include/groonga.h    2019-03-18 18:51:07 +0900 (a735c5def)
@@ -1,6 +1,6 @@
 /*
   Copyright(C) 2014-2018 Brazil
-  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+  Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -41,6 +41,7 @@
 #include "groonga/highlighter.h"
 #include "groonga/id.h"
 #include "groonga/ii.h"
+#include "groonga/index_column.h"
 #include "groonga/obj.h"
 #include "groonga/operator.h"
 #include "groonga/option.h"

  Modified: include/groonga/Makefile.am (+1 -0)
===================================================================
--- include/groonga/Makefile.am    2019-03-18 18:50:39 +0900 (bb9e16be6)
+++ include/groonga/Makefile.am    2019-03-18 18:51:07 +0900 (45860bc95)
@@ -20,6 +20,7 @@ groonga_include_HEADERS =			\
 	groonga.h				\
 	id.h					\
 	ii.h					\
+	index_column.h				\
 	msgpack.h				\
 	obj.h					\
 	operator.h				\

  Added: include/groonga/index_column.h (+31 -0) 100644
===================================================================
--- /dev/null
+++ include/groonga/index_column.h    2019-03-18 18:51:07 +0900 (4785d31bc)
@@ -0,0 +1,31 @@
+/*
+  Copyright(C) 2019 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License as published by the Free Software Foundation; either
+  version 2.1 of the License, or (at your option) any later version.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#pragma once
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+GRN_API grn_rc grn_index_column_diff(grn_ctx *ctx,
+                                     grn_obj *index_column,
+                                     grn_obj **diff);
+
+#ifdef __cplusplus
+}
+#endif

  Modified: lib/grn_proc.h (+1 -0)
===================================================================
--- lib/grn_proc.h    2019-03-18 18:50:39 +0900 (3477d39b7)
+++ lib/grn_proc.h    2019-03-18 18:51:07 +0900 (276e44888)
@@ -54,6 +54,7 @@ void grn_proc_init_highlight(grn_ctx *ctx);
 void grn_proc_init_highlight_full(grn_ctx *ctx);
 void grn_proc_init_highlight_html(grn_ctx *ctx);
 void grn_proc_init_in_records(grn_ctx *ctx);
+void grn_proc_init_index_column_diff(grn_ctx *ctx);
 void grn_proc_init_lock_acquire(grn_ctx *ctx);
 void grn_proc_init_lock_clear(grn_ctx *ctx);
 void grn_proc_init_lock_release(grn_ctx *ctx);

  Modified: lib/index_column.c (+415 -1)
===================================================================
--- lib/index_column.c    2019-03-18 18:50:39 +0900 (f1dad536d)
+++ lib/index_column.c    2019-03-18 18:51:07 +0900 (82904f01a)
@@ -1,7 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2015 Brazil
-  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+  Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -234,3 +234,417 @@ grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column)
 
   GRN_API_RETURN(ctx->rc);
 }
+
+static const char *remains_column_name = "remains";
+static const char *missings_column_name = "missings";
+
+typedef struct {
+  grn_obj *lexicon;
+  grn_ii *ii;
+  struct {
+    grn_bool with_section;
+    grn_bool with_position;
+    uint32_t n_elements;
+  } index;
+  size_t n_posting_elements;
+  grn_obj *source_table;
+  grn_obj source_columns;
+  grn_obj *tokens;
+  grn_obj *remains;
+  grn_obj *missings;
+  struct {
+    grn_obj value;
+    grn_obj postings;
+    grn_obj new_postings;
+    grn_obj missings;
+  } buffers;
+} grn_index_column_diff_data;
+
+static void
+grn_index_column_diff_data_init(grn_ctx *ctx,
+                                grn_index_column_diff_data *data)
+{
+  GRN_PTR_INIT(&(data->source_columns), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_VOID_INIT(&(data->buffers.value));
+  GRN_UINT32_INIT(&(data->buffers.postings), GRN_OBJ_VECTOR);
+  GRN_UINT32_INIT(&(data->buffers.new_postings), GRN_OBJ_VECTOR);
+  GRN_UINT32_INIT(&(data->buffers.missings), GRN_OBJ_VECTOR);
+}
+
+static void
+grn_index_column_diff_data_fin(grn_ctx *ctx,
+                               grn_index_column_diff_data *data)
+{
+  {
+    size_t n_columns = GRN_PTR_VECTOR_SIZE(&(data->source_columns));
+    for (size_t i = 0; i < n_columns; i++) {
+      grn_obj *column = GRN_PTR_VALUE_AT(&(data->source_columns), i);
+      if (grn_obj_is_accessor(ctx, column)) {
+        grn_obj_close(ctx, column);
+      }
+    }
+    GRN_OBJ_FIN(ctx, &(data->source_columns));
+  }
+
+  GRN_OBJ_FIN(ctx, &(data->buffers.value));
+  GRN_OBJ_FIN(ctx, &(data->buffers.postings));
+  GRN_OBJ_FIN(ctx, &(data->buffers.new_postings));
+  GRN_OBJ_FIN(ctx, &(data->buffers.missings));
+}
+
+static void
+grn_index_column_diff_get_postings(grn_ctx *ctx,
+                                   grn_index_column_diff_data *data,
+                                   grn_id token_id)
+{
+  grn_obj *postings = &(data->buffers.postings);
+
+  int added = 0;
+  grn_table_add(ctx, data->tokens, &token_id, sizeof(grn_id), &added);
+  if (!added) {
+    grn_obj_get_value(ctx, data->remains, token_id, postings);
+    return;
+  }
+
+  const unsigned int ii_cursor_flags = 0;
+  grn_ii_cursor *ii_cursor = grn_ii_cursor_open(ctx,
+                                                data->ii,
+                                                token_id,
+                                                GRN_ID_NIL,
+                                                GRN_ID_MAX,
+                                                data->index.n_elements,
+                                                ii_cursor_flags);
+  if (ii_cursor) {
+    const grn_bool with_section = data->index.with_section;
+    const grn_bool with_position = data->index.with_position;
+    if (with_position) {
+      while (grn_ii_cursor_next(ctx, ii_cursor)) {
+        grn_posting *posting;
+        while ((posting = grn_ii_cursor_next_pos(ctx, ii_cursor))) {
+          GRN_UINT32_PUT(ctx, postings, posting->rid);
+          if (with_section) {
+            GRN_UINT32_PUT(ctx, postings, posting->sid);
+          }
+          GRN_UINT32_PUT(ctx, postings, posting->pos);
+        }
+      }
+    } else {
+      grn_posting *posting;
+      while ((posting = grn_ii_cursor_next(ctx, ii_cursor))) {
+        GRN_UINT32_PUT(ctx, postings, posting->rid);
+        if (with_section) {
+          GRN_UINT32_PUT(ctx, postings, posting->sid);
+        }
+      }
+    }
+    grn_ii_cursor_close(ctx, ii_cursor);
+  }
+
+  grn_obj_set_value(ctx, data->remains, token_id, postings, GRN_OBJ_SET);
+}
+
+static int
+grn_index_column_diff_compare_posting(grn_ctx *ctx,
+                                      grn_index_column_diff_data *data,
+                                      size_t nth_posting,
+                                      grn_posting *current_posting)
+{
+  grn_obj *postings = &(data->buffers.postings);
+  const grn_bool with_section = data->index.with_section;
+  const grn_bool with_position = data->index.with_position;
+  const size_t n_posting_elements = data->n_posting_elements;
+
+  size_t i = nth_posting * n_posting_elements;
+
+  grn_posting posting = {0};
+  posting.rid = GRN_UINT32_VALUE_AT(postings, i);
+
+  if (posting.rid < current_posting->rid) {
+    return -1;
+  } else if (posting.rid > current_posting->rid) {
+    return 1;
+  }
+
+  if (with_section) {
+    i++;
+    posting.sid = GRN_UINT32_VALUE_AT(postings, i);
+    if (posting.sid < current_posting->sid) {
+      return -1;
+    } else if (posting.sid > current_posting->sid) {
+      return 1;
+    }
+  }
+
+  if (with_position) {
+    i++;
+    posting.pos = GRN_UINT32_VALUE_AT(postings, i);
+    if (posting.pos < current_posting->pos) {
+      return -1;
+    } else if (posting.pos > current_posting->pos) {
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+static int64_t
+grn_index_column_diff_find_posting(grn_ctx *ctx,
+                                   grn_index_column_diff_data *data,
+                                   grn_posting *current_posting)
+{
+  grn_obj *postings = &(data->buffers.postings);
+  const size_t n_posting_elements = data->n_posting_elements;
+  int64_t min = 0;
+  int64_t max = (GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements) - 1;
+  while (min <= max) {
+    int64_t middle = min + ((max - min) / 2);
+    int compared =
+      grn_index_column_diff_compare_posting(ctx, data, middle, current_posting);
+    if (compared == 0) {
+      return middle;
+    } else if (compared < 0) {
+      min = middle + 1;
+    } else {
+      max = middle - 1;
+    }
+  }
+  return -1;
+}
+
+static void
+grn_index_column_diff_compute(grn_ctx *ctx,
+                              grn_index_column_diff_data *data)
+{
+  grn_obj *source_columns = &(data->source_columns);
+  const size_t n_source_columns = GRN_PTR_VECTOR_SIZE(source_columns);
+  grn_obj *value = &(data->buffers.value);
+  grn_obj *postings = &(data->buffers.postings);
+  grn_obj *new_postings = &(data->buffers.new_postings);
+  grn_obj *missings = &(data->buffers.missings);
+  const grn_bool with_section = data->index.with_section;
+  const grn_bool with_position = data->index.with_position;
+  const size_t n_posting_elements = data->n_posting_elements;
+
+  GRN_TABLE_EACH_BEGIN_FLAGS(ctx,
+                             data->source_table,
+                             cursor,
+                             id,
+                             GRN_CURSOR_BY_ID) {
+    for (size_t i = 0; i < n_source_columns; i++) {
+      grn_posting current_posting = {0};
+      current_posting.rid = id;
+      current_posting.sid = i + 1;
+      grn_obj *source = GRN_PTR_VALUE_AT(source_columns, i);
+
+      GRN_BULK_REWIND(value);
+      grn_obj_get_value(ctx, source, id, value);
+
+      const unsigned int token_cursor_flags = 0;
+      grn_token_cursor *token_cursor =
+        grn_token_cursor_open(ctx,
+                              data->lexicon,
+                              GRN_BULK_HEAD(value),
+                              GRN_BULK_VSIZE(value),
+                              GRN_TOKEN_ADD,
+                              token_cursor_flags);
+      if (!token_cursor) {
+        continue;
+      }
+
+      while (grn_token_cursor_get_status(ctx, token_cursor) ==
+             GRN_TOKEN_CURSOR_DOING) {
+        const grn_id token_id = grn_token_cursor_next(ctx, token_cursor);
+        if (token_id == GRN_ID_NIL) {
+          continue;
+        }
+
+        grn_token *token = grn_token_cursor_get_token(ctx, token_cursor);
+        current_posting.pos = grn_token_get_position(ctx, token);
+
+        GRN_BULK_REWIND(postings);
+        grn_index_column_diff_get_postings(ctx, data, token_id);
+
+        int64_t nth_posting =
+          grn_index_column_diff_find_posting(ctx, data, &current_posting);
+        if (nth_posting >= 0) {
+          GRN_BULK_REWIND(new_postings);
+          const size_t posting_size = sizeof(uint32_t) * n_posting_elements;
+          grn_bulk_write(ctx,
+                         new_postings,
+                         GRN_BULK_HEAD(postings),
+                         posting_size * nth_posting);
+          const size_t n_postings =
+            GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements;
+          grn_bulk_write(ctx,
+                         new_postings,
+                         GRN_BULK_HEAD(postings) +
+                         (posting_size * (nth_posting + 1)),
+                         posting_size * (n_postings - nth_posting - 1));
+          grn_obj_set_value(ctx,
+                            data->remains,
+                            token_id,
+                            new_postings,
+                            GRN_OBJ_SET);
+        } else {
+          GRN_BULK_REWIND(missings);
+          GRN_UINT32_PUT(ctx, missings, current_posting.rid);
+          if (with_section) {
+            GRN_UINT32_PUT(ctx, missings, current_posting.sid);
+          }
+          if (with_position) {
+            GRN_UINT32_PUT(ctx, missings, current_posting.pos);
+          }
+          grn_obj_set_value(ctx,
+                            data->missings,
+                            token_id,
+                            missings,
+                            GRN_OBJ_APPEND);
+        }
+      }
+      grn_token_cursor_close(ctx, token_cursor);
+    }
+  } GRN_TABLE_EACH_END(ctx, cursor);
+
+  GRN_TABLE_EACH_BEGIN(ctx, data->tokens, cursor, id) {
+    GRN_BULK_REWIND(postings);
+    grn_obj_get_value(ctx, data->remains, id, postings);
+    if (GRN_UINT32_VECTOR_SIZE(postings) > 0) {
+      continue;
+    }
+    GRN_BULK_REWIND(missings);
+    grn_obj_get_value(ctx, data->missings, id, missings);
+    if (GRN_UINT32_VECTOR_SIZE(missings) > 0) {
+      continue;
+    }
+    grn_table_cursor_delete(ctx, cursor);
+  } GRN_TABLE_EACH_END(ctx, cursor);
+}
+
+grn_rc
+grn_index_column_diff(grn_ctx *ctx,
+                      grn_obj *index_column,
+                      grn_obj **diff)
+{
+  grn_index_column_diff_data data = {0};
+
+  GRN_API_ENTER;
+
+  grn_index_column_diff_data_init(ctx, &data);
+
+  if (!index_column) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[index-column][diff] index column must not NULL");
+    goto exit;
+  }
+  if (!grn_obj_is_index_column(ctx, index_column)) {
+    char name[GRN_TABLE_MAX_KEY_SIZE];
+    int name_size;
+    name_size = grn_obj_name(ctx, index_column, name, sizeof(name));
+    ERR(GRN_INVALID_ARGUMENT,
+        "[index-column][diff] invalid index column: <%.*s>: <%s>",
+        name_size, name,
+        grn_obj_type_to_string(index_column->header.type));
+    goto exit;
+  }
+  data.ii = (grn_ii *)index_column;
+  {
+    grn_column_flags flags = grn_column_get_flags(ctx, index_column);
+    data.index.with_section =
+      ((flags & GRN_OBJ_WITH_SECTION) == GRN_OBJ_WITH_SECTION);
+    data.index.with_position =
+      ((flags & GRN_OBJ_WITH_POSITION) == GRN_OBJ_WITH_POSITION);
+    data.index.n_elements = grn_ii_get_n_elements(ctx, data.ii);
+  }
+
+  data.n_posting_elements = 1;
+  if (data.index.with_section) {
+    data.n_posting_elements++;
+  }
+  if (data.index.with_position) {
+    data.n_posting_elements++;
+  }
+
+  data.source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column));
+  {
+    grn_obj source_columns;
+    GRN_RECORD_INIT(&source_columns, GRN_OBJ_VECTOR, GRN_ID_NIL);
+    grn_obj_get_info(ctx, index_column, GRN_INFO_SOURCE, &source_columns);
+    size_t n_columns = GRN_RECORD_VECTOR_SIZE(&source_columns);
+    for (size_t i = 0; i < n_columns; i++) {
+      grn_id source_id = GRN_RECORD_VALUE_AT(&source_columns, i);
+      grn_obj *source = grn_ctx_at(ctx, source_id);
+      GRN_PTR_PUT(ctx, &(data.source_columns), source);
+    }
+    GRN_OBJ_FIN(ctx, &source_columns);
+  }
+
+  data.lexicon = grn_ctx_at(ctx, index_column->header.domain);
+
+  data.tokens = grn_table_create(ctx,
+                                 NULL, 0,
+                                 NULL,
+                                 GRN_TABLE_HASH_KEY,
+                                 data.lexicon,
+                                 NULL);
+  if (!data.tokens) {
+    char message[GRN_CTX_MSGSIZE];
+    grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf);
+    char name[GRN_TABLE_MAX_KEY_SIZE];
+    int name_size = grn_obj_name(ctx, index_column, name, sizeof(name));
+    ERR(GRN_INVALID_ARGUMENT,
+        "[index-column][diff] failed to create token table: <%.*s>: %s",
+        name_size, name,
+        message);
+    goto exit;
+  }
+  data.remains = grn_column_create(ctx,
+                                   data.tokens,
+                                   remains_column_name,
+                                   strlen(remains_column_name),
+                                   NULL,
+                                   GRN_OBJ_COLUMN_VECTOR,
+                                   grn_ctx_at(ctx, GRN_DB_UINT32));
+  if (!data.remains) {
+    char message[GRN_CTX_MSGSIZE];
+    grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf);
+    char name[GRN_TABLE_MAX_KEY_SIZE];
+    int name_size = grn_obj_name(ctx, index_column, name, sizeof(name));
+    ERR(GRN_INVALID_ARGUMENT,
+        "[index-column][diff] failed to create reamins column: <%.*s>: %s",
+        name_size, name,
+        message);
+    goto exit;
+  }
+  data.missings = grn_column_create(ctx,
+                                    data.tokens,
+                                    missings_column_name,
+                                    strlen(missings_column_name),
+                                    NULL,
+                                    GRN_OBJ_COLUMN_VECTOR,
+                                    grn_ctx_at(ctx, GRN_DB_UINT32));
+  if (!data.missings) {
+    char message[GRN_CTX_MSGSIZE];
+    grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf);
+    char name[GRN_TABLE_MAX_KEY_SIZE];
+    int name_size = grn_obj_name(ctx, index_column, name, sizeof(name));
+    ERR(GRN_INVALID_ARGUMENT,
+        "[index-column][diff] failed to create missings column: <%.*s>: %s",
+        name_size, name,
+        message);
+    goto exit;
+  }
+
+  grn_index_column_diff_compute(ctx, &data);
+  *diff = data.tokens;
+  data.tokens = NULL;
+
+exit :
+  if (data.tokens) {
+    grn_obj_close(ctx, data.tokens);
+  }
+
+  grn_index_column_diff_data_fin(ctx, &data);
+
+  GRN_API_RETURN(ctx->rc);
+}

  Modified: lib/proc.c (+2 -0)
===================================================================
--- lib/proc.c    2019-03-18 18:50:39 +0900 (7c618c708)
+++ lib/proc.c    2019-03-18 18:51:07 +0900 (ae85f4faf)
@@ -4352,4 +4352,6 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
   grn_proc_init_query_log_flags_remove(ctx);
 
   grn_proc_init_cast_loose(ctx);
+
+  grn_proc_init_index_column_diff(ctx);
 }

  Added: lib/proc/proc_index_column.c (+252 -0) 100644
===================================================================
--- /dev/null
+++ lib/proc/proc_index_column.c    2019-03-18 18:51:07 +0900 (91f937ec3)
@@ -0,0 +1,252 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2019 Kouhei Sutou <kou****@clear*****>
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "../grn_proc.h"
+
+#include "../grn_ctx.h"
+#include "../grn_db.h"
+#include "../grn_str.h"
+
+#include <groonga/plugin.h>
+
+static const char *remains_column_name = "remains";
+static const char *missings_column_name = "missings";
+
+static void
+index_column_diff_output_postings(grn_ctx *ctx,
+                                  grn_column_flags index_column_flags,
+                                  grn_obj *postings,
+                                  const char *name)
+{
+  size_t i;
+  size_t n_elements = 1;
+  if (index_column_flags & GRN_OBJ_WITH_SECTION) {
+    n_elements++;
+  }
+  if (index_column_flags & GRN_OBJ_WITH_POSITION) {
+    n_elements++;
+  }
+  size_t n = GRN_UINT32_VECTOR_SIZE(postings);
+  grn_ctx_output_array_open(ctx, name, n);
+  for (i = 0; i < n; i += n_elements) {
+    grn_ctx_output_map_open(ctx, "posting", n_elements);
+    {
+      size_t j = i;
+      grn_ctx_output_cstr(ctx, "record_id");
+      grn_id record_id = GRN_UINT32_VALUE_AT(postings, j);
+      grn_ctx_output_uint32(ctx, record_id);
+      if (index_column_flags & GRN_OBJ_WITH_SECTION) {
+        j++;
+        grn_ctx_output_cstr(ctx, "section_id");
+        grn_id section_id = GRN_UINT32_VALUE_AT(postings, j);
+        grn_ctx_output_uint32(ctx, section_id);
+      }
+      if (index_column_flags & GRN_OBJ_WITH_POSITION) {
+        j++;
+        grn_ctx_output_cstr(ctx, "position");
+        uint32_t position = GRN_UINT32_VALUE_AT(postings, j);
+        grn_ctx_output_uint32(ctx, position);
+      }
+    }
+    grn_ctx_output_map_close(ctx);
+  }
+  grn_ctx_output_array_close(ctx);
+}
+
+static void
+index_column_diff_output(grn_ctx *ctx,
+                         grn_obj *diff,
+                         grn_obj *lexicon,
+                         grn_column_flags index_column_flags)
+{
+  grn_obj *remains_column =
+    grn_obj_column(ctx,
+                   diff,
+                   remains_column_name,
+                   strlen(remains_column_name));
+  grn_obj *missings_column =
+    grn_obj_column(ctx,
+                   diff,
+                   missings_column_name,
+                   strlen(missings_column_name));
+  grn_obj key;
+  GRN_OBJ_INIT(&key, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, lexicon->header.domain);
+  grn_obj remains;
+  GRN_UINT32_INIT(&remains, GRN_OBJ_VECTOR);
+  grn_obj missings;
+  GRN_UINT32_INIT(&missings, GRN_OBJ_VECTOR);
+  grn_ctx_output_array_open(ctx, "diffs", grn_table_size(ctx, diff));
+  {
+    GRN_TABLE_EACH_BEGIN(ctx, diff, cursor, id) {
+      grn_ctx_output_map_open(ctx, "diff", 3);
+      {
+        grn_ctx_output_cstr(ctx, "token");
+        grn_ctx_output_map_open(ctx, "token", 2);
+        {
+          grn_ctx_output_cstr(ctx, "id");
+          void *token_id_buffer;
+          grn_table_cursor_get_key(ctx, cursor, &token_id_buffer);
+          grn_id token_id = *((grn_id *)token_id_buffer);
+          grn_ctx_output_uint32(ctx, token_id);
+
+          grn_ctx_output_cstr(ctx, "value");
+          char key_buffer[GRN_TABLE_MAX_KEY_SIZE];
+          int key_size = grn_table_get_key(ctx,
+                                           lexicon,
+                                           token_id,
+                                           key_buffer,
+                                           sizeof(key_buffer));
+          GRN_TEXT_SET(ctx, &key, key_buffer, key_size);
+          grn_ctx_output_obj(ctx, &key, NULL);
+        }
+        grn_ctx_output_map_close(ctx);
+
+        grn_ctx_output_cstr(ctx, "remains");
+        GRN_BULK_REWIND(&remains);
+        grn_obj_get_value(ctx, remains_column, id, &remains);
+        index_column_diff_output_postings(ctx,
+                                          index_column_flags,
+                                          &remains,
+                                          "remains");
+
+        grn_ctx_output_cstr(ctx, "missings");
+        GRN_BULK_REWIND(&missings);
+        grn_obj_get_value(ctx, missings_column, id, &missings);
+        index_column_diff_output_postings(ctx,
+                                          index_column_flags,
+                                          &missings,
+                                          "missings");
+      }
+      grn_ctx_output_map_close(ctx);
+    } GRN_TABLE_EACH_END(ctx, cursor);
+  }
+  grn_ctx_output_array_close(ctx);
+  GRN_OBJ_FIN(ctx, &missings);
+  GRN_OBJ_FIN(ctx, &remains);
+  GRN_OBJ_FIN(ctx, &key);
+}
+
+static grn_obj *
+command_index_column_diff(grn_ctx *ctx,
+                          int n_args,
+                          grn_obj **args,
+                          grn_user_data *user_data)
+{
+  grn_raw_string table_name;
+  grn_raw_string column_name;
+  grn_obj *table = NULL;
+  grn_obj *column = NULL;
+  grn_obj *diff = NULL;
+
+  table_name.value =
+    grn_plugin_proc_get_var_string(ctx, user_data,
+                                   "table", -1,
+                                   &(table_name.length));
+  column_name.value =
+    grn_plugin_proc_get_var_string(ctx, user_data,
+                                   "name", -1,
+                                   &(column_name.length));
+
+  table = grn_ctx_get(ctx, table_name.value, table_name.length);
+  if (!table) {
+    GRN_PLUGIN_ERROR(ctx,
+                     GRN_INVALID_ARGUMENT,
+                     "[index-column][diff] table doesn't exist: <%.*s>",
+                     (int)(table_name.length),
+                     table_name.value);
+    goto exit;
+  }
+  if (!grn_obj_is_lexicon(ctx, table)) {
+    GRN_PLUGIN_ERROR(ctx,
+                     GRN_INVALID_ARGUMENT,
+                     "[index-column][diff] table must be lexicon: <%.*s>: %s",
+                     (int)(table_name.length),
+                     table_name.value,
+                     grn_obj_type_to_string(table->header.type));
+    goto exit;
+  }
+
+  column = grn_obj_column(ctx, table, column_name.value, column_name.length);
+  if (!column) {
+    GRN_PLUGIN_ERROR(ctx,
+                     GRN_INVALID_ARGUMENT,
+                     "[index-column][diff] column doesn't exist: <%.*s>: <%.*s>",
+                     (int)(table_name.length),
+                     table_name.value,
+                     (int)(column_name.length),
+                     column_name.value);
+    goto exit;
+  }
+  if (!grn_obj_is_index_column(ctx, column)) {
+    GRN_PLUGIN_ERROR(ctx,
+                     GRN_INVALID_ARGUMENT,
+                     "[index-column][diff] column must be index column: "
+                     "<%.*s>: <%.*s>: %s",
+                     (int)(table_name.length),
+                     table_name.value,
+                     (int)(column_name.length),
+                     column_name.value,
+                     grn_obj_type_to_string(column->header.type));
+    goto exit;
+  }
+
+  grn_index_column_diff(ctx, column, &diff);
+  if (ctx->rc != GRN_SUCCESS) {
+    GRN_PLUGIN_ERROR(ctx,
+                     ctx->rc,
+                     "[index-column][diff] failed to diff: "
+                     "<%.*s>: <%.*s>: %s",
+                     (int)(table_name.length),
+                     table_name.value,
+                     (int)(column_name.length),
+                     column_name.value,
+                     ctx->errbuf);
+    goto exit;
+  }
+
+  index_column_diff_output(ctx,
+                           diff,
+                           table,
+                           grn_column_get_flags(ctx, column));
+
+exit :
+  if (grn_obj_is_accessor(ctx, column)) {
+    grn_obj_close(ctx, column);
+  }
+
+  if (diff) {
+    grn_obj_close(ctx, diff);
+  }
+
+  return NULL;
+}
+
+void
+grn_proc_init_index_column_diff(grn_ctx *ctx)
+{
+  grn_expr_var vars[2];
+  unsigned int n_vars = 0;
+
+  grn_plugin_expr_var_init(ctx, &(vars[n_vars++]), "table", -1);
+  grn_plugin_expr_var_init(ctx, &(vars[n_vars++]), "name", -1);
+  grn_plugin_command_create(ctx,
+                            "index_column_diff", -1,
+                            command_index_column_diff,
+                            n_vars,
+                            vars);
+}

  Modified: lib/proc/sources.am (+1 -0)
===================================================================
--- lib/proc/sources.am    2019-03-18 18:50:39 +0900 (aa4782dce)
+++ lib/proc/sources.am    2019-03-18 18:51:07 +0900 (23606348f)
@@ -6,6 +6,7 @@ libgrnproc_la_SOURCES =				\
 	proc_fuzzy_search.c			\
 	proc_highlight.c			\
 	proc_in_records.c			\
+	proc_index_column.c			\
 	proc_lexicon.c				\
 	proc_lock.c				\
 	proc_normalize.c			\

  Added: test/command/suite/index_column_diff/missing/with_section.expected (+164 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/missing/with_section.expected    2019-03-18 18:51:07 +0900 (dace34582)
@@ -0,0 +1,164 @@
+table_create Data TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Data value1 COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+column_create Data value2 COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenNgram   --normalizer NormalizerNFKC100
+[[0,0.0,0.0],true]
+column_create Terms data_values_index   COLUMN_INDEX|WITH_POSITION|WITH_SECTION   Data value1,value2
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"value1": "Hello World",
+ "value2": "Good-by World"},
+{"value1": "Hello Groonga",
+ "value2": "Good-by Groonga"}
+]
+[[0,0.0,0.0],2]
+truncate Terms.data_values_index
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"value1": "Morning World",
+ "value2": "Good night World"},
+{"value1": "Morning Groonga",
+ "value2": "Good night Groonga"}
+]
+[[0,0.0,0.0],2]
+index_column_diff Terms data_values_index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "token": {
+        "id": 1,
+        "value": "hello"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "section_id": 1,
+          "position": 0
+        },
+        {
+          "record_id": 2,
+          "section_id": 1,
+          "position": 0
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 2,
+        "value": "world"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "section_id": 1,
+          "position": 1
+        },
+        {
+          "record_id": 1,
+          "section_id": 2,
+          "position": 3
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 3,
+        "value": "good"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "section_id": 2,
+          "position": 0
+        },
+        {
+          "record_id": 2,
+          "section_id": 2,
+          "position": 0
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 4,
+        "value": "-"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "section_id": 2,
+          "position": 1
+        },
+        {
+          "record_id": 2,
+          "section_id": 2,
+          "position": 1
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 5,
+        "value": "by"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "section_id": 2,
+          "position": 2
+        },
+        {
+          "record_id": 2,
+          "section_id": 2,
+          "position": 2
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 6,
+        "value": "groonga"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 2,
+          "section_id": 1,
+          "position": 1
+        },
+        {
+          "record_id": 2,
+          "section_id": 2,
+          "position": 3
+        }
+      ]
+    }
+  ]
+]

  Added: test/command/suite/index_column_diff/missing/with_section.test (+30 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/missing/with_section.test    2019-03-18 18:51:07 +0900 (39d0ea54c)
@@ -0,0 +1,30 @@
+table_create Data TABLE_NO_KEY
+column_create Data value1 COLUMN_SCALAR ShortText
+column_create Data value2 COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenNgram \
+  --normalizer NormalizerNFKC100
+column_create Terms data_values_index \
+  COLUMN_INDEX|WITH_POSITION|WITH_SECTION \
+  Data value1,value2
+
+load --table Data
+[
+{"value1": "Hello World",
+ "value2": "Good-by World"},
+{"value1": "Hello Groonga",
+ "value2": "Good-by Groonga"}
+]
+
+truncate Terms.data_values_index
+
+load --table Data
+[
+{"value1": "Morning World",
+ "value2": "Good night World"},
+{"value1": "Morning Groonga",
+ "value2": "Good night Groonga"}
+]
+
+index_column_diff Terms data_values_index

  Added: test/command/suite/index_column_diff/missing/without_section.expected (+81 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/missing/without_section.expected    2019-03-18 18:51:07 +0900 (47e54d6b4)
@@ -0,0 +1,81 @@
+table_create Data TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Data value COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenNgram   --normalizer NormalizerNFKC100
+[[0,0.0,0.0],true]
+column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"value": "Hello World"},
+{"value": "Hello Groonga"}
+]
+[[0,0.0,0.0],2]
+truncate Terms.data_value_index
+[[0,0.0,0.0],true]
+load --table Data
+[
+{"value": "Good-by World"},
+{"value": "Good-by Groonga"}
+]
+[[0,0.0,0.0],2]
+index_column_diff Terms data_value_index
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "token": {
+        "id": 1,
+        "value": "hello"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "position": 0
+        },
+        {
+          "record_id": 2,
+          "position": 0
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 2,
+        "value": "world"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 1,
+          "position": 1
+        }
+      ]
+    },
+    {
+      "token": {
+        "id": 3,
+        "value": "groonga"
+      },
+      "remains": [
+
+      ],
+      "missings": [
+        {
+          "record_id": 2,
+          "position": 1
+        }
+      ]
+    }
+  ]
+]

  Added: test/command/suite/index_column_diff/missing/without_section.test (+23 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/index_column_diff/missing/without_section.test    2019-03-18 18:51:07 +0900 (5429f610a)
@@ -0,0 +1,23 @@
+table_create Data TABLE_NO_KEY
+column_create Data value COLUMN_SCALAR ShortText
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenNgram \
+  --normalizer NormalizerNFKC100
+column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value
+
+load --table Data
+[
+{"value": "Hello World"},
+{"value": "Hello Groonga"}
+]
+
+truncate Terms.data_value_index
+
+load --table Data
+[
+{"value": "Good-by World"},
+{"value": "Good-by Groonga"}
+]
+
+index_column_diff Terms data_value_index
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190318/d1cc2a73/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index