Kouhei Sutou 2019-03-18 18:51:07 +0900 (Mon, 18 Mar 2019) Revision: bc2ab09bca5dd16f2293e9b7ae28af62a73e94d1 https://github.com/groonga/groonga/commit/bc2ab09bca5dd16f2293e9b7ae28af62a73e94d1 Message: Add index_column_diff command New C API: * grn_index_column_diff() Added files: include/groonga/index_column.h lib/proc/proc_index_column.c test/command/suite/index_column_diff/missing/with_section.expected test/command/suite/index_column_diff/missing/with_section.test test/command/suite/index_column_diff/missing/without_section.expected test/command/suite/index_column_diff/missing/without_section.test Modified files: include/groonga.h include/groonga/Makefile.am lib/grn_proc.h lib/index_column.c lib/proc.c lib/proc/sources.am Modified: include/groonga.h (+2 -1) =================================================================== --- include/groonga.h 2019-03-18 18:50:39 +0900 (a398be7fc) +++ include/groonga.h 2019-03-18 18:51:07 +0900 (a735c5def) @@ -1,6 +1,6 @@ /* Copyright(C) 2014-2018 Brazil - Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -41,6 +41,7 @@ #include "groonga/highlighter.h" #include "groonga/id.h" #include "groonga/ii.h" +#include "groonga/index_column.h" #include "groonga/obj.h" #include "groonga/operator.h" #include "groonga/option.h" Modified: include/groonga/Makefile.am (+1 -0) =================================================================== --- include/groonga/Makefile.am 2019-03-18 18:50:39 +0900 (bb9e16be6) +++ include/groonga/Makefile.am 2019-03-18 18:51:07 +0900 (45860bc95) @@ -20,6 +20,7 @@ groonga_include_HEADERS = \ groonga.h \ id.h \ ii.h \ + index_column.h \ msgpack.h \ obj.h \ operator.h \ Added: include/groonga/index_column.h (+31 -0) 100644 =================================================================== --- /dev/null +++ include/groonga/index_column.h 2019-03-18 18:51:07 +0900 (4785d31bc) @@ -0,0 +1,31 @@ +/* + Copyright(C) 2019 Brazil + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +GRN_API grn_rc grn_index_column_diff(grn_ctx *ctx, + grn_obj *index_column, + grn_obj **diff); + +#ifdef __cplusplus +} +#endif Modified: lib/grn_proc.h (+1 -0) =================================================================== --- lib/grn_proc.h 2019-03-18 18:50:39 +0900 (3477d39b7) +++ lib/grn_proc.h 2019-03-18 18:51:07 +0900 (276e44888) @@ -54,6 +54,7 @@ void grn_proc_init_highlight(grn_ctx *ctx); void grn_proc_init_highlight_full(grn_ctx *ctx); void grn_proc_init_highlight_html(grn_ctx *ctx); void grn_proc_init_in_records(grn_ctx *ctx); +void grn_proc_init_index_column_diff(grn_ctx *ctx); void grn_proc_init_lock_acquire(grn_ctx *ctx); void grn_proc_init_lock_clear(grn_ctx *ctx); void grn_proc_init_lock_release(grn_ctx *ctx); Modified: lib/index_column.c (+415 -1) =================================================================== --- lib/index_column.c 2019-03-18 18:50:39 +0900 (f1dad536d) +++ lib/index_column.c 2019-03-18 18:51:07 +0900 (82904f01a) @@ -1,7 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2015 Brazil - Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> + Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -234,3 +234,417 @@ grn_index_column_rebuild(grn_ctx *ctx, grn_obj *index_column) GRN_API_RETURN(ctx->rc); } + +static const char *remains_column_name = "remains"; +static const char *missings_column_name = "missings"; + +typedef struct { + grn_obj *lexicon; + grn_ii *ii; + struct { + grn_bool with_section; + grn_bool with_position; + uint32_t n_elements; + } index; + size_t n_posting_elements; + grn_obj *source_table; + grn_obj source_columns; + grn_obj *tokens; + grn_obj *remains; + grn_obj *missings; + struct { + grn_obj value; + grn_obj postings; + grn_obj new_postings; + grn_obj missings; + } buffers; +} grn_index_column_diff_data; + +static void +grn_index_column_diff_data_init(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + GRN_PTR_INIT(&(data->source_columns), GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_VOID_INIT(&(data->buffers.value)); + GRN_UINT32_INIT(&(data->buffers.postings), GRN_OBJ_VECTOR); + GRN_UINT32_INIT(&(data->buffers.new_postings), GRN_OBJ_VECTOR); + GRN_UINT32_INIT(&(data->buffers.missings), GRN_OBJ_VECTOR); +} + +static void +grn_index_column_diff_data_fin(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + { + size_t n_columns = GRN_PTR_VECTOR_SIZE(&(data->source_columns)); + for (size_t i = 0; i < n_columns; i++) { + grn_obj *column = GRN_PTR_VALUE_AT(&(data->source_columns), i); + if (grn_obj_is_accessor(ctx, column)) { + grn_obj_close(ctx, column); + } + } + GRN_OBJ_FIN(ctx, &(data->source_columns)); + } + + GRN_OBJ_FIN(ctx, &(data->buffers.value)); + GRN_OBJ_FIN(ctx, &(data->buffers.postings)); + GRN_OBJ_FIN(ctx, &(data->buffers.new_postings)); + GRN_OBJ_FIN(ctx, &(data->buffers.missings)); +} + +static void +grn_index_column_diff_get_postings(grn_ctx *ctx, + grn_index_column_diff_data *data, + grn_id token_id) +{ + grn_obj *postings = &(data->buffers.postings); + + int added = 0; + grn_table_add(ctx, data->tokens, &token_id, sizeof(grn_id), &added); + if (!added) { + grn_obj_get_value(ctx, data->remains, token_id, postings); + return; + } + + const unsigned int ii_cursor_flags = 0; + grn_ii_cursor *ii_cursor = grn_ii_cursor_open(ctx, + data->ii, + token_id, + GRN_ID_NIL, + GRN_ID_MAX, + data->index.n_elements, + ii_cursor_flags); + if (ii_cursor) { + const grn_bool with_section = data->index.with_section; + const grn_bool with_position = data->index.with_position; + if (with_position) { + while (grn_ii_cursor_next(ctx, ii_cursor)) { + grn_posting *posting; + while ((posting = grn_ii_cursor_next_pos(ctx, ii_cursor))) { + GRN_UINT32_PUT(ctx, postings, posting->rid); + if (with_section) { + GRN_UINT32_PUT(ctx, postings, posting->sid); + } + GRN_UINT32_PUT(ctx, postings, posting->pos); + } + } + } else { + grn_posting *posting; + while ((posting = grn_ii_cursor_next(ctx, ii_cursor))) { + GRN_UINT32_PUT(ctx, postings, posting->rid); + if (with_section) { + GRN_UINT32_PUT(ctx, postings, posting->sid); + } + } + } + grn_ii_cursor_close(ctx, ii_cursor); + } + + grn_obj_set_value(ctx, data->remains, token_id, postings, GRN_OBJ_SET); +} + +static int +grn_index_column_diff_compare_posting(grn_ctx *ctx, + grn_index_column_diff_data *data, + size_t nth_posting, + grn_posting *current_posting) +{ + grn_obj *postings = &(data->buffers.postings); + const grn_bool with_section = data->index.with_section; + const grn_bool with_position = data->index.with_position; + const size_t n_posting_elements = data->n_posting_elements; + + size_t i = nth_posting * n_posting_elements; + + grn_posting posting = {0}; + posting.rid = GRN_UINT32_VALUE_AT(postings, i); + + if (posting.rid < current_posting->rid) { + return -1; + } else if (posting.rid > current_posting->rid) { + return 1; + } + + if (with_section) { + i++; + posting.sid = GRN_UINT32_VALUE_AT(postings, i); + if (posting.sid < current_posting->sid) { + return -1; + } else if (posting.sid > current_posting->sid) { + return 1; + } + } + + if (with_position) { + i++; + posting.pos = GRN_UINT32_VALUE_AT(postings, i); + if (posting.pos < current_posting->pos) { + return -1; + } else if (posting.pos > current_posting->pos) { + return 1; + } + } + + return 0; +} + +static int64_t +grn_index_column_diff_find_posting(grn_ctx *ctx, + grn_index_column_diff_data *data, + grn_posting *current_posting) +{ + grn_obj *postings = &(data->buffers.postings); + const size_t n_posting_elements = data->n_posting_elements; + int64_t min = 0; + int64_t max = (GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements) - 1; + while (min <= max) { + int64_t middle = min + ((max - min) / 2); + int compared = + grn_index_column_diff_compare_posting(ctx, data, middle, current_posting); + if (compared == 0) { + return middle; + } else if (compared < 0) { + min = middle + 1; + } else { + max = middle - 1; + } + } + return -1; +} + +static void +grn_index_column_diff_compute(grn_ctx *ctx, + grn_index_column_diff_data *data) +{ + grn_obj *source_columns = &(data->source_columns); + const size_t n_source_columns = GRN_PTR_VECTOR_SIZE(source_columns); + grn_obj *value = &(data->buffers.value); + grn_obj *postings = &(data->buffers.postings); + grn_obj *new_postings = &(data->buffers.new_postings); + grn_obj *missings = &(data->buffers.missings); + const grn_bool with_section = data->index.with_section; + const grn_bool with_position = data->index.with_position; + const size_t n_posting_elements = data->n_posting_elements; + + GRN_TABLE_EACH_BEGIN_FLAGS(ctx, + data->source_table, + cursor, + id, + GRN_CURSOR_BY_ID) { + for (size_t i = 0; i < n_source_columns; i++) { + grn_posting current_posting = {0}; + current_posting.rid = id; + current_posting.sid = i + 1; + grn_obj *source = GRN_PTR_VALUE_AT(source_columns, i); + + GRN_BULK_REWIND(value); + grn_obj_get_value(ctx, source, id, value); + + const unsigned int token_cursor_flags = 0; + grn_token_cursor *token_cursor = + grn_token_cursor_open(ctx, + data->lexicon, + GRN_BULK_HEAD(value), + GRN_BULK_VSIZE(value), + GRN_TOKEN_ADD, + token_cursor_flags); + if (!token_cursor) { + continue; + } + + while (grn_token_cursor_get_status(ctx, token_cursor) == + GRN_TOKEN_CURSOR_DOING) { + const grn_id token_id = grn_token_cursor_next(ctx, token_cursor); + if (token_id == GRN_ID_NIL) { + continue; + } + + grn_token *token = grn_token_cursor_get_token(ctx, token_cursor); + current_posting.pos = grn_token_get_position(ctx, token); + + GRN_BULK_REWIND(postings); + grn_index_column_diff_get_postings(ctx, data, token_id); + + int64_t nth_posting = + grn_index_column_diff_find_posting(ctx, data, ¤t_posting); + if (nth_posting >= 0) { + GRN_BULK_REWIND(new_postings); + const size_t posting_size = sizeof(uint32_t) * n_posting_elements; + grn_bulk_write(ctx, + new_postings, + GRN_BULK_HEAD(postings), + posting_size * nth_posting); + const size_t n_postings = + GRN_UINT32_VECTOR_SIZE(postings) / n_posting_elements; + grn_bulk_write(ctx, + new_postings, + GRN_BULK_HEAD(postings) + + (posting_size * (nth_posting + 1)), + posting_size * (n_postings - nth_posting - 1)); + grn_obj_set_value(ctx, + data->remains, + token_id, + new_postings, + GRN_OBJ_SET); + } else { + GRN_BULK_REWIND(missings); + GRN_UINT32_PUT(ctx, missings, current_posting.rid); + if (with_section) { + GRN_UINT32_PUT(ctx, missings, current_posting.sid); + } + if (with_position) { + GRN_UINT32_PUT(ctx, missings, current_posting.pos); + } + grn_obj_set_value(ctx, + data->missings, + token_id, + missings, + GRN_OBJ_APPEND); + } + } + grn_token_cursor_close(ctx, token_cursor); + } + } GRN_TABLE_EACH_END(ctx, cursor); + + GRN_TABLE_EACH_BEGIN(ctx, data->tokens, cursor, id) { + GRN_BULK_REWIND(postings); + grn_obj_get_value(ctx, data->remains, id, postings); + if (GRN_UINT32_VECTOR_SIZE(postings) > 0) { + continue; + } + GRN_BULK_REWIND(missings); + grn_obj_get_value(ctx, data->missings, id, missings); + if (GRN_UINT32_VECTOR_SIZE(missings) > 0) { + continue; + } + grn_table_cursor_delete(ctx, cursor); + } GRN_TABLE_EACH_END(ctx, cursor); +} + +grn_rc +grn_index_column_diff(grn_ctx *ctx, + grn_obj *index_column, + grn_obj **diff) +{ + grn_index_column_diff_data data = {0}; + + GRN_API_ENTER; + + grn_index_column_diff_data_init(ctx, &data); + + if (!index_column) { + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] index column must not NULL"); + goto exit; + } + if (!grn_obj_is_index_column(ctx, index_column)) { + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size; + name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] invalid index column: <%.*s>: <%s>", + name_size, name, + grn_obj_type_to_string(index_column->header.type)); + goto exit; + } + data.ii = (grn_ii *)index_column; + { + grn_column_flags flags = grn_column_get_flags(ctx, index_column); + data.index.with_section = + ((flags & GRN_OBJ_WITH_SECTION) == GRN_OBJ_WITH_SECTION); + data.index.with_position = + ((flags & GRN_OBJ_WITH_POSITION) == GRN_OBJ_WITH_POSITION); + data.index.n_elements = grn_ii_get_n_elements(ctx, data.ii); + } + + data.n_posting_elements = 1; + if (data.index.with_section) { + data.n_posting_elements++; + } + if (data.index.with_position) { + data.n_posting_elements++; + } + + data.source_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index_column)); + { + grn_obj source_columns; + GRN_RECORD_INIT(&source_columns, GRN_OBJ_VECTOR, GRN_ID_NIL); + grn_obj_get_info(ctx, index_column, GRN_INFO_SOURCE, &source_columns); + size_t n_columns = GRN_RECORD_VECTOR_SIZE(&source_columns); + for (size_t i = 0; i < n_columns; i++) { + grn_id source_id = GRN_RECORD_VALUE_AT(&source_columns, i); + grn_obj *source = grn_ctx_at(ctx, source_id); + GRN_PTR_PUT(ctx, &(data.source_columns), source); + } + GRN_OBJ_FIN(ctx, &source_columns); + } + + data.lexicon = grn_ctx_at(ctx, index_column->header.domain); + + data.tokens = grn_table_create(ctx, + NULL, 0, + NULL, + GRN_TABLE_HASH_KEY, + data.lexicon, + NULL); + if (!data.tokens) { + char message[GRN_CTX_MSGSIZE]; + grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] failed to create token table: <%.*s>: %s", + name_size, name, + message); + goto exit; + } + data.remains = grn_column_create(ctx, + data.tokens, + remains_column_name, + strlen(remains_column_name), + NULL, + GRN_OBJ_COLUMN_VECTOR, + grn_ctx_at(ctx, GRN_DB_UINT32)); + if (!data.remains) { + char message[GRN_CTX_MSGSIZE]; + grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] failed to create reamins column: <%.*s>: %s", + name_size, name, + message); + goto exit; + } + data.missings = grn_column_create(ctx, + data.tokens, + missings_column_name, + strlen(missings_column_name), + NULL, + GRN_OBJ_COLUMN_VECTOR, + grn_ctx_at(ctx, GRN_DB_UINT32)); + if (!data.missings) { + char message[GRN_CTX_MSGSIZE]; + grn_strcpy(message, GRN_CTX_MSGSIZE, ctx->errbuf); + char name[GRN_TABLE_MAX_KEY_SIZE]; + int name_size = grn_obj_name(ctx, index_column, name, sizeof(name)); + ERR(GRN_INVALID_ARGUMENT, + "[index-column][diff] failed to create missings column: <%.*s>: %s", + name_size, name, + message); + goto exit; + } + + grn_index_column_diff_compute(ctx, &data); + *diff = data.tokens; + data.tokens = NULL; + +exit : + if (data.tokens) { + grn_obj_close(ctx, data.tokens); + } + + grn_index_column_diff_data_fin(ctx, &data); + + GRN_API_RETURN(ctx->rc); +} Modified: lib/proc.c (+2 -0) =================================================================== --- lib/proc.c 2019-03-18 18:50:39 +0900 (7c618c708) +++ lib/proc.c 2019-03-18 18:51:07 +0900 (ae85f4faf) @@ -4352,4 +4352,6 @@ grn_db_init_builtin_commands(grn_ctx *ctx) grn_proc_init_query_log_flags_remove(ctx); grn_proc_init_cast_loose(ctx); + + grn_proc_init_index_column_diff(ctx); } Added: lib/proc/proc_index_column.c (+252 -0) 100644 =================================================================== --- /dev/null +++ lib/proc/proc_index_column.c 2019-03-18 18:51:07 +0900 (91f937ec3) @@ -0,0 +1,252 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2019 Kouhei Sutou <kou****@clear*****> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "../grn_proc.h" + +#include "../grn_ctx.h" +#include "../grn_db.h" +#include "../grn_str.h" + +#include <groonga/plugin.h> + +static const char *remains_column_name = "remains"; +static const char *missings_column_name = "missings"; + +static void +index_column_diff_output_postings(grn_ctx *ctx, + grn_column_flags index_column_flags, + grn_obj *postings, + const char *name) +{ + size_t i; + size_t n_elements = 1; + if (index_column_flags & GRN_OBJ_WITH_SECTION) { + n_elements++; + } + if (index_column_flags & GRN_OBJ_WITH_POSITION) { + n_elements++; + } + size_t n = GRN_UINT32_VECTOR_SIZE(postings); + grn_ctx_output_array_open(ctx, name, n); + for (i = 0; i < n; i += n_elements) { + grn_ctx_output_map_open(ctx, "posting", n_elements); + { + size_t j = i; + grn_ctx_output_cstr(ctx, "record_id"); + grn_id record_id = GRN_UINT32_VALUE_AT(postings, j); + grn_ctx_output_uint32(ctx, record_id); + if (index_column_flags & GRN_OBJ_WITH_SECTION) { + j++; + grn_ctx_output_cstr(ctx, "section_id"); + grn_id section_id = GRN_UINT32_VALUE_AT(postings, j); + grn_ctx_output_uint32(ctx, section_id); + } + if (index_column_flags & GRN_OBJ_WITH_POSITION) { + j++; + grn_ctx_output_cstr(ctx, "position"); + uint32_t position = GRN_UINT32_VALUE_AT(postings, j); + grn_ctx_output_uint32(ctx, position); + } + } + grn_ctx_output_map_close(ctx); + } + grn_ctx_output_array_close(ctx); +} + +static void +index_column_diff_output(grn_ctx *ctx, + grn_obj *diff, + grn_obj *lexicon, + grn_column_flags index_column_flags) +{ + grn_obj *remains_column = + grn_obj_column(ctx, + diff, + remains_column_name, + strlen(remains_column_name)); + grn_obj *missings_column = + grn_obj_column(ctx, + diff, + missings_column_name, + strlen(missings_column_name)); + grn_obj key; + GRN_OBJ_INIT(&key, GRN_BULK, GRN_OBJ_DO_SHALLOW_COPY, lexicon->header.domain); + grn_obj remains; + GRN_UINT32_INIT(&remains, GRN_OBJ_VECTOR); + grn_obj missings; + GRN_UINT32_INIT(&missings, GRN_OBJ_VECTOR); + grn_ctx_output_array_open(ctx, "diffs", grn_table_size(ctx, diff)); + { + GRN_TABLE_EACH_BEGIN(ctx, diff, cursor, id) { + grn_ctx_output_map_open(ctx, "diff", 3); + { + grn_ctx_output_cstr(ctx, "token"); + grn_ctx_output_map_open(ctx, "token", 2); + { + grn_ctx_output_cstr(ctx, "id"); + void *token_id_buffer; + grn_table_cursor_get_key(ctx, cursor, &token_id_buffer); + grn_id token_id = *((grn_id *)token_id_buffer); + grn_ctx_output_uint32(ctx, token_id); + + grn_ctx_output_cstr(ctx, "value"); + char key_buffer[GRN_TABLE_MAX_KEY_SIZE]; + int key_size = grn_table_get_key(ctx, + lexicon, + token_id, + key_buffer, + sizeof(key_buffer)); + GRN_TEXT_SET(ctx, &key, key_buffer, key_size); + grn_ctx_output_obj(ctx, &key, NULL); + } + grn_ctx_output_map_close(ctx); + + grn_ctx_output_cstr(ctx, "remains"); + GRN_BULK_REWIND(&remains); + grn_obj_get_value(ctx, remains_column, id, &remains); + index_column_diff_output_postings(ctx, + index_column_flags, + &remains, + "remains"); + + grn_ctx_output_cstr(ctx, "missings"); + GRN_BULK_REWIND(&missings); + grn_obj_get_value(ctx, missings_column, id, &missings); + index_column_diff_output_postings(ctx, + index_column_flags, + &missings, + "missings"); + } + grn_ctx_output_map_close(ctx); + } GRN_TABLE_EACH_END(ctx, cursor); + } + grn_ctx_output_array_close(ctx); + GRN_OBJ_FIN(ctx, &missings); + GRN_OBJ_FIN(ctx, &remains); + GRN_OBJ_FIN(ctx, &key); +} + +static grn_obj * +command_index_column_diff(grn_ctx *ctx, + int n_args, + grn_obj **args, + grn_user_data *user_data) +{ + grn_raw_string table_name; + grn_raw_string column_name; + grn_obj *table = NULL; + grn_obj *column = NULL; + grn_obj *diff = NULL; + + table_name.value = + grn_plugin_proc_get_var_string(ctx, user_data, + "table", -1, + &(table_name.length)); + column_name.value = + grn_plugin_proc_get_var_string(ctx, user_data, + "name", -1, + &(column_name.length)); + + table = grn_ctx_get(ctx, table_name.value, table_name.length); + if (!table) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "[index-column][diff] table doesn't exist: <%.*s>", + (int)(table_name.length), + table_name.value); + goto exit; + } + if (!grn_obj_is_lexicon(ctx, table)) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "[index-column][diff] table must be lexicon: <%.*s>: %s", + (int)(table_name.length), + table_name.value, + grn_obj_type_to_string(table->header.type)); + goto exit; + } + + column = grn_obj_column(ctx, table, column_name.value, column_name.length); + if (!column) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "[index-column][diff] column doesn't exist: <%.*s>: <%.*s>", + (int)(table_name.length), + table_name.value, + (int)(column_name.length), + column_name.value); + goto exit; + } + if (!grn_obj_is_index_column(ctx, column)) { + GRN_PLUGIN_ERROR(ctx, + GRN_INVALID_ARGUMENT, + "[index-column][diff] column must be index column: " + "<%.*s>: <%.*s>: %s", + (int)(table_name.length), + table_name.value, + (int)(column_name.length), + column_name.value, + grn_obj_type_to_string(column->header.type)); + goto exit; + } + + grn_index_column_diff(ctx, column, &diff); + if (ctx->rc != GRN_SUCCESS) { + GRN_PLUGIN_ERROR(ctx, + ctx->rc, + "[index-column][diff] failed to diff: " + "<%.*s>: <%.*s>: %s", + (int)(table_name.length), + table_name.value, + (int)(column_name.length), + column_name.value, + ctx->errbuf); + goto exit; + } + + index_column_diff_output(ctx, + diff, + table, + grn_column_get_flags(ctx, column)); + +exit : + if (grn_obj_is_accessor(ctx, column)) { + grn_obj_close(ctx, column); + } + + if (diff) { + grn_obj_close(ctx, diff); + } + + return NULL; +} + +void +grn_proc_init_index_column_diff(grn_ctx *ctx) +{ + grn_expr_var vars[2]; + unsigned int n_vars = 0; + + grn_plugin_expr_var_init(ctx, &(vars[n_vars++]), "table", -1); + grn_plugin_expr_var_init(ctx, &(vars[n_vars++]), "name", -1); + grn_plugin_command_create(ctx, + "index_column_diff", -1, + command_index_column_diff, + n_vars, + vars); +} Modified: lib/proc/sources.am (+1 -0) =================================================================== --- lib/proc/sources.am 2019-03-18 18:50:39 +0900 (aa4782dce) +++ lib/proc/sources.am 2019-03-18 18:51:07 +0900 (23606348f) @@ -6,6 +6,7 @@ libgrnproc_la_SOURCES = \ proc_fuzzy_search.c \ proc_highlight.c \ proc_in_records.c \ + proc_index_column.c \ proc_lexicon.c \ proc_lock.c \ proc_normalize.c \ Added: test/command/suite/index_column_diff/missing/with_section.expected (+164 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/index_column_diff/missing/with_section.expected 2019-03-18 18:51:07 +0900 (dace34582) @@ -0,0 +1,164 @@ +table_create Data TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Data value1 COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +column_create Data value2 COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenNgram --normalizer NormalizerNFKC100 +[[0,0.0,0.0],true] +column_create Terms data_values_index COLUMN_INDEX|WITH_POSITION|WITH_SECTION Data value1,value2 +[[0,0.0,0.0],true] +load --table Data +[ +{"value1": "Hello World", + "value2": "Good-by World"}, +{"value1": "Hello Groonga", + "value2": "Good-by Groonga"} +] +[[0,0.0,0.0],2] +truncate Terms.data_values_index +[[0,0.0,0.0],true] +load --table Data +[ +{"value1": "Morning World", + "value2": "Good night World"}, +{"value1": "Morning Groonga", + "value2": "Good night Groonga"} +] +[[0,0.0,0.0],2] +index_column_diff Terms data_values_index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "token": { + "id": 1, + "value": "hello" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "section_id": 1, + "position": 0 + }, + { + "record_id": 2, + "section_id": 1, + "position": 0 + } + ] + }, + { + "token": { + "id": 2, + "value": "world" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "section_id": 1, + "position": 1 + }, + { + "record_id": 1, + "section_id": 2, + "position": 3 + } + ] + }, + { + "token": { + "id": 3, + "value": "good" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "section_id": 2, + "position": 0 + }, + { + "record_id": 2, + "section_id": 2, + "position": 0 + } + ] + }, + { + "token": { + "id": 4, + "value": "-" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "section_id": 2, + "position": 1 + }, + { + "record_id": 2, + "section_id": 2, + "position": 1 + } + ] + }, + { + "token": { + "id": 5, + "value": "by" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "section_id": 2, + "position": 2 + }, + { + "record_id": 2, + "section_id": 2, + "position": 2 + } + ] + }, + { + "token": { + "id": 6, + "value": "groonga" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 2, + "section_id": 1, + "position": 1 + }, + { + "record_id": 2, + "section_id": 2, + "position": 3 + } + ] + } + ] +] Added: test/command/suite/index_column_diff/missing/with_section.test (+30 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/index_column_diff/missing/with_section.test 2019-03-18 18:51:07 +0900 (39d0ea54c) @@ -0,0 +1,30 @@ +table_create Data TABLE_NO_KEY +column_create Data value1 COLUMN_SCALAR ShortText +column_create Data value2 COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenNgram \ + --normalizer NormalizerNFKC100 +column_create Terms data_values_index \ + COLUMN_INDEX|WITH_POSITION|WITH_SECTION \ + Data value1,value2 + +load --table Data +[ +{"value1": "Hello World", + "value2": "Good-by World"}, +{"value1": "Hello Groonga", + "value2": "Good-by Groonga"} +] + +truncate Terms.data_values_index + +load --table Data +[ +{"value1": "Morning World", + "value2": "Good night World"}, +{"value1": "Morning Groonga", + "value2": "Good night Groonga"} +] + +index_column_diff Terms data_values_index Added: test/command/suite/index_column_diff/missing/without_section.expected (+81 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/index_column_diff/missing/without_section.expected 2019-03-18 18:51:07 +0900 (47e54d6b4) @@ -0,0 +1,81 @@ +table_create Data TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Data value COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenNgram --normalizer NormalizerNFKC100 +[[0,0.0,0.0],true] +column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value +[[0,0.0,0.0],true] +load --table Data +[ +{"value": "Hello World"}, +{"value": "Hello Groonga"} +] +[[0,0.0,0.0],2] +truncate Terms.data_value_index +[[0,0.0,0.0],true] +load --table Data +[ +{"value": "Good-by World"}, +{"value": "Good-by Groonga"} +] +[[0,0.0,0.0],2] +index_column_diff Terms data_value_index +[ + [ + 0, + 0.0, + 0.0 + ], + [ + { + "token": { + "id": 1, + "value": "hello" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "position": 0 + }, + { + "record_id": 2, + "position": 0 + } + ] + }, + { + "token": { + "id": 2, + "value": "world" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 1, + "position": 1 + } + ] + }, + { + "token": { + "id": 3, + "value": "groonga" + }, + "remains": [ + + ], + "missings": [ + { + "record_id": 2, + "position": 1 + } + ] + } + ] +] Added: test/command/suite/index_column_diff/missing/without_section.test (+23 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/index_column_diff/missing/without_section.test 2019-03-18 18:51:07 +0900 (5429f610a) @@ -0,0 +1,23 @@ +table_create Data TABLE_NO_KEY +column_create Data value COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenNgram \ + --normalizer NormalizerNFKC100 +column_create Terms data_value_index COLUMN_INDEX|WITH_POSITION Data value + +load --table Data +[ +{"value": "Hello World"}, +{"value": "Hello Groonga"} +] + +truncate Terms.data_value_index + +load --table Data +[ +{"value": "Good-by World"}, +{"value": "Good-by Groonga"} +] + +index_column_diff Terms data_value_index -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190318/d1cc2a73/attachment-0001.html>