[Groonga-commit] groonga/groonga at 53624c3 [master] dump: support tokenizer options

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Apr 5 18:18:29 JST 2018


Kouhei Sutou	2018-04-05 18:18:29 +0900 (Thu, 05 Apr 2018)

  New Revision: 53624c39fc4e1507243c5c4494b383c2cd0ebd19
  https://github.com/groonga/groonga/commit/53624c39fc4e1507243c5c4494b383c2cd0ebd19

  Message:
    dump: support tokenizer options

  Added files:
    test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected
    test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test
    test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected
    test/command/suite/dump/schema/table/hash/tokenizer_with_options.test
    test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected
    test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test
  Modified files:
    include/groonga/table.h
    lib/proc/proc_dump.c
    lib/table.c

  Modified: include/groonga/table.h (+6 -0)
===================================================================
--- include/groonga/table.h    2018-04-05 18:16:58 +0900 (dfaebdc88)
+++ include/groonga/table.h    2018-04-05 18:18:29 +0900 (934ea5f59)
@@ -246,6 +246,9 @@ typedef void *(*grn_tokenizer_open_options_func)(grn_ctx *ctx,
                                                  grn_obj *values,
                                                  void *user_data);
 
+GRN_API grn_rc
+grn_table_get_tokenizer_options(grn_ctx *ctx, grn_obj *table, grn_obj *options);
+
 GRN_API void *
 grn_table_cache_tokenizer_options(grn_ctx *ctx,
                                   grn_obj *table,
@@ -253,6 +256,9 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx,
                                   grn_close_func close_options_func,
                                   void *user_data);
 
+GRN_API grn_rc
+grn_table_get_tokenizer_string(grn_ctx *ctx, grn_obj *table, grn_obj *output);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/proc/proc_dump.c (+20 -2)
===================================================================
--- lib/proc/proc_dump.c    2018-04-05 18:16:58 +0900 (9439e0cda)
+++ lib/proc/proc_dump.c    2018-04-05 18:18:29 +0900 (16a1e76bb)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2017 Brazil
+  Copyright(C) 2009-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -704,6 +704,20 @@ exit :
 }
 
 static void
+dump_optionable_obj_string(grn_ctx *ctx,
+                           grn_dumper *dumper,
+                           grn_obj *string)
+{
+  const char *value = GRN_TEXT_VALUE(string);
+  size_t length = GRN_TEXT_LEN(string);
+  if (length > 0 && value[length - 1] == ')') {
+    grn_text_otoj(ctx, dumper->output, string, NULL);
+  } else {
+    GRN_TEXT_PUT(ctx, dumper->output, value, length);
+  }
+}
+
+static void
 dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table)
 {
   grn_obj *domain = NULL;
@@ -763,8 +777,12 @@ dump_table(grn_ctx *ctx, grn_dumper *dumper, grn_obj *table)
     grn_obj_unlink(ctx, range);
   }
   if (default_tokenizer) {
+    grn_obj sub_output;
     GRN_TEXT_PUTS(ctx, dumper->output, " --default_tokenizer ");
-    dump_obj_name(ctx, dumper, default_tokenizer);
+    GRN_TEXT_INIT(&sub_output, 0);
+    grn_table_get_tokenizer_string(ctx, table, &sub_output);
+    dump_optionable_obj_string(ctx, dumper, &sub_output);
+    GRN_OBJ_FIN(ctx, &sub_output);
   }
   if (normalizer) {
     GRN_TEXT_PUTS(ctx, dumper->output, " --normalizer ");

  Modified: lib/table.c (+96 -2)
===================================================================
--- lib/table.c    2018-04-05 18:16:58 +0900 (7e7c1813e)
+++ lib/table.c    2018-04-05 18:18:29 +0900 (6201dbdf5)
@@ -184,6 +184,29 @@ grn_table_tokenizer_set_options(grn_ctx *ctx,
   }
 }
 
+grn_rc
+grn_table_get_tokenizer_options(grn_ctx *ctx,
+                                grn_obj *table,
+                                grn_obj *options)
+{
+  GRN_API_ENTER;
+
+  if (!grn_obj_is_lexicon(ctx, table)) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][tokenizer-options][get] table must be key table: %s",
+        table ? grn_obj_type_to_string(table->header.type) : "(null)");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  grn_obj_get_option_values(ctx,
+                            table,
+                            "tokenizer",
+                            -1,
+                            GRN_OPTION_REVISION_NONE,
+                            options);
+  GRN_API_RETURN(ctx->rc);
+}
+
 void *
 grn_table_cache_tokenizer_options(grn_ctx *ctx,
                                   grn_obj *table,
@@ -200,7 +223,7 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx,
 
   if (!table) {
     ERR(GRN_INVALID_ARGUMENT,
-        "[table][tokenizer-options][set] table is NULL");
+        "[table][tokenizer-options][cache] table is NULL");
     GRN_API_RETURN(NULL);
   }
 
@@ -216,7 +239,7 @@ grn_table_cache_tokenizer_options(grn_ctx *ctx,
     break;
   default :
     ERR(GRN_INVALID_ARGUMENT,
-        "[table][tokenizer-options][set] table must key table: %s",
+        "[table][tokenizer-options][cache] table must key table: %s",
         grn_obj_type_to_string(table->header.type));
     GRN_API_RETURN(NULL);
     break;
@@ -247,3 +270,74 @@ exit :
   GRN_API_RETURN(tokenizer->options);
 }
 
+grn_rc
+grn_table_get_tokenizer_string(grn_ctx *ctx,
+                               grn_obj *table,
+                               grn_obj *output)
+{
+  grn_obj *tokenizer;
+  char name[GRN_TABLE_MAX_KEY_SIZE];
+  unsigned int name_size;
+  grn_obj options;
+  unsigned int n = 0;
+
+  GRN_API_ENTER;
+
+  if (!grn_obj_is_lexicon(ctx, table)) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][tokenizer-options][get] table must be key table: %s",
+        table ? grn_obj_type_to_string(table->header.type) : "(null)");
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  grn_table_get_info(ctx, table, NULL, NULL, &tokenizer, NULL, NULL);
+  if (!tokenizer) {
+    GRN_API_RETURN(ctx->rc);
+  }
+
+  name_size = grn_obj_name(ctx, tokenizer, name, GRN_TABLE_MAX_KEY_SIZE);
+  GRN_TEXT_PUT(ctx, output, name, name_size);
+
+  GRN_VOID_INIT(&options);
+  grn_obj_get_option_values(ctx,
+                            table,
+                            "tokenizer",
+                            -1,
+                            GRN_OPTION_REVISION_NONE,
+                            &options);
+  if (options.header.type != GRN_DB_VOID) {
+    n = grn_vector_size(ctx, &options);
+  }
+  if (n > 0) {
+    unsigned int i;
+    grn_obj option;
+
+    GRN_VOID_INIT(&option);
+    GRN_TEXT_PUTS(ctx, output, "(");
+    for (i = 0; i < n; i++) {
+      const char *value;
+      unsigned int value_size;
+      grn_id domain;
+
+      if (i > 0) {
+        GRN_TEXT_PUTS(ctx, output, ", ");
+      }
+
+      value_size = grn_vector_get_element(ctx,
+                                          &options,
+                                          i,
+                                          &value,
+                                          NULL,
+                                          &domain);
+      grn_obj_reinit(ctx, &option, domain, 0);
+      grn_bulk_write(ctx, &option, value, value_size);
+      grn_text_otoj(ctx, output, &option, NULL);
+    }
+    GRN_TEXT_PUTS(ctx, output, ")");
+    GRN_OBJ_FIN(ctx, &option);
+  }
+  GRN_OBJ_FIN(ctx, &options);
+
+  GRN_API_RETURN(ctx->rc);
+}
+

  Added: test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.expected    2018-04-05 18:18:29 +0900 (b07eb6b32)
@@ -0,0 +1,4 @@
+table_create Users TABLE_DAT_KEY ShortText   --default_tokenizer 'TokenNgram("n", 4)'
+[[0,0.0,0.0],true]
+dump
+table_create Users TABLE_DAT_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)"

  Added: test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/double_array_trie/tokenizer_with_options.test    2018-04-05 18:18:29 +0900 (a4c360ff8)
@@ -0,0 +1,4 @@
+table_create Users TABLE_DAT_KEY ShortText \
+  --default_tokenizer 'TokenNgram("n", 4)'
+
+dump

  Added: test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/hash/tokenizer_with_options.expected    2018-04-05 18:18:29 +0900 (8034ec3ca)
@@ -0,0 +1,4 @@
+table_create Users TABLE_HASH_KEY ShortText   --default_tokenizer 'TokenNgram("n", 4)'
+[[0,0.0,0.0],true]
+dump
+table_create Users TABLE_HASH_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)"

  Added: test/command/suite/dump/schema/table/hash/tokenizer_with_options.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/hash/tokenizer_with_options.test    2018-04-05 18:18:29 +0900 (10249ea5b)
@@ -0,0 +1,4 @@
+table_create Users TABLE_HASH_KEY ShortText \
+  --default_tokenizer 'TokenNgram("n", 4)'
+
+dump

  Added: test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.expected    2018-04-05 18:18:29 +0900 (314ecc527)
@@ -0,0 +1,4 @@
+table_create Users TABLE_PAT_KEY ShortText   --default_tokenizer 'TokenNgram("n", 4)'
+[[0,0.0,0.0],true]
+dump
+table_create Users TABLE_PAT_KEY ShortText --default_tokenizer "TokenNgram(\"n\", 4)"

  Added: test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/dump/schema/table/patricia_trie/tokenizer_with_options.test    2018-04-05 18:18:29 +0900 (bbc58f62f)
@@ -0,0 +1,4 @@
+table_create Users TABLE_PAT_KEY ShortText \
+  --default_tokenizer 'TokenNgram("n", 4)'
+
+dump
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/784c9048/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index