[Groonga-commit] groonga/groonga at edf9b24 [master] schema: support dumping tokenizer with options

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Apr 5 18:21:31 JST 2018


Kouhei Sutou	2018-04-05 18:21:31 +0900 (Thu, 05 Apr 2018)

  New Revision: edf9b24b80b1caa4db3f93852d7aed3966540d92
  https://github.com/groonga/groonga/commit/edf9b24b80b1caa4db3f93852d7aed3966540d92

  Message:
    schema: support dumping tokenizer with options

  Added files:
    test/command/suite/schema/tables/tokenizer_with_options.test
  Copied files:
    test/command/suite/schema/tables/tokenizer_with_options.expected
      (from test/command/suite/schema/tables/tokenizer.expected)
  Modified files:
    lib/proc/proc_schema.c
    test/command/suite/schema/tables/columns/type/index_medium.expected
    test/command/suite/schema/tables/columns/type/index_small.expected
    test/command/suite/schema/tables/token_filters.expected
    test/command/suite/schema/tables/tokenizer.expected

  Modified: lib/proc/proc_schema.c (+83 -7)
===================================================================
--- lib/proc/proc_schema.c    2018-04-05 18:18:29 +0900 (eb1e71943)
+++ lib/proc/proc_schema.c    2018-04-05 18:21:31 +0900 (a9b527c1e)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2015-2016 Brazil
+  Copyright(C) 2015-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -97,6 +97,30 @@ command_schema_output_value_type(grn_ctx *ctx, grn_obj *value_type)
   command_schema_output_type(ctx, "value_type", value_type);
 }
 
+static grn_bool
+command_schema_output_command_include_special_character(grn_ctx *ctx,
+                                                        const char *value,
+                                                        unsigned int size)
+{
+  const char *end = value + size;
+
+  for (; value < end; value++) {
+    switch (value[0]) {
+    case '(' :
+    case ')' :
+    case ' ' :
+    case '"' :
+    case '\'' :
+      return GRN_TRUE;
+      break;
+    default :
+      break;
+    }
+  }
+
+  return GRN_FALSE;
+}
+
 static void
 command_schema_output_command(grn_ctx *ctx,
                               const char *command_name,
@@ -145,12 +169,22 @@ command_schema_output_command(grn_ctx *ctx,
 
       name_size  = grn_vector_get_element(ctx, arguments, i, &name,
                                           NULL, NULL);
+      grn_text_printf(ctx, &command_line,
+                      " --%.*s ",
+                      name_size, name);
       value_size = grn_vector_get_element(ctx, arguments, i + 1, &value,
                                           NULL, NULL);
-      grn_text_printf(ctx, &command_line,
-                      " --%.*s %.*s",
-                      name_size, name,
-                      value_size, value);
+      if (command_schema_output_command_include_special_character(ctx,
+                                                                  value,
+                                                                  value_size)) {
+        grn_obj value_text;
+        GRN_TEXT_INIT(&value_text, GRN_OBJ_DO_SHALLOW_COPY);
+        GRN_TEXT_SET(ctx, &value_text, value, value_size);
+        grn_text_otoj(ctx, &command_line, &value_text, NULL);
+        GRN_OBJ_FIN(ctx, &value_text);
+      } else {
+        GRN_TEXT_PUT(ctx, &command_line, value, value_size);
+      }
     }
     grn_ctx_output_str(ctx,
                        GRN_TEXT_VALUE(&command_line),
@@ -506,7 +540,7 @@ command_schema_table_output_tokenizer(grn_ctx *ctx, grn_obj *table)
     return;
   }
 
-  grn_ctx_output_map_open(ctx, "tokenizer", 2);
+  grn_ctx_output_map_open(ctx, "tokenizer", 3);
 
   grn_ctx_output_cstr(ctx, "id");
   command_schema_output_id(ctx, tokenizer);
@@ -514,6 +548,43 @@ command_schema_table_output_tokenizer(grn_ctx *ctx, grn_obj *table)
   grn_ctx_output_cstr(ctx, "name");
   command_schema_output_name(ctx, tokenizer);
 
+  grn_ctx_output_cstr(ctx, "options");
+  {
+    grn_obj options;
+    unsigned int n;
+
+    GRN_VOID_INIT(&options);
+    grn_table_get_tokenizer_options(ctx, table, &options);
+    if (options.header.type == GRN_VOID) {
+      grn_ctx_output_null(ctx);
+    } else {
+      grn_obj option;
+      unsigned int i;
+
+      n = grn_vector_size(ctx, &options);
+      grn_ctx_output_array_open(ctx, "options", n);
+      GRN_VOID_INIT(&option);
+      for (i = 0; i < n; i++) {
+        const char *value;
+        unsigned int length;
+        grn_id domain;
+
+        length = grn_vector_get_element(ctx,
+                                        &options,
+                                        i,
+                                        &value,
+                                        NULL,
+                                        &domain);
+        grn_obj_reinit(ctx, &option, domain, 0);
+        grn_bulk_write(ctx, &option, value, length);
+        grn_ctx_output_obj(ctx, &option, NULL);
+      }
+      GRN_OBJ_FIN(ctx, &option);
+      grn_ctx_output_array_close(ctx);
+    }
+    GRN_OBJ_FIN(ctx, &options);
+  }
+
   grn_ctx_output_map_close(ctx);
 }
 
@@ -642,7 +713,12 @@ command_schema_table_command_collect_arguments(grn_ctx *ctx,
     grn_obj *tokenizer;
     tokenizer = grn_obj_get_info(ctx, table, GRN_INFO_DEFAULT_TOKENIZER, NULL);
     if (tokenizer) {
-      ADD_OBJECT_NAME("default_tokenizer", tokenizer);
+      grn_obj sub_output;
+      GRN_TEXT_INIT(&sub_output, 0);
+      grn_table_get_tokenizer_string(ctx, table, &sub_output);
+      GRN_TEXT_PUTC(ctx, &sub_output, '\0');
+      ADD("default_tokenizer", GRN_TEXT_VALUE(&sub_output));
+      GRN_OBJ_FIN(ctx, &sub_output);
     }
   }
 

  Modified: test/command/suite/schema/tables/columns/type/index_medium.expected (+2 -1)
===================================================================
--- test/command/suite/schema/tables/columns/type/index_medium.expected    2018-04-05 18:18:29 +0900 (246983f77)
+++ test/command/suite/schema/tables/columns/type/index_medium.expected    2018-04-05 18:21:31 +0900 (f028fb7cb)
@@ -338,7 +338,8 @@ schema
         "value_type": null,
         "tokenizer": {
           "id": 67,
-          "name": "TokenBigram"
+          "name": "TokenBigram",
+          "options": null
         },
         "normalizer": {
           "id": 79,

  Modified: test/command/suite/schema/tables/columns/type/index_small.expected (+2 -1)
===================================================================
--- test/command/suite/schema/tables/columns/type/index_small.expected    2018-04-05 18:18:29 +0900 (d1d7bb133)
+++ test/command/suite/schema/tables/columns/type/index_small.expected    2018-04-05 18:21:31 +0900 (0326d2757)
@@ -338,7 +338,8 @@ schema
         "value_type": null,
         "tokenizer": {
           "id": 67,
-          "name": "TokenBigram"
+          "name": "TokenBigram",
+          "options": null
         },
         "normalizer": {
           "id": 79,

  Modified: test/command/suite/schema/tables/token_filters.expected (+2 -1)
===================================================================
--- test/command/suite/schema/tables/token_filters.expected    2018-04-05 18:18:29 +0900 (82f07365b)
+++ test/command/suite/schema/tables/token_filters.expected    2018-04-05 18:21:31 +0900 (620433ea3)
@@ -227,7 +227,8 @@ schema
         "value_type": null,
         "tokenizer": {
           "id": 67,
-          "name": "TokenBigram"
+          "name": "TokenBigram",
+          "options": null
         },
         "normalizer": null,
         "token_filters": [

  Modified: test/command/suite/schema/tables/tokenizer.expected (+2 -1)
===================================================================
--- test/command/suite/schema/tables/tokenizer.expected    2018-04-05 18:18:29 +0900 (55128d980)
+++ test/command/suite/schema/tables/tokenizer.expected    2018-04-05 18:21:31 +0900 (b7cfcd2d2)
@@ -218,7 +218,8 @@ schema
         "value_type": null,
         "tokenizer": {
           "id": 67,
-          "name": "TokenBigram"
+          "name": "TokenBigram",
+          "options": null
         },
         "normalizer": null,
         "token_filters": [

  Copied: test/command/suite/schema/tables/tokenizer_with_options.expected (+9 -5) 93%
===================================================================
--- test/command/suite/schema/tables/tokenizer.expected    2018-04-05 18:18:29 +0900 (55128d980)
+++ test/command/suite/schema/tables/tokenizer_with_options.expected    2018-04-05 18:21:31 +0900 (41ed03ce6)
@@ -1,4 +1,4 @@
-table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer 'TokenNgram("n", 4)'
 [[0,0.0,0.0],true]
 schema
 [
@@ -217,8 +217,12 @@ schema
         },
         "value_type": null,
         "tokenizer": {
-          "id": 67,
-          "name": "TokenBigram"
+          "id": 78,
+          "name": "TokenNgram",
+          "options": [
+            "n",
+            4
+          ]
         },
         "normalizer": null,
         "token_filters": [
@@ -233,9 +237,9 @@ schema
             "name": "Terms",
             "flags": "TABLE_PAT_KEY",
             "key_type": "ShortText",
-            "default_tokenizer": "TokenBigram"
+            "default_tokenizer": "TokenNgram(\"n\", 4)"
           },
-          "command_line": "table_create --name Terms --flags TABLE_PAT_KEY --key_type ShortText --default_tokenizer TokenBigram"
+          "command_line": "table_create --name Terms --flags TABLE_PAT_KEY --key_type ShortText --default_tokenizer \"TokenNgram(\\\"n\\\", 4)\""
         },
         "columns": {
         }

  Added: test/command/suite/schema/tables/tokenizer_with_options.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/schema/tables/tokenizer_with_options.test    2018-04-05 18:21:31 +0900 (29d176baa)
@@ -0,0 +1,4 @@
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer 'TokenNgram("n", 4)'
+
+schema
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/251dbc30/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index