[Groonga-commit] groonga/groonga at 822622c [master] Add TokenNgram

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Apr 5 15:35:44 JST 2018


Kouhei Sutou	2018-04-05 15:35:44 +0900 (Thu, 05 Apr 2018)

  New Revision: 822622c003a1392b062f1220002998bca96eb358
  https://github.com/groonga/groonga/commit/822622c003a1392b062f1220002998bca96eb358

  Message:
    Add TokenNgram
    
    You can customize N by 'TokenNgram("n", 4)'.

  Added files:
    test/command/suite/table_create/default_tokenizer/ngram/options/n.expected
    test/command/suite/table_create/default_tokenizer/ngram/options/n.test
  Modified files:
    lib/tokenizers.c
    test/command/suite/schema/plugins.expected
    test/command/suite/schema/tables/columns/compress/lz4.expected
    test/command/suite/schema/tables/columns/compress/zlib.expected
    test/command/suite/schema/tables/columns/compress/zstd.expected
    test/command/suite/schema/tables/columns/type/index_medium.expected
    test/command/suite/schema/tables/columns/type/index_small.expected
    test/command/suite/schema/tables/columns/type/scalar.expected
    test/command/suite/schema/tables/columns/type/vector.expected
    test/command/suite/schema/tables/normalizer.expected
    test/command/suite/schema/tables/token_filters.expected
    test/command/suite/schema/tables/tokenizer.expected
    test/command/suite/schema/tables/type/array.expected
    test/command/suite/schema/tables/type/hash_table.expected
    test/command/suite/schema/tables/value_type/reference.expected
    test/command/suite/schema/tables/value_type/type.expected
    test/command/suite/table_create/default_tokenizer/nonexistent.expected
    test/command/suite/tokenizer_list/default.expected

  Modified: lib/tokenizers.c (+99 -13)
===================================================================
--- lib/tokenizers.c    2018-04-05 15:31:39 +0900 (9e4dc7429)
+++ lib/tokenizers.c    2018-04-05 15:35:44 +0900 (49fb90f46)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2009-2017 Brazil
+  Copyright(C) 2009-2018 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -19,6 +19,7 @@
 #include "grn_token_cursor.h"
 #include "grn_string.h"
 #include "grn_plugin.h"
+#include "grn_raw_string.h"
 #include <groonga/tokenizer.h>
 
 grn_obj *grn_tokenizer_uvector = NULL;
@@ -258,8 +259,15 @@ typedef struct {
 } grn_ngram_tokenizer;
 
 static grn_obj *
-ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, uint8_t ngram_unit,
-           uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
+ngram_init_raw(grn_ctx *ctx,
+               int nargs,
+               grn_obj **args,
+               grn_user_data *user_data,
+               uint8_t ngram_unit,
+               uint8_t uni_alpha,
+               uint8_t uni_digit,
+               uint8_t uni_symbol,
+               uint8_t ignore_blank)
 {
   unsigned int normalize_flags =
     GRN_STRING_REMOVE_BLANK |
@@ -311,43 +319,119 @@ ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data, ui
 
 static grn_obj *
 unigram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 1, 1, 1, 1, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 1, 1, 1, 1, 0); }
 
 static grn_obj *
 bigram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 1, 1, 1, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 1, 1, 1, 0); }
 
 static grn_obj *
 trigram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 3, 1, 1, 1, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 3, 1, 1, 1, 0); }
 
 static grn_obj *
 bigrams_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 1, 1, 0, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 1, 1, 0, 0); }
 
 static grn_obj *
 bigramsa_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 0, 1, 0, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 0, 1, 0, 0); }
 
 static grn_obj *
 bigramsad_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 0, 0, 0, 0); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 0, 0, 0, 0); }
 
 static grn_obj *
 bigrami_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 1, 1, 1, 1); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 1, 1, 1, 1); }
 
 static grn_obj *
 bigramis_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 1, 1, 0, 1); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 1, 1, 0, 1); }
 
 static grn_obj *
 bigramisa_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 0, 1, 0, 1); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 0, 1, 0, 1); }
 
 static grn_obj *
 bigramisad_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
-{ return ngram_init(ctx, nargs, args, user_data, 2, 0, 0, 0, 1); }
+{ return ngram_init_raw(ctx, nargs, args, user_data, 2, 0, 0, 0, 1); }
+
+typedef struct {
+  uint8_t unit;
+  uint8_t uni_alpha;
+  uint8_t uni_digit;
+  uint8_t uni_symbol;
+  uint8_t ignore_blank;
+} ngram_options;
+
+static void *
+ngram_open_options(grn_ctx *ctx,
+                   grn_obj *lexicon,
+                   grn_obj *raw_options,
+                   void *user_data)
+{
+  ngram_options *options;
+
+  options = GRN_MALLOC(sizeof(ngram_options));
+  if (!options) {
+    return NULL;
+  }
+
+  options->unit = 2;
+  options->uni_alpha = 1;
+  options->uni_digit = 1;
+  options->uni_symbol = 1;
+  options->ignore_blank = 0;
+
+  GRN_OPTION_VALUES_EACH_BEGIN(ctx, raw_options, i, name, name_length) {
+    grn_raw_string name_raw;
+    name_raw.value = name;
+    name_raw.length = name_length;
+
+    if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "n")) {
+      options->unit = grn_vector_get_element_uint8(ctx,
+                                                   raw_options,
+                                                   i,
+                                                   options->unit);
+    }
+  } GRN_OPTION_VALUES_EACH_END();
+
+  return options;
+}
+
+static void
+ngram_close_options(grn_ctx *ctx, void *data)
+{
+  ngram_options *options = data;
+  GRN_FREE(options);
+}
+
+static grn_obj *
+ngram_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+  grn_obj *lexicon = args[0];
+  ngram_options *options;
+
+  options = grn_table_get_tokenizer_options(ctx,
+                                            lexicon,
+                                            ngram_open_options,
+                                            ngram_close_options,
+                                            NULL);
+  if (ctx->rc != GRN_SUCCESS) {
+    return NULL;
+  }
+
+  return ngram_init_raw(ctx,
+                        nargs,
+                        args,
+                        user_data,
+                        options->unit,
+                        options->uni_alpha,
+                        options->uni_digit,
+                        options->uni_symbol,
+                        options->ignore_blank);
+}
 
 static grn_obj *
 ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
@@ -873,5 +957,7 @@ grn_db_init_builtin_tokenizers(grn_ctx *ctx)
                 delimit_null_init, delimited_next, delimited_fin, vars);
   DEF_TOKENIZER("TokenRegexp",
                 regexp_init, regexp_next, regexp_fin, vars);
+  DEF_TOKENIZER("TokenNgram",
+                ngram_init, ngram_next, ngram_fin, vars);
   return GRN_SUCCESS;
 }

  Modified: test/command/suite/schema/plugins.expected (+6 -2)
===================================================================
--- test/command/suite/schema/plugins.expected    2018-04-05 15:31:39 +0900 (d2f84bb15)
+++ test/command/suite/schema/plugins.expected    2018-04-05 15:35:44 +0900 (e3bbcbcc7)
@@ -179,6 +179,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -194,11 +198,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/columns/compress/lz4.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/columns/compress/lz4.expected    2018-04-05 15:31:39 +0900 (6283c63b4)
+++ test/command/suite/schema/tables/columns/compress/lz4.expected    2018-04-05 15:35:44 +0900 (bfab62c18)
@@ -178,6 +178,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -193,11 +197,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/columns/compress/zlib.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/columns/compress/zlib.expected    2018-04-05 15:31:39 +0900 (2216c691b)
+++ test/command/suite/schema/tables/columns/compress/zlib.expected    2018-04-05 15:35:44 +0900 (6eba84ce0)
@@ -178,6 +178,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -193,11 +197,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/columns/compress/zstd.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/columns/compress/zstd.expected    2018-04-05 15:31:39 +0900 (b8bbc048f)
+++ test/command/suite/schema/tables/columns/compress/zstd.expected    2018-04-05 15:35:44 +0900 (6bfeb981e)
@@ -178,6 +178,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -193,11 +197,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/columns/type/index_medium.expected (+7 -3)
===================================================================
--- test/command/suite/schema/tables/columns/type/index_medium.expected    2018-04-05 15:31:39 +0900 (a6679d912)
+++ test/command/suite/schema/tables/columns/type/index_medium.expected    2018-04-05 15:35:44 +0900 (246983f77)
@@ -184,6 +184,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -199,11 +203,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },
@@ -337,7 +341,7 @@ schema
           "name": "TokenBigram"
         },
         "normalizer": {
-          "id": 78,
+          "id": 79,
           "name": "NormalizerAuto"
         },
         "token_filters": [

  Modified: test/command/suite/schema/tables/columns/type/index_small.expected (+7 -3)
===================================================================
--- test/command/suite/schema/tables/columns/type/index_small.expected    2018-04-05 15:31:39 +0900 (b3b7d1437)
+++ test/command/suite/schema/tables/columns/type/index_small.expected    2018-04-05 15:35:44 +0900 (d1d7bb133)
@@ -184,6 +184,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -199,11 +203,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },
@@ -337,7 +341,7 @@ schema
           "name": "TokenBigram"
         },
         "normalizer": {
-          "id": 78,
+          "id": 79,
           "name": "NormalizerAuto"
         },
         "token_filters": [

  Modified: test/command/suite/schema/tables/columns/type/scalar.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/columns/type/scalar.expected    2018-04-05 15:31:39 +0900 (b6de416ab)
+++ test/command/suite/schema/tables/columns/type/scalar.expected    2018-04-05 15:35:44 +0900 (1b5397061)
@@ -178,6 +178,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -193,11 +197,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/columns/type/vector.expected (+7 -3)
===================================================================
--- test/command/suite/schema/tables/columns/type/vector.expected    2018-04-05 15:31:39 +0900 (27c475f14)
+++ test/command/suite/schema/tables/columns/type/vector.expected    2018-04-05 15:35:44 +0900 (ff6017c2f)
@@ -180,6 +180,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -195,11 +199,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },
@@ -280,7 +284,7 @@ schema
         "value_type": null,
         "tokenizer": null,
         "normalizer": {
-          "id": 78,
+          "id": 79,
           "name": "NormalizerAuto"
         },
         "token_filters": [

  Modified: test/command/suite/schema/tables/normalizer.expected (+7 -3)
===================================================================
--- test/command/suite/schema/tables/normalizer.expected    2018-04-05 15:31:39 +0900 (3133e06a3)
+++ test/command/suite/schema/tables/normalizer.expected    2018-04-05 15:35:44 +0900 (62b64bdd2)
@@ -176,6 +176,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -191,11 +195,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },
@@ -214,7 +218,7 @@ schema
         "value_type": null,
         "tokenizer": null,
         "normalizer": {
-          "id": 78,
+          "id": 79,
           "name": "NormalizerAuto"
         },
         "token_filters": [

  Modified: test/command/suite/schema/tables/token_filters.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/token_filters.expected    2018-04-05 15:31:39 +0900 (37f7c2018)
+++ test/command/suite/schema/tables/token_filters.expected    2018-04-05 15:35:44 +0900 (82f07365b)
@@ -181,6 +181,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -196,11 +200,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/tokenizer.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/tokenizer.expected    2018-04-05 15:31:39 +0900 (14af4176f)
+++ test/command/suite/schema/tables/tokenizer.expected    2018-04-05 15:35:44 +0900 (55128d980)
@@ -176,6 +176,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -191,11 +195,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/type/array.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/type/array.expected    2018-04-05 15:31:39 +0900 (1dfccbc49)
+++ test/command/suite/schema/tables/type/array.expected    2018-04-05 15:35:44 +0900 (67566e936)
@@ -176,6 +176,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -191,11 +195,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/type/hash_table.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/type/hash_table.expected    2018-04-05 15:31:39 +0900 (1b1bef2f8)
+++ test/command/suite/schema/tables/type/hash_table.expected    2018-04-05 15:35:44 +0900 (3dda6cf5b)
@@ -176,6 +176,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -191,11 +195,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/value_type/reference.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/value_type/reference.expected    2018-04-05 15:31:39 +0900 (711c9b08a)
+++ test/command/suite/schema/tables/value_type/reference.expected    2018-04-05 15:35:44 +0900 (7f201461f)
@@ -178,6 +178,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -193,11 +197,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Modified: test/command/suite/schema/tables/value_type/type.expected (+6 -2)
===================================================================
--- test/command/suite/schema/tables/value_type/type.expected    2018-04-05 15:31:39 +0900 (09a58c1a9)
+++ test/command/suite/schema/tables/value_type/type.expected    2018-04-05 15:35:44 +0900 (964056fa7)
@@ -176,6 +176,10 @@ schema
         "id": 64,
         "name": "TokenMecab"
       },
+      "TokenNgram": {
+        "id": 78,
+        "name": "TokenNgram"
+      },
       "TokenRegexp": {
         "id": 77,
         "name": "TokenRegexp"
@@ -191,11 +195,11 @@ schema
     },
     "normalizers": {
       "NormalizerAuto": {
-        "id": 78,
+        "id": 79,
         "name": "NormalizerAuto"
       },
       "NormalizerNFKC51": {
-        "id": 79,
+        "id": 80,
         "name": "NormalizerNFKC51"
       }
     },

  Added: test/command/suite/table_create/default_tokenizer/ngram/options/n.expected (+98 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/default_tokenizer/ngram/options/n.expected    2018-04-05 15:35:44 +0900 (0e1c9d971)
@@ -0,0 +1,98 @@
+table_create Memos TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Memos content COLUMN_SCALAR Text
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer 'TokenNgram("n", 3)'
+[[0,0.0,0.0],true]
+column_create Terms memos_content COLUMN_INDEX Memos content
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"content": "This is a pen."},
+{"content": "これはペンです。"}
+]
+[[0,0.0,0.0],2]
+select Terms --output_columns _key --limit -1
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        21
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ]
+      ],
+      [
+        " a "
+      ],
+      [
+        " is"
+      ],
+      [
+        " pe"
+      ],
+      [
+        "."
+      ],
+      [
+        "Thi"
+      ],
+      [
+        "a p"
+      ],
+      [
+        "en."
+      ],
+      [
+        "his"
+      ],
+      [
+        "is "
+      ],
+      [
+        "n."
+      ],
+      [
+        "pen"
+      ],
+      [
+        "s a"
+      ],
+      [
+        "s i"
+      ],
+      [
+        "。"
+      ],
+      [
+        "これは"
+      ],
+      [
+        "す。"
+      ],
+      [
+        "です。"
+      ],
+      [
+        "はペン"
+      ],
+      [
+        "れはペ"
+      ],
+      [
+        "ペンで"
+      ],
+      [
+        "ンです"
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/table_create/default_tokenizer/ngram/options/n.test (+14 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/default_tokenizer/ngram/options/n.test    2018-04-05 15:35:44 +0900 (199aa2878)
@@ -0,0 +1,14 @@
+table_create Memos TABLE_NO_KEY
+column_create Memos content COLUMN_SCALAR Text
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer 'TokenNgram("n", 3)'
+column_create Terms memos_content COLUMN_INDEX Memos content
+
+load --table Memos
+[
+{"content": "This is a pen."},
+{"content": "これはペンです。"}
+]
+
+select Terms --output_columns _key --limit -1

  Modified: test/command/suite/table_create/default_tokenizer/nonexistent.expected (+3 -2)
===================================================================
--- test/command/suite/table_create/default_tokenizer/nonexistent.expected    2018-04-05 15:31:39 +0900 (4a3b61055)
+++ test/command/suite/table_create/default_tokenizer/nonexistent.expected    2018-04-05 15:35:44 +0900 (a110e8c93)
@@ -6,9 +6,10 @@ table_create Tags TABLE_PAT_KEY ShortText   --default_tokenizer TokenNonexistent
       0.0,
       0.0
     ],
-    "[table][create][Tags] unknown tokenizer: <TokenNonexistent>"
+    "[table][create][Tags] failed to set default tokenizer: <TokenNonexistent>: [info][set][default-tokenizer][Tags] unknown tokeniz"
   ],
   false
 ]
-#|e| [table][create][Tags] unknown tokenizer: <TokenNonexistent>
+#|e| [info][set][default-tokenizer][Tags] unknown tokenizer: <TokenNonexistent>
+#|e| [table][create][Tags] failed to set default tokenizer: <TokenNonexistent>: [info][set][default-tokenizer][Tags] unknown tokenizer: <TokenNonexistent>
 dump

  Modified: test/command/suite/tokenizer_list/default.expected (+3 -0)
===================================================================
--- test/command/suite/tokenizer_list/default.expected    2018-04-05 15:31:39 +0900 (a26eb0b5a)
+++ test/command/suite/tokenizer_list/default.expected    2018-04-05 15:35:44 +0900 (5c3e0b019)
@@ -47,6 +47,9 @@ tokenizer_list
     },
     {
       "name": "TokenRegexp"
+    },
+    {
+      "name": "TokenNgram"
     }
   ]
 ]
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180405/644e8696/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index