[Groonga-commit] groonga/groonga at 02c79b1 [master] TokenNgram: add unify_symbol option

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Sep 20 17:13:51 JST 2018


Kouhei Sutou	2018-09-20 17:13:51 +0900 (Thu, 20 Sep 2018)

  Revision: 02c79b13076fe0198da4e1238602f82e5946f025
  https://github.com/groonga/groonga/commit/02c79b13076fe0198da4e1238602f82e5946f025

  Message:
    TokenNgram: add unify_symbol option
    
    TokenNgram("unify_symbol", false) == TokenBigramSplitSymbol

  Added files:
    test/command/suite/tokenizers/ngram/unify_symbol.expected
    test/command/suite/tokenizers/ngram/unify_symbol.test
  Modified files:
    lib/tokenizers.c

  Modified: lib/tokenizers.c (+6 -0)
===================================================================
--- lib/tokenizers.c    2018-09-20 17:11:09 +0900 (ca3037e5c)
+++ lib/tokenizers.c    2018-09-20 17:13:51 +0900 (473e05d7c)
@@ -736,6 +736,12 @@ ngram_open_options(grn_ctx *ctx,
                                     raw_options,
                                     i,
                                     options->unify_alphabet);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_symbol")) {
+      options->unify_symbol =
+        grn_vector_get_element_bool(ctx,
+                                    raw_options,
+                                    i,
+                                    options->unify_symbol);
     }
   } GRN_OPTION_VALUES_EACH_END();
 

  Added: test/command/suite/tokenizers/ngram/unify_symbol.expected (+40 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/unify_symbol.expected    2018-09-20 17:13:51 +0900 (460e19461)
@@ -0,0 +1,40 @@
+tokenize   'TokenNgram("unify_symbol", false)'   "___---"   NormalizerAuto
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "__",
+      "position": 0,
+      "force_prefix": false
+    },
+    {
+      "value": "__",
+      "position": 1,
+      "force_prefix": false
+    },
+    {
+      "value": "_-",
+      "position": 2,
+      "force_prefix": false
+    },
+    {
+      "value": "--",
+      "position": 3,
+      "force_prefix": false
+    },
+    {
+      "value": "--",
+      "position": 4,
+      "force_prefix": false
+    },
+    {
+      "value": "-",
+      "position": 5,
+      "force_prefix": false
+    }
+  ]
+]

  Added: test/command/suite/tokenizers/ngram/unify_symbol.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/unify_symbol.test    2018-09-20 17:13:51 +0900 (470c37dab)
@@ -0,0 +1,4 @@
+tokenize \
+  'TokenNgram("unify_symbol", false)' \
+  "___---" \
+  NormalizerAuto
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180920/c196395d/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index