[Groonga-commit] groonga/groonga at 95f8fd3 [master] TokenNgram: add unify_digit option

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Sep 20 17:15:55 JST 2018


Kouhei Sutou	2018-09-20 17:15:55 +0900 (Thu, 20 Sep 2018)

  Revision: 95f8fd3600bc16c640808bb30c9dd49cae23ded1
  https://github.com/groonga/groonga/commit/95f8fd3600bc16c640808bb30c9dd49cae23ded1

  Message:
    TokenNgram: add unify_digit option
    
    TokenNgram("unify_digit", false) == TokenBigramSplitDigit (not exist ;p)

  Added files:
    test/command/suite/tokenizers/ngram/unify_digit.expected
    test/command/suite/tokenizers/ngram/unify_digit.test
  Modified files:
    lib/tokenizers.c

  Modified: lib/tokenizers.c (+6 -0)
===================================================================
--- lib/tokenizers.c    2018-09-20 17:13:51 +0900 (473e05d7c)
+++ lib/tokenizers.c    2018-09-20 17:15:55 +0900 (e988cff7f)
@@ -736,6 +736,12 @@ ngram_open_options(grn_ctx *ctx,
                                     raw_options,
                                     i,
                                     options->unify_alphabet);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_digit")) {
+      options->unify_digit =
+        grn_vector_get_element_bool(ctx,
+                                    raw_options,
+                                    i,
+                                    options->unify_digit);
     } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_symbol")) {
       options->unify_symbol =
         grn_vector_get_element_bool(ctx,

  Added: test/command/suite/tokenizers/ngram/unify_digit.expected (+40 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/unify_digit.expected    2018-09-20 17:15:55 +0900 (02889dc3f)
@@ -0,0 +1,40 @@
+tokenize   'TokenNgram("unify_digit", false)'   "012345"   NormalizerAuto
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "01",
+      "position": 0,
+      "force_prefix": false
+    },
+    {
+      "value": "12",
+      "position": 1,
+      "force_prefix": false
+    },
+    {
+      "value": "23",
+      "position": 2,
+      "force_prefix": false
+    },
+    {
+      "value": "34",
+      "position": 3,
+      "force_prefix": false
+    },
+    {
+      "value": "45",
+      "position": 4,
+      "force_prefix": false
+    },
+    {
+      "value": "5",
+      "position": 5,
+      "force_prefix": false
+    }
+  ]
+]

  Added: test/command/suite/tokenizers/ngram/unify_digit.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/ngram/unify_digit.test    2018-09-20 17:15:55 +0900 (777b6f8d4)
@@ -0,0 +1,4 @@
+tokenize \
+  'TokenNgram("unify_digit", false)' \
+  "012345" \
+  NormalizerAuto
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180920/f25b3b07/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index