[Groonga-commit] groonga/groonga at 1ed550f [master] TokenMecab: add no reading case test

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Sep 10 16:51:44 JST 2018


Kouhei Sutou	2018-09-10 16:51:44 +0900 (Mon, 10 Sep 2018)

  Revision: 1ed550f7f7b3603645bc311dd2bfb38143c00365
  https://github.com/groonga/groonga/commit/1ed550f7f7b3603645bc311dd2bfb38143c00365

  Message:
    TokenMecab: add no reading case test

  Modified files:
    plugins/tokenizers/mecab.c
    test/command/suite/tokenizers/mecab/options/include_reading.expected
    test/command/suite/tokenizers/mecab/options/include_reading.test

  Modified: plugins/tokenizers/mecab.c (+8 -5)
===================================================================
--- plugins/tokenizers/mecab.c    2018-09-10 16:37:06 +0900 (105033a99)
+++ plugins/tokenizers/mecab.c    2018-09-10 16:51:44 +0900 (0b51370e8)
@@ -642,7 +642,10 @@ mecab_init(grn_ctx *ctx, grn_tokenizer_query *query)
         GRN_PLUGIN_FREE(ctx, tokenizer);
         return NULL;
       }
-      {
+      if (mecab_tokenizer_options_need_default_output(tokenizer->options)) {
+        tokenizer->next = GRN_TEXT_VALUE(&(tokenizer->buf));
+        tokenizer->end = tokenizer->next + GRN_TEXT_LEN(&(tokenizer->buf));
+      } else {
         char *buf, *p;
         unsigned int bufsize;
 
@@ -670,11 +673,11 @@ mecab_next_default_format_skip_eos(grn_ctx *ctx,
   }
 
   if (strncmp(tokenizer->next, "EOS", 3) == 0) {
-    const char *current = tokenizer->next;
-    if (current + 1 < tokenizer->end && current[0] == '\r') {
+    const char *current = tokenizer->next + 3;
+    if (current < tokenizer->end && current[0] == '\r') {
       current++;
     }
-    if (current + 1 < tokenizer->end && current[0] == '\n') {
+    if (current < tokenizer->end && current[0] == '\n') {
       current++;
       tokenizer->next = current;
     }
@@ -702,7 +705,7 @@ mecab_next_default_format_add_feature(grn_ctx *ctx,
   size_t feature_length;
   grn_obj value;
 
-  if (i + 1 > n_locations) {
+  if (i + 2 > n_locations) {
     return;
   }
 

  Modified: test/command/suite/tokenizers/mecab/options/include_reading.expected (+16 -1)
===================================================================
--- test/command/suite/tokenizers/mecab/options/include_reading.expected    2018-09-10 16:37:06 +0900 (de2d54cf2)
+++ test/command/suite/tokenizers/mecab/options/include_reading.expected    2018-09-10 16:51:44 +0900 (4653e9054)
@@ -1,4 +1,4 @@
-tokenize   'TokenMecab("include_reading", true)'   '焼き肉と焼きにく'
+tokenize   'TokenMecab("include_reading", true)'   '焼き肉と焼きにくとyakiniku'
 [
   [
     0,
@@ -29,6 +29,21 @@ tokenize   'TokenMecab("include_reading", true)'   '焼き肉と焼きにく'
       "metadata": {
         "reading": "ヤキニク"
       }
+    },
+    {
+      "value": "と",
+      "position": 3,
+      "force_prefix": false,
+      "metadata": {
+        "reading": "ト"
+      }
+    },
+    {
+      "value": "yakiniku",
+      "position": 4,
+      "force_prefix": false,
+      "metadata": {
+      }
     }
   ]
 ]

  Modified: test/command/suite/tokenizers/mecab/options/include_reading.test (+1 -1)
===================================================================
--- test/command/suite/tokenizers/mecab/options/include_reading.test    2018-09-10 16:37:06 +0900 (0d637833f)
+++ test/command/suite/tokenizers/mecab/options/include_reading.test    2018-09-10 16:51:44 +0900 (3064d672e)
@@ -1,5 +1,5 @@
 #@on-error omit
 tokenize \
   'TokenMecab("include_reading", true)' \
-  '焼き肉と焼きにく'
+  '焼き肉と焼きにくとyakiniku'
 #@on-error default
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180910/6b933ca7/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index