[Groonga-commit] groonga/groonga at e262c28 [master] TokenMecab: add include_reading option

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Sep 10 16:37:06 JST 2018


Kouhei Sutou	2018-09-10 16:37:06 +0900 (Mon, 10 Sep 2018)

  Revision: e262c284afdbd6a0e92ba8e9116d4216040c6910
  https://github.com/groonga/groonga/commit/e262c284afdbd6a0e92ba8e9116d4216040c6910

  Message:
    TokenMecab: add include_reading option
    
    It adds reading to tokens.

  Added files:
    test/command/suite/tokenizers/mecab/options/include_reading.expected
    test/command/suite/tokenizers/mecab/options/include_reading.test
  Modified files:
    plugins/tokenizers/mecab.c

  Modified: plugins/tokenizers/mecab.c (+20 -0)
===================================================================
--- plugins/tokenizers/mecab.c    2018-09-10 16:17:57 +0900 (8a56d8872)
+++ plugins/tokenizers/mecab.c    2018-09-10 16:37:06 +0900 (105033a99)
@@ -50,6 +50,7 @@ typedef struct {
   grn_bool chunked_tokenize;
   int32_t chunk_size_threshold;
   grn_bool include_class;
+  grn_bool include_reading;
 } grn_mecab_tokenizer_options;
 
 typedef struct {
@@ -142,6 +143,7 @@ mecab_tokenizer_options_init(grn_mecab_tokenizer_options *options)
   options->chunked_tokenize = grn_mecab_chunked_tokenize_enabled;
   options->chunk_size_threshold = grn_mecab_chunk_size_threshold;
   options->include_class = GRN_FALSE;
+  options->include_reading = GRN_FALSE;
 }
 
 static grn_bool
@@ -155,6 +157,10 @@ mecab_tokenizer_options_need_default_output(grn_mecab_tokenizer_options *options
     return GRN_TRUE;
   }
 
+  if (options->include_reading) {
+    return GRN_TRUE;
+  }
+
   return GRN_FALSE;
 }
 
@@ -200,6 +206,12 @@ mecab_tokenizer_options_open(grn_ctx *ctx,
                                     raw_options,
                                     i,
                                     options->include_class);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "include_reading")) {
+      options->include_reading =
+        grn_vector_get_element_bool(ctx,
+                                    raw_options,
+                                    i,
+                                    options->include_reading);
     }
   } GRN_OPTION_VALUES_EACH_END();
 
@@ -802,6 +814,14 @@ mecab_next_default_format(grn_ctx *ctx,
     mecab_next_default_format_add_feature(ctx, &data, "subclass1", 2);
     mecab_next_default_format_add_feature(ctx, &data, "subclass2", 3);
   }
+  if (tokenizer->options->include_reading) {
+    add_feature_data data;
+    data.token = token;
+    data.features = &features;
+    data.ignore_empty_value = GRN_TRUE;
+    data.ignore_asterisk_value = GRN_FALSE;
+    mecab_next_default_format_add_feature(ctx, &data, "reading", 7);
+  }
   GRN_OBJ_FIN(ctx, &features);
 }
 

  Added: test/command/suite/tokenizers/mecab/options/include_reading.expected (+34 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/mecab/options/include_reading.expected    2018-09-10 16:37:06 +0900 (de2d54cf2)
@@ -0,0 +1,34 @@
+tokenize   'TokenMecab("include_reading", true)'   '焼き肉と焼きにく'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    {
+      "value": "焼き肉",
+      "position": 0,
+      "force_prefix": false,
+      "metadata": {
+        "reading": "ヤキニク"
+      }
+    },
+    {
+      "value": "と",
+      "position": 1,
+      "force_prefix": false,
+      "metadata": {
+        "reading": "ト"
+      }
+    },
+    {
+      "value": "焼きにく",
+      "position": 2,
+      "force_prefix": false,
+      "metadata": {
+        "reading": "ヤキニク"
+      }
+    }
+  ]
+]

  Added: test/command/suite/tokenizers/mecab/options/include_reading.test (+5 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/tokenizers/mecab/options/include_reading.test    2018-09-10 16:37:06 +0900 (0d637833f)
@@ -0,0 +1,5 @@
+#@on-error omit
+tokenize \
+  'TokenMecab("include_reading", true)' \
+  '焼き肉と焼きにく'
+#@on-error default
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180910/b2122980/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index