[Groonga-commit] groonga/groonga at ea62d52 [master] mecab: fix a bug that -Owakati is ignored

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Oct 17 23:59:40 JST 2015


Kouhei Sutou	2015-10-17 23:59:40 +0900 (Sat, 17 Oct 2015)

  New Revision: ea62d52e6d8d8e0c4b8079a19ab39f23f5ee58fe
  https://github.com/groonga/groonga/commit/ea62d52e6d8d8e0c4b8079a19ab39f23f5ee58fe

  Message:
    mecab: fix a bug that -Owakati is ignored

  Modified files:
    plugins/tokenizers/mecab.c

  Modified: plugins/tokenizers/mecab.c (+66 -57)
===================================================================
--- plugins/tokenizers/mecab.c    2015-10-17 23:47:03 +0900 (b5e0356)
+++ plugins/tokenizers/mecab.c    2015-10-17 23:59:40 +0900 (482ae0f)
@@ -271,6 +271,57 @@ chunked_tokenize_utf8(grn_ctx *ctx,
   }
 }
 
+static mecab_t *
+mecab_create(grn_ctx *ctx)
+{
+  mecab_t *mecab;
+  int argc = 0;
+  const char *argv[4];
+
+  argv[argc++] = "Groonga";
+  argv[argc++] = "-Owakati";
+#ifdef GRN_WITH_BUNDLED_MECAB
+  argv[argc++] = "--rcfile";
+# ifdef WIN32
+  {
+    static char windows_mecab_rc_file[PATH_MAX];
+
+    grn_strcpy(windows_mecab_rc_file,
+               PATH_MAX,
+               grn_plugin_windows_base_dir());
+    grn_strcat(windows_mecab_rc_file,
+               PATH_MAX,
+               "/");
+    grn_strcat(windows_mecab_rc_file,
+               PATH_MAX,
+               GRN_BUNDLED_MECAB_RELATIVE_RC_PATH);
+    argv[argc++] = windows_mecab_rc_file;
+  }
+# else /* WIN32 */
+  argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH;
+# endif /* WIN32 */
+#endif /* GRN_WITH_BUNDLED_MECAB */
+  mecab = mecab_new(argc, (char **)argv);
+
+  if (!mecab) {
+    GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+                     "[tokenizer][mecab] "
+                     "failed to create mecab_t: mecab_new("
+                     "\"%s\""
+#ifdef GRN_WITH_BUNDLED_MECAB
+                     ", \"%s\", \"%s\""
+#endif /* GRN_WITH_BUNDLED_MECAB */
+                     "): %s",
+                     argv[0],
+#ifdef GRN_WITH_BUNDLED_MECAB
+                     argv[1], argv[2],
+#endif /* GRN_WITH_BUNDLED_MECAB */
+                     mecab_global_error_message());
+  }
+
+  return mecab;
+}
+
 /*
   This function is called for a full text search query or a document to be
   indexed. This means that both short/long strings are given.
@@ -294,46 +345,8 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
   if (!sole_mecab) {
     grn_plugin_mutex_lock(ctx, sole_mecab_mutex);
     if (!sole_mecab) {
-      int argc = 0;
-      const char *argv[3];
-      argv[argc++] = "-Owakati";
-#ifdef GRN_WITH_BUNDLED_MECAB
-      argv[argc++] = "--rcfile";
-# ifdef WIN32
-      {
-        static char windows_mecab_rc_file[PATH_MAX];
-
-        grn_strcpy(windows_mecab_rc_file,
-                   PATH_MAX,
-                   grn_plugin_windows_base_dir());
-        grn_strcat(windows_mecab_rc_file,
-                   PATH_MAX,
-                   "/");
-        grn_strcat(windows_mecab_rc_file,
-                   PATH_MAX,
-                   GRN_BUNDLED_MECAB_RELATIVE_RC_PATH);
-        argv[argc++] = windows_mecab_rc_file;
-      }
-# else /* WIN32 */
-      argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH;
-# endif /* WIN32 */
-#endif /* GRN_WITH_BUNDLED_MECAB */
-      sole_mecab = mecab_new(argc, (char **)argv);
-      if (!sole_mecab) {
-        GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
-                         "[tokenizer][mecab] "
-                         "mecab_new("
-                         "\"%s\""
-#ifdef GRN_WITH_BUNDLED_MECAB
-                         ", \"%s\", \"%s\""
-#endif /* GRN_WITH_BUNDLED_MECAB */
-                         ") failed on mecab_init(): %s",
-                         argv[0],
-#ifdef GRN_WITH_BUNDLED_MECAB
-                         argv[1], argv[2],
-#endif /* GRN_WITH_BUNDLED_MECAB */
-                         mecab_global_error_message());
-      } else {
+      sole_mecab = mecab_create(ctx);
+      if (sole_mecab) {
         sole_mecab_encoding = get_mecab_encoding(sole_mecab);
       }
     }
@@ -512,28 +525,24 @@ check_mecab_dictionary_encoding(grn_ctx *ctx)
 {
 #ifdef HAVE_MECAB_DICTIONARY_INFO_T
   mecab_t *mecab;
+  grn_encoding encoding;
+  grn_bool have_same_encoding_dictionary;
 
-  mecab = mecab_new2("-Owakati");
-  if (mecab) {
-    grn_encoding encoding;
-    grn_bool have_same_encoding_dictionary;
+  mecab = mecab_create(ctx);
+  if (!mecab) {
+    return;
+  }
 
-    encoding = GRN_CTX_GET_ENCODING(ctx);
-    have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab));
-    mecab_destroy(mecab);
+  encoding = GRN_CTX_GET_ENCODING(ctx);
+  have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab));
+  mecab_destroy(mecab);
 
-    if (!have_same_encoding_dictionary) {
-      GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
-                       "[tokenizer][mecab] "
-                       "MeCab has no dictionary that uses the context encoding"
-                       ": <%s>",
-                       grn_encoding_to_string(encoding));
-    }
-  } else {
+  if (!have_same_encoding_dictionary) {
     GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
                      "[tokenizer][mecab] "
-                     "mecab_new2 failed in check_mecab_dictionary_encoding: %s",
-                     mecab_global_error_message());
+                     "MeCab has no dictionary that uses the context encoding"
+                     ": <%s>",
+                     grn_encoding_to_string(encoding));
   }
 #endif
 }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index