Kouhei Sutou
null+****@clear*****
Sat Oct 17 23:59:40 JST 2015
Kouhei Sutou 2015-10-17 23:59:40 +0900 (Sat, 17 Oct 2015) New Revision: ea62d52e6d8d8e0c4b8079a19ab39f23f5ee58fe https://github.com/groonga/groonga/commit/ea62d52e6d8d8e0c4b8079a19ab39f23f5ee58fe Message: mecab: fix a bug that -Owakati is ignored Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+66 -57) =================================================================== --- plugins/tokenizers/mecab.c 2015-10-17 23:47:03 +0900 (b5e0356) +++ plugins/tokenizers/mecab.c 2015-10-17 23:59:40 +0900 (482ae0f) @@ -271,6 +271,57 @@ chunked_tokenize_utf8(grn_ctx *ctx, } } +static mecab_t * +mecab_create(grn_ctx *ctx) +{ + mecab_t *mecab; + int argc = 0; + const char *argv[4]; + + argv[argc++] = "Groonga"; + argv[argc++] = "-Owakati"; +#ifdef GRN_WITH_BUNDLED_MECAB + argv[argc++] = "--rcfile"; +# ifdef WIN32 + { + static char windows_mecab_rc_file[PATH_MAX]; + + grn_strcpy(windows_mecab_rc_file, + PATH_MAX, + grn_plugin_windows_base_dir()); + grn_strcat(windows_mecab_rc_file, + PATH_MAX, + "/"); + grn_strcat(windows_mecab_rc_file, + PATH_MAX, + GRN_BUNDLED_MECAB_RELATIVE_RC_PATH); + argv[argc++] = windows_mecab_rc_file; + } +# else /* WIN32 */ + argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH; +# endif /* WIN32 */ +#endif /* GRN_WITH_BUNDLED_MECAB */ + mecab = mecab_new(argc, (char **)argv); + + if (!mecab) { + GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, + "[tokenizer][mecab] " + "failed to create mecab_t: mecab_new(" + "\"%s\"" +#ifdef GRN_WITH_BUNDLED_MECAB + ", \"%s\", \"%s\"" +#endif /* GRN_WITH_BUNDLED_MECAB */ + "): %s", + argv[0], +#ifdef GRN_WITH_BUNDLED_MECAB + argv[1], argv[2], +#endif /* GRN_WITH_BUNDLED_MECAB */ + mecab_global_error_message()); + } + + return mecab; +} + /* This function is called for a full text search query or a document to be indexed. This means that both short/long strings are given. @@ -294,46 +345,8 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) if (!sole_mecab) { grn_plugin_mutex_lock(ctx, sole_mecab_mutex); if (!sole_mecab) { - int argc = 0; - const char *argv[3]; - argv[argc++] = "-Owakati"; -#ifdef GRN_WITH_BUNDLED_MECAB - argv[argc++] = "--rcfile"; -# ifdef WIN32 - { - static char windows_mecab_rc_file[PATH_MAX]; - - grn_strcpy(windows_mecab_rc_file, - PATH_MAX, - grn_plugin_windows_base_dir()); - grn_strcat(windows_mecab_rc_file, - PATH_MAX, - "/"); - grn_strcat(windows_mecab_rc_file, - PATH_MAX, - GRN_BUNDLED_MECAB_RELATIVE_RC_PATH); - argv[argc++] = windows_mecab_rc_file; - } -# else /* WIN32 */ - argv[argc++] = GRN_BUNDLED_MECAB_RC_PATH; -# endif /* WIN32 */ -#endif /* GRN_WITH_BUNDLED_MECAB */ - sole_mecab = mecab_new(argc, (char **)argv); - if (!sole_mecab) { - GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, - "[tokenizer][mecab] " - "mecab_new(" - "\"%s\"" -#ifdef GRN_WITH_BUNDLED_MECAB - ", \"%s\", \"%s\"" -#endif /* GRN_WITH_BUNDLED_MECAB */ - ") failed on mecab_init(): %s", - argv[0], -#ifdef GRN_WITH_BUNDLED_MECAB - argv[1], argv[2], -#endif /* GRN_WITH_BUNDLED_MECAB */ - mecab_global_error_message()); - } else { + sole_mecab = mecab_create(ctx); + if (sole_mecab) { sole_mecab_encoding = get_mecab_encoding(sole_mecab); } } @@ -512,28 +525,24 @@ check_mecab_dictionary_encoding(grn_ctx *ctx) { #ifdef HAVE_MECAB_DICTIONARY_INFO_T mecab_t *mecab; + grn_encoding encoding; + grn_bool have_same_encoding_dictionary; - mecab = mecab_new2("-Owakati"); - if (mecab) { - grn_encoding encoding; - grn_bool have_same_encoding_dictionary; + mecab = mecab_create(ctx); + if (!mecab) { + return; + } - encoding = GRN_CTX_GET_ENCODING(ctx); - have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab)); - mecab_destroy(mecab); + encoding = GRN_CTX_GET_ENCODING(ctx); + have_same_encoding_dictionary = (encoding == get_mecab_encoding(mecab)); + mecab_destroy(mecab); - if (!have_same_encoding_dictionary) { - GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, - "[tokenizer][mecab] " - "MeCab has no dictionary that uses the context encoding" - ": <%s>", - grn_encoding_to_string(encoding)); - } - } else { + if (!have_same_encoding_dictionary) { GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR, "[tokenizer][mecab] " - "mecab_new2 failed in check_mecab_dictionary_encoding: %s", - mecab_global_error_message()); + "MeCab has no dictionary that uses the context encoding" + ": <%s>", + grn_encoding_to_string(encoding)); } #endif } -------------- next part -------------- HTML����������������������������...Download