[Groonga-commit] groonga/groonga [master] use mecab_sparse_tostr2() instead of mecab_sparse_tostr3().

Back to archive index

null+****@clear***** null+****@clear*****
2012年 2月 24日 (金) 15:07:15 JST


Susumu Yata	2012-02-24 15:07:15 +0900 (Fri, 24 Feb 2012)

  New Revision: 77f2afc7947913aa4b3cd47bfb58e9058fa86808

  Log:
    use mecab_sparse_tostr2() instead of mecab_sparse_tostr3().

  Modified files:
    plugins/tokenizers/mecab.c

  Modified: plugins/tokenizers/mecab.c (+19 -27)
===================================================================
--- plugins/tokenizers/mecab.c    2012-02-24 11:19:47 +0900 (f944656)
+++ plugins/tokenizers/mecab.c    2012-02-24 15:07:15 +0900 (3249376)
@@ -84,24 +84,24 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_obj *str;
   int nflags = 0;
-  char *buf, *s, *p;
-  char mecab_err[256];
+  char *buf, *p;
+  const char *s;
   grn_obj *table = args[0];
   grn_obj_flags table_flags;
   grn_encoding table_encoding;
   grn_mecab_tokenizer *token;
-  unsigned int bufsize, maxtrial = 10, len;
+  unsigned int bufsize, len;
   if (!(str = grn_ctx_pop(ctx))) {
     ERR(GRN_INVALID_ARGUMENT, "missing argument");
     return NULL;
   }
-  mecab_err[sizeof(mecab_err) - 1] = '\0';
   if (!sole_mecab) {
     CRITICAL_SECTION_ENTER(sole_mecab_lock);
     if (!sole_mecab) {
       sole_mecab = mecab_new2("-Owakati");
       if (!sole_mecab) {
-        strncpy(mecab_err, mecab_strerror(NULL), sizeof(mecab_err) - 1);
+        ERR(GRN_TOKENIZER_ERROR, "mecab_new2 failed on grn_mecab_init: %s",
+            mecab_strerror(NULL));
       } else {
         sole_mecab_encoding = get_mecab_encoding(sole_mecab);
       }
@@ -109,8 +109,6 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
     CRITICAL_SECTION_LEAVE(sole_mecab_lock);
   }
   if (!sole_mecab) {
-    ERR(GRN_TOKENIZER_ERROR,
-        "mecab_new2 failed on grn_mecab_init: %s", mecab_err);
     return NULL;
   }
   grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL);
@@ -131,39 +129,33 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
     return NULL;
   }
   len = token->nstr->norm_blen;
-  for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
-    if (!(buf = GRN_MALLOC(bufsize + 1))) {
+  CRITICAL_SECTION_ENTER(sole_mecab_lock);
+  s = mecab_sparse_tostr2(token->mecab, token->nstr->norm, len);
+  if (!s) {
+    ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d err=%s",
+        len, mecab_strerror(token->mecab));
+  } else {
+    bufsize = strlen(s) + 1;
+    if (!(buf = GRN_MALLOC(bufsize))) {
       GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
-      grn_str_close(ctx, token->nstr);
-      GRN_FREE(token);
-      return NULL;
+    } else {
+      memcpy(buf, s, bufsize);
     }
-    CRITICAL_SECTION_ENTER(sole_mecab_lock);
-    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
-    if (!s) {
-      strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
-    }
-    CRITICAL_SECTION_LEAVE(sole_mecab_lock);
-    if (s) { break; }
-    GRN_FREE(buf);
-    if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
   }
-  if (!s) {
-    ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
-        len, bufsize, mecab_err);
+  CRITICAL_SECTION_LEAVE(sole_mecab_lock);
+  if (!s || !buf) {
     grn_str_close(ctx, token->nstr);
     GRN_FREE(token);
     return NULL;
   }
   /* A certain version of mecab returns trailing lf or spaces. */
-  for (p = buf + strlen(buf) - 1;
+  for (p = buf + bufsize - 2;
        buf <= p && isspace(*(unsigned char *)p);
        p--) { *p = '\0'; }
-  /* grn_log("sparsed='%s'", s); */
   user_data->ptr = token;
   token->buf = buf;
   token->next = buf;
-  token->end = buf + strlen(buf);
+  token->end = p + 1;
   GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
   GRN_UINT32_INIT(&token->stat_, 0);
   return NULL;




Groonga-commit メーリングリストの案内
Back to archive index