[Groonga-commit] groonga/groonga [master] Revert "[normalizer] changed to use grn_normalized_text instead of grn_str."

Back to archive index

null+****@clear***** null+****@clear*****
2012年 2月 14日 (火) 13:58:03 JST


Kouhei Sutou	2012-02-14 13:58:03 +0900 (Tue, 14 Feb 2012)

  New Revision: 17870f594194e90133bbccd15aa99b536444152a

  Log:
    Revert "[normalizer] changed to use grn_normalized_text instead of grn_str."
    
    This reverts commit 27aac3047ea6f54bd610e0917bbca0953a23a665.
    
    Conflicts:
    
    	lib/ii.c
    	lib/tokenizer.c
    	plugins/tokenizers/mecab.c

  Modified files:
    lib/db.c
    lib/db.h
    lib/ii.c
    lib/token.c
    lib/tokenizer.c
    plugins/tokenizers/mecab.c

  Modified: lib/db.c (+4 -9)
===================================================================
--- lib/db.c    2012-02-14 13:48:09 +0900 (e15e2b8)
+++ lib/db.c    2012-02-14 13:58:03 +0900 (d9eb95c)
@@ -1805,8 +1805,7 @@ exit :
 
 grn_rc
 grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags,
-                   grn_encoding *encoding, grn_obj **tokenizer,
-                   grn_obj **normalizer)
+                   grn_encoding *encoding, grn_obj **tokenizer)
 {
   grn_rc rc = GRN_INVALID_ARGUMENT;
   GRN_API_ENTER;
@@ -1816,28 +1815,24 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags,
       if (flags) { *flags = ((grn_pat *)table)->obj.header.flags; }
       if (encoding) { *encoding = ((grn_pat *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer; }
-      if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer; }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_DAT_KEY :
       if (flags) { *flags = ((grn_dat *)table)->obj.header.flags; }
       if (encoding) { *encoding = ((grn_dat *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer; }
-      if (normalizer) { *normalizer = ((grn_dat *)table)->normalizer; }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_HASH_KEY :
       if (flags) { *flags = ((grn_hash *)table)->obj.header.flags; }
       if (encoding) { *encoding = ((grn_hash *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer; }
-      if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer; }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_NO_KEY :
       if (flags) { *flags = 0; }
       if (encoding) { *encoding = GRN_ENC_NONE; }
       if (tokenizer) { *tokenizer = grn_uvector_tokenizer; }
-      if (normalizer) { *normalizer = NULL; }
       rc = GRN_SUCCESS;
       break;
     }
@@ -8079,7 +8074,7 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op,
         if (obj->header.type != GRN_COLUMN_FIX_SIZE) {
           grn_obj *tokenizer, *lexicon = grn_ctx_at(ctx, target->header.domain);
           if (!lexicon) { continue; }
-          grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL);
+          grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer);
           if (tokenizer) { continue; }
         }
         if (n < buf_size) {
@@ -8117,7 +8112,7 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op,
           if (!lexicon) { continue; }
           if (lexicon->header.type != GRN_TABLE_PAT_KEY) { continue; }
           /* FIXME: GRN_TABLE_DAT_KEY should be supported */
-          grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL);
+          grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer);
           if (tokenizer) { continue; }
         }
         if (n < buf_size) {
@@ -8197,7 +8192,7 @@ grn_column_index(grn_ctx *ctx, grn_obj *obj, grn_operator op,
               if (!lexicon) { continue; }
               if (lexicon->header.type != GRN_TABLE_PAT_KEY) { continue; }
               /* FIXME: GRN_TABLE_DAT_KEY should be supported */
-              grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer, NULL);
+              grn_table_get_info(ctx, lexicon, NULL, NULL, &tokenizer);
               if (tokenizer) { continue; }
             }
             if (n < buf_size) {

  Modified: lib/db.h (+1 -2)
===================================================================
--- lib/db.h    2012-02-14 13:48:09 +0900 (537b32f)
+++ lib/db.h    2012-02-14 13:58:03 +0900 (4f76d43)
@@ -92,8 +92,7 @@ grn_id grn_table_get_v(grn_ctx *ctx, grn_obj *table, const void *key, int key_si
 grn_id grn_table_add_v(grn_ctx *ctx, grn_obj *table, const void *key, int key_size,
                        void **value, int *added);
 GRN_API grn_rc grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_obj_flags *flags,
-                                  grn_encoding *encoding, grn_obj **tokenizer,
-                                  grn_obj **normalizer);
+                                  grn_encoding *encoding, grn_obj **tokenizer);
 const char *_grn_table_key(grn_ctx *ctx, grn_obj *table, grn_id id, uint32_t *key_size);
 
 grn_rc grn_table_search(grn_ctx *ctx, grn_obj *table,

  Modified: lib/ii.c (+3 -7)
===================================================================
--- lib/ii.c    2012-02-14 13:48:09 +0900 (96f959b)
+++ lib/ii.c    2012-02-14 13:58:03 +0900 (66dbbe3)
@@ -3405,9 +3405,7 @@ _grn_ii_create(grn_ctx *ctx, grn_ii *ii, const char *path, grn_obj *lexicon, uin
     free_histogram[i] = 0;
   }
   */
-  if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL)) {
-    return NULL;
-  }
+  if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer)) { return NULL; }
   if (path && strlen(path) + 6 >= PATH_MAX) { return NULL; }
   seg = grn_io_create(ctx, path, sizeof(struct grn_ii_header),
                       S_SEGMENT, GRN_II_MAX_LSEG, grn_io_auto, GRN_IO_EXPIRE_SEGMENT);
@@ -3526,9 +3524,7 @@ grn_ii_open(grn_ctx *ctx, const char *path, grn_obj *lexicon)
   grn_obj_flags lflags;
   grn_encoding encoding;
   grn_obj *tokenizer;
-  if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer, NULL)) {
-    return NULL;
-  }
+  if (grn_table_get_info(ctx, lexicon, &lflags, &encoding, &tokenizer)) { return NULL; }
   if (strlen(path) + 6 >= PATH_MAX) { return NULL; }
   strcpy(path2, path);
   strcat(path2, ".c");
@@ -6552,7 +6548,7 @@ grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       grn_obj *range = grn_ctx_at(ctx, DB_OBJ(ii_buffer->lexicon)->range);
       grn_obj *tokenizer;
       grn_obj_flags flags;
-      grn_table_get_info(ctx, ii_buffer->lexicon, &flags, NULL, &tokenizer, NULL);
+      grn_table_get_info(ctx, builder->lexicon, &flags, NULL, &tokenizer);
       flags &= ~GRN_OBJ_PERSISTENT;
       ii_buffer->tmp_lexicon = grn_table_create(ctx, NULL, 0, NULL, flags, domain, range);
       grn_obj_set_info(ctx, ii_buffer->tmp_lexicon, GRN_INFO_DEFAULT_TOKENIZER, tokenizer);

  Modified: lib/token.c (+28 -84)
===================================================================
--- lib/token.c    2012-02-14 13:48:09 +0900 (887491d)
+++ lib/token.c    2012-02-14 13:58:03 +0900 (3ef64d8)
@@ -79,8 +79,7 @@ uvector_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 }
 
 typedef struct {
-  grn_obj *normalized_text;
-  char *target_text;
+  grn_str *nstr;
   const uint8_t *delimiter;
   uint32_t delimiter_len;
   int32_t pos;
@@ -98,47 +97,29 @@ delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
                const uint8_t *delimiter, uint32_t delimiter_len)
 {
   grn_obj *str;
+  int nflags = 0;
   grn_delimited_tokenizer *token;
   grn_obj_flags table_flags;
-  grn_obj *normalizer;
   if (!(str = grn_ctx_pop(ctx))) {
     ERR(GRN_INVALID_ARGUMENT, "missing argument");
     return NULL;
   }
   if (!(token = GRN_MALLOC(sizeof(grn_delimited_tokenizer)))) { return NULL; }
   user_data->ptr = token;
-  token->normalized_text = NULL;
-  token->target_text = NULL;
   token->delimiter = delimiter;
   token->delimiter_len = delimiter_len;
   token->pos = 0;
-  grn_table_get_info(ctx, table, &table_flags,
-                     &token->encoding, NULL, &normalizer);
-  if (normalizer) {
-    unsigned int length_in_bytes;
-    if (!(token->normalized_text = grn_normalized_text_open(ctx,
-                                                            normalizer,
-                                                            GRN_TEXT_VALUE(str),
-                                                            GRN_TEXT_LEN(str),
-                                                            token->encoding,
-                                                            0))) {
-      GRN_FREE(token);
-      ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
-      return NULL;
-    }
-    grn_normalized_text_get_value(ctx, token->normalized_text,
-                                  (const char **)(&(token->next)),
-                                  &(token->len),
-                                  &length_in_bytes);
-    token->end = token->next + length_in_bytes;
-  } else {
-    token->len = GRN_TEXT_LEN(str);
-    token->target_text = GRN_MALLOC(token->len + 1);
-    memcpy(token->target_text, GRN_TEXT_VALUE(str), token->len);
-    token->target_text[token->len] = '\0';
-    token->next = (unsigned char *)token->target_text;
-    token->end = token->next + token->len;
+  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
+  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
+  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
+                                    nflags, token->encoding))) {
+    GRN_FREE(token);
+    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
+    return NULL;
   }
+  token->next = (unsigned char *)token->nstr->norm;
+  token->end = token->next + token->nstr->norm_blen;
+  token->len = token->nstr->length;
   GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
   GRN_UINT32_INIT(&token->stat_, 0);
   return NULL;
@@ -173,12 +154,7 @@ static grn_obj *
 delimited_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_delimited_tokenizer *token = user_data->ptr;
-  if (token->normalized_text) {
-    grn_obj_unlink(ctx, token->normalized_text);
-  }
-  if (token->target_text) {
-    GRN_FREE(token->target_text);
-  }
+  grn_str_close(ctx, token->nstr);
   GRN_FREE(token);
   return NULL;
 }
@@ -202,7 +178,6 @@ delimit_null_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_d
 /* ngram tokenizer */
 
 typedef struct {
-  grn_obj *normalized_text;
   grn_str *nstr;
   uint8_t uni_alpha;
   uint8_t uni_digit;
@@ -215,7 +190,7 @@ typedef struct {
   grn_encoding encoding;
   const unsigned char *next;
   const unsigned char *end;
-  const uint_least8_t *ctypes;
+  uint_least8_t *ctypes;
   int32_t len;
   uint32_t tail;
   grn_obj curr_;
@@ -227,7 +202,6 @@ ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram
            uint8_t uni_alpha, uint8_t uni_digit, uint8_t uni_symbol, uint8_t ignore_blank)
 {
   grn_obj *str;
-  grn_obj *normalizer;
   int nflags = GRN_NORMALIZE_REMOVE_BLANK|GRN_NORMALIZE_WITH_TYPES;
   grn_ngram_tokenizer *token;
   grn_obj_flags table_flags;
@@ -237,8 +211,6 @@ ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram
   }
   if (!(token = GRN_MALLOC(sizeof(grn_ngram_tokenizer)))) { return NULL; }
   user_data->ptr = token;
-  token->normalized_text = NULL;
-  token->nstr = NULL;
   token->uni_alpha = uni_alpha;
   token->uni_digit = uni_digit;
   token->uni_symbol = uni_symbol;
@@ -247,39 +219,18 @@ ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram
   token->overlap = 0;
   token->pos = 0;
   token->skip = 0;
-  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL,
-                     &normalizer);
-  if (normalizer) {
-    unsigned int length_in_bytes;
-    if (!(token->normalized_text = grn_normalized_text_open(ctx,
-                                                            normalizer,
-                                                            GRN_TEXT_VALUE(str),
-                                                            GRN_TEXT_LEN(str),
-                                                            token->encoding,
-                                                            nflags))) {
-      GRN_FREE(token);
-      ERR(GRN_TOKENIZER_ERROR,
-          "[tokenizer][ngram][init] failed to open normalized text");
-      return NULL;
-    }
-    grn_normalized_text_get_value(ctx, token->normalized_text,
-                                  (const char **)(&(token->next)),
-                                  &(token->len),
-                                  &length_in_bytes);
-    token->end = token->next + length_in_bytes;
-    token->ctypes = grn_normalized_text_get_types(ctx, token->normalized_text);
-  } else {
-    if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
-                                      nflags, token->encoding))) {
-      GRN_FREE(token);
-      ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
-      return NULL;
-    }
-    token->next = (unsigned char *)token->nstr->norm;
-    token->end = token->next + token->nstr->norm_blen;
-    token->ctypes = token->nstr->ctypes;
-    token->len = token->nstr->length;
+  grn_table_get_info(ctx, table, &table_flags, &token->encoding, NULL);
+  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
+  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
+                                    nflags, token->encoding))) {
+    GRN_FREE(token);
+    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
+    return NULL;
   }
+  token->next = (unsigned char *)token->nstr->norm;
+  token->end = token->next + token->nstr->norm_blen;
+  token->ctypes = token->nstr->ctypes;
+  token->len = token->nstr->length;
   GRN_TEXT_INIT(&token->curr_, GRN_OBJ_DO_SHALLOW_COPY);
   GRN_UINT32_INIT(&token->stat_, 0);
   return NULL;
@@ -332,7 +283,7 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
   grn_ngram_tokenizer *token = user_data->ptr;
   const unsigned char *p = token->next, *r = p, *e = token->end;
   int32_t len = 0, pos = token->pos + token->skip, status = 0;
-  const uint_least8_t *cp = token->ctypes ? token->ctypes + pos : NULL;
+  uint_least8_t *cp = token->ctypes ? token->ctypes + pos : NULL;
   if (cp && token->uni_alpha && GRN_CHAR_TYPE(*cp) == grn_char_alpha) {
     while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
       len++;
@@ -420,12 +371,7 @@ static grn_obj *
 ngram_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_ngram_tokenizer *token = user_data->ptr;
-  if (token->normalized_text) {
-    grn_obj_unlink(ctx, token->normalized_text);
-  }
-  if (token->nstr) {
-    grn_str_close(ctx, token->nstr);
-  }
+  grn_str_close(ctx, token->nstr);
   GRN_FREE(token);
   return NULL;
 }
@@ -460,9 +406,7 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
   grn_token *token;
   grn_encoding encoding;
   grn_obj *tokenizer;
-  if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer, NULL)) {
-    return NULL;
-  }
+  if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
   if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
   token->table = table;
   token->mode = mode;

  Modified: lib/tokenizer.c (+10 -23)
===================================================================
--- lib/tokenizer.c    2012-02-14 13:48:09 +0900 (b47f1db)
+++ lib/tokenizer.c    2012-02-14 13:58:03 +0900 (b33fed0)
@@ -103,29 +103,16 @@ grn_tokenizer_query *grn_tokenizer_query_create(grn_ctx *ctx,
 
     {
       grn_obj * const table = args[0];
-      grn_encoding table_encoding = GRN_ENC_NONE;
-      grn_obj *normalizer = NULL;
-      grn_table_get_info(ctx, table, NULL, &table_encoding, NULL, &normalizer);
-      if (normalizer != NULL) {
-        grn_obj * const normalized_query = grn_normalized_text_open(
-            ctx, normalizer, GRN_TEXT_VALUE(query_str),
-            GRN_TEXT_LEN(query_str), table_encoding, 0);
-        if (query->normalized_query == NULL) {
-          GRN_PLUGIN_FREE(ctx, query);
-          GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
-                           "[tokenizer] failed to open normalized text");
-          return NULL;
-        }
-        query->normalized_query = normalized_query;
-        grn_normalized_text_get_value(ctx, query->normalized_query,
-                                      &query->ptr, NULL, &query->length);
-      } else {
-        unsigned int query_length = GRN_TEXT_LEN(query_str);
-        char *query_buf = (char *)GRN_PLUGIN_MALLOC(ctx, query_length + 1);
-        if (query_buf == NULL) {
-          GRN_PLUGIN_FREE(ctx, query);
-          GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
-                           "[tokenizer] failed to duplicate query");
+      grn_encoding table_encoding;
+      int flags = 0;
+      grn_table_get_info(ctx, table, NULL, &table_encoding, NULL);
+      {
+        grn_str * const str = grn_str_open_(ctx, GRN_TEXT_VALUE(query_str),
+                                            GRN_TEXT_LEN(query_str),
+                                            flags | GRN_OBJ_KEY_NORMALIZE,
+                                            table_encoding);
+        if (str == NULL) {
+          GRN_TOKENIZER_FREE(ctx, query);
           return NULL;
         }
         memcpy(query_buf, GRN_TEXT_VALUE(query_str), query_length);

  Modified: plugins/tokenizers/mecab.c (+15 -29)
===================================================================
--- plugins/tokenizers/mecab.c    2012-02-14 13:48:09 +0900 (dc9886d)
+++ plugins/tokenizers/mecab.c    2012-02-14 13:58:03 +0900 (f944656)
@@ -34,6 +34,7 @@ static grn_critical_section sole_mecab_lock;
 static grn_encoding sole_mecab_encoding = GRN_ENC_NONE;
 
 typedef struct {
+  grn_str *nstr;
   mecab_t *mecab;
   char *buf;
   char *next;
@@ -82,12 +83,11 @@ static grn_obj *
 mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_obj *str;
-  const char *target_text;
+  int nflags = 0;
   char *buf, *s, *p;
   char mecab_err[256];
   grn_obj *table = args[0];
-  grn_obj *normalizer;
-  grn_obj *normalized_text;
+  grn_obj_flags table_flags;
   grn_encoding table_encoding;
   grn_mecab_tokenizer *token;
   unsigned int bufsize, maxtrial = 10, len;
@@ -113,7 +113,7 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
         "mecab_new2 failed on grn_mecab_init: %s", mecab_err);
     return NULL;
   }
-  grn_table_get_info(ctx, table, NULL, &table_encoding, NULL, &normalizer);
+  grn_table_get_info(ctx, table, &table_flags, &table_encoding, NULL);
   if (table_encoding != sole_mecab_encoding) {
     ERR(GRN_TOKENIZER_ERROR,
         "MeCab dictionary charset (%s) does not match the context encoding: <%s>",
@@ -123,36 +123,23 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
   if (!(token = GRN_MALLOC(sizeof(grn_mecab_tokenizer)))) { return NULL; }
   token->mecab = sole_mecab;
   token->encoding = table_encoding;
-  normalized_text = NULL;
-  if (normalizer) {
-    if (!(normalized_text = grn_normalized_text_open(ctx,
-                                                     normalizer,
-                                                     GRN_TEXT_VALUE(str),
-                                                     GRN_TEXT_LEN(str),
-                                                     token->encoding,
-                                                     0))) {
-      GRN_FREE(token);
-      ERR(GRN_TOKENIZER_ERROR,
-          "[tokenizer][mecab] failed to open normalized text");
-      return NULL;
-    }
-    grn_normalized_text_get_value(ctx, normalized_text,
-                                  &target_text, NULL, &len);
-  } else {
-    target_text = GRN_TEXT_VALUE(str);
-    len = GRN_TEXT_LEN(str);
+  nflags |= (table_flags & GRN_OBJ_KEY_NORMALIZE);
+  if (!(token->nstr = grn_str_open_(ctx, GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
+                                    nflags, token->encoding))) {
+    GRN_FREE(token);
+    ERR(GRN_TOKENIZER_ERROR, "grn_str_open failed at grn_token_open");
+    return NULL;
   }
+  len = token->nstr->norm_blen;
   for (bufsize = len * 2 + 1; maxtrial; bufsize *= 2, maxtrial--) {
     if (!(buf = GRN_MALLOC(bufsize + 1))) {
       GRN_LOG(ctx, GRN_LOG_ALERT, "buffer allocation on mecab_init failed !");
-      if (normalized_text) {
-        grn_obj_unlink(ctx, normalized_text);
-      }
+      grn_str_close(ctx, token->nstr);
       GRN_FREE(token);
       return NULL;
     }
     CRITICAL_SECTION_ENTER(sole_mecab_lock);
-    s = mecab_sparse_tostr3(token->mecab, target_text, len, buf, bufsize);
+    s = mecab_sparse_tostr3(token->mecab, token->nstr->norm, len, buf, bufsize);
     if (!s) {
       strncpy(mecab_err, mecab_strerror(token->mecab), sizeof(mecab_err) - 1);
     }
@@ -161,12 +148,10 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
     GRN_FREE(buf);
     if (strstr(mecab_err, "output buffer overflow") == NULL) { break; }
   }
-  if (normalized_text) {
-    grn_obj_unlink(ctx, normalized_text);
-  }
   if (!s) {
     ERR(GRN_TOKENIZER_ERROR, "mecab_sparse_tostr failed len=%d bufsize=%d err=%s",
         len, bufsize, mecab_err);
+    grn_str_close(ctx, token->nstr);
     GRN_FREE(token);
     return NULL;
   }
@@ -221,6 +206,7 @@ static grn_obj *
 mecab_fin(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
   grn_mecab_tokenizer *token = user_data->ptr;
+  grn_str_close(ctx, token->nstr);
   GRN_FREE(token->buf);
   GRN_FREE(token);
   return NULL;




Groonga-commit メーリングリストの案内
Back to archive index