[Groonga-commit] groonga/groonga [master] Fixed missing deleting index entries which correspond to the last token.

Back to archive index

null+****@clear***** null+****@clear*****
2010年 10月 19日 (火) 01:23:09 JST


Daijiro MORI	2010-10-18 16:23:09 +0000 (Mon, 18 Oct 2010)

  New Revision: eb6f109acbb31a6fcb099bf9bcb7a1535e90eb94

  Log:
    Fixed missing deleting index entries which correspond to the last token.

  Modified files:
    lib/ii.c
    lib/token.c
    lib/token.h

  Modified: lib/ii.c (+10 -10)
===================================================================
--- lib/ii.c    2010-10-18 15:49:27 +0000 (6452a49)
+++ lib/ii.c    2010-10-18 16:23:09 +0000 (d022c4b)
@@ -4574,7 +4574,7 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr
   grn_rc r, rc = GRN_SUCCESS;
   grn_vgram_buf *sbuf = NULL;
   if (!rid) { return GRN_INVALID_ARGUMENT; }
-  if (!(token = grn_token_open(ctx, lexicon, value, value_len, 1))) {
+  if (!(token = grn_token_open(ctx, lexicon, value, value_len, grn_token_add))) {
     return GRN_NO_MEMORY_AVAILABLE;
   }
   if (vgram) { sbuf = grn_vgram_buf_open(value_len); }
@@ -4628,7 +4628,7 @@ index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr
   grn_ii_updspec **u;
   grn_id tid, *tp;
   if (!rid) { return GRN_INVALID_ARGUMENT; }
-  if (!(token = grn_token_open(ctx, lexicon, value, value_len, 0))) {
+  if (!(token = grn_token_open(ctx, lexicon, value, value_len, grn_token_del))) {
     return GRN_NO_MEMORY_AVAILABLE;
   }
   h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY);
@@ -4706,7 +4706,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i
       goto exit;
     }
     for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) {
-      if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, 1))) {
+      if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, grn_token_add))) {
         while (!token->status) {
           if ((tid = grn_token_next(ctx, token))) {
             if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u, NULL)) {
@@ -4749,7 +4749,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i
       goto exit;
     }
     for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) {
-      if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, 0))) {
+      if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, grn_token_del))) {
         while (!token->status) {
           if ((tid = grn_token_next(ctx, token))) {
             if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u, NULL)) {
@@ -4815,7 +4815,7 @@ exit :
 
 static grn_rc
 grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
-                    grn_obj *in, grn_obj *out, int add, grn_obj *posting)
+                    grn_obj *in, grn_obj *out, grn_token_mode mode, grn_obj *posting)
 {
   int j;
   grn_id tid;
@@ -4828,7 +4828,7 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
     const char *head = GRN_BULK_HEAD(in->u.v.body);
     for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) {
       if (v->length &&
-          (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, add))) {
+          (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, mode))) {
         while (!token->status) {
           if ((tid = grn_token_next(ctx, token))) {
             if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
@@ -4924,7 +4924,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !");
         rc = GRN_NO_MEMORY_AVAILABLE;
       } else {
-        rc = grn_vector2updspecs(ctx, ii, rid, section, new_, new, 1, post);
+        rc = grn_vector2updspecs(ctx, ii, rid, section, new_, new, grn_token_add, post);
       }
       if (new_ != newvalue) { grn_obj_close(ctx, new_); }
       if (rc) { goto exit; }
@@ -5006,7 +5006,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
         GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!");
         rc = GRN_NO_MEMORY_AVAILABLE;
       } else {
-        rc = grn_vector2updspecs(ctx, ii, rid, section, old_, old, 0, NULL);
+        rc = grn_vector2updspecs(ctx, ii, rid, section, old_, old, grn_token_del, NULL);
       }
       if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
       if (rc) { goto exit; }
@@ -5275,7 +5275,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
   const char *key;
   uint32_t size;
   grn_rc rc = GRN_END_OF_DATA;
-  grn_token *token = grn_token_open(ctx, lexicon, string, string_len, 0);
+  grn_token *token = grn_token_open(ctx, lexicon, string, string_len, grn_token_get);
   if (!token) { return GRN_NO_MEMORY_AVAILABLE; }
   if (mode == GRN_OP_UNSPLIT) {
     if ((ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, token->orig_blen, 0, EX_BOTH))) {
@@ -5557,7 +5557,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
   if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) {
     return GRN_NO_MEMORY_AVAILABLE;
   }
-  if (!(token = grn_token_open(ctx, lexicon, string, string_len, 0))) {
+  if (!(token = grn_token_open(ctx, lexicon, string, string_len, grn_token_get))) {
     grn_hash_close(ctx, h);
     return GRN_NO_MEMORY_AVAILABLE;
   }

  Modified: lib/token.c (+7 -6)
===================================================================
--- lib/token.c    2010-10-18 15:49:27 +0000 (1f9d4bb)
+++ lib/token.c    2010-10-18 16:23:09 +0000 (41a5acd)
@@ -315,7 +315,7 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
     if (tid && (len > 1 || r == p)) {
       if (r != p && pos + len - 1 <= token->tail) { continue; }
       p += strlen(key);
-      if (!*p && !token->add) { token->status = grn_token_done; }
+      if (!*p && token->mode == grn_token_get) { token->status = grn_token_done; }
     }
 #endif /* PRE_DEFINED_UNSPLIT_WORDS */
     if ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
@@ -389,7 +389,8 @@ grn_token_fin(void)
 }
 
 grn_token *
-grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, int add)
+grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
+               grn_token_mode mode)
 {
   grn_token *token;
   grn_encoding encoding;
@@ -397,7 +398,7 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, in
   if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
   if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
   token->table = table;
-  token->add = add;
+  token->mode = mode;
   token->encoding = encoding;
   token->tokenizer = tokenizer;
   token->orig = str;
@@ -445,12 +446,12 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
       token->curr_size = GRN_TEXT_LEN(curr_);
       status = GRN_UINT32_VALUE(stat_);
       token->status = ((status & GRN_TOKEN_LAST) ||
-                       (!token->add && (status & GRN_TOKEN_REACH_END)))
+                       (token->mode == grn_token_get && (status & GRN_TOKEN_REACH_END)))
         ? grn_token_done : grn_token_doing;
       token->force_prefix = 0;
       if (status & GRN_TOKEN_UNMATURED) {
         if (status & GRN_TOKEN_OVERLAP) {
-          if (!token->add) { token->pos++; continue; }
+          if (token->mode == grn_token_get) { token->pos++; continue; }
         } else {
           if (status & GRN_TOKEN_LAST) { token->force_prefix = 1; }
         }
@@ -460,7 +461,7 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
       token->curr_size = token->orig_blen;
       token->status = grn_token_done;
     }
-    if (token->add) {
+    if (token->mode == grn_token_add) {
       switch (table->header.type) {
       case GRN_TABLE_PAT_KEY :
         if (grn_io_lock(ctx, ((grn_pat *)table)->io, 10000000)) {

  Modified: lib/token.h (+8 -2)
===================================================================
--- lib/token.h    2010-10-18 15:49:27 +0000 (2e3f5c3)
+++ lib/token.h    2010-10-18 16:23:09 +0000 (e91a267)
@@ -37,6 +37,12 @@
 extern "C" {
 #endif
 
+typedef enum {
+  grn_token_get = 0,
+  grn_token_add,
+  grn_token_del
+} grn_token_mode;
+
 typedef struct {
   grn_obj *table;
   const unsigned char *orig;
@@ -44,7 +50,7 @@ typedef struct {
   uint32_t orig_blen;
   uint32_t curr_size;
   int32_t pos;
-  int32_t add;
+  grn_token_mode mode;
   uint8_t status;
   uint8_t force_prefix;
   grn_obj_flags table_flags;
@@ -71,7 +77,7 @@ grn_rc grn_token_init(void);
 grn_rc grn_token_fin(void);
 
 GRN_API grn_token *grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str,
-                                  size_t str_len, int add);
+                                  size_t str_len, grn_token_mode mode);
 
 GRN_API grn_id grn_token_next(grn_ctx *ctx, grn_token *ng);
 GRN_API grn_rc grn_token_close(grn_ctx *ctx, grn_token *ng);




Groonga-commit メーリングリストの案内
Back to archive index