null+****@clear*****
null+****@clear*****
2010年 10月 19日 (火) 01:23:09 JST
Daijiro MORI 2010-10-18 16:23:09 +0000 (Mon, 18 Oct 2010)
New Revision: eb6f109acbb31a6fcb099bf9bcb7a1535e90eb94
Log:
Fixed missing deleting index entries which correspond to the last token.
Modified files:
lib/ii.c
lib/token.c
lib/token.h
Modified: lib/ii.c (+10 -10)
===================================================================
--- lib/ii.c 2010-10-18 15:49:27 +0000 (6452a49)
+++ lib/ii.c 2010-10-18 16:23:09 +0000 (d022c4b)
@@ -4574,7 +4574,7 @@ index_add(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr
grn_rc r, rc = GRN_SUCCESS;
grn_vgram_buf *sbuf = NULL;
if (!rid) { return GRN_INVALID_ARGUMENT; }
- if (!(token = grn_token_open(ctx, lexicon, value, value_len, 1))) {
+ if (!(token = grn_token_open(ctx, lexicon, value, value_len, grn_token_add))) {
return GRN_NO_MEMORY_AVAILABLE;
}
if (vgram) { sbuf = grn_vgram_buf_open(value_len); }
@@ -4628,7 +4628,7 @@ index_del(grn_ctx *ctx, grn_id rid, grn_obj *lexicon, grn_ii *ii, grn_vgram *vgr
grn_ii_updspec **u;
grn_id tid, *tp;
if (!rid) { return GRN_INVALID_ARGUMENT; }
- if (!(token = grn_token_open(ctx, lexicon, value, value_len, 0))) {
+ if (!(token = grn_token_open(ctx, lexicon, value, value_len, grn_token_del))) {
return GRN_NO_MEMORY_AVAILABLE;
}
h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(grn_ii_updspec *), GRN_HASH_TINY);
@@ -4706,7 +4706,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i
goto exit;
}
for (j = newvalues->n_values, v = newvalues->values; j; j--, v++) {
- if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, 1))) {
+ if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, grn_token_add))) {
while (!token->status) {
if ((tid = grn_token_next(ctx, token))) {
if (!grn_hash_add(ctx, new, &tid, sizeof(grn_id), (void **) &u, NULL)) {
@@ -4749,7 +4749,7 @@ grn_ii_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, grn_vgram *vgram, unsigned i
goto exit;
}
for (j = oldvalues->n_values, v = oldvalues->values; j; j--, v++) {
- if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, 0))) {
+ if ((token = grn_token_open(ctx, lexicon, v->str, v->str_len, grn_token_del))) {
while (!token->status) {
if ((tid = grn_token_next(ctx, token))) {
if (!grn_hash_add(ctx, old, &tid, sizeof(grn_id), (void **) &u, NULL)) {
@@ -4815,7 +4815,7 @@ exit :
static grn_rc
grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
- grn_obj *in, grn_obj *out, int add, grn_obj *posting)
+ grn_obj *in, grn_obj *out, grn_token_mode mode, grn_obj *posting)
{
int j;
grn_id tid;
@@ -4828,7 +4828,7 @@ grn_vector2updspecs(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
const char *head = GRN_BULK_HEAD(in->u.v.body);
for (j = in->u.v.n_sections, v = in->u.v.sections; j; j--, v++) {
if (v->length &&
- (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, add))) {
+ (token = grn_token_open(ctx, lexicon, head + v->offset, v->length, mode))) {
while (!token->status) {
if ((tid = grn_token_next(ctx, token))) {
if (posting) { GRN_RECORD_PUT(ctx, posting, tid); }
@@ -4924,7 +4924,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create on grn_ii_update failed !");
rc = GRN_NO_MEMORY_AVAILABLE;
} else {
- rc = grn_vector2updspecs(ctx, ii, rid, section, new_, new, 1, post);
+ rc = grn_vector2updspecs(ctx, ii, rid, section, new_, new, grn_token_add, post);
}
if (new_ != newvalue) { grn_obj_close(ctx, new_); }
if (rc) { goto exit; }
@@ -5006,7 +5006,7 @@ grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id rid, unsigned int section,
GRN_LOG(ctx, GRN_LOG_ALERT, "grn_hash_create(ctx, NULL, old) on grn_ii_update failed!");
rc = GRN_NO_MEMORY_AVAILABLE;
} else {
- rc = grn_vector2updspecs(ctx, ii, rid, section, old_, old, 0, NULL);
+ rc = grn_vector2updspecs(ctx, ii, rid, section, old_, old, grn_token_del, NULL);
}
if (old_ != oldvalue) { grn_obj_close(ctx, old_); }
if (rc) { goto exit; }
@@ -5275,7 +5275,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
const char *key;
uint32_t size;
grn_rc rc = GRN_END_OF_DATA;
- grn_token *token = grn_token_open(ctx, lexicon, string, string_len, 0);
+ grn_token *token = grn_token_open(ctx, lexicon, string, string_len, grn_token_get);
if (!token) { return GRN_NO_MEMORY_AVAILABLE; }
if (mode == GRN_OP_UNSPLIT) {
if ((ti = token_info_open(ctx, lexicon, ii, (char *)token->orig, token->orig_blen, 0, EX_BOTH))) {
@@ -5557,7 +5557,7 @@ grn_ii_similar_search(grn_ctx *ctx, grn_ii *ii,
if (!(h = grn_hash_create(ctx, NULL, sizeof(grn_id), sizeof(int), 0))) {
return GRN_NO_MEMORY_AVAILABLE;
}
- if (!(token = grn_token_open(ctx, lexicon, string, string_len, 0))) {
+ if (!(token = grn_token_open(ctx, lexicon, string, string_len, grn_token_get))) {
grn_hash_close(ctx, h);
return GRN_NO_MEMORY_AVAILABLE;
}
Modified: lib/token.c (+7 -6)
===================================================================
--- lib/token.c 2010-10-18 15:49:27 +0000 (1f9d4bb)
+++ lib/token.c 2010-10-18 16:23:09 +0000 (41a5acd)
@@ -315,7 +315,7 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
if (tid && (len > 1 || r == p)) {
if (r != p && pos + len - 1 <= token->tail) { continue; }
p += strlen(key);
- if (!*p && !token->add) { token->status = grn_token_done; }
+ if (!*p && token->mode == grn_token_get) { token->status = grn_token_done; }
}
#endif /* PRE_DEFINED_UNSPLIT_WORDS */
if ((cl = grn_charlen_(ctx, (char *)r, (char *)e, token->encoding))) {
@@ -389,7 +389,8 @@ grn_token_fin(void)
}
grn_token *
-grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, int add)
+grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len,
+ grn_token_mode mode)
{
grn_token *token;
grn_encoding encoding;
@@ -397,7 +398,7 @@ grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str, size_t str_len, in
if (grn_table_get_info(ctx, table, NULL, &encoding, &tokenizer)) { return NULL; }
if (!(token = GRN_MALLOC(sizeof(grn_token)))) { return NULL; }
token->table = table;
- token->add = add;
+ token->mode = mode;
token->encoding = encoding;
token->tokenizer = tokenizer;
token->orig = str;
@@ -445,12 +446,12 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
token->curr_size = GRN_TEXT_LEN(curr_);
status = GRN_UINT32_VALUE(stat_);
token->status = ((status & GRN_TOKEN_LAST) ||
- (!token->add && (status & GRN_TOKEN_REACH_END)))
+ (token->mode == grn_token_get && (status & GRN_TOKEN_REACH_END)))
? grn_token_done : grn_token_doing;
token->force_prefix = 0;
if (status & GRN_TOKEN_UNMATURED) {
if (status & GRN_TOKEN_OVERLAP) {
- if (!token->add) { token->pos++; continue; }
+ if (token->mode == grn_token_get) { token->pos++; continue; }
} else {
if (status & GRN_TOKEN_LAST) { token->force_prefix = 1; }
}
@@ -460,7 +461,7 @@ grn_token_next(grn_ctx *ctx, grn_token *token)
token->curr_size = token->orig_blen;
token->status = grn_token_done;
}
- if (token->add) {
+ if (token->mode == grn_token_add) {
switch (table->header.type) {
case GRN_TABLE_PAT_KEY :
if (grn_io_lock(ctx, ((grn_pat *)table)->io, 10000000)) {
Modified: lib/token.h (+8 -2)
===================================================================
--- lib/token.h 2010-10-18 15:49:27 +0000 (2e3f5c3)
+++ lib/token.h 2010-10-18 16:23:09 +0000 (e91a267)
@@ -37,6 +37,12 @@
extern "C" {
#endif
+typedef enum {
+ grn_token_get = 0,
+ grn_token_add,
+ grn_token_del
+} grn_token_mode;
+
typedef struct {
grn_obj *table;
const unsigned char *orig;
@@ -44,7 +50,7 @@ typedef struct {
uint32_t orig_blen;
uint32_t curr_size;
int32_t pos;
- int32_t add;
+ grn_token_mode mode;
uint8_t status;
uint8_t force_prefix;
grn_obj_flags table_flags;
@@ -71,7 +77,7 @@ grn_rc grn_token_init(void);
grn_rc grn_token_fin(void);
GRN_API grn_token *grn_token_open(grn_ctx *ctx, grn_obj *table, const char *str,
- size_t str_len, int add);
+ size_t str_len, grn_token_mode mode);
GRN_API grn_id grn_token_next(grn_ctx *ctx, grn_token *ng);
GRN_API grn_rc grn_token_close(grn_ctx *ctx, grn_token *ng);