Kouhei Sutou
null+****@clear*****
Mon Feb 16 00:22:27 JST 2015
Kouhei Sutou 2015-02-16 00:22:27 +0900 (Mon, 16 Feb 2015) New Revision: 2f81b917d6a314297d688003bffd100473e2990c https://github.com/groonga/groonga/commit/2f81b917d6a314297d688003bffd100473e2990c Message: Support skip posting list in grn_ii_select() It'll improve performance for searching popular term and rare term such as "a" and "dacicvs". (I don't know about "dacicvs".) TODO: * Benchmark me. We can enable the feature by GRN_II_CURSOR_SET_MIN_ENABLE=1 environment variable. It's disabled by default. Modified files: lib/ii.c Modified: lib/ii.c (+48 -2) =================================================================== --- lib/ii.c 2015-02-16 00:05:17 +0900 (427523d) +++ lib/ii.c 2015-02-16 00:22:27 +0900 (5c2a4c7) @@ -4092,6 +4092,18 @@ exit : return c; } +static inline void +grn_ii_cursor_set_min(grn_ctx *ctx, grn_ii_cursor *c, grn_id min) +{ + if (c->min > min) { + return; + } + + if (getenv("GRN_II_CURSOR_SET_MIN_ENABLE")) { + c->min = min; + } +} + grn_ii_posting * grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) { @@ -4116,6 +4128,14 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) c->pc.weight = 0; } c->pc.pos = 0; + if (c->pc.rid < c->min) { + if (c->curr_chunk < c->nchunks) { + if (c->pc.rid + c->cinfo[c->curr_chunk + 1].dgap < c->min) { + c->crp = c->cdp + c->cdf; + } + } + continue; + } /* { static int count = 0; @@ -4197,6 +4217,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) } } if (c->stat & BUFFER_USED) { + for (;;) { if (c->nextb) { uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */ buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb); @@ -4214,6 +4235,24 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) { ERR(GRN_FILE_CORRUPT, "brokend!! (%d:%d) -> (%d:%d) (%d->%d)", lrid, lsid, c->pb.rid, c->pb.sid, c->buffer_pseg, *c->ppseg); } + if (c->pb.rid < c->min) { + c->pb.rid = 0; + if (br->jump > 0) { + buffer_rec *jump_br = BUFFER_REC_AT(c->buf, br->jump); + uint8_t *jump_bp; + uint32_t jump_rid; + jump_bp = NEXT_ADDR(jump_br); + GRN_B_DEC(jump_rid, jump_bp); + if (jump_rid < c->min) { + c->nextb = br->jump; + } else { + c->nextb = br->step; + } + } else { + c->nextb = br->step; + } + continue; + } c->nextb = br->step; GRN_B_DEC(c->pb.tf, c->bp); if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) { @@ -4226,6 +4265,8 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) } else { c->pb.rid = 0; } + break; + } } if (c->pb.rid) { if (c->pc.rid) { @@ -4272,6 +4313,10 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c) } else { c->post = &c->pb; c->stat |= SOLE_DOC_USED; + if (c->post->rid < c->min) { + c->post = NULL; + return NULL; + } } } return c->post; @@ -4551,10 +4596,11 @@ cursor_heap_recalc_min(cursor_heap *h) } static inline void -cursor_heap_pop(grn_ctx *ctx, cursor_heap *h) +cursor_heap_pop(grn_ctx *ctx, cursor_heap *h, grn_id min) { if (h->n_entries) { grn_ii_cursor *c = h->bins[0]; + grn_ii_cursor_set_min(ctx, c, min); if (!grn_ii_cursor_next(ctx, c)) { grn_ii_cursor_close(ctx, c); h->bins[0] = h->bins[--h->n_entries]; @@ -5401,7 +5447,7 @@ token_info_skip(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid) if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; } p = c->post; if (p->rid > rid || (p->rid == rid && p->sid >= sid)) { break; } - cursor_heap_pop(ctx, ti->cursors); + cursor_heap_pop(ctx, ti->cursors, rid); } ti->pos = p->pos - ti->offset; ti->p = p; -------------- next part -------------- HTML����������������������������...Download