[Groonga-commit] groonga/groonga at 2f81b91 [support-ii-select-skip] Support skip posting list in grn_ii_select()

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Feb 16 00:22:27 JST 2015


Kouhei Sutou	2015-02-16 00:22:27 +0900 (Mon, 16 Feb 2015)

  New Revision: 2f81b917d6a314297d688003bffd100473e2990c
  https://github.com/groonga/groonga/commit/2f81b917d6a314297d688003bffd100473e2990c

  Message:
    Support skip posting list in grn_ii_select()
    
    It'll improve performance for searching popular term and rare term such
    as "a" and "dacicvs". (I don't know about "dacicvs".)
    
    TODO:
    
      * Benchmark me. We can enable the feature by
        GRN_II_CURSOR_SET_MIN_ENABLE=1 environment variable. It's disabled
        by default.

  Modified files:
    lib/ii.c

  Modified: lib/ii.c (+48 -2)
===================================================================
--- lib/ii.c    2015-02-16 00:05:17 +0900 (427523d)
+++ lib/ii.c    2015-02-16 00:22:27 +0900 (5c2a4c7)
@@ -4092,6 +4092,18 @@ exit :
   return c;
 }
 
+static inline void
+grn_ii_cursor_set_min(grn_ctx *ctx, grn_ii_cursor *c, grn_id min)
+{
+  if (c->min > min) {
+    return;
+  }
+
+  if (getenv("GRN_II_CURSOR_SET_MIN_ENABLE")) {
+    c->min = min;
+  }
+}
+
 grn_ii_posting *
 grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
 {
@@ -4116,6 +4128,14 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
               c->pc.weight = 0;
             }
             c->pc.pos = 0;
+            if (c->pc.rid < c->min) {
+              if (c->curr_chunk < c->nchunks) {
+                if (c->pc.rid + c->cinfo[c->curr_chunk + 1].dgap < c->min) {
+                  c->crp = c->cdp + c->cdf;
+                }
+              }
+              continue;
+            }
             /*
             {
               static int count = 0;
@@ -4197,6 +4217,7 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
         }
       }
       if (c->stat & BUFFER_USED) {
+        for (;;) {
         if (c->nextb) {
           uint32_t lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
           buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
@@ -4214,6 +4235,24 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
           if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
             ERR(GRN_FILE_CORRUPT, "brokend!! (%d:%d) -> (%d:%d) (%d->%d)", lrid, lsid, c->pb.rid, c->pb.sid, c->buffer_pseg, *c->ppseg);
           }
+          if (c->pb.rid < c->min) {
+            c->pb.rid = 0;
+            if (br->jump > 0) {
+              buffer_rec *jump_br = BUFFER_REC_AT(c->buf, br->jump);
+              uint8_t *jump_bp;
+              uint32_t jump_rid;
+              jump_bp = NEXT_ADDR(jump_br);
+              GRN_B_DEC(jump_rid, jump_bp);
+              if (jump_rid < c->min) {
+                c->nextb = br->jump;
+              } else {
+                c->nextb = br->step;
+              }
+            } else {
+              c->nextb = br->step;
+            }
+            continue;
+          }
           c->nextb = br->step;
           GRN_B_DEC(c->pb.tf, c->bp);
           if ((c->ii->header->flags & GRN_OBJ_WITH_WEIGHT)) {
@@ -4226,6 +4265,8 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
         } else {
           c->pb.rid = 0;
         }
+        break;
+        }
       }
       if (c->pb.rid) {
         if (c->pc.rid) {
@@ -4272,6 +4313,10 @@ grn_ii_cursor_next(grn_ctx *ctx, grn_ii_cursor *c)
     } else {
       c->post = &c->pb;
       c->stat |= SOLE_DOC_USED;
+      if (c->post->rid < c->min) {
+        c->post = NULL;
+        return NULL;
+      }
     }
   }
   return c->post;
@@ -4551,10 +4596,11 @@ cursor_heap_recalc_min(cursor_heap *h)
 }
 
 static inline void
-cursor_heap_pop(grn_ctx *ctx, cursor_heap *h)
+cursor_heap_pop(grn_ctx *ctx, cursor_heap *h, grn_id min)
 {
   if (h->n_entries) {
     grn_ii_cursor *c = h->bins[0];
+    grn_ii_cursor_set_min(ctx, c, min);
     if (!grn_ii_cursor_next(ctx, c)) {
       grn_ii_cursor_close(ctx, c);
       h->bins[0] = h->bins[--h->n_entries];
@@ -5401,7 +5447,7 @@ token_info_skip(grn_ctx *ctx, token_info *ti, uint32_t rid, uint32_t sid)
     if (!(c = cursor_heap_min(ti->cursors))) { return GRN_END_OF_DATA; }
     p = c->post;
     if (p->rid > rid || (p->rid == rid && p->sid >= sid)) { break; }
-    cursor_heap_pop(ctx, ti->cursors);
+    cursor_heap_pop(ctx, ti->cursors, rid);
   }
   ti->pos = p->pos - ti->offset;
   ti->p = p;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index