[Groonga-commit] groonga/groonga at f706018 [master] ii regexp: fix wrong match detection

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Apr 12 17:31:11 JST 2017


Kouhei Sutou	2017-04-12 17:31:11 +0900 (Wed, 12 Apr 2017)

  New Revision: f7060180b72c8d939d94ca149c2bf945b6afcca8
  https://github.com/groonga/groonga/commit/f7060180b72c8d939d94ca149c2bf945b6afcca8

  Message:
    ii regexp: fix wrong match detection

  Modified files:
    lib/ii.c

  Modified: lib/ii.c (+33 -19)
===================================================================
--- lib/ii.c    2017-04-12 17:08:11 +0900 (344c7c4)
+++ lib/ii.c    2017-04-12 17:31:11 +0900 (2dcdeeb)
@@ -7693,13 +7693,22 @@ grn_ii_term_extract(grn_ctx *ctx, grn_ii *ii, const char *string,
 }
 
 typedef struct {
+  grn_id rid;
+  uint32_t sid;
+  uint32_t start_pos;
+  uint32_t end_pos;
+  uint32_t tf;
+  uint32_t weight;
+} grn_ii_select_cursor_posting;
+
+typedef struct {
   btr *bt;
   grn_ii *ii;
   token_info **tis;
   uint32_t n_tis;
   int max_interval;
   grn_operator mode;
-  grn_posting posting;
+  grn_ii_select_cursor_posting posting;
   const char *string;
   unsigned int string_len;
   grn_bool done;
@@ -7831,8 +7840,6 @@ grn_ii_select_cursor_open(grn_ctx *ctx,
           cursor->n_tis,
           string_len, string);
 
-  cursor->posting.rest = 0;
-
   cursor->string = string;
   cursor->string_len = string_len;
 
@@ -7841,7 +7848,7 @@ grn_ii_select_cursor_open(grn_ctx *ctx,
   return cursor;
 }
 
-grn_posting *
+grn_ii_select_cursor_posting *
 grn_ii_select_cursor_next(grn_ctx *ctx,
                           grn_ii_select_cursor *cursor)
 {
@@ -7879,7 +7886,8 @@ grn_ii_select_cursor_next(grn_ctx *ctx,
 
     if (tip == tie) {
       int n_occurs = 0;
-      int pos = 0;
+      int start_pos = 0;
+      int end_pos = 0;
       int score = 0;
       int tscore = 0;
 
@@ -7894,12 +7902,12 @@ grn_ii_select_cursor_next(grn_ctx *ctx,
       if (n_tis == 1) {
         n_occurs = (*tis)->p->tf;
         tscore = (*tis)->p->weight + (*tis)->cursors->bins[0]->weight;
-        pos = (*tis)->p->pos;
+        start_pos = end_pos = (*tis)->p->pos;
       } else if (mode == GRN_OP_NEAR) {
         bt_zap(bt);
         for (tip = tis; tip < tie; tip++) {
           token_info *ti = *tip;
-          SKIP_OR_BREAK(pos);
+          SKIP_OR_BREAK(end_pos);
           bt_push(bt, ti);
         }
         if (tip == tie) {
@@ -7950,20 +7958,23 @@ grn_ii_select_cursor_next(grn_ctx *ctx,
 
           if (tip == tie) { tip = tis; }
           ti = *tip;
-          SKIP_OR_BREAK(pos);
-          if (ti->pos == pos) {
+          SKIP_OR_BREAK(end_pos);
+          if (ti->pos == end_pos) {
             score += ti->p->weight + ti->cursors->bins[0]->weight;
             count++;
           } else {
             score = ti->p->weight + ti->cursors->bins[0]->weight;
             count = 1;
-            pos = ti->pos;
+            if (start_pos == 0) {
+              start_pos = ti->pos;
+            }
+            end_pos = ti->pos;
           }
           if (count == n_tis) {
             tscore += score;
             score = 0;
             count = 0;
-            pos++;
+            end_pos++;
             n_occurs++;
           }
         }
@@ -7971,10 +7982,11 @@ grn_ii_select_cursor_next(grn_ctx *ctx,
       if (n_occurs > 0) {
         cursor->posting.rid = rid;
         cursor->posting.sid = sid;
-        cursor->posting.pos = pos;
+        cursor->posting.start_pos = start_pos;
+        cursor->posting.end_pos = end_pos;
         cursor->posting.tf = n_occurs;
         cursor->posting.weight = tscore;
-        if (token_info_skip_pos(ctx, *tis, rid, sid, pos + 1) != GRN_SUCCESS) {
+        if (token_info_skip_pos(ctx, *tis, rid, sid, end_pos + 1) != GRN_SUCCESS) {
           if (token_info_skip(ctx, *tis, next_rid, next_sid) != GRN_SUCCESS) {
             cursor->done = GRN_TRUE;
           }
@@ -8120,7 +8132,7 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
     grn_ii_select_cursor **cursors;
     grn_bool have_error = GRN_FALSE;
     int keep_i = 0;
-    grn_posting keep_posting;
+    grn_ii_select_cursor_posting keep_posting;
 
     cursors = GRN_CALLOC(sizeof(grn_ii_select_cursor *) * n_parsed_strings);
     for (i = 0; i < n_parsed_strings; i++) {
@@ -8144,7 +8156,7 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
     }
 
     while (!have_error) {
-      grn_posting *posting;
+      grn_ii_select_cursor_posting *posting;
       uint32_t pos;
 
       posting = grn_ii_select_cursor_next(ctx, cursors[0]);
@@ -8152,9 +8164,9 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
         break;
       }
 
-      pos = posting->pos;
+      pos = posting->end_pos;
       for (i = 1; i < n_parsed_strings; i++) {
-        grn_posting *posting_i;
+        grn_ii_select_cursor_posting *posting_i;
 
         for (;;) {
           if (keep_i == i) {
@@ -8169,7 +8181,7 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
 
           if (posting_i->rid == posting->rid &&
               posting_i->sid == posting->sid &&
-              posting_i->pos > pos) {
+              posting_i->start_pos > pos) {
             keep_i = i;
             keep_posting = *posting_i;
             break;
@@ -8188,10 +8200,12 @@ grn_ii_select_regexp(grn_ctx *ctx, grn_ii *ii,
         if (posting_i->rid != posting->rid || posting_i->sid != posting->sid) {
           break;
         }
+
+        pos = posting_i->end_pos;
       }
 
       if (i == n_parsed_strings) {
-        grn_rset_posinfo pi = {posting->rid, posting->sid, 0};
+        grn_rset_posinfo pi = {posting->rid, posting->sid, pos};
         double record_score = 1.0;
         res_add(ctx, s, &pi, record_score, op);
       }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index