[Groonga-commit] groonga/groonga at 771f617 [master] grn_ii_buffer: separate grn_ii_buffer_tokenize()

Back to archive index

susumu.yata null+****@clear*****
Fri Aug 14 12:56:08 JST 2015


susumu.yata	2015-08-14 12:56:08 +0900 (Fri, 14 Aug 2015)

  New Revision: 771f61768460ff1ae2bb3985e06aca2a0c513434
  https://github.com/groonga/groonga/commit/771f61768460ff1ae2bb3985e06aca2a0c513434

  Message:
    grn_ii_buffer: separate grn_ii_buffer_tokenize()
    
    grn_ii_buffer_tokenize_value() tokenises a specified value.
    grn_ii_buffer_tokenize() tokenizes values in ii_buffer.
    
    GitHub: #374

  Modified files:
    lib/ii.c

  Modified: lib/ii.c (+86 -79)
===================================================================
--- lib/ii.c    2015-08-14 11:36:04 +0900 (eae3861)
+++ lib/ii.c    2015-08-14 12:56:08 +0900 (66e55f4)
@@ -7480,85 +7480,95 @@ get_buffer_counter(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
 }
 
 static void
-grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid,
-                       unsigned int sid, unsigned int weight,
-                       const char *value, uint32_t value_len)
-{
-  if (value_len) {
-    grn_obj *tmp_lexicon;
-    uint32_t est_len = value_len * 2 + 2;
-    if (ii_buffer->block_buf_size < ii_buffer->block_pos + est_len) {
-      return;
-    }
-    if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) {
-      unsigned int token_flags = 0;
-      grn_token_cursor *token_cursor;
-      grn_id *buffer = ii_buffer->block_buf;
-      uint32_t block_pos = ii_buffer->block_pos;
-      uint32_t ii_flags = ii_buffer->ii->header->flags;
-      buffer[block_pos++] = II_BUFFER_PACK(rid, II_BUFFER_TYPE_RID);
-      if (ii_flags & GRN_OBJ_WITH_SECTION) {
-        buffer[block_pos++] = sid;
-      }
-      if (weight) {
-        buffer[block_pos++] = II_BUFFER_PACK(weight, II_BUFFER_TYPE_WEIGHT);
-      }
-      if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon,
-                                                value, value_len,
-                                                GRN_TOKEN_ADD, token_flags))) {
-        while (!token_cursor->status) {
-          grn_id tid;
-          if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
-            ii_buffer_counter *counter;
-            counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid);
-            if (!counter) { return; }
-            buffer[block_pos++] = tid;
-            if (ii_flags & GRN_OBJ_WITH_POSITION) {
-              buffer[block_pos++] = token_cursor->pos;
+grn_ii_buffer_tokenize_value(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
+                             grn_id rid, const ii_buffer_value *value)
+{
+  grn_obj *tmp_lexicon;
+  if ((tmp_lexicon = get_tmp_lexicon(ctx, ii_buffer))) {
+    unsigned int token_flags = 0;
+    grn_token_cursor *token_cursor;
+    grn_id *buffer = ii_buffer->block_buf;
+    uint32_t block_pos = ii_buffer->block_pos;
+    uint32_t ii_flags = ii_buffer->ii->header->flags;
+    buffer[block_pos++] = II_BUFFER_PACK(rid, II_BUFFER_TYPE_RID);
+    if (ii_flags & GRN_OBJ_WITH_SECTION) {
+      buffer[block_pos++] = value->sid;
+    }
+    if (value->weight) {
+      buffer[block_pos++] = II_BUFFER_PACK(value->weight,
+                                           II_BUFFER_TYPE_WEIGHT);
+    }
+    if ((token_cursor = grn_token_cursor_open(ctx, tmp_lexicon,
+                                              value->p, value->len,
+                                              GRN_TOKEN_ADD, token_flags))) {
+      while (!token_cursor->status) {
+        grn_id tid;
+        if ((tid = grn_token_cursor_next(ctx, token_cursor))) {
+          ii_buffer_counter *counter;
+          counter = get_buffer_counter(ctx, ii_buffer, tmp_lexicon, tid);
+          if (!counter) { return; }
+          buffer[block_pos++] = tid;
+          if (ii_flags & GRN_OBJ_WITH_POSITION) {
+            buffer[block_pos++] = token_cursor->pos;
+          }
+          if (counter->last_rid != rid) {
+            counter->offset_rid += GRN_B_ENC_SIZE(rid - counter->last_rid);
+            counter->last_rid = rid;
+            counter->offset_sid += GRN_B_ENC_SIZE(value->sid - 1);
+            counter->last_sid = value->sid;
+            if (counter->last_tf) {
+              counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
+              counter->last_tf = 0;
+              counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
+              counter->last_weight = 0;
             }
-            if (counter->last_rid != rid) {
-              counter->offset_rid += GRN_B_ENC_SIZE(rid - counter->last_rid);
-              counter->last_rid = rid;
-              counter->offset_sid += GRN_B_ENC_SIZE(sid - 1);
-              counter->last_sid = sid;
-              if (counter->last_tf) {
-                counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
-                counter->last_tf = 0;
-                counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
-                counter->last_weight = 0;
-              }
-              counter->last_pos = 0;
-              counter->nrecs++;
-            } else if (counter->last_sid != sid) {
-              counter->offset_rid += GRN_B_ENC_SIZE(0);
-              counter->offset_sid +=
-                GRN_B_ENC_SIZE(sid - counter->last_sid - 1);
-              counter->last_sid = sid;
-              if (counter->last_tf) {
-                counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
-                counter->last_tf = 0;
-                counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
-                counter->last_weight = 0;
-              }
-              counter->last_pos = 0;
-              counter->nrecs++;
+            counter->last_pos = 0;
+            counter->nrecs++;
+          } else if (counter->last_sid != value->sid) {
+            counter->offset_rid += GRN_B_ENC_SIZE(0);
+            counter->offset_sid +=
+              GRN_B_ENC_SIZE(value->sid - counter->last_sid - 1);
+            counter->last_sid = value->sid;
+            if (counter->last_tf) {
+              counter->offset_tf += GRN_B_ENC_SIZE(counter->last_tf - 1);
+              counter->last_tf = 0;
+              counter->offset_weight += GRN_B_ENC_SIZE(counter->last_weight);
+              counter->last_weight = 0;
             }
-            counter->offset_pos +=
-              GRN_B_ENC_SIZE(token_cursor->pos - counter->last_pos);
-            counter->last_pos = token_cursor->pos;
-            counter->last_tf++;
-            counter->last_weight += weight;
-            counter->nposts++;
+            counter->last_pos = 0;
+            counter->nrecs++;
           }
+          counter->offset_pos +=
+            GRN_B_ENC_SIZE(token_cursor->pos - counter->last_pos);
+          counter->last_pos = token_cursor->pos;
+          counter->last_tf++;
+          counter->last_weight += value->weight;
+          counter->nposts++;
         }
-        grn_token_cursor_close(ctx, token_cursor);
       }
-      ii_buffer->block_pos = block_pos;
+      grn_token_cursor_close(ctx, token_cursor);
     }
+    ii_buffer->block_pos = block_pos;
   }
 }
 
 static void
+grn_ii_buffer_tokenize(grn_ctx *ctx, grn_ii_buffer *ii_buffer, grn_id rid)
+{
+  unsigned int i;
+  for (i = 0; i < ii_buffer->nvalues; i++) {
+    const ii_buffer_value *value = &ii_buffer->values[i];
+    if (value->len) {
+      uint32_t est_len = value->len * 2 + 2;
+      if (ii_buffer->block_buf_size >= ii_buffer->block_pos + est_len) {
+        grn_ii_buffer_tokenize_value(ctx, ii_buffer, rid, value);
+      }
+    }
+  }
+  ii_buffer->nvalues = 0;
+}
+
+static void
 grn_ii_buffer_fetch(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                     ii_buffer_block *block)
 {
@@ -7928,8 +7938,12 @@ grn_rc
 grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                      grn_id rid, unsigned int sid, grn_obj *value)
 {
-  grn_ii_buffer_tokenize(ctx, ii_buffer, rid, sid, 0,
-                         GRN_TEXT_VALUE(value), GRN_TEXT_LEN(value));
+  ii_buffer_value tmp_value;
+  tmp_value.sid = sid;
+  tmp_value.weight = 0;
+  tmp_value.p = GRN_TEXT_VALUE(value);
+  tmp_value.len = GRN_TEXT_LEN(value);
+  grn_ii_buffer_tokenize_value(ctx, ii_buffer, rid, &tmp_value);
   return ctx->rc;
 }
 
@@ -8168,14 +8182,7 @@ grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
             ii_buffer->block_buf_size = est_len;
           }
         }
-
-        for (j = 0; j < ii_buffer->nvalues; j++) {
-          grn_ii_buffer_tokenize(ctx, ii_buffer, rid, ii_buffer->values[j].sid,
-                                 ii_buffer->values[j].weight,
-                                 ii_buffer->values[j].p,
-                                 ii_buffer->values[j].len);
-        }
-        ii_buffer->nvalues = 0;
+        grn_ii_buffer_tokenize(ctx, ii_buffer, rid);
       }
       grn_table_cursor_close(ctx, tc);
     }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index