[Groonga-commit] groonga/groonga [master] grn_ii_buffer: add 'sparsity' parameter to grn_ii_build().

Back to archive index

null+****@clear***** null+****@clear*****
2012年 2月 23日 (木) 17:49:35 JST


Daijiro MORI	2012-02-23 17:49:35 +0900 (Thu, 23 Feb 2012)

  New Revision: 0b6a766884a89ca5abadab5b47351e419698acab

  Log:
    grn_ii_buffer: add 'sparsity' parameter to grn_ii_build().

  Modified files:
    lib/db.c
    lib/ii.c
    lib/ii.h

  Modified: lib/db.c (+1 -1)
===================================================================
--- lib/db.c    2012-02-23 15:15:41 +0900 (775c063)
+++ lib/db.c    2012-02-23 17:49:35 +0900 (29a5a96)
@@ -5724,7 +5724,7 @@ build_index(grn_ctx *ctx, grn_obj *obj)
   if ((src = grn_ctx_at(ctx, *s))) {
     target = GRN_OBJ_TABLEP(src) ? src : grn_ctx_at(ctx, src->header.domain);
     if (target) {
-      grn_ii_build(ctx, (grn_ii *)obj);
+      grn_ii_build(ctx, (grn_ii *)obj, 10);
     } else {
       ERR(GRN_INVALID_ARGUMENT, "invalid target");
     }

  Modified: lib/ii.c (+28 -11)
===================================================================
--- lib/ii.c    2012-02-23 15:15:41 +0900 (6602d8c)
+++ lib/ii.c    2012-02-23 17:49:35 +0900 (7ccdf63)
@@ -6388,6 +6388,7 @@ struct _grn_ii_buffer {
   uint32_t nblocks;
   int tmpfd;
   char tmpfpath[PATH_MAX];
+  uint64_t update_buffer_size;
   // stuff for parsing
   off_t filepos;
   grn_id *block_buf;
@@ -6395,8 +6396,8 @@ struct _grn_ii_buffer {
   uint32_t block_pos;
   ii_buffer_counter *counters;
   uint32_t ncounters;
-  uint64_t total_nrecs;
-  uint64_t total_nposts;
+  uint64_t total_size;
+  uint64_t curr_size;
   // stuff for merging
   grn_ii *ii;
   uint32_t lseg;
@@ -6492,8 +6493,7 @@ encode_terms(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       GRN_B_ENC(gtid, outbufp);
       GRN_B_ENC(counter->nrecs, outbufp);
       GRN_B_ENC(counter->nposts, outbufp);
-      ii_buffer->total_nrecs += counter->nrecs;
-      ii_buffer->total_nposts += counter->nposts;
+      ii_buffer->total_size += counter->nrecs + counter->nposts;
       counter->offset_rid = outbufp - outbuf;
       outbufp += offset_rid;
       if ((flags & GRN_OBJ_WITH_SECTION)) {
@@ -6853,6 +6853,7 @@ grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
   ii_buffer->packed_buf = NULL;
   ii_buffer->packed_len = 0;
   ii_buffer->packed_buf_size = 0;
+  ii_buffer->curr_size = 0;
 }
 
 static uint32_t
@@ -6869,6 +6870,7 @@ merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
     nrecs += block->nrecs;
     nposts += block->nposts;
   }
+  ii_buffer->curr_size += nrecs + nposts;
   max_size = nrecs * (ii_buffer->ii->n_elements);
   if (flags & GRN_OBJ_WITH_POSITION) { max_size += nposts - nrecs; }
   datavec_reset(ctx, ii_buffer->data_vectors,
@@ -7056,7 +7058,9 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       bt->size_in_chunk = packed_len;
       bt->pos_in_chunk = ii_buffer->packed_len;
       ii_buffer->packed_len += packed_len;
-      if (term_buffer->header.nterms == II_BUFFER_NTERMS_PER_BUFFER) {
+      if (((ii_buffer->curr_size * ii_buffer->update_buffer_size) +
+           (ii_buffer->total_size * term_buffer->header.nterms * 16)) >=
+          (ii_buffer->total_size * II_BUFFER_NTERMS_PER_BUFFER * 16)) {
         grn_ii_buffer_chunk_flush(ctx, ii_buffer);
       }
     }
@@ -7064,7 +7068,7 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
 }
 
 grn_ii_buffer *
-grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii)
+grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii, uint64_t update_buffer_size)
 {
   if (ii && ii->lexicon) {
     grn_ii_buffer *ii_buffer = GRN_MALLOCN(grn_ii_buffer, 1);
@@ -7077,8 +7081,8 @@ grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii)
       ii_buffer->ncounters = II_BUFFER_NCOUNTERS_MARGIN;
       ii_buffer->block_pos = 0;
       ii_buffer->filepos = 0;
-      ii_buffer->total_nrecs = 0;
-      ii_buffer->total_nposts = 0;
+      ii_buffer->total_size = 0;
+      ii_buffer->update_buffer_size = update_buffer_size;
       ii_buffer->counters = GRN_CALLOC(ii_buffer->ncounters *
                                        sizeof(ii_buffer_counter));
       if (ii_buffer->counters) {
@@ -7136,7 +7140,20 @@ grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
   close(ii_buffer->tmpfd);
   GRN_FREE(ii_buffer->block_buf);
   GRN_FREE(ii_buffer->counters);
-  GRN_LOG(ctx, GRN_LOG_NOTICE, "nblocks: %d", ii_buffer->nblocks);
+
+  if (ii_buffer->update_buffer_size &&
+      ii_buffer->update_buffer_size < 20) {
+    if (ii_buffer->update_buffer_size < 10) {
+      ii_buffer->update_buffer_size =
+        ii_buffer->total_size >> (10 - ii_buffer->update_buffer_size);
+    } else {
+      ii_buffer->update_buffer_size =
+        ii_buffer->total_size << (ii_buffer->update_buffer_size - 10);
+    }
+  }
+
+  GRN_LOG(ctx, GRN_LOG_NOTICE, "nblocks=%d, update_buffer_size=%zu",
+          ii_buffer->nblocks, ii_buffer->update_buffer_size);
 
   ii_buffer->term_buffer = NULL;
   ii_buffer->packed_buf = NULL;
@@ -7241,9 +7258,9 @@ grn_ii_buffer_parse(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
 }
 
 grn_rc
-grn_ii_build(grn_ctx *ctx, grn_ii *ii)
+grn_ii_build(grn_ctx *ctx, grn_ii *ii, unsigned int sparsity)
 {
-  grn_ii_buffer *ii_buffer = grn_ii_buffer_open(ctx, ii);
+  grn_ii_buffer *ii_buffer = grn_ii_buffer_open(ctx, ii, sparsity);
   if (ii_buffer) {
     grn_id *s = ii->obj.source;
     if ((ii->obj.source_size) && s) {

  Modified: lib/ii.h (+3 -2)
===================================================================
--- lib/ii.h    2012-02-23 15:15:41 +0900 (4d9708d)
+++ lib/ii.h    2012-02-23 17:49:35 +0900 (c0fad19)
@@ -188,8 +188,9 @@ grn_rc grn_ii_at(grn_ctx *ctx, grn_ii *ii, grn_id id, grn_hash *s, grn_operator
 void grn_ii_inspect_elements(grn_ctx *ctx, grn_ii *ii, grn_obj *buf);
 void grn_ii_cursor_inspect(grn_ctx *ctx, grn_ii_cursor *c, grn_obj *buf);
 
-grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii);
-grn_ii_buffer *grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii);
+grn_rc grn_ii_build(grn_ctx *ctx, grn_ii *ii, unsigned int sparsity);
+grn_ii_buffer *grn_ii_buffer_open(grn_ctx *ctx, grn_ii *ii,
+                                  long long unsigned int update_buffer_size);
 grn_rc grn_ii_buffer_append(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                             grn_id rid, unsigned int section, grn_obj *value);
 grn_rc grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer);




Groonga-commit メーリングリストの案内
Back to archive index