[Groonga-commit] groonga/groonga at bdab98e [master] ii: add comments

Back to archive index

Susumu Yata null+****@clear*****
Tue Feb 23 19:41:38 JST 2016


Susumu Yata	2016-02-23 19:41:38 +0900 (Tue, 23 Feb 2016)

  New Revision: bdab98ebe59304c1c98e866bb6786444af5062f0
  https://github.com/groonga/groonga/commit/bdab98ebe59304c1c98e866bb6786444af5062f0

  Message:
    ii: add comments

  Modified files:
    lib/grn_ii.h
    lib/ii.c

  Modified: lib/grn_ii.h (+2 -1)
===================================================================
--- lib/grn_ii.h    2016-02-22 19:13:38 +0900 (8e91c4f)
+++ lib/grn_ii.h    2016-02-23 19:41:38 +0900 (7001490)
@@ -36,7 +36,8 @@ struct _grn_ii {
   grn_obj *lexicon;
   grn_obj_flags lflags;
   grn_encoding encoding;
-  uint32_t n_elements;
+  uint32_t n_elements; /* Number of elements in postings */
+                       /* rid, tf, [sid], [weight] and [pos] */
   struct grn_ii_header *header;
 };
 

  Modified: lib/ii.c (+29 -5)
===================================================================
--- lib/ii.c    2016-02-22 19:13:38 +0900 (403ca72)
+++ lib/ii.c    2016-02-23 19:41:38 +0900 (d1ef9e7)
@@ -7563,10 +7563,10 @@ typedef struct {
   uint32_t cap;        /* Buffer size */
 } ii_buffer_value;
 
-/* ii_buffer_counter is associated with a block. */
+/* ii_buffer_counter is associated with a combination of a block an a term. */
 typedef struct {
-  uint32_t nrecs;  /* Number of values */
-  uint32_t nposts; /* Number of values */
+  uint32_t nrecs;  /* Number of records or sections */
+  uint32_t nposts; /* Number of occurrences */
 
   /* Information of the last value */
   grn_id last_rid;      /* Record ID */
@@ -7703,7 +7703,7 @@ allocate_outbuf(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
  *         NextUnitSize (The first unit size is kept on memory)
  * Chunk = Term...
  * Term  = ID (gtid)
- *         NumSections (nrecs), NumValues (nposts)
+ *         NumRecordsOrSections (nrecs), NumOccurrences (nposts)
  *         RecordID... (rid, diff)
  *         [SectionID... (sid, diff)]
  *         TermFrequency... (tf, diff)
@@ -8162,6 +8162,7 @@ grn_ii_buffer_fetch(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
   }
 }
 
+/* grn_ii_buffer_chunk_flush flushes the current buffer for packed postings. */
 static void
 grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
 {
@@ -8194,6 +8195,10 @@ grn_ii_buffer_chunk_flush(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
   ii_buffer->curr_size = 0;
 }
 
+/*
+ * merge_hit_blocks merges hit blocks into ii_buffer->data_vectors.
+ * merge_hit_blocks returns the estimated maximum size in bytes.
+ */
 static size_t
 merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                  ii_buffer_block *hits[], int nhits)
@@ -8215,10 +8220,11 @@ merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                 ii_buffer->ii->n_elements, nrecs, max_size);
   {
     int i;
-    uint32_t lr = 0;
+    uint32_t lr = 0; /* Last rid */
     uint64_t spos = 0;
     uint32_t *ridp, *sidp = NULL, *tfp, *weightp = NULL, *posp = NULL;
     {
+      /* Get write positions in datavec. */
       int j = 0;
       ridp = ii_buffer->data_vectors[j++].data;
       if (flags & GRN_OBJ_WITH_SECTION) {
@@ -8233,6 +8239,7 @@ merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       }
     }
     for (i = 0; i < nhits; i++) {
+      /* Read postings from hit blocks and join the postings into datavec. */
       ii_buffer_block *block = hits[i];
       uint8_t *p = block->bufcur;
       uint32_t n = block->nrecs;
@@ -8269,6 +8276,7 @@ merge_hit_blocks(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       grn_ii_buffer_fetch(ctx, ii_buffer, block);
     }
     {
+      /* Set size and flags of datavec. */
       int j = 0;
       uint32_t f_s = (nrecs < 3) ? 0 : USE_P_ENC;
       uint32_t f_d = ((nrecs < 16) || (nrecs <= (lr >> 8))) ? 0 : USE_P_ENC;
@@ -8316,6 +8324,15 @@ get_term_buffer(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
   return ii_buffer->term_buffer;
 }
 
+/*
+ * try_in_place_packing tries to pack a posting in an array element.
+ *
+ * The requirements are as follows:
+ *  - nposts == 1
+ *   - nhits == 1 && nrecs == 1 && tf == 0
+ *  - weight == 0
+ *  - !(flags & GRN_OBJ_WITH_SECTION) || (rid < 0x100000 && sid < 0x800)
+ */
 static grn_bool
 try_in_place_packing(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                      grn_id tid, ii_buffer_block *hits[], int nhits)
@@ -8358,11 +8375,13 @@ try_in_place_packing(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
   return GRN_FALSE;
 }
 
+/* grn_ii_buffer_merge merges hit blocks and pack it. */
 static void
 grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
                     grn_id tid, ii_buffer_block *hits[], int nhits)
 {
   if (!try_in_place_packing(ctx, ii_buffer, tid, hits, nhits)) {
+    /* Merge hit blocks and reserve a buffer for packed data. */
     size_t max_size = merge_hit_blocks(ctx, ii_buffer, hits, nhits);
     if (ii_buffer->packed_buf &&
         ii_buffer->packed_buf_size < ii_buffer->packed_len + max_size) {
@@ -8376,6 +8395,7 @@ grn_ii_buffer_merge(grn_ctx *ctx, grn_ii_buffer *ii_buffer,
       }
     }
     {
+      /* Pack postings into the current buffer. */
       uint16_t nterm;
       size_t packed_len;
       buffer_term *bt;
@@ -8618,6 +8638,10 @@ grn_ii_buffer_commit(grn_ctx *ctx, grn_ii_buffer *ii_buffer)
                                  NULL, 0, NULL, 0, 0, -1, II_BUFFER_ORDER);
       if (tc) {
         while ((tid = grn_table_cursor_next(ctx, tc)) != GRN_ID_NIL) {
+          /*
+           * Find blocks which contain the current term.
+           * Then, merge the postings.
+           */
           int nrests = 0;
           int nhits = 0;
           uint32_t i;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index