[Groonga-commit] groonga/groonga at f13164f [master] ii: show positions

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Jan 18 19:03:46 JST 2019


Kouhei Sutou	2019-01-18 19:03:46 +0900 (Fri, 18 Jan 2019)

  Revision: f13164f0bbee9311002ccd3d873530e52557a16b
  https://github.com/groonga/groonga/commit/f13164f0bbee9311002ccd3d873530e52557a16b

  Message:
    ii: show positions

  Modified files:
    lib/ii.c
    tools/generate-long-posting-list-data.rb

  Modified: lib/ii.c (+79 -40)
===================================================================
--- lib/ii.c    2019-01-18 18:29:54 +0900 (ab24c597d)
+++ lib/ii.c    2019-01-18 19:03:46 +0900 (9133e80fd)
@@ -1,7 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2018 Brazil
-  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
+  Copyright(C) 2018-2019 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -2744,30 +2744,10 @@ typedef struct {
 
 #define BUFFER_MERGE_DUMP_SOURCE_BATCH_SIZE 10
 
-static void
-buffer_merge_dump_source_add_entry(grn_ctx *ctx,
-                                   buffer_merge_dump_source_data *data,
-                                   grn_id record_id,
-                                   uint32_t section_id)
-{
-  if (GRN_TEXT_LEN(&(data->inspected_entries)) > 0) {
-    GRN_TEXT_PUTC(ctx, &(data->inspected_entries), ' ');
-  }
-  grn_text_printf(ctx,
-                  &(data->inspected_entries),
-                  "(%d:%d)",
-                  record_id,
-                  section_id);
-  data->n_inspected_entries++;
-  if (data->n_inspected_entries == BUFFER_MERGE_DUMP_SOURCE_BATCH_SIZE) {
-    GRN_LOG(ctx, data->log_level,
-            "%.*s",
-            (int)GRN_TEXT_LEN(&(data->inspected_entries)),
-            GRN_TEXT_VALUE(&(data->inspected_entries)));
-    GRN_BULK_REWIND(&(data->inspected_entries));
-    data->n_inspected_entries = 0;
-  }
-}
+typedef enum {
+  BUFFER_MERGE_DUMP_SOURCE_ENTRY_BUFFER,
+  BUFFER_MERGE_DUMP_SOURCE_ENTRY_CHUNK,
+} buffer_merge_dump_source_entry_type;
 
 static void
 buffer_merge_dump_source_flush_entries(grn_ctx *ctx,
@@ -2786,6 +2766,53 @@ buffer_merge_dump_source_flush_entries(grn_ctx *ctx,
 }
 
 static void
+buffer_merge_dump_source_add_entry(grn_ctx *ctx,
+                                   buffer_merge_dump_source_data *data,
+                                   buffer_merge_dump_source_entry_type type,
+                                   docinfo *info,
+                                   uint8_t *position_diffs)
+{
+  if (GRN_TEXT_LEN(&(data->inspected_entries)) > 0) {
+    GRN_TEXT_PUTC(ctx, &(data->inspected_entries), ' ');
+  }
+  grn_text_printf(ctx,
+                  &(data->inspected_entries),
+                  "(%u:%u:%u:%u)",
+                  info->rid,
+                  info->sid,
+                  info->tf,
+                  info->weight);
+  if (data->ii->header->flags & GRN_OBJ_WITH_POSITION) {
+    uint32_t i;
+    uint32_t position = 0;
+
+    GRN_TEXT_PUTC(ctx, &(data->inspected_entries), '[');
+    for (i = 0; i < info->tf; i++) {
+      uint32_t position_diff;
+
+      if (i > 0) {
+        GRN_TEXT_PUTC(ctx, &(data->inspected_entries), ',');
+      }
+      if (type == BUFFER_MERGE_DUMP_SOURCE_ENTRY_BUFFER) {
+        GRN_B_DEC(position_diff, position_diffs);
+      } else {
+        position_diff = ((uint32_t *)position_diffs)[i];
+      }
+      position += position_diff;
+      grn_text_printf(ctx,
+                      &(data->inspected_entries),
+                      "%u",
+                      position);
+    }
+    GRN_TEXT_PUTC(ctx, &(data->inspected_entries), ']');
+  }
+  data->n_inspected_entries++;
+  if (data->n_inspected_entries == BUFFER_MERGE_DUMP_SOURCE_BATCH_SIZE) {
+    buffer_merge_dump_source_flush_entries(ctx, data);
+  }
+}
+
+static void
 buffer_merge_dump_chunk_raw(grn_ctx *ctx,
                             buffer_merge_dump_source_data *data,
                             uint8_t *chunk_start,
@@ -2833,7 +2860,7 @@ buffer_merge_dump_chunk_raw(grn_ctx *ctx,
     uint32_t *section_id_diffs = NULL;
     uint32_t *n_terms_list;
     uint32_t *weights;
-    uint32_t *positions;
+    uint32_t *position_diffs = NULL;
 
     {
       int i = 0;
@@ -2847,7 +2874,7 @@ buffer_merge_dump_chunk_raw(grn_ctx *ctx,
         weights = data->data_vector[i++].data;
       }
       if (data->ii->header->flags & GRN_OBJ_WITH_POSITION) {
-        positions = data->data_vector[i++].data;
+        position_diffs = data->data_vector[i++].data;
       }
     }
 
@@ -2858,10 +2885,10 @@ buffer_merge_dump_chunk_raw(grn_ctx *ctx,
 
       for (i = 0; i < n_documents; i++) {
         uint32_t record_id_diff = record_id_diffs[i];
-        uint32_t n_terms;
-        uint32_t weight;
+        docinfo info;
 
         record_id += record_id_diff;
+        info.rid = record_id;
         if (data->ii->header->flags & GRN_OBJ_WITH_SECTION) {
           if (record_id_diff > 0) {
             section_id = 0;
@@ -2870,14 +2897,19 @@ buffer_merge_dump_chunk_raw(grn_ctx *ctx,
         } else {
           section_id = 1;
         }
-        n_terms = 1 + n_terms_list[i];
+        info.sid = section_id;
+        info.tf = 1 + n_terms_list[i];
         if (data->ii->header->flags & GRN_OBJ_WITH_WEIGHT) {
-          weight = weights[i];
+          info.weight = weights[i];
         } else {
-          weight = 0;
+          info.weight = 0;
         }
 
-        buffer_merge_dump_source_add_entry(ctx, data, record_id, section_id);
+        buffer_merge_dump_source_add_entry(ctx,
+                                           data,
+                                           BUFFER_MERGE_DUMP_SOURCE_ENTRY_CHUNK,
+                                           &info,
+                                           (uint8_t *)position_diffs);
       }
       buffer_merge_dump_source_flush_entries(ctx, data);
     }
@@ -3062,18 +3094,25 @@ buffer_merge_dump_source(grn_ctx *ctx,
     while (position > 0) {
       buffer_rec *record = BUFFER_REC_AT(buffer, position);
       uint8_t *record_data = GRN_NEXT_ADDR(record);
-      grn_id record_id;
-      grn_id section_id = 1;
-      uint32_t weight = 0;
+      docinfo info;
 
-      GRN_B_DEC(record_id, record_data);
+      GRN_B_DEC(info.rid, record_data);
       if (ii->header->flags & GRN_OBJ_WITH_SECTION) {
-        GRN_B_DEC(section_id, record_data);
+        GRN_B_DEC(info.sid, record_data);
+      } else {
+        info.sid = 1;
       }
+      GRN_B_DEC(info.tf, record_data);
       if (ii->header->flags & GRN_OBJ_WITH_WEIGHT) {
-        GRN_B_DEC(weight, record_data);
+        GRN_B_DEC(info.weight, record_data);
+      } else {
+        info.weight = 0;
       }
-      buffer_merge_dump_source_add_entry(ctx, &data, record_id, section_id);
+      buffer_merge_dump_source_add_entry(ctx,
+                                         &data,
+                                         BUFFER_MERGE_DUMP_SOURCE_ENTRY_BUFFER,
+                                         &info,
+                                         record_data);
       position = record->step;
     }
     buffer_merge_dump_source_flush_entries(ctx, &data);

  Modified: tools/generate-long-posting-list-data.rb (+16 -1)
===================================================================
--- tools/generate-long-posting-list-data.rb    2019-01-18 18:29:54 +0900 (9b9425b7e)
+++ tools/generate-long-posting-list-data.rb    2019-01-18 19:03:46 +0900 (5dd8e320c)
@@ -1,4 +1,19 @@
 #!/usr/bin/env ruby
+#
+# Copyright(C) 2019 Kouhei Sutou <kou****@clear*****>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 require "json"
 require "optparse"
@@ -49,7 +64,7 @@ LOAD
 n_records.times do
   record = {}
   columns.each do |column|
-    record[column] = "X"
+    record[column] = "XX XX XX"
   end
   puts(record.to_json)
 end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190118/72498c54/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index