[Groonga-commit] groonga/groonga at 2dfb8e5 [master] logical_range_filter: reduce needless sort

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Feb 25 14:12:15 JST 2015


Kouhei Sutou	2015-02-25 14:12:15 +0900 (Wed, 25 Feb 2015)

  New Revision: 2dfb8e57e101b6f97eb9d77bdba535ac098ae950
  https://github.com/groonga/groonga/commit/2dfb8e57e101b6f97eb9d77bdba535ac098ae950

  Message:
    logical_range_filter: reduce needless sort

  Modified files:
    lib/mrb/mrb_index_cursor.c
    lib/mrb/scripts/logger.rb
    plugins/sharding/logical_range_filter.rb

  Modified: lib/mrb/mrb_index_cursor.c (+49 -14)
===================================================================
--- lib/mrb/mrb_index_cursor.c    2015-02-25 14:08:33 +0900 (e49446a)
+++ lib/mrb/mrb_index_cursor.c    2015-02-25 14:12:15 +0900 (f04818f)
@@ -31,6 +31,7 @@
 #include "mrb_ctx.h"
 #include "mrb_index_cursor.h"
 #include "mrb_converter.h"
+#include "mrb_options.h"
 
 static struct mrb_data_type mrb_grn_index_cursor_type = {
   "Groonga::IndexCursor",
@@ -115,34 +116,59 @@ static mrb_value
 mrb_grn_index_cursor_select(mrb_state *mrb, mrb_value self)
 {
   grn_ctx *ctx = (grn_ctx *)mrb->ud;
-  mrb_value mrb_expr;
+  mrb_value mrb_result_set;
+  mrb_value mrb_options;
+  grn_obj *index_cursor;
   grn_obj *expr = NULL;
   grn_obj *expr_variable = NULL;
-  grn_obj *index_cursor;
+  int offset = 0;
+  int limit = 10;
+  int n_processed_records = 0;
   mrb_value mrb_index;
   grn_obj *index;
   grn_obj *lexicon;
   grn_obj *data_table;
-  grn_obj *result;
+  grn_hash *result_set;
   grn_posting *posting;
   grn_id term_id;
   grn_operator op = GRN_OP_OR;
 
-  mrb_get_args(mrb, "o", &mrb_expr);
+  mrb_get_args(mrb, "o|H", &mrb_result_set, &mrb_options);
 
   index_cursor = DATA_PTR(self);
-  if (!mrb_nil_p(mrb_expr)) {
-    expr = DATA_PTR(mrb_expr);
-    expr_variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+  result_set = DATA_PTR(mrb_result_set);
+
+  if (!mrb_nil_p(mrb_options)) {
+    mrb_value mrb_expr;
+    mrb_value mrb_offset;
+    mrb_value mrb_limit;
+
+    mrb_expr = grn_mrb_options_get_lit(mrb, mrb_options, "expression");
+    if (!mrb_nil_p(mrb_expr)) {
+      expr = DATA_PTR(mrb_expr);
+      expr_variable = grn_expr_get_var_by_offset(ctx, expr, 0);
+    }
+
+    mrb_offset = grn_mrb_options_get_lit(mrb, mrb_options, "offset");
+    if (!mrb_nil_p(mrb_offset)) {
+      offset = mrb_fixnum(mrb_offset);
+    }
+
+    mrb_limit = grn_mrb_options_get_lit(mrb, mrb_options, "limit");
+    if (!mrb_nil_p(mrb_limit)) {
+      limit = mrb_fixnum(mrb_limit);
+    }
+  }
+
+  if (limit <= 0) {
+    return mrb_fixnum_value(n_processed_records);
   }
+
   mrb_index = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@index"));
   index = DATA_PTR(mrb_index);
   lexicon = ((grn_ii *)index)->lexicon;
   data_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index));
 
-  result = grn_table_create(ctx, NULL, 0, NULL,
-                            GRN_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC,
-                            data_table, NULL);
   while ((posting = grn_index_cursor_next(ctx, index_cursor, &term_id))) {
     if (expr) {
       grn_bool matched_raw;
@@ -155,11 +181,20 @@ mrb_grn_index_cursor_select(mrb_state *mrb, mrb_value self)
         continue;
       }
     }
-    grn_ii_posting_add(ctx, (grn_ii_posting *)posting, (grn_hash *)result, op);
+    n_processed_records++;
+    if (offset > 0) {
+      offset--;
+      continue;
+    }
+    grn_ii_posting_add(ctx, (grn_ii_posting *)posting, result_set, op);
+    limit--;
+    if (limit == 0) {
+      break;
+    }
   }
-  grn_ii_resolve_sel_and(ctx, (grn_hash *)result, op);
+  grn_ii_resolve_sel_and(ctx, result_set, op);
 
-  return grn_mrb_value_from_grn_obj(mrb, result);
+  return mrb_fixnum_value(n_processed_records);
 }
 
 void
@@ -184,6 +219,6 @@ grn_mrb_index_cursor_init(grn_ctx *ctx)
   mrb_define_method(mrb, klass, "count",
                     mrb_grn_index_cursor_count, MRB_ARGS_NONE());
   mrb_define_method(mrb, klass, "select",
-                    mrb_grn_index_cursor_select, MRB_ARGS_REQ(1));
+                    mrb_grn_index_cursor_select, MRB_ARGS_ARG(1, 1));
 }
 #endif

  Modified: lib/mrb/scripts/logger.rb (+1 -0)
===================================================================
--- lib/mrb/scripts/logger.rb    2015-02-25 14:08:33 +0900 (a25d8f1)
+++ lib/mrb/scripts/logger.rb    2015-02-25 14:12:15 +0900 (cb747a4)
@@ -15,6 +15,7 @@ module Groonga
         file = last_entry.file
         line = last_entry.line
         method = last_entry.method
+        # message = "#{file}:#{line}:#{method}: #{message}"
       else
         file = ""
         line = 0

  Modified: plugins/sharding/logical_range_filter.rb (+91 -50)
===================================================================
--- plugins/sharding/logical_range_filter.rb    2015-02-25 14:08:33 +0900 (1cbc8bf)
+++ plugins/sharding/logical_range_filter.rb    2015-02-25 14:12:15 +0900 (764301a)
@@ -33,30 +33,13 @@ module Groonga
             end
           end
 
-          sort_keys = [
-            {
-              :key => executor.shard_key_name,
-              :order => executor.order,
-            },
-          ]
-          current_offset = executor.offset
-          current_limit = executor.limit
           writer.array("RESULTSET", n_elements) do
             first_result_set = result_sets.first
             if first_result_set
               writer.write_table_columns(first_result_set, output_columns)
             end
             result_sets.each do |result_set|
-              if result_set.size <= current_offset
-                current_offset -= result_set.size
-                next
-              end
-              sorted_result_set = result_set.sort(sort_keys,
-                                                  :offset => current_offset,
-                                                  :limit => current_limit)
-              writer.write_table_records(sorted_result_set, output_columns)
-              current_limit -= sorted_result_set.size
-              sorted_result_set.close
+              writer.write_table_records(result_set, output_columns)
             end
           end
         ensure
@@ -65,9 +48,6 @@ module Groonga
       end
 
       class Executor
-        attr_reader :order
-        attr_reader :offset
-        attr_reader :limit
         attr_reader :result_sets
         def initialize(input)
           @input = input
@@ -78,6 +58,9 @@ module Groonga
           @limit = (@input[:limit] || 10).to_i
 
           @result_sets = []
+          @unsorted_result_sets = []
+          @current_offset = 0
+          @current_limit = @limit
         end
 
         def shard_key_name
@@ -85,31 +68,24 @@ module Groonga
         end
 
         def execute
-          n_records = 0
-
           @enumerator.each do |table, shard_key, shard_range|
-            result_set = filter_shard(table, @filter,
-                                      shard_key, shard_range,
-                                      @enumerator.target_range)
-            next if result_set.nil?
-            if result_set.empty?
-              result_set.close if result_set.temporary?
-              next
-            end
-            @result_sets << result_set
-            n_records += result_set.size
-            break if n_records >= offset + limit
+            filter_shard(table, @filter,
+                         shard_key, shard_range,
+                         @enumerator.target_range)
+            break if @current_limit == 0
           end
         end
 
         def close
+          @unsorted_result_sets.each do |result_set|
+            result_set.close if result_set.temporary?
+          end
           @result_sets.each do |result_set|
             result_set.close if result_set.temporary?
           end
         end
 
         private
-
         def parse_order(input, name)
           order = input[name]
           return :ascending if order.nil?
@@ -128,12 +104,14 @@ module Groonga
         end
 
         def filter_shard(table, filter, shard_key, shard_range, target_range)
+          return if table.empty?
+
           cover_type = target_range.cover_type(shard_range)
-          return nil if cover_type == :none
+          return if cover_type == :none
 
           if cover_type == :all
             if filter.nil?
-              return table
+              return sort_result_set(table)
             else
               return filter_table(table, filter)
             end
@@ -219,6 +197,12 @@ module Groonga
           lexicon = range_index.domain
           data_table = range_index.range
           flags = TableCursorFlags::BY_KEY
+          case @order
+          when :ascending
+            flags |= TableCursorFlags::ASCENDING
+          when :descending
+            flags |= TableCursorFlags::DESCENDING
+          end
           case min_border
           when :include
             flags |= TableCursorFlags::GE
@@ -232,23 +216,50 @@ module Groonga
             flags |= TableCursorFlags::LT
           end
 
-          TableCursor.open(lexicon,
-                           :min => min,
-                           :max => max,
-                           :flags => flags) do |table_cursor|
-            if filter
-              create_expression(data_table) do |expression|
-                expression.parse(filter)
+          result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC,
+                                        :key_type => data_table)
+          n_processed_records = 0
+          begin
+            TableCursor.open(lexicon,
+                             :min => min,
+                             :max => max,
+                             :flags => flags) do |table_cursor|
+              options = {
+                :offset => @current_offset,
+                :limit => @current_limit,
+              }
+              if filter
+                create_expression(data_table) do |expression|
+                  expression.parse(filter)
+                  options[:expression] = expression
+                  IndexCursor.open(table_cursor, range_index) do |index_cursor|
+                    n_processed_records = index_cursor.select(result_set,
+                                                              options)
+                  end
+                end
+              else
                 IndexCursor.open(table_cursor, range_index) do |index_cursor|
-                  index_cursor.select(expression)
+                  n_processed_records = index_cursor.select(result_set,
+                                                            options)
                 end
               end
-            else
-              IndexCursor.open(table_cursor, range_index) do |index_cursor|
-                index_cursor.select(nil)
-              end
             end
+          rescue
+            result_set.close
+            raise
           end
+
+          if n_processed_records <= @current_offset
+            @current_offset -= n_processed_records
+            result_set.close
+            return
+          end
+
+          if @current_offset > 0
+            @current_offset = 0
+          end
+          @current_limit -= result_set.size
+          @result_sets << result_set
         end
 
         def filter_table(table, filter)
@@ -262,8 +273,38 @@ module Groonga
             else
               expression.parse(filter)
             end
-            table.select(expression)
+            result_set = table.select(expression)
+            sort_result_set(result_set)
+          end
+        end
+
+        def sort_result_set(result_set)
+          if result_set.empty?
+            result_set.close if result_set.temporary?
+            return
+          end
+
+          if result_set.size <= @current_offset
+            @current_offset -= result_set.size
+            result_set.close if result_set.temporary?
+            return
+          end
+
+          @unsorted_result_sets << result_set if result_set.temporary?
+          sort_keys = [
+            {
+              :key => @enumerator.shard_key_name,
+              :order => @order,
+            },
+          ]
+          sorted_result_set = result_set.sort(sort_keys,
+                                              :offset => @current_offset,
+                                              :limit => @current_limit)
+          @result_sets << sorted_result_set
+          if @current_offset > 0
+            @current_offset = 0
           end
+          @current_limit -= sorted_result_set.size
         end
       end
     end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index