Kouhei Sutou
null+****@clear*****
Wed Feb 25 14:12:15 JST 2015
Kouhei Sutou 2015-02-25 14:12:15 +0900 (Wed, 25 Feb 2015) New Revision: 2dfb8e57e101b6f97eb9d77bdba535ac098ae950 https://github.com/groonga/groonga/commit/2dfb8e57e101b6f97eb9d77bdba535ac098ae950 Message: logical_range_filter: reduce needless sort Modified files: lib/mrb/mrb_index_cursor.c lib/mrb/scripts/logger.rb plugins/sharding/logical_range_filter.rb Modified: lib/mrb/mrb_index_cursor.c (+49 -14) =================================================================== --- lib/mrb/mrb_index_cursor.c 2015-02-25 14:08:33 +0900 (e49446a) +++ lib/mrb/mrb_index_cursor.c 2015-02-25 14:12:15 +0900 (f04818f) @@ -31,6 +31,7 @@ #include "mrb_ctx.h" #include "mrb_index_cursor.h" #include "mrb_converter.h" +#include "mrb_options.h" static struct mrb_data_type mrb_grn_index_cursor_type = { "Groonga::IndexCursor", @@ -115,34 +116,59 @@ static mrb_value mrb_grn_index_cursor_select(mrb_state *mrb, mrb_value self) { grn_ctx *ctx = (grn_ctx *)mrb->ud; - mrb_value mrb_expr; + mrb_value mrb_result_set; + mrb_value mrb_options; + grn_obj *index_cursor; grn_obj *expr = NULL; grn_obj *expr_variable = NULL; - grn_obj *index_cursor; + int offset = 0; + int limit = 10; + int n_processed_records = 0; mrb_value mrb_index; grn_obj *index; grn_obj *lexicon; grn_obj *data_table; - grn_obj *result; + grn_hash *result_set; grn_posting *posting; grn_id term_id; grn_operator op = GRN_OP_OR; - mrb_get_args(mrb, "o", &mrb_expr); + mrb_get_args(mrb, "o|H", &mrb_result_set, &mrb_options); index_cursor = DATA_PTR(self); - if (!mrb_nil_p(mrb_expr)) { - expr = DATA_PTR(mrb_expr); - expr_variable = grn_expr_get_var_by_offset(ctx, expr, 0); + result_set = DATA_PTR(mrb_result_set); + + if (!mrb_nil_p(mrb_options)) { + mrb_value mrb_expr; + mrb_value mrb_offset; + mrb_value mrb_limit; + + mrb_expr = grn_mrb_options_get_lit(mrb, mrb_options, "expression"); + if (!mrb_nil_p(mrb_expr)) { + expr = DATA_PTR(mrb_expr); + expr_variable = grn_expr_get_var_by_offset(ctx, expr, 0); + } + + mrb_offset = grn_mrb_options_get_lit(mrb, mrb_options, "offset"); + if (!mrb_nil_p(mrb_offset)) { + offset = mrb_fixnum(mrb_offset); + } + + mrb_limit = grn_mrb_options_get_lit(mrb, mrb_options, "limit"); + if (!mrb_nil_p(mrb_limit)) { + limit = mrb_fixnum(mrb_limit); + } + } + + if (limit <= 0) { + return mrb_fixnum_value(n_processed_records); } + mrb_index = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@index")); index = DATA_PTR(mrb_index); lexicon = ((grn_ii *)index)->lexicon; data_table = grn_ctx_at(ctx, grn_obj_get_range(ctx, index)); - result = grn_table_create(ctx, NULL, 0, NULL, - GRN_TABLE_HASH_KEY | GRN_OBJ_WITH_SUBREC, - data_table, NULL); while ((posting = grn_index_cursor_next(ctx, index_cursor, &term_id))) { if (expr) { grn_bool matched_raw; @@ -155,11 +181,20 @@ mrb_grn_index_cursor_select(mrb_state *mrb, mrb_value self) continue; } } - grn_ii_posting_add(ctx, (grn_ii_posting *)posting, (grn_hash *)result, op); + n_processed_records++; + if (offset > 0) { + offset--; + continue; + } + grn_ii_posting_add(ctx, (grn_ii_posting *)posting, result_set, op); + limit--; + if (limit == 0) { + break; + } } - grn_ii_resolve_sel_and(ctx, (grn_hash *)result, op); + grn_ii_resolve_sel_and(ctx, result_set, op); - return grn_mrb_value_from_grn_obj(mrb, result); + return mrb_fixnum_value(n_processed_records); } void @@ -184,6 +219,6 @@ grn_mrb_index_cursor_init(grn_ctx *ctx) mrb_define_method(mrb, klass, "count", mrb_grn_index_cursor_count, MRB_ARGS_NONE()); mrb_define_method(mrb, klass, "select", - mrb_grn_index_cursor_select, MRB_ARGS_REQ(1)); + mrb_grn_index_cursor_select, MRB_ARGS_ARG(1, 1)); } #endif Modified: lib/mrb/scripts/logger.rb (+1 -0) =================================================================== --- lib/mrb/scripts/logger.rb 2015-02-25 14:08:33 +0900 (a25d8f1) +++ lib/mrb/scripts/logger.rb 2015-02-25 14:12:15 +0900 (cb747a4) @@ -15,6 +15,7 @@ module Groonga file = last_entry.file line = last_entry.line method = last_entry.method + # message = "#{file}:#{line}:#{method}: #{message}" else file = "" line = 0 Modified: plugins/sharding/logical_range_filter.rb (+91 -50) =================================================================== --- plugins/sharding/logical_range_filter.rb 2015-02-25 14:08:33 +0900 (1cbc8bf) +++ plugins/sharding/logical_range_filter.rb 2015-02-25 14:12:15 +0900 (764301a) @@ -33,30 +33,13 @@ module Groonga end end - sort_keys = [ - { - :key => executor.shard_key_name, - :order => executor.order, - }, - ] - current_offset = executor.offset - current_limit = executor.limit writer.array("RESULTSET", n_elements) do first_result_set = result_sets.first if first_result_set writer.write_table_columns(first_result_set, output_columns) end result_sets.each do |result_set| - if result_set.size <= current_offset - current_offset -= result_set.size - next - end - sorted_result_set = result_set.sort(sort_keys, - :offset => current_offset, - :limit => current_limit) - writer.write_table_records(sorted_result_set, output_columns) - current_limit -= sorted_result_set.size - sorted_result_set.close + writer.write_table_records(result_set, output_columns) end end ensure @@ -65,9 +48,6 @@ module Groonga end class Executor - attr_reader :order - attr_reader :offset - attr_reader :limit attr_reader :result_sets def initialize(input) @input = input @@ -78,6 +58,9 @@ module Groonga @limit = (@input[:limit] || 10).to_i @result_sets = [] + @unsorted_result_sets = [] + @current_offset = 0 + @current_limit = @limit end def shard_key_name @@ -85,31 +68,24 @@ module Groonga end def execute - n_records = 0 - @enumerator.each do |table, shard_key, shard_range| - result_set = filter_shard(table, @filter, - shard_key, shard_range, - @enumerator.target_range) - next if result_set.nil? - if result_set.empty? - result_set.close if result_set.temporary? - next - end - @result_sets << result_set - n_records += result_set.size - break if n_records >= offset + limit + filter_shard(table, @filter, + shard_key, shard_range, + @enumerator.target_range) + break if @current_limit == 0 end end def close + @unsorted_result_sets.each do |result_set| + result_set.close if result_set.temporary? + end @result_sets.each do |result_set| result_set.close if result_set.temporary? end end private - def parse_order(input, name) order = input[name] return :ascending if order.nil? @@ -128,12 +104,14 @@ module Groonga end def filter_shard(table, filter, shard_key, shard_range, target_range) + return if table.empty? + cover_type = target_range.cover_type(shard_range) - return nil if cover_type == :none + return if cover_type == :none if cover_type == :all if filter.nil? - return table + return sort_result_set(table) else return filter_table(table, filter) end @@ -219,6 +197,12 @@ module Groonga lexicon = range_index.domain data_table = range_index.range flags = TableCursorFlags::BY_KEY + case @order + when :ascending + flags |= TableCursorFlags::ASCENDING + when :descending + flags |= TableCursorFlags::DESCENDING + end case min_border when :include flags |= TableCursorFlags::GE @@ -232,23 +216,50 @@ module Groonga flags |= TableCursorFlags::LT end - TableCursor.open(lexicon, - :min => min, - :max => max, - :flags => flags) do |table_cursor| - if filter - create_expression(data_table) do |expression| - expression.parse(filter) + result_set = HashTable.create(:flags => ObjectFlags::WITH_SUBREC, + :key_type => data_table) + n_processed_records = 0 + begin + TableCursor.open(lexicon, + :min => min, + :max => max, + :flags => flags) do |table_cursor| + options = { + :offset => @current_offset, + :limit => @current_limit, + } + if filter + create_expression(data_table) do |expression| + expression.parse(filter) + options[:expression] = expression + IndexCursor.open(table_cursor, range_index) do |index_cursor| + n_processed_records = index_cursor.select(result_set, + options) + end + end + else IndexCursor.open(table_cursor, range_index) do |index_cursor| - index_cursor.select(expression) + n_processed_records = index_cursor.select(result_set, + options) end end - else - IndexCursor.open(table_cursor, range_index) do |index_cursor| - index_cursor.select(nil) - end end + rescue + result_set.close + raise end + + if n_processed_records <= @current_offset + @current_offset -= n_processed_records + result_set.close + return + end + + if @current_offset > 0 + @current_offset = 0 + end + @current_limit -= result_set.size + @result_sets << result_set end def filter_table(table, filter) @@ -262,8 +273,38 @@ module Groonga else expression.parse(filter) end - table.select(expression) + result_set = table.select(expression) + sort_result_set(result_set) + end + end + + def sort_result_set(result_set) + if result_set.empty? + result_set.close if result_set.temporary? + return + end + + if result_set.size <= @current_offset + @current_offset -= result_set.size + result_set.close if result_set.temporary? + return + end + + @unsorted_result_sets << result_set if result_set.temporary? + sort_keys = [ + { + :key => @enumerator.shard_key_name, + :order => @order, + }, + ] + sorted_result_set = result_set.sort(sort_keys, + :offset => @current_offset, + :limit => @current_limit) + @result_sets << sorted_result_set + if @current_offset > 0 + @current_offset = 0 end + @current_limit -= sorted_result_set.size end end end -------------- next part -------------- HTML����������������������������...Download