[Groonga-commit] groonga/groonga at 6f37968 [sharding-strategy] sharding: add uint-range strategy

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jul 4 17:09:45 JST 2018


Kouhei Sutou	2018-07-04 17:09:45 +0900 (Wed, 04 Jul 2018)

  New Revision: 6f37968714776040a26fd92b974ced1d672d5ebe
  https://github.com/groonga/groonga/commit/6f37968714776040a26fd92b974ced1d672d5ebe

  Message:
    sharding: add uint-range strategy
    
    It's just a proof of concept implementation. It's not a production use
    yet.

  Added files:
    test/command/fixture/sharding/logical_range_filter/strategy/uint_range/schema.grn
    test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.expected
    test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.test
  Modified files:
    plugins/sharding/logical_enumerator.rb
    plugins/sharding/logical_range_filter.rb

  Modified: plugins/sharding/logical_enumerator.rb (+185 -78)
===================================================================
--- plugins/sharding/logical_enumerator.rb    2018-07-04 11:43:55 +0900 (d05a220fc)
+++ plugins/sharding/logical_enumerator.rb    2018-07-04 17:09:45 +0900 (0160b78e1)
@@ -23,72 +23,11 @@ module Groonga
 
       private
       def each_internal(order)
-        context = Context.instance
-        each_shard_with_around(order) do |prev_shard, current_shard, next_shard|
-          shard_range_data = current_shard.range_data
-          shard_range = nil
-
-          if shard_range_data.day.nil?
-            if order == :ascending
-              if next_shard
-                next_shard_range_data = next_shard.range_data
-              else
-                next_shard_range_data = nil
-              end
-            else
-              if prev_shard
-                next_shard_range_data = prev_shard.range_data
-              else
-                next_shard_range_data = nil
-              end
-            end
-            max_day = compute_month_shard_max_day(shard_range_data.year,
-                                                  shard_range_data.month,
-                                                  next_shard_range_data)
-            shard_range = MonthShardRange.new(shard_range_data.year,
-                                              shard_range_data.month,
-                                              max_day)
-          else
-            shard_range = DayShardRange.new(shard_range_data.year,
-                                            shard_range_data.month,
-                                            shard_range_data.day)
-          end
-
-          yield(current_shard, shard_range)
+        @strategy.each(order) do |shard, shard_range|
+          yield(shard, shard_range)
         end
       end
 
-      def each_shard_with_around(order)
-        context = Context.instance
-        prefix = "#{@logical_table}_"
-
-        shards = [nil]
-        context.database.each_name(:prefix => prefix,
-                                   :order_by => :key,
-                                   :order => order) do |name|
-          shard_range_raw = name[prefix.size..-1]
-
-          case shard_range_raw
-          when /\A(\d{4})(\d{2})\z/
-            shard_range_data = ShardRangeData.new($1.to_i, $2.to_i, nil)
-          when /\A(\d{4})(\d{2})(\d{2})\z/
-            shard_range_data = ShardRangeData.new($1.to_i, $2.to_i, $3.to_i)
-          else
-            next
-          end
-
-          shards << Shard.new(name, @shard_key_name, shard_range_data)
-          next if shards.size < 3
-          yield(*shards)
-          shards.shift
-        end
-
-        if shards.size == 2
-          yield(shards[0], shards[1], nil)
-        end
-      end
-
-      private
       def initialize_parameters
         @logical_table = @input[:logical_table]
         if @logical_table.nil?
@@ -104,15 +43,147 @@ module Groonga
           end
         end
 
-        @target_range = TargetRange.new(@command_name, @input)
+        prefix = "#{@logical_table}_"
+        @strategy_name = @input[:strategy] || "date-range"
+        case @strategy_name
+        when "date-range"
+          @strategy = DateStrategy.new(prefix, @shard_key_name)
+        when "uint-range"
+          @strategy = UIntStrategy.new(prefix, @shard_key_name)
+        else
+          message = "[#{@command_name}] strategy must be " +
+            "date-range or uint-ragne: #{@strategy_name}"
+          raise InvalidArgument, message
+        end
+
+        @target_range = TargetRange.new(@command_name, @strategy, @input)
+      end
+
+      class Strategy
+        def initialize(prefix, shard_key_name)
+          @prefix = prefix
+          @shard_key_name = shard_key_name
+          @context = Context.instance
+        end
+      end
+
+      class DateStrategy < Strategy
+        def parse_value(value)
+          Converter.convert(value, Time)
+        end
+
+        def each(order)
+          each_shard_with_around(order) do |prev_shard, current_shard, next_shard|
+            shard_range_data = current_shard.range_data
+            shard_range = nil
+
+            if shard_range_data.day.nil?
+              if order == :ascending
+                if next_shard
+                  next_shard_range_data = next_shard.range_data
+                else
+                  next_shard_range_data = nil
+                end
+              else
+                if prev_shard
+                  next_shard_range_data = prev_shard.range_data
+                else
+                  next_shard_range_data = nil
+                end
+              end
+              max_day = compute_month_shard_max_day(shard_range_data.year,
+                                                    shard_range_data.month,
+                                                    next_shard_range_data)
+              shard_range = MonthShardRange.new(shard_range_data.year,
+                                                shard_range_data.month,
+                                                max_day)
+            else
+              shard_range = DayShardRange.new(shard_range_data.year,
+                                              shard_range_data.month,
+                                              shard_range_data.day)
+            end
+
+            yield(current_shard, shard_range)
+          end
+        end
+
+        private
+        def each_shard_with_around(order)
+          shards = [nil]
+          @context.database.each_name(:prefix => @prefix,
+                                      :order_by => :key,
+                                      :order => order) do |name|
+            shard_range_raw = name[@prefix.size..-1]
+
+            case shard_range_raw
+            when /\A(\d{4})(\d{2})\z/
+              shard_range_data = DateShardRangeData.new($1.to_i, $2.to_i, nil)
+            when /\A(\d{4})(\d{2})(\d{2})\z/
+              shard_range_data = DateShardRangeData.new($1.to_i, $2.to_i, $3.to_i)
+            else
+              next
+            end
+
+            shards << Shard.new(name, @shard_key_name, shard_range_data)
+            next if shards.size < 3
+            yield(*shards)
+            shards.shift
+          end
+
+          if shards.size == 2
+            yield(shards[0], shards[1], nil)
+          end
+        end
+
+        def compute_month_shard_max_day(year, month, next_shard_range)
+          return nil if next_shard_range.nil?
+
+          return nil if month != next_shard_range.month
+
+          next_shard_range.day
+        end
       end
 
-      def compute_month_shard_max_day(year, month, next_shard_range)
-        return nil if next_shard_range.nil?
+      class UIntStrategy < Strategy
+        def parse_value(value)
+          Converter.convert(value, Fixnum)
+        end
+
+        def each(order)
+          each_shard_with_around(order) do |prev_shard, current_shard, next_shard|
+            min = current_shard.range_data.value
+            if next_shard
+              max = next_shard.range_data.value
+            else
+              max = nil
+            end
+            shard_range = UIntShardRange.new(min, max)
+            yield(current_shard, shard_range)
+          end
+        end
 
-        return nil if month != next_shard_range.month
+        private
+        def each_shard_with_around(order, &block)
+          shards = []
+          @context.database.each_name(:prefix => @prefix) do |name|
+            shard_range_raw = name[@prefix.size..-1]
+            case shard_range_raw
+            when /\A(\d+)\z/
+              shard_range_data = UIntShardRangeData.new($1.to_i)
+              shards << Shard.new(name, @shard_key_name, shard_range_data)
+            end
+          end
+          return if shards.empty?
 
-        next_shard_range.day
+          sorted_shards = shards.sort_by do |shard|
+            value = shard.range_data.value
+            value = -value if order == :descending
+            value
+          end
+          sorted_shards.unshift(nil)
+          sorted_shards << nil
+          sorted_shards.each_cons(3, &block)
+        end
       end
 
       class Shard
@@ -136,7 +207,7 @@ module Groonga
         end
       end
 
-      class ShardRangeData
+      class DateShardRangeData
         attr_reader :year, :month, :day
         def initialize(year, month, day)
           @year = year
@@ -153,6 +224,17 @@ module Groonga
         end
       end
 
+      class UIntShardRangeData
+        attr_reader :value
+        def initialize(value)
+          @value = value
+        end
+
+        def to_suffix
+          "_%d" % [@value]
+        end
+      end
+
       class DayShardRange
         attr_reader :year, :month, :day
         def initialize(year, month, day)
@@ -161,7 +243,7 @@ module Groonga
           @day = day
         end
 
-        def least_over_time
+        def least_over_value
           next_day = Time.local(@year, @month, @day) + (60 * 60 * 24)
           while next_day.day == @day # For leap second
             next_day += 1
@@ -169,7 +251,7 @@ module Groonga
           next_day
         end
 
-        def min_time
+        def min
           Time.local(@year, @month, @day)
         end
 
@@ -188,7 +270,7 @@ module Groonga
           @max_day = max_day
         end
 
-        def least_over_time
+        def least_over_value
           if @max_day.nil?
             if @month == 12
               Time.local(@year + 1, 1, 1)
@@ -200,7 +282,7 @@ module Groonga
           end
         end
 
-        def min_time
+        def min
           Time.local(@year, @month, 1)
         end
 
@@ -216,11 +298,36 @@ module Groonga
         end
       end
 
+      class UIntShardRange
+        attr_reader :min, :max
+        def initialize(min, max)
+          @min = min
+          @max = max
+        end
+
+        def least_over_value
+          @max
+        end
+
+        def include?(value)
+          if value < @min
+            false
+          else
+            if****@max*****?
+              true
+            else
+              value < @max
+            end
+          end
+        end
+      end
+
       class TargetRange
         attr_reader :min, :min_border
         attr_reader :max, :max_border
-        def initialize(command_name, input)
+        def initialize(command_name, strategy, input)
           @command_name = command_name
+          @strategy = strategy
           @input = input
           @min = parse_value(:min)
           @min_border = parse_border(:min_border)
@@ -267,7 +374,7 @@ module Groonga
           value = @input[name]
           return nil if value.nil?
 
-          Converter.convert(value, Time)
+          @strategy.parse_value(value)
         end
 
         def parse_border(name)
@@ -288,7 +395,7 @@ module Groonga
         end
 
         def in_min?(shard_range)
-          @min < shard_range.least_over_time
+          @min < shard_range.least_over_value
         end
 
         def in_min_partial?(shard_range)
@@ -296,11 +403,11 @@ module Groonga
 
           return true if @min_border == :exclude
 
-          shard_range.min_time != @min
+          shard_range.min != @min
         end
 
         def in_max?(shard_range)
-          max_base_time = shard_range.min_time
+          max_base_time = shard_range.min
           if @max_border == :include
             @max >= max_base_time
           else

  Modified: plugins/sharding/logical_range_filter.rb (+1 -1)
===================================================================
--- plugins/sharding/logical_range_filter.rb    2018-07-04 11:43:55 +0900 (b4587f462)
+++ plugins/sharding/logical_range_filter.rb    2018-07-04 17:09:45 +0900 (8f9f831dd)
@@ -322,7 +322,7 @@ module Groonga
             microsecond = max.usec
             border = @target_range.max_border
           else
-            next_shard_edge = @shard_range.least_over_time
+            next_shard_edge = @shard_range.least_over_value
             year = next_shard_edge.year
             month = next_shard_edge.month
             day = next_shard_edge.day

  Added: test/command/fixture/sharding/logical_range_filter/strategy/uint_range/schema.grn (+47 -0) 100644
===================================================================
--- /dev/null
+++ test/command/fixture/sharding/logical_range_filter/strategy/uint_range/schema.grn    2018-07-04 17:09:45 +0900 (1d242001c)
@@ -0,0 +1,47 @@
+#@disable-logging
+
+#@on-error omit
+register sharding
+#@on-error default
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto
+
+table_create Logs_2018030 TABLE_NO_KEY
+column_create Logs_2018030 year_day COLUMN_SCALAR UInt32
+column_create Logs_2018030 memo COLUMN_SCALAR ShortText
+column_create Logs_2018030 message COLUMN_SCALAR Text
+
+table_create YearDay_2018030 TABLE_PAT_KEY UInt32
+column_create YearDay_2018030 index COLUMN_INDEX Logs_2018030 year_day
+
+column_create Terms index_2018030 COLUMN_INDEX|WITH_POSITION|WITH_SECTION \
+  Logs_2018030 memo,message
+
+
+table_create Logs_2018040 TABLE_NO_KEY
+column_create Logs_2018040 year_day COLUMN_SCALAR UInt32
+column_create Logs_2018040 memo COLUMN_SCALAR ShortText
+column_create Logs_2018040 message COLUMN_SCALAR Text
+
+table_create YearDay_2018040 TABLE_PAT_KEY UInt32
+column_create YearDay_2018040 index COLUMN_INDEX Logs_2018040 year_day
+
+column_create Terms index_2018040 COLUMN_INDEX|WITH_POSITION|WITH_SECTION \
+  Logs_2018040 memo,message
+
+
+table_create Logs_2018050 TABLE_NO_KEY
+column_create Logs_2018050 year_day COLUMN_SCALAR UInt32
+column_create Logs_2018050 memo COLUMN_SCALAR ShortText
+column_create Logs_2018050 message COLUMN_SCALAR Text
+
+table_create YearDay_2018050 TABLE_PAT_KEY UInt32
+column_create YearDay_2018050 index COLUMN_INDEX Logs_2018050 year_day
+
+column_create Terms index_2018050 COLUMN_INDEX|WITH_POSITION|WITH_SECTION \
+  Logs_2018050 memo,message
+
+
+#@enable-logging

  Added: test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.expected (+63 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.expected    2018-07-04 17:09:45 +0900 (9299aa5f1)
@@ -0,0 +1,63 @@
+load --table Logs_2018030
+[
+{"year_day": 2018030, "memo": "2018-01-30 00:00:00", "message": "Start"},
+{"year_day": 2018038, "memo": "2018-02-07 00:00:00", "message": "Shutdown"},
+{"year_day": 2018039, "memo": "2018-02-08 00:00:00", "message": "Start"},
+{"year_day": 2018039, "memo": "2018-02-08 23:59:59", "message": "Shutdown"}
+]
+[[0,0.0,0.0],4]
+load --table Logs_2018040
+[
+{"year_day": 2018040, "memo": "2018-02-09 00:00:00", "message": "Start"},
+{"year_day": 2018048, "memo": "2018-02-17 00:00:00", "message": "Shutdown"},
+{"year_day": 2018049, "memo": "2018-02-18 00:00:00", "message": "Start"},
+{"year_day": 2018049, "memo": "2018-02-18 23:59:59", "message": "Shutdown"}
+]
+[[0,0.0,0.0],4]
+load --table Logs_2018050
+[
+{"year_day": 2018050, "memo": "2018-02-19 00:00:00", "message": "Start"},
+{"year_day": 2018058, "memo": "2018-02-27 00:00:00", "message": "Shutdown"},
+{"year_day": 2018059, "memo": "2018-02-28 00:00:00", "message": "Start"},
+{"year_day": 2018059, "memo": "2018-02-28 23:59:59", "message": "Shutdown"}
+]
+[[0,0.0,0.0],4]
+logical_range_filter   --logical_table Logs   --shard_key year_day   --strategy "uint-range"   --filter 'message == "Shutdown"'   --max 2018049   --max_border "exclude"
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        "memo",
+        "ShortText"
+      ],
+      [
+        "message",
+        "Text"
+      ],
+      [
+        "year_day",
+        "UInt32"
+      ]
+    ],
+    [
+      "2018-02-07 00:00:00",
+      "Shutdown",
+      2018038
+    ],
+    [
+      "2018-02-08 23:59:59",
+      "Shutdown",
+      2018039
+    ],
+    [
+      "2018-02-17 00:00:00",
+      "Shutdown",
+      2018048
+    ]
+  ]
+]

  Added: test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.test (+34 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/sharding/logical_range_filter/strategy/uint_range/range/max_exclude.test    2018-07-04 17:09:45 +0900 (e62ba1a18)
@@ -0,0 +1,34 @@
+#@include fixture/sharding/logical_range_filter/strategy/uint_range/schema.grn
+
+load --table Logs_2018030
+[
+{"year_day": 2018030, "memo": "2018-01-30 00:00:00", "message": "Start"},
+{"year_day": 2018038, "memo": "2018-02-07 00:00:00", "message": "Shutdown"},
+{"year_day": 2018039, "memo": "2018-02-08 00:00:00", "message": "Start"},
+{"year_day": 2018039, "memo": "2018-02-08 23:59:59", "message": "Shutdown"}
+]
+
+load --table Logs_2018040
+[
+{"year_day": 2018040, "memo": "2018-02-09 00:00:00", "message": "Start"},
+{"year_day": 2018048, "memo": "2018-02-17 00:00:00", "message": "Shutdown"},
+{"year_day": 2018049, "memo": "2018-02-18 00:00:00", "message": "Start"},
+{"year_day": 2018049, "memo": "2018-02-18 23:59:59", "message": "Shutdown"}
+]
+
+load --table Logs_2018050
+[
+{"year_day": 2018050, "memo": "2018-02-19 00:00:00", "message": "Start"},
+{"year_day": 2018058, "memo": "2018-02-27 00:00:00", "message": "Shutdown"},
+{"year_day": 2018059, "memo": "2018-02-28 00:00:00", "message": "Start"},
+{"year_day": 2018059, "memo": "2018-02-28 23:59:59", "message": "Shutdown"}
+]
+
+logical_range_filter \
+  --logical_table Logs \
+  --shard_key year_day \
+  --strategy "uint-range" \
+  --filter 'message == "Shutdown"' \
+  --max 2018049 \
+  --max_border "exclude"
+
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180704/83783cae/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index