[Groonga-commit] groonga/groonga-command [master] parser: support stream parsing

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Nov 26 23:18:57 JST 2012


Kouhei Sutou	2012-11-26 23:18:57 +0900 (Mon, 26 Nov 2012)

  New Revision: 9d7b52c317c7c1d9013a60031a4aa3e560c1b0f0
  https://github.com/groonga/groonga-command/commit/9d7b52c317c7c1d9013a60031a4aa3e560c1b0f0

  Log:
    parser: support stream parsing
    
    But it is too dirty!!! TOO DIRTY!!!

  Added files:
    lib/groonga/command/error.rb
  Modified files:
    groonga-command.gemspec
    lib/groonga/command/base.rb
    lib/groonga/command/parser.rb
    test/test-parser.rb

  Modified: groonga-command.gemspec (+2 -0)
===================================================================
--- groonga-command.gemspec    2012-11-26 17:13:02 +0900 (9242aa0)
+++ groonga-command.gemspec    2012-11-26 23:18:57 +0900 (acd543b)
@@ -51,6 +51,8 @@ Gem::Specification.new do |spec|
   spec.licenses = ["LGPLv2.1+"]
   spec.require_paths = ["lib"]
 
+  spec.add_runtime_dependency("yajl")
+
   spec.add_development_dependency("test-unit")
   spec.add_development_dependency("test-unit-notify")
   spec.add_development_dependency("rake")

  Modified: lib/groonga/command/base.rb (+2 -1)
===================================================================
--- lib/groonga/command/base.rb    2012-11-26 17:13:02 +0900 (3bf72b3)
+++ lib/groonga/command/base.rb    2012-11-26 23:18:57 +0900 (b0b2566)
@@ -40,11 +40,12 @@ module Groonga
       end
 
       attr_reader :name, :arguments
-      attr_accessor :original_format
+      attr_accessor :original_format, :original_source
       def initialize(name, pair_arguments, ordered_arguments=[])
         @name = name
         @arguments = construct_arguments(pair_arguments, ordered_arguments)
         @original_format = nil
+        @original_source = nil
       end
 
       def [](name)

  Added: lib/groonga/command/error.rb (+24 -0) 100644
===================================================================
--- /dev/null
+++ lib/groonga/command/error.rb    2012-11-26 23:18:57 +0900 (03ca346)
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2012  Kouhei Sutou <kou �� clear-code.com>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+module Groonga
+  module Command
+    class Error < StandardError
+    end
+  end
+end

  Modified: lib/groonga/command/parser.rb (+243 -3)
===================================================================
--- lib/groonga/command/parser.rb    2012-11-26 17:13:02 +0900 (5429c2a)
+++ lib/groonga/command/parser.rb    2012-11-26 23:18:57 +0900 (c947345)
@@ -19,6 +19,10 @@
 require "shellwords"
 require "cgi"
 
+require "yajl"
+
+require "groonga/command/error"
+
 require "groonga/command/base"
 require "groonga/command/get"
 require "groonga/command/select"
@@ -35,14 +39,235 @@ require "groonga/command/truncate"
 
 module Groonga
   module Command
+    class ParseError < Error
+    end
+
     class Parser
       class << self
-        def parse(input)
-          new.parse(input)
+        def parse(data, &block)
+          if block_given?
+            event_parse(data, &block)
+          else
+            stand_alone_parse(data)
+          end
+        end
+
+        private
+        def event_parse(data)
+          parser = new
+
+          parser.on_command do |command|
+            yield(:on_command, command)
+          end
+          parser.on_load_start do |command|
+            yield(:on_load_start, command)
+          end
+          parser.on_load_value do |command, value|
+            yield(:on_load_value, command, value)
+          end
+          parser.on_load_complete do |command|
+            yield(:on_load_complete, command)
+          end
+
+          consume_data(parser, data)
+        end
+
+        def stand_alone_parse(data)
+          parsed_command = nil
+
+          parser = new
+          parser.on_command do |command|
+            parsed_command = command
+          end
+          parser.on_load_complete do |command|
+            parsed_command = command
+          end
+
+          consume_data(parser, data)
+          if parsed_command.nil?
+            raise ParseError, "not completed: <#{data.inspect}>"
+          end
+
+          parsed_command
+        end
+
+        def consume_data(parser, data)
+          if data.respond_to?(:each)
+            data.each do |chunk|
+              parser << chunk
+            end
+          else
+            parser << data
+          end
+          parser.finish
         end
       end
 
       def initialize
+        reset
+        initialize_hooks
+      end
+
+      def <<(chunk)
+        @buffer << chunk
+        consume_buffer
+      end
+
+      def finish
+        if @loading
+          raise ParseError, "not completed"
+        else
+          catch do |tag|
+            consume_command(tag, @buffer)
+          end
+        end
+      end
+
+      # @overload on_command(command)
+      # @overload on_command {|command| }
+      def on_command(*arguments, &block)
+        if block_given?
+          @on_command_hook = block
+        else
+          @on_command_hook.call(*arguments) if @on_command_hook
+        end
+      end
+
+      # @overload on_load_start(command)
+      # @overload on_load_start {|command| }
+      def on_load_start(*arguments, &block)
+        if block_given?
+          @on_load_start_hook = block
+        else
+          @on_load_start_hook.call(*arguments) if @on_load_start_hook
+        end
+      end
+
+      # @overload on_load_value(command)
+      # @overload on_load_value {|command| }
+      def on_load_value(*arguments, &block)
+        if block_given?
+          @on_load_value_hook = block
+        else
+          @on_load_value_hook.call(*arguments) if @on_load_value_hook
+        end
+      end
+
+      # @overload on_load_complete(command)
+      # @overload on_load_complete(command) { }
+      def on_load_complete(*arguments, &block)
+        if block_given?
+          @on_load_complete_hook = block
+        else
+          @on_load_complete_hook.call(*arguments) if @on_load_complete_hook
+        end
+      end
+
+      private
+      def consume_buffer
+        catch do |tag|
+          loop do
+            if @loading
+              consume_load_values(tag)
+            else
+              consume_command(tag, consume_line(tag))
+            end
+          end
+        end
+      end
+
+      def consume_load_values(tag)
+        if @in_load_values
+          json, separator, rest =****@buffe*****(/[\]},]/)
+          if @load_value_completed
+            throw(tag) if separator.empty?
+            if separator == ","
+              if /\A\s*\z/ =~ json
+                @command.original_source << json << separator
+                @buffer = rest
+                @load_value_completed = false
+                return
+              else
+                raise ParseError, "garbage before value"
+              end
+            elsif separator == "]"
+              if /\A\s*\z/ =~ json
+                @command.original_source << json << separator
+                @buffer = rest
+                on_load_complete(@command)
+                reset
+                return
+              end
+            end
+            unless /\A\s*[\[\{]/ =~ json
+              raise ParseError, "garbage before value"
+            end
+          end
+          @buffer = rest
+          @command.original_source << json
+          @json_parser << json
+          if separator.empty?
+            throw(tag)
+          else
+            @command.original_source << separator
+            @load_value_completed = false
+            @json_parser << separator
+          end
+        else
+          spaces, start_json, rest =****@buffe*****("[")
+          unless /\A\s*\z/ =~ spaces
+            raise ParseError, "garbage before JSON"
+          end
+          if start_json.empty?
+            @command.original_source << @buffer
+            @buffer.clear
+            throw(tag)
+          else
+            @command.original_source << spaces << start_json
+            @buffer = rest
+            @json_parser = Yajl::Parser.new
+            @json_parser.on_parse_complete = lambda do |value|
+              on_load_value(@command, value)
+              @load_value_completed = true
+            end
+            @in_load_values = true
+          end
+        end
+      end
+
+      def consume_line(tag)
+        current_line, separator, rest =****@buffe*****(/\r?\n/)
+        throw(tag) if separator.empty?
+
+        if current_line.end_with?("\\")
+          @buffer.sub!(/\\\r?\n/, "")
+          consume_line(tag)
+        else
+          @buffer = rest
+          current_line
+        end
+      end
+
+      def consume_command(tag, line)
+        @command = parse(line)
+        @command.original_source = line
+        if****@comma***** == "load"
+          on_load_start(@command)
+          if @command[:values]
+            values = Yajl::Parser.parse(@command[:values])
+            values.each do |value|
+              on_load_value(@command, value)
+            end
+            on_load_complete(@command)
+            reset
+          else
+            @command.original_source << "\n"
+            @loading = true
+          end
+        else
+          on_command(@command)
+          reset
+        end
       end
 
       def parse(input)
@@ -53,7 +278,6 @@ module Groonga
         end
       end
 
-      private
       def parse_uri_path(path)
         name, arguments_string = path.split(/\?/, 2)
         arguments = {}
@@ -89,6 +313,22 @@ module Groonga
         command.original_format = :command
         command
       end
+
+      private
+      def reset
+        @command = nil
+        @loading = false
+        @in_load_values = false
+        @load_value_completed = false
+        @buffer = "".force_encoding("ASCII-8BIT")
+      end
+
+      def initialize_hooks
+        @on_command_hook = nil
+        @on_load_start_hook = nil
+        @on_load_value_hook = nil
+        @on_load_complete_hook = nil
+      end
     end
   end
 end

  Modified: test/test-parser.rb (+130 -0)
===================================================================
--- test/test-parser.rb    2012-11-26 17:13:02 +0900 (0f520df)
+++ test/test-parser.rb    2012-11-26 23:18:57 +0900 (ee4a49b)
@@ -64,5 +64,135 @@ class ParserTest < Test::Unit::TestCase
     class ParseTest < self
       include ParseTests
     end
+
+    class EventTest < self
+      def setup
+        @parser = Groonga::Command::Parser.new
+      end
+
+      class CommandTest < self
+        def test_newline
+          parsed_command = nil
+          @parser.on_command do |command|
+            parsed_command = command
+          end
+
+          @parser << "status"
+          assert_nil(parsed_command)
+          @parser << "\n"
+          assert_equal("status", parsed_command.name)
+        end
+
+        def test_finish
+          parsed_command = nil
+          @parser.on_command do |command|
+            parsed_command = command
+          end
+
+          @parser << "status"
+          assert_nil(parsed_command)
+          @parser.finish
+          assert_equal("status", parsed_command.name)
+        end
+      end
+
+      class LoadTest < self
+        def setup
+          super
+          @events = []
+          @parser.on_load_start do |command|
+            @events << [:load_start, command.original_source.dup]
+          end
+          @parser.on_load_value do |command, value|
+            @events << [:load_value, command.original_source.dup, value]
+          end
+          @parser.on_load_complete do |command|
+            @events << [:load_complete, command.original_source.dup]
+          end
+        end
+
+        def test_inline
+          command_line = "load --values '[{\"_key\": 1}]' --table IDs"
+          @parser << command_line
+          assert_equal([], @events)
+          @parser << "\n"
+          assert_equal([
+                         [:load_start, command_line],
+                         [:load_value, command_line, {"_key" => 1}],
+                         [:load_complete, command_line],
+                       ],
+                       @events)
+        end
+
+        def test_bracket
+          @parser << <<-EOC
+load --table Users
+[
+["_key", "name"],
+["alice", "Alice"]
+]
+EOC
+          expected_events = []
+          expected_events << [:load_start, <<-EOC.chomp]
+load --table Users
+EOC
+          # FIXME: It should be stored into command[:columns].
+          expected_events << [:load_value, <<-EOC.chomp, ["_key", "name"]]
+load --table Users
+[
+["_key", "name"]
+EOC
+          expected_events << [:load_value, <<-EOC.chomp, ["alice", "Alice"]]
+load --table Users
+[
+["_key", "name"],
+["alice", "Alice"]
+EOC
+          expected_events << [:load_complete, <<-EOC.chomp]
+load --table Users
+[
+["_key", "name"],
+["alice", "Alice"]
+]
+EOC
+          assert_equal(expected_events, @events)
+        end
+
+        def test_brace
+          @parser << <<-EOC
+load --table Users
+[
+{"_key": "alice", "name": "Alice"},
+{"_key": "bob",   "name": "Bob"}
+]
+EOC
+          expected_events = []
+          expected_events << [:load_start, <<-EOC.chomp]
+load --table Users
+EOC
+          value = {"_key" => "alice", "name" => "Alice"}
+          expected_events << [:load_value, <<-EOC.chomp, value]
+load --table Users
+[
+{"_key": "alice", "name": "Alice"}
+EOC
+          value = {"_key" => "bob", "name" => "Bob"}
+          expected_events << [:load_value, <<-EOC.chomp, value]
+load --table Users
+[
+{"_key": "alice", "name": "Alice"},
+{"_key": "bob",   "name": "Bob"}
+EOC
+          expected_events << [:load_complete, <<-EOC.chomp]
+load --table Users
+[
+{"_key": "alice", "name": "Alice"},
+{"_key": "bob",   "name": "Bob"}
+]
+EOC
+          assert_equal(expected_events, @events)
+        end
+      end
+    end
   end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index