[Groonga-commit] ranguba/chupa-text at 32b15bd [master] chupa-text: support URI as input

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Jan 3 17:01:41 JST 2014


Kouhei Sutou	2014-01-03 17:01:41 +0900 (Fri, 03 Jan 2014)

  New Revision: 32b15bd8a90fbcab3bc8584f2ce32192cfba76cd
  https://github.com/ranguba/chupa-text/commit/32b15bd8a90fbcab3bc8584f2ce32192cfba76cd

  Message:
    chupa-text: support URI as input

  Modified files:
    lib/chupa-text/command/chupa-text.rb
    test/command/test-chupa-text.rb

  Modified: lib/chupa-text/command/chupa-text.rb (+25 -9)
===================================================================
--- lib/chupa-text/command/chupa-text.rb    2014-01-03 16:02:31 +0900 (456e533)
+++ lib/chupa-text/command/chupa-text.rb    2014-01-03 17:01:41 +0900 (e30761e)
@@ -15,6 +15,8 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 require "optparse"
+require "uri"
+require "open-uri"
 
 module ChupaText
   module Command
@@ -27,7 +29,7 @@ module ChupaText
       end
 
       def initialize
-        @path = nil
+        @input = nil
         @configuration = Configuration.new
       end
 
@@ -35,12 +37,7 @@ module ChupaText
         return false unless parse_arguments(arguments)
 
         extractor = create_extractor
-        data = Data.new
-        if****@path*****?
-          data.body = $stdin.read
-        else
-          data.path = @path
-        end
+        data = create_data
         formatter = create_formatter
         formatter.format_start(data)
         extractor.extract(data) do |extracted|
@@ -64,13 +61,13 @@ module ChupaText
           puts(parser.help)
           return false
         end
-        @path, = rest
+        @input, = rest
         true
       end
 
       def create_option_parser
         parser = OptionParser.new
-        parser.banner += " [FILE]"
+        parser.banner += " [FILE_OR_URI]"
         parser.version = VERSION
         parser.on("--configuration=FILE",
                   "Read configuration from FILE.") do |path|
@@ -91,6 +88,25 @@ module ChupaText
         extractor
       end
 
+      def create_data
+        data = Data.new
+        if****@input*****?
+          data.body = $stdin.read
+        else
+          uri = URI.parse(@input)
+          if uri.is_a?(URI::HTTP)
+            open(uri) do |input|
+              data.body = input.read
+              data.content_type = input.content_type
+            end
+            data["uri"] = @input
+          else
+            data.path = @input
+          end
+        end
+        data
+      end
+
       def create_formatter
         Formatters::JSON.new($stdout)
       end

  Modified: test/command/test-chupa-text.rb (+66 -12)
===================================================================
--- test/command/test-chupa-text.rb    2014-01-03 16:02:31 +0900 (91d86b9)
+++ test/command/test-chupa-text.rb    2014-01-03 17:01:41 +0900 (ce68461)
@@ -14,6 +14,8 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
+require "socket"
+
 class TestCommandChupaText < Test::Unit::TestCase
   include Helper
 
@@ -21,27 +23,29 @@ class TestCommandChupaText < Test::Unit::TestCase
     setup_io
   end
 
-  def teardown
-    teardown_io
+  def setup_io
+    @stdin  = StringIO.new
+    @stdout = StringIO.new
   end
 
-  def setup_io
+  private
+  def wrap_io
     @original_stdin  = $stdin
     @original_stdout = $stdout
-    @stdin  = StringIO.new
-    @stdout = StringIO.new
     $stdin  = @stdin
     $stdout = @stdout
+    begin
+      yield
+    ensure
+      $stdin  = @original_stdin
+      $stdout = @original_stdout
+    end
   end
 
-  def teardown_io
-    $stdin  = @original_stdin
-    $stdout = @original_stdout
-  end
-
-  private
   def run_command(*arguments)
-    succeeded = ChupaText::Command::ChupaText.run(*arguments)
+    succeeded = wrap_io do
+      ChupaText::Command::ChupaText.run(*arguments)
+    end
     [succeeded, JSON.parse(@stdout.string)]
   end
 
@@ -74,6 +78,56 @@ class TestCommandChupaText < Test::Unit::TestCase
       end
     end
 
+    sub_test_case("URI") do
+      def setup
+        super
+        setup_www_server
+      end
+
+      def teardown
+        super
+        teardown_www_server
+      end
+
+      def setup_www_server
+        @www_server = TCPServer.new("127.0.0.1", 0)
+        _, port, host, = @www_server.addr
+        @uri = "http://#{host}:#{port}/"
+        @www_server_thread = Thread.new do
+          client = @www_server.accept
+          loop do
+            line = client.gets
+            break if line.chomp.empty?
+          end
+          client.print("HTTP/1.1 200 OK\r\n")
+          client.print("Content-Type: text/html\r\n")
+          client.print("\r\n")
+          client.print(@html)
+          client.close
+        end
+      end
+
+      def teardown_www_server
+        @www_server.close
+        @www_server_thread.kill
+      end
+
+      def test_single
+        @html = "<html><body>Hello</body></html>"
+        assert_equal([
+                       true,
+                       {
+                         "content-type" => "text/html",
+                         "size"         => @html.bytesize,
+                         "uri"          => @uri,
+                         "texts"        => [
+                         ],
+                       },
+                     ],
+                     run_command(@uri))
+      end
+    end
+
     sub_test_case("standard input") do
       def test_single
         body = "Hello\n"
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index