[Groonga-commit] ranguba/chupa-text at 6adcd01 [master] Add InputData and VirtualFileData for specific use

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Jan 4 20:22:00 JST 2014


Kouhei Sutou	2014-01-04 20:22:00 +0900 (Sat, 04 Jan 2014)

  New Revision: 6adcd01e71da0c5b37537936d7af70b318e82a72
  https://github.com/ranguba/chupa-text/commit/6adcd01e71da0c5b37537936d7af70b318e82a72

  Message:
    Add InputData and VirtualFileData for specific use

  Copied files:
    lib/chupa-text/input-data.rb
      (from lib/chupa-text/decomposers/tar.rb)
    lib/chupa-text/virtual-file-data.rb
      (from lib/chupa-text.rb)
  Modified files:
    lib/chupa-text.rb
    lib/chupa-text/command/chupa-text.rb
    lib/chupa-text/data.rb
    lib/chupa-text/decomposers/gzip.rb
    lib/chupa-text/decomposers/tar.rb
    test/decomposers/test-gzip.rb
    test/decomposers/test-tar.rb

  Modified: lib/chupa-text.rb (+5 -1)
===================================================================
--- lib/chupa-text.rb    2014-01-04 19:13:00 +0900 (95599d1)
+++ lib/chupa-text.rb    2014-01-04 20:22:00 +0900 (bbd2244)
@@ -18,7 +18,6 @@ require "chupa-text/version"
 
 require "chupa-text/configuration"
 require "chupa-text/configuration-loader"
-require "chupa-text/data"
 require "chupa-text/decomposer"
 require "chupa-text/decomposer-registry"
 require "chupa-text/decomposers"
@@ -29,4 +28,9 @@ require "chupa-text/mime-type-registry"
 
 require "chupa-text/file-content"
 require "chupa-text/virtual-content"
+
+require "chupa-text/data"
+require "chupa-text/input-data"
+require "chupa-text/virtual-file-data"
+
 require "chupa-text/command"

  Modified: lib/chupa-text/command/chupa-text.rb (+2 -4)
===================================================================
--- lib/chupa-text/command/chupa-text.rb    2014-01-04 19:13:00 +0900 (c38fde8)
+++ lib/chupa-text/command/chupa-text.rb    2014-01-04 20:22:00 +0900 (1564212)
@@ -87,13 +87,11 @@ module ChupaText
       end
 
       def create_data
-        data = Data.new
         if****@input*****?
-          data.body = $stdin.read
+          VirtualFileData.new(nil, $stdin)
         else
-          data.uri = @input
+          InputData.new(@input)
         end
-        data
       end
 
       def create_formatter

  Modified: lib/chupa-text/data.rb (+28 -28)
===================================================================
--- lib/chupa-text/data.rb    2014-01-04 19:13:00 +0900 (89733c4)
+++ lib/chupa-text/data.rb    2014-01-04 20:22:00 +0900 (1ff4ede)
@@ -19,24 +19,44 @@ require "open-uri"
 
 module ChupaText
   class Data
-    attr_accessor :body
-    attr_accessor :attributes
-
     # @return [URI, nil] The URI of the data if the data is for remote
     #   or local file, `nil` if the data isn't associated with any
     #   URIs.
     attr_reader :uri
 
+    # @return [String, nil] The content of the data, `nil` if the data
+    #   doesn't have any content.
+    attr_accessor :body
+
+    # @return [Integer, nil] The byte size of the data, `nil` if the data
+    #   doesn't have any content.
+    attr_accessor :size
+
+    # @return [String, nil] The path associated with the content of
+    #   the data, `nil` if the data doesn't associated with any file.
+    #
+    #   The path may not be related with the original content. For
+    #   example, `"/tmp/XXX.txt"` may be returned for the data of
+    #   `"http://example.com/XXX.txt"`.
+    #
+    #   This value is useful to use an external command to extract
+    #   text and meta-data.
+    attr_accessor :path
+
+    attr_accessor :attributes
+
     # @return [Data, nil] The source of the data. For example, text
     #   data (`hello.txt`) in archive data (`hello.tar`) have the
     #   archive data in {#source}.
     attr_accessor :source
 
     def initialize
+      @uri = nil
       @body = nil
+      @size = nil
+      @path = nil
       @mime_type = nil
       @attributes = {}
-      @uri = nil
       @source = nil
     end
 
@@ -54,15 +74,10 @@ module ChupaText
         uri = URI.parse(uri.to_s)
       end
       @uri = uri
-      if @uri and****@body*****?
-        retrieve_info(@uri)
-      end
     end
 
-    def size
-      _body = body
-      return 0 if _body.nil?
-      _body.bytesize
+    def open
+      yield(StringIO.new(body))
     end
 
     def [](name)
@@ -103,21 +118,6 @@ module ChupaText
     end
 
     private
-    def retrieve_info(uri)
-      if uri.respond_to?(:open)
-        uri.open("rb") do |input|
-          @body = input.read
-          if input.respond_to?(:content_type)
-            self.mime_type = input.content_type.split(/;/).first
-          end
-        end
-      else
-        File.open(uri.path, "rb") do |file|
-          @body = file.read
-        end
-      end
-    end
-
     def guess_mime_type
       guess_mime_type_from_uri or
         guess_mime_type_from_body
@@ -129,8 +129,8 @@ module ChupaText
 
     def guess_mime_type_from_body
       mime_type = nil
-      change_encoding(body, "UTF-8") do |_body|
-        mime_type = "text/plain" if _body.valid_encoding?
+      change_encoding(body, "UTF-8") do |utf8_body|
+        mime_type = "text/plain" if utf8_body.valid_encoding?
       end
       mime_type
     end

  Modified: lib/chupa-text/decomposers/gzip.rb (+4 -4)
===================================================================
--- lib/chupa-text/decomposers/gzip.rb    2014-01-04 19:13:00 +0900 (0c27e81)
+++ lib/chupa-text/decomposers/gzip.rb    2014-01-04 20:22:00 +0900 (992e246)
@@ -37,14 +37,14 @@ module ChupaText
 
       def decompose(data)
         reader = Zlib::GzipReader.new(StringIO.new(data.body))
-        extracted = Data.new
-        extracted.body   = reader.read
+        uri = nil
         case data.extension
         when "gz"
-          extracted.uri  = data.uri.to_s.gsub(/\.gz\z/i, "")
+          uri = data.uri.to_s.gsub(/\.gz\z/i, "")
         when "tgz"
-          extracted.uri  = data.uri.to_s.gsub(/\.tgz\z/i, ".tar")
+          uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar")
         end
+        extracted = VirtualFileData.new(uri, reader)
         extracted.source = data
         yield(extracted)
       end

  Modified: lib/chupa-text/decomposers/tar.rb (+1 -3)
===================================================================
--- lib/chupa-text/decomposers/tar.rb    2014-01-04 19:13:00 +0900 (d5ad665)
+++ lib/chupa-text/decomposers/tar.rb    2014-01-04 20:22:00 +0900 (89f2422)
@@ -33,9 +33,7 @@ module ChupaText
         Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader|
           reader.each do |entry|
             next unless entry.file?
-            extracted = Data.new
-            extracted.body   = entry.read
-            extracted.uri    = entry.full_name
+            extracted = VirtualFileData.new(entry.full_name, entry)
             extracted.source = data
             yield(extracted)
           end

  Copied: lib/chupa-text/input-data.rb (+34 -22) 56%
===================================================================
--- lib/chupa-text/decomposers/tar.rb    2014-01-04 19:13:00 +0900 (d5ad665)
+++ lib/chupa-text/input-data.rb    2014-01-04 20:22:00 +0900 (cc4fa13)
@@ -14,32 +14,44 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
-require "stringio"
-require "rubygems/package"
-
-require "chupa-text"
+require "uri"
+require "open-uri"
 
 module ChupaText
-  module Decomposers
-    class Tar < Decomposer
-      registry.register("tar", self)
-
-      def target?(data)
-        data.extension == "tar" or
-          data.mime_type == "application/x-tar"
+  class InputData < Data
+    def initialize(uri)
+      super()
+      self.uri = uri
+      if****@uri***** == URI::Generic
+        @content = FileContent.new(@uri.path)
+      else
+        @content = download
       end
+    end
+
+    def body
+      @content.body
+    end
+
+    def size
+      @content.size
+    end
+
+    def path
+      @content.path
+    end
+
+    def open(&block)
+      @content.open(&block)
+    end
 
-      def decompose(data)
-        Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader|
-          reader.each do |entry|
-            next unless entry.file?
-            extracted = Data.new
-            extracted.body   = entry.read
-            extracted.uri    = entry.full_name
-            extracted.source = data
-            yield(extracted)
-          end
-        end
+    private
+    def download
+      path =****@uri*****
+      path += "index.html" if path.end_with?("/")
+      @uri.open("rb") do |input|
+        self.mime_type = input.content_type.split(/;/).first
+        VirtualContent.new(input, path)
       end
     end
   end

  Copied: lib/chupa-text/virtual-file-data.rb (+28 -14) 62%
===================================================================
--- lib/chupa-text.rb    2014-01-04 19:13:00 +0900 (95599d1)
+++ lib/chupa-text/virtual-file-data.rb    2014-01-04 20:22:00 +0900 (64e2685)
@@ -14,19 +14,33 @@
 # License along with this library; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
-require "chupa-text/version"
+module ChupaText
+  class VirtualFileData < Data
+    def initialize(uri, input)
+      super()
+      self.uri = uri
+      if @uri
+        path =****@uri*****
+      else
+        path = nil
+      end
+      @content = VirtualContent.new(input, path)
+    end
 
-require "chupa-text/configuration"
-require "chupa-text/configuration-loader"
-require "chupa-text/data"
-require "chupa-text/decomposer"
-require "chupa-text/decomposer-registry"
-require "chupa-text/decomposers"
-require "chupa-text/extractor"
-require "chupa-text/formatters"
-require "chupa-text/mime-type"
-require "chupa-text/mime-type-registry"
+    def body
+      @content.body
+    end
 
-require "chupa-text/file-content"
-require "chupa-text/virtual-content"
-require "chupa-text/command"
+    def size
+      @content.size
+    end
+
+    def path
+      @content.path
+    end
+
+    def open(&block)
+      @content.open(&block)
+    end
+  end
+end

  Modified: test/decomposers/test-gzip.rb (+3 -6)
===================================================================
--- test/decomposers/test-gzip.rb    2014-01-04 19:13:00 +0900 (94bfa0e)
+++ test/decomposers/test-gzip.rb    2014-01-04 20:22:00 +0900 (6bfc469)
@@ -38,8 +38,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
     sub_test_case("gz") do
       def setup
         super
-        @data = ChupaText::Data.new
-        @data.uri = fixture_path("hello.txt.gz")
+        @data = ChupaText::InputData.new(fixture_path("hello.txt.gz"))
       end
 
       def test_path
@@ -61,8 +60,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
     sub_test_case("tar.gz") do
       def setup
         super
-        @data = ChupaText::Data.new
-        @data.uri = fixture_path("hello.tar.gz")
+        @data = ChupaText::InputData.new(fixture_path("hello.tar.gz"))
       end
 
       def test_uri
@@ -89,8 +87,7 @@ class TestDecomposersGzip < Test::Unit::TestCase
     sub_test_case("tgz") do
       def setup
         super
-        @data = ChupaText::Data.new
-        @data.uri = fixture_path("hello.tgz")
+        @data = ChupaText::InputData.new(fixture_path("hello.tgz"))
       end
 
       def test_uri

  Modified: test/decomposers/test-tar.rb (+2 -4)
===================================================================
--- test/decomposers/test-tar.rb    2014-01-04 19:13:00 +0900 (35bb0f2)
+++ test/decomposers/test-tar.rb    2014-01-04 20:22:00 +0900 (a3c5531)
@@ -42,8 +42,7 @@ class TestDecomposersTar < Test::Unit::TestCase
     sub_test_case("top-level") do
       def setup
         super
-        @data = ChupaText::Data.new
-        @data.uri = fixture_path("top-level.tar")
+        @data = ChupaText::InputData.new(fixture_path("top-level.tar"))
       end
 
       def test_decompose
@@ -61,8 +60,7 @@ class TestDecomposersTar < Test::Unit::TestCase
     sub_test_case("directory") do
       def setup
         super
-        @data = ChupaText::Data.new
-        @data.uri = fixture_path("directory.tar")
+        @data = ChupaText::InputData.new(fixture_path("directory.tar"))
       end
 
       def test_decompose
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index