Kouhei Sutou
null+****@clear*****
Sat Jan 4 20:22:00 JST 2014
Kouhei Sutou 2014-01-04 20:22:00 +0900 (Sat, 04 Jan 2014) New Revision: 6adcd01e71da0c5b37537936d7af70b318e82a72 https://github.com/ranguba/chupa-text/commit/6adcd01e71da0c5b37537936d7af70b318e82a72 Message: Add InputData and VirtualFileData for specific use Copied files: lib/chupa-text/input-data.rb (from lib/chupa-text/decomposers/tar.rb) lib/chupa-text/virtual-file-data.rb (from lib/chupa-text.rb) Modified files: lib/chupa-text.rb lib/chupa-text/command/chupa-text.rb lib/chupa-text/data.rb lib/chupa-text/decomposers/gzip.rb lib/chupa-text/decomposers/tar.rb test/decomposers/test-gzip.rb test/decomposers/test-tar.rb Modified: lib/chupa-text.rb (+5 -1) =================================================================== --- lib/chupa-text.rb 2014-01-04 19:13:00 +0900 (95599d1) +++ lib/chupa-text.rb 2014-01-04 20:22:00 +0900 (bbd2244) @@ -18,7 +18,6 @@ require "chupa-text/version" require "chupa-text/configuration" require "chupa-text/configuration-loader" -require "chupa-text/data" require "chupa-text/decomposer" require "chupa-text/decomposer-registry" require "chupa-text/decomposers" @@ -29,4 +28,9 @@ require "chupa-text/mime-type-registry" require "chupa-text/file-content" require "chupa-text/virtual-content" + +require "chupa-text/data" +require "chupa-text/input-data" +require "chupa-text/virtual-file-data" + require "chupa-text/command" Modified: lib/chupa-text/command/chupa-text.rb (+2 -4) =================================================================== --- lib/chupa-text/command/chupa-text.rb 2014-01-04 19:13:00 +0900 (c38fde8) +++ lib/chupa-text/command/chupa-text.rb 2014-01-04 20:22:00 +0900 (1564212) @@ -87,13 +87,11 @@ module ChupaText end def create_data - data = Data.new if****@input*****? - data.body = $stdin.read + VirtualFileData.new(nil, $stdin) else - data.uri = @input + InputData.new(@input) end - data end def create_formatter Modified: lib/chupa-text/data.rb (+28 -28) =================================================================== --- lib/chupa-text/data.rb 2014-01-04 19:13:00 +0900 (89733c4) +++ lib/chupa-text/data.rb 2014-01-04 20:22:00 +0900 (1ff4ede) @@ -19,24 +19,44 @@ require "open-uri" module ChupaText class Data - attr_accessor :body - attr_accessor :attributes - # @return [URI, nil] The URI of the data if the data is for remote # or local file, `nil` if the data isn't associated with any # URIs. attr_reader :uri + # @return [String, nil] The content of the data, `nil` if the data + # doesn't have any content. + attr_accessor :body + + # @return [Integer, nil] The byte size of the data, `nil` if the data + # doesn't have any content. + attr_accessor :size + + # @return [String, nil] The path associated with the content of + # the data, `nil` if the data doesn't associated with any file. + # + # The path may not be related with the original content. For + # example, `"/tmp/XXX.txt"` may be returned for the data of + # `"http://example.com/XXX.txt"`. + # + # This value is useful to use an external command to extract + # text and meta-data. + attr_accessor :path + + attr_accessor :attributes + # @return [Data, nil] The source of the data. For example, text # data (`hello.txt`) in archive data (`hello.tar`) have the # archive data in {#source}. attr_accessor :source def initialize + @uri = nil @body = nil + @size = nil + @path = nil @mime_type = nil @attributes = {} - @uri = nil @source = nil end @@ -54,15 +74,10 @@ module ChupaText uri = URI.parse(uri.to_s) end @uri = uri - if @uri and****@body*****? - retrieve_info(@uri) - end end - def size - _body = body - return 0 if _body.nil? - _body.bytesize + def open + yield(StringIO.new(body)) end def [](name) @@ -103,21 +118,6 @@ module ChupaText end private - def retrieve_info(uri) - if uri.respond_to?(:open) - uri.open("rb") do |input| - @body = input.read - if input.respond_to?(:content_type) - self.mime_type = input.content_type.split(/;/).first - end - end - else - File.open(uri.path, "rb") do |file| - @body = file.read - end - end - end - def guess_mime_type guess_mime_type_from_uri or guess_mime_type_from_body @@ -129,8 +129,8 @@ module ChupaText def guess_mime_type_from_body mime_type = nil - change_encoding(body, "UTF-8") do |_body| - mime_type = "text/plain" if _body.valid_encoding? + change_encoding(body, "UTF-8") do |utf8_body| + mime_type = "text/plain" if utf8_body.valid_encoding? end mime_type end Modified: lib/chupa-text/decomposers/gzip.rb (+4 -4) =================================================================== --- lib/chupa-text/decomposers/gzip.rb 2014-01-04 19:13:00 +0900 (0c27e81) +++ lib/chupa-text/decomposers/gzip.rb 2014-01-04 20:22:00 +0900 (992e246) @@ -37,14 +37,14 @@ module ChupaText def decompose(data) reader = Zlib::GzipReader.new(StringIO.new(data.body)) - extracted = Data.new - extracted.body = reader.read + uri = nil case data.extension when "gz" - extracted.uri = data.uri.to_s.gsub(/\.gz\z/i, "") + uri = data.uri.to_s.gsub(/\.gz\z/i, "") when "tgz" - extracted.uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar") + uri = data.uri.to_s.gsub(/\.tgz\z/i, ".tar") end + extracted = VirtualFileData.new(uri, reader) extracted.source = data yield(extracted) end Modified: lib/chupa-text/decomposers/tar.rb (+1 -3) =================================================================== --- lib/chupa-text/decomposers/tar.rb 2014-01-04 19:13:00 +0900 (d5ad665) +++ lib/chupa-text/decomposers/tar.rb 2014-01-04 20:22:00 +0900 (89f2422) @@ -33,9 +33,7 @@ module ChupaText Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader| reader.each do |entry| next unless entry.file? - extracted = Data.new - extracted.body = entry.read - extracted.uri = entry.full_name + extracted = VirtualFileData.new(entry.full_name, entry) extracted.source = data yield(extracted) end Copied: lib/chupa-text/input-data.rb (+34 -22) 56% =================================================================== --- lib/chupa-text/decomposers/tar.rb 2014-01-04 19:13:00 +0900 (d5ad665) +++ lib/chupa-text/input-data.rb 2014-01-04 20:22:00 +0900 (cc4fa13) @@ -14,32 +14,44 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -require "stringio" -require "rubygems/package" - -require "chupa-text" +require "uri" +require "open-uri" module ChupaText - module Decomposers - class Tar < Decomposer - registry.register("tar", self) - - def target?(data) - data.extension == "tar" or - data.mime_type == "application/x-tar" + class InputData < Data + def initialize(uri) + super() + self.uri = uri + if****@uri***** == URI::Generic + @content = FileContent.new(@uri.path) + else + @content = download end + end + + def body + @content.body + end + + def size + @content.size + end + + def path + @content.path + end + + def open(&block) + @content.open(&block) + end - def decompose(data) - Gem::Package::TarReader.new(StringIO.new(data.body)) do |reader| - reader.each do |entry| - next unless entry.file? - extracted = Data.new - extracted.body = entry.read - extracted.uri = entry.full_name - extracted.source = data - yield(extracted) - end - end + private + def download + path =****@uri***** + path += "index.html" if path.end_with?("/") + @uri.open("rb") do |input| + self.mime_type = input.content_type.split(/;/).first + VirtualContent.new(input, path) end end end Copied: lib/chupa-text/virtual-file-data.rb (+28 -14) 62% =================================================================== --- lib/chupa-text.rb 2014-01-04 19:13:00 +0900 (95599d1) +++ lib/chupa-text/virtual-file-data.rb 2014-01-04 20:22:00 +0900 (64e2685) @@ -14,19 +14,33 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -require "chupa-text/version" +module ChupaText + class VirtualFileData < Data + def initialize(uri, input) + super() + self.uri = uri + if @uri + path =****@uri***** + else + path = nil + end + @content = VirtualContent.new(input, path) + end -require "chupa-text/configuration" -require "chupa-text/configuration-loader" -require "chupa-text/data" -require "chupa-text/decomposer" -require "chupa-text/decomposer-registry" -require "chupa-text/decomposers" -require "chupa-text/extractor" -require "chupa-text/formatters" -require "chupa-text/mime-type" -require "chupa-text/mime-type-registry" + def body + @content.body + end -require "chupa-text/file-content" -require "chupa-text/virtual-content" -require "chupa-text/command" + def size + @content.size + end + + def path + @content.path + end + + def open(&block) + @content.open(&block) + end + end +end Modified: test/decomposers/test-gzip.rb (+3 -6) =================================================================== --- test/decomposers/test-gzip.rb 2014-01-04 19:13:00 +0900 (94bfa0e) +++ test/decomposers/test-gzip.rb 2014-01-04 20:22:00 +0900 (6bfc469) @@ -38,8 +38,7 @@ class TestDecomposersGzip < Test::Unit::TestCase sub_test_case("gz") do def setup super - @data = ChupaText::Data.new - @data.uri = fixture_path("hello.txt.gz") + @data = ChupaText::InputData.new(fixture_path("hello.txt.gz")) end def test_path @@ -61,8 +60,7 @@ class TestDecomposersGzip < Test::Unit::TestCase sub_test_case("tar.gz") do def setup super - @data = ChupaText::Data.new - @data.uri = fixture_path("hello.tar.gz") + @data = ChupaText::InputData.new(fixture_path("hello.tar.gz")) end def test_uri @@ -89,8 +87,7 @@ class TestDecomposersGzip < Test::Unit::TestCase sub_test_case("tgz") do def setup super - @data = ChupaText::Data.new - @data.uri = fixture_path("hello.tgz") + @data = ChupaText::InputData.new(fixture_path("hello.tgz")) end def test_uri Modified: test/decomposers/test-tar.rb (+2 -4) =================================================================== --- test/decomposers/test-tar.rb 2014-01-04 19:13:00 +0900 (35bb0f2) +++ test/decomposers/test-tar.rb 2014-01-04 20:22:00 +0900 (a3c5531) @@ -42,8 +42,7 @@ class TestDecomposersTar < Test::Unit::TestCase sub_test_case("top-level") do def setup super - @data = ChupaText::Data.new - @data.uri = fixture_path("top-level.tar") + @data = ChupaText::InputData.new(fixture_path("top-level.tar")) end def test_decompose @@ -61,8 +60,7 @@ class TestDecomposersTar < Test::Unit::TestCase sub_test_case("directory") do def setup super - @data = ChupaText::Data.new - @data.uri = fixture_path("directory.tar") + @data = ChupaText::InputData.new(fixture_path("directory.tar")) end def test_decompose -------------- next part -------------- HTML����������������������������...Download