Kouhei Sutou
null+****@clear*****
Thu Jan 2 00:38:14 JST 2014
Kouhei Sutou 2014-01-02 00:38:14 +0900 (Thu, 02 Jan 2014) New Revision: c266bbfbdd2bd1d228802ae8ee82902657b5a9af https://github.com/ranguba/chupa-text/commit/c266bbfbdd2bd1d228802ae8ee82902657b5a9af Message: Support multi decomposed data Modified files: lib/chupa-text/data.rb lib/chupa-text/extractor.rb test/test-extractor.rb Modified: lib/chupa-text/data.rb (+6 -0) =================================================================== --- lib/chupa-text/data.rb 2014-01-02 00:15:02 +0900 (a1b7d9a) +++ lib/chupa-text/data.rb 2014-01-02 00:38:14 +0900 (ac8c19e) @@ -36,6 +36,12 @@ module ChupaText @source = nil end + def initialize_copy(object) + super + @attributes =****@attri***** + self + end + def body @body ||= read_body end Modified: lib/chupa-text/extractor.rb (+14 -7) =================================================================== --- lib/chupa-text/extractor.rb 2014-01-02 00:15:02 +0900 (77b4ab3) +++ lib/chupa-text/extractor.rb 2014-01-02 00:38:14 +0900 (05ca766) @@ -27,14 +27,21 @@ module ChupaText end def extract(data) - loop do - if data.text? - yield(data) - return + processed = {} + targets = [data] + until targets.empty? + target = targets.pop + decomposer = find_decomposer(target) + processed_key = [target, decomposer] + decomposer = nil if processed[processed_key] + if decomposer.nil? + yield(target) if target.text? + next + end + processed[processed_key] = true + decomposer.decompose(target) do |decomposed| + targets.push(decomposed) end - decomposer = find_decomposer(data) - return if decomposer.nil? - data = decomposer.decompose(data) end end Modified: test/test-extractor.rb (+29 -1) =================================================================== --- test/test-extractor.rb 2014-01-02 00:15:02 +0900 (215df31) +++ test/test-extractor.rb 2014-01-02 00:38:14 +0900 (7341edd) @@ -55,7 +55,7 @@ class TestExtractor < Test::Unit::TestCase extracted = ChupaText::Data.new extracted.content_type = "text/plain" extracted.body = data.body.gsub(/<.+?>/, "") - extracted + yield(extracted) end end @@ -72,5 +72,33 @@ class TestExtractor < Test::Unit::TestCase assert_equal(["Hello"], extract(data)) end end + + sub_test_case("multi decomposed") do + class CopyDecomposer < ChupaText::Decomposer + def target?(data) + data["copied"].nil? + end + + def decompose(data) + copied_data = data.dup + copied_data["copied"] = true + yield(copied_data.dup) + yield(copied_data.dup) + end + end + + def setup + super + decomposer = CopyDecomposer.new + @extractor.add_decomposer(decomposer) + end + + def test_decompose + data = ChupaText::Data.new + data.content_type = "text/plain" + data.body = "Hello" + assert_equal(["Hello", "Hello"], extract(data)) + end + end end end -------------- next part -------------- HTML����������������������������... Download