[Groonga-commit] ranguba/chupa-text at c266bbf [master] Support multi decomposed data

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Jan 2 00:38:14 JST 2014


Kouhei Sutou	2014-01-02 00:38:14 +0900 (Thu, 02 Jan 2014)

  New Revision: c266bbfbdd2bd1d228802ae8ee82902657b5a9af
  https://github.com/ranguba/chupa-text/commit/c266bbfbdd2bd1d228802ae8ee82902657b5a9af

  Message:
    Support multi decomposed data

  Modified files:
    lib/chupa-text/data.rb
    lib/chupa-text/extractor.rb
    test/test-extractor.rb

  Modified: lib/chupa-text/data.rb (+6 -0)
===================================================================
--- lib/chupa-text/data.rb    2014-01-02 00:15:02 +0900 (a1b7d9a)
+++ lib/chupa-text/data.rb    2014-01-02 00:38:14 +0900 (ac8c19e)
@@ -36,6 +36,12 @@ module ChupaText
       @source = nil
     end
 
+    def initialize_copy(object)
+      super
+      @attributes =****@attri*****
+      self
+    end
+
     def body
       @body ||= read_body
     end

  Modified: lib/chupa-text/extractor.rb (+14 -7)
===================================================================
--- lib/chupa-text/extractor.rb    2014-01-02 00:15:02 +0900 (77b4ab3)
+++ lib/chupa-text/extractor.rb    2014-01-02 00:38:14 +0900 (05ca766)
@@ -27,14 +27,21 @@ module ChupaText
     end
 
     def extract(data)
-      loop do
-        if data.text?
-          yield(data)
-          return
+      processed = {}
+      targets = [data]
+      until targets.empty?
+        target = targets.pop
+        decomposer = find_decomposer(target)
+        processed_key = [target, decomposer]
+        decomposer = nil if processed[processed_key]
+        if decomposer.nil?
+          yield(target) if target.text?
+          next
+        end
+        processed[processed_key] = true
+        decomposer.decompose(target) do |decomposed|
+          targets.push(decomposed)
         end
-        decomposer = find_decomposer(data)
-        return if decomposer.nil?
-        data = decomposer.decompose(data)
       end
     end
 

  Modified: test/test-extractor.rb (+29 -1)
===================================================================
--- test/test-extractor.rb    2014-01-02 00:15:02 +0900 (215df31)
+++ test/test-extractor.rb    2014-01-02 00:38:14 +0900 (7341edd)
@@ -55,7 +55,7 @@ class TestExtractor < Test::Unit::TestCase
           extracted = ChupaText::Data.new
           extracted.content_type = "text/plain"
           extracted.body = data.body.gsub(/<.+?>/, "")
-          extracted
+          yield(extracted)
         end
       end
 
@@ -72,5 +72,33 @@ class TestExtractor < Test::Unit::TestCase
         assert_equal(["Hello"], extract(data))
       end
     end
+
+    sub_test_case("multi decomposed") do
+      class CopyDecomposer < ChupaText::Decomposer
+        def target?(data)
+          data["copied"].nil?
+        end
+
+        def decompose(data)
+          copied_data = data.dup
+          copied_data["copied"] = true
+          yield(copied_data.dup)
+          yield(copied_data.dup)
+        end
+      end
+
+      def setup
+        super
+        decomposer = CopyDecomposer.new
+        @extractor.add_decomposer(decomposer)
+      end
+
+      def test_decompose
+        data = ChupaText::Data.new
+        data.content_type = "text/plain"
+        data.body = "Hello"
+        assert_equal(["Hello", "Hello"], extract(data))
+      end
+    end
   end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index