Kouhei Sutou
null+****@clear*****
Fri Jan 3 23:26:42 JST 2014
Kouhei Sutou 2014-01-03 23:26:42 +0900 (Fri, 03 Jan 2014) New Revision: 1e3fced54239d0553c3d6b8e15b3ea19bb8f655e https://github.com/ranguba/chupa-text/commit/1e3fced54239d0553c3d6b8e15b3ea19bb8f655e Message: Extractor#extract accepts URI as input Added files: test/fixture/extractor/hello.txt Modified files: lib/chupa-text/extractor.rb test/test-extractor.rb Modified: lib/chupa-text/extractor.rb (+29 -2) =================================================================== --- lib/chupa-text/extractor.rb 2014-01-03 23:23:33 +0900 (52e6f15) +++ lib/chupa-text/extractor.rb 2014-01-03 23:26:42 +0900 (5c139ad) @@ -14,6 +14,9 @@ # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +require "pathname" +require "uri" + module ChupaText class Extractor def initialize @@ -39,8 +42,21 @@ module ChupaText @decomposers << decomposer end - def extract(data) - targets = [data] + # Extracts texts from input. Each extracted text is passes to the + # given block. + # + # @param [Data, String] input The input to be extracted texts. + # If `input` is `String`, it is treated as the local file path or URI + # of input data. + # + # @yield [text_data] Gives extracted text data to the block. + # The block may be called zero or more times. + # @yieldparam [Data] text_data The extracted text data. + # You can get text data by `text_data.body`. + # + # @return [void] + def extract(input) + targets = [ensure_data(input)] until targets.empty? target = targets.pop decomposer = find_decomposer(target) @@ -55,6 +71,17 @@ module ChupaText end private + def ensure_data(input) + case input + when String, Pathname, URI::Generic + data = Data.new + data.uri = input.to_s + data + else + input + end + end + def find_decomposer(data) @decomposers.find do |decomposer| decomposer.target?(data) Added: test/fixture/extractor/hello.txt (+1 -0) 100644 =================================================================== --- /dev/null +++ test/fixture/extractor/hello.txt 2014-01-03 23:26:42 +0900 (e965047) @@ -0,0 +1 @@ +Hello Modified: test/test-extractor.rb (+21 -0) =================================================================== --- test/test-extractor.rb 2014-01-03 23:23:33 +0900 (093284f) +++ test/test-extractor.rb 2014-01-03 23:26:42 +0900 (c46311f) @@ -15,10 +15,17 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA class TestExtractor < Test::Unit::TestCase + include Helper + def setup @extractor = ChupaText::Extractor.new end + private + def fixture_path(*components) + super("extractor", *components) + end + sub_test_case("extract") do private def extract(data) @@ -29,6 +36,20 @@ class TestExtractor < Test::Unit::TestCase texts end + sub_test_case("input") do + def test_string + extract(fixture_path("hello.txt").to_s) + end + + def test_uri + extract(URI.parse(fixture_path("hello.txt").to_s)) + end + + def test_path + extract(fixture_path("hello.txt")) + end + end + sub_test_case("no decomposers") do def test_text data = ChupaText::Data.new -------------- next part -------------- HTML����������������������������...Download