Kouhei Sutou
null+****@clear*****
Mon Feb 20 23:44:14 JST 2017
Kouhei Sutou 2017-02-20 23:44:14 +0900 (Mon, 20 Feb 2017) New Revision: b7579365c3e9358760be9cd81a51954ac6142973 https://github.com/ranguba/ranguba-server/commit/b7579365c3e9358760be9cd81a51954ac6142973 Message: Accept nil MIME type Removed files: bin/ranguba Modified files: app/models/scraping.rb Modified: app/models/scraping.rb (+3 -1) =================================================================== --- app/models/scraping.rb 2017-02-20 00:19:30 +0900 (2b2bd44) +++ app/models/scraping.rb 2017-02-20 23:44:14 +0900 (9491b57) @@ -11,9 +11,11 @@ class Scraping @entry = Entry.new extractor = ChupaText::Extractor.new extractor.apply_configuration(ChupaText::Configuration.default) - data = ChupaText::TextData.new(body) + data = ChupaText::Data.new data.uri = uri data.mime_type = mime_type + data.body = body + data.size = body.bytesize texts = [] extractor.extract(data) do |extracted_data| texts << extracted_data.body Deleted: bin/ranguba (+0 -27) 100644 =================================================================== --- bin/ranguba 2017-02-20 00:19:30 +0900 (ffdf001) +++ /dev/null @@ -1,27 +0,0 @@ -# -*- ruby -*- - -require "net/http" -require "uri" -require "pathname" -require "json" -require "pp" - -def path_to_uri(path) - components = path.expand_path.to_s.split(Pathname::SEPARATOR_PAT) - escaped_components = components.collect do |component| - CGI.escape(component) - end - "file://" + File.join(*escaped_components) -end - -api_uri = URI("http://localhost:3000/scraping.json") -ARGV.each do |path| - response = Net::HTTP.post_form(api_uri, - { - "uri" => path_to_uri(Pathname(path)), - "mime_type" => "message/rfc822", - "body" => File.read(path), - }) - p response - pp JSON.parse(response.body) -end -------------- next part -------------- HTML����������������������������...Download