[Groonga-commit] ranguba/ranguba-server at 701b5e5 [master] Support scraping

Back to archive index

Kouhei Sutou null+****@clear*****
Sun Feb 19 23:40:10 JST 2017


Kouhei Sutou	2017-02-19 23:40:10 +0900 (Sun, 19 Feb 2017)

  New Revision: 701b5e5463e5d0945b36383658ac649b761be074
  https://github.com/ranguba/ranguba-server/commit/701b5e5463e5d0945b36383658ac649b761be074

  Message:
    Support scraping

  Added files:
    app/assets/javascripts/scrapings.coffee
    app/assets/stylesheets/scrapings.scss
    app/controllers/application_api_controller.rb
    app/controllers/scrapings_controller.rb
    app/helpers/scrapings_helper.rb
    app/models/scraping.rb
    bin/ranguba
    config/initializers/chupa_text.rb
    test/controllers/scrapings_controller_test.rb
    test/factories/scrapings.rb
    test/models/scraping_test.rb
  Modified files:
    Gemfile
    Gemfile.lock
    app/views/entries/_entry.json.jbuilder
    config/routes.rb

  Modified: Gemfile (+3 -0)
===================================================================
--- Gemfile    2017-02-19 22:18:11 +0900 (88f3b53)
+++ Gemfile    2017-02-19 23:40:10 +0900 (90b4cb7)
@@ -51,3 +51,6 @@ end
 gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 
 gem 'groonga-client-model'
+gem 'chupa-text', path: '../chupa-text'
+gem 'chupa-text-decomposer-html', path: '../chupa-text-decomposer-html'
+gem 'chupa-text-decomposer-mail', path: '../chupa-text-decomposer-mail'

  Modified: Gemfile.lock (+22 -0)
===================================================================
--- Gemfile.lock    2017-02-19 22:18:11 +0900 (966f6e5)
+++ Gemfile.lock    2017-02-19 23:40:10 +0900 (1b7f6d3)
@@ -1,3 +1,22 @@
+PATH
+  remote: ../chupa-text-decomposer-html
+  specs:
+    chupa-text-decomposer-html (1.0.2)
+      chupa-text
+      nokogiri
+
+PATH
+  remote: ../chupa-text-decomposer-mail
+  specs:
+    chupa-text-decomposer-mail (1.0.0)
+      chupa-text
+      mail
+
+PATH
+  remote: ../chupa-text
+  specs:
+    chupa-text (1.0.5)
+
 GEM
   remote: https://rubygems.org/
   specs:
@@ -209,6 +228,9 @@ PLATFORMS
 
 DEPENDENCIES
   byebug
+  chupa-text!
+  chupa-text-decomposer-html!
+  chupa-text-decomposer-mail!
   coffee-rails (~> 4.2)
   factory_girl_rails
   groonga-client-model

  Added: app/assets/javascripts/scrapings.coffee (+3 -0) 100644
===================================================================
--- /dev/null
+++ app/assets/javascripts/scrapings.coffee    2017-02-19 23:40:10 +0900 (24f83d1)
@@ -0,0 +1,3 @@
+# Place all the behaviors and hooks related to the matching controller here.
+# All this logic will automatically be available in application.js.
+# You can use CoffeeScript in this file: http://coffeescript.org/

  Added: app/assets/stylesheets/scrapings.scss (+3 -0) 100644
===================================================================
--- /dev/null
+++ app/assets/stylesheets/scrapings.scss    2017-02-19 23:40:10 +0900 (ea8de25)
@@ -0,0 +1,3 @@
+// Place all the styles related to the scrapings controller here.
+// They will automatically be included in application.css.
+// You can use Sass (SCSS) here: http://sass-lang.com/

  Added: app/controllers/application_api_controller.rb (+3 -0) 100644
===================================================================
--- /dev/null
+++ app/controllers/application_api_controller.rb    2017-02-19 23:40:10 +0900 (f8dedc1)
@@ -0,0 +1,3 @@
+class ApplicationApiController < ActionController::API
+  include ActionController::MimeResponds
+end

  Added: app/controllers/scrapings_controller.rb (+22 -0) 100644
===================================================================
--- /dev/null
+++ app/controllers/scrapings_controller.rb    2017-02-19 23:40:10 +0900 (45f829c)
@@ -0,0 +1,22 @@
+class ScrapingsController < ApplicationApiController
+  # POST /scraping.json
+  def create
+    @scraping = Scraping.new(scraping_params)
+
+    respond_to do |format|
+      if****@scrap*****
+        @entry =****@scrap*****
+        format.html { redirect_to @entry, notice: 'Scraped successfully.' }
+        format.json { render "entries/show", status: :created, location: @entry }
+      else
+        format.html { render :new }
+        format.json { render json: @scraping.errors, status: :unprocessable_entity }
+      end
+    end
+  end
+
+  private
+  def scraping_params
+    params.permit(:uri, :mime_type, :body)
+  end
+end

  Added: app/helpers/scrapings_helper.rb (+2 -0) 100644
===================================================================
--- /dev/null
+++ app/helpers/scrapings_helper.rb    2017-02-19 23:40:10 +0900 (c52d30c)
@@ -0,0 +1,2 @@
+module ScrapingsHelper
+end

  Added: app/models/scraping.rb (+27 -0) 100644
===================================================================
--- /dev/null
+++ app/models/scraping.rb    2017-02-19 23:40:10 +0900 (2b2bd44)
@@ -0,0 +1,27 @@
+class Scraping
+  include ActiveModel::Model
+
+  attr_accessor :uri
+  attr_accessor :mime_type
+  attr_accessor :body
+
+  attr_reader :entry
+
+  def scrape
+    @entry = Entry.new
+    extractor = ChupaText::Extractor.new
+    extractor.apply_configuration(ChupaText::Configuration.default)
+    data = ChupaText::TextData.new(body)
+    data.uri = uri
+    data.mime_type = mime_type
+    texts = []
+    extractor.extract(data) do |extracted_data|
+      texts << extracted_data.body
+    end
+    @entry._key = data.uri.to_s
+    @entry.mime_type = data.mime_type
+    @entry.body = texts.join("\n")
+    @entry.size =****@entry*****
+    @entry.save
+  end
+end

  Modified: app/views/entries/_entry.json.jbuilder (+2 -2)
===================================================================
--- app/views/entries/_entry.json.jbuilder    2017-02-19 22:18:11 +0900 (f60ac0a)
+++ app/views/entries/_entry.json.jbuilder    2017-02-19 23:40:10 +0900 (8bc949f)
@@ -1,2 +1,2 @@
-json.extract! entry, :id, :_key, :title, :body, :size, :created_at, :updated_at
-json.url entry_url(entry, format: :json)
\ No newline at end of file
+json.extract! entry, :id, :_key, :title, :body, :size#, :created_at, :updated_at
+json.url entry_url(entry, format: :json)

  Added: bin/ranguba (+27 -0) 100644
===================================================================
--- /dev/null
+++ bin/ranguba    2017-02-19 23:40:10 +0900 (ffdf001)
@@ -0,0 +1,27 @@
+# -*- ruby -*-
+
+require "net/http"
+require "uri"
+require "pathname"
+require "json"
+require "pp"
+
+def path_to_uri(path)
+  components = path.expand_path.to_s.split(Pathname::SEPARATOR_PAT)
+  escaped_components = components.collect do |component|
+    CGI.escape(component)
+  end
+  "file://" + File.join(*escaped_components)
+end
+
+api_uri = URI("http://localhost:3000/scraping.json")
+ARGV.each do |path|
+  response = Net::HTTP.post_form(api_uri,
+                                 {
+                                   "uri" => path_to_uri(Pathname(path)),
+                                   "mime_type" => "message/rfc822",
+                                   "body" => File.read(path),
+                                 })
+  p response
+  pp JSON.parse(response.body)
+end

  Added: config/initializers/chupa_text.rb (+1 -0) 100644
===================================================================
--- /dev/null
+++ config/initializers/chupa_text.rb    2017-02-19 23:40:10 +0900 (b949701)
@@ -0,0 +1 @@
+ChupaText::Decomposers.load

  Modified: config/routes.rb (+1 -0)
===================================================================
--- config/routes.rb    2017-02-19 22:18:11 +0900 (b69de7e)
+++ config/routes.rb    2017-02-19 23:40:10 +0900 (ffa876a)
@@ -1,4 +1,5 @@
 Rails.application.routes.draw do
+  resource :scraping, only: ["create"]
   resources :entries
   # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
 end

  Added: test/controllers/scrapings_controller_test.rb (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/controllers/scrapings_controller_test.rb    2017-02-19 23:40:10 +0900 (cdb6793)
@@ -0,0 +1,7 @@
+require 'test_helper'
+
+class ScrapingsControllerTest < ActionDispatch::IntegrationTest
+  # test "the truth" do
+  #   assert true
+  # end
+end

  Added: test/factories/scrapings.rb (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/factories/scrapings.rb    2017-02-19 23:40:10 +0900 (b4c5d9f)
@@ -0,0 +1,7 @@
+FactoryGirl.define do
+  factory :scraping do
+    uri "MyString"
+    mime_type "MyString"
+    body "MyText"
+  end
+end

  Added: test/models/scraping_test.rb (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/models/scraping_test.rb    2017-02-19 23:40:10 +0900 (dfe068f)
@@ -0,0 +1,7 @@
+require 'test_helper'
+
+class ScrapingTest < ActiveSupport::TestCase
+  # test "the truth" do
+  #   assert true
+  # end
+end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index