[Groonga-commit] droonga/wikipedia-search at 9ffa222 [master] Introduce path object

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Apr 7 11:16:38 JST 2014


Kouhei Sutou	2014-04-07 11:16:38 +0900 (Mon, 07 Apr 2014)

  New Revision: 9ffa2221e5048855d52c41212e921b9755932c02
  https://github.com/droonga/wikipedia-search/commit/9ffa2221e5048855d52c41212e921b9755932c02

  Message:
    Introduce path object

  Added files:
    lib/wikipedia-search/path.rb
  Modified files:
    lib/wikipedia-search/task.rb

  Added: lib/wikipedia-search/path.rb (+131 -0) 100644
===================================================================
--- /dev/null
+++ lib/wikipedia-search/path.rb    2014-04-07 11:16:38 +0900 (debcaa5)
@@ -0,0 +1,131 @@
+require "pathname"
+
+module WikipediaSearch
+  class Path
+    def initialize(base, language)
+      @base = Pathname.new(base)
+      @language = language
+    end
+
+    def data_dir
+      @base + "data"
+    end
+
+    def download_dir
+      data_dir + "download"
+    end
+
+    def config_dir
+      @base + "config"
+    end
+
+    def wikipedia
+      WikipediaPath.new(self, @language)
+    end
+
+    def groonga
+      GroongaPath.new(self, @language)
+    end
+
+    def droonga
+      DroongaPath.new(self, @language)
+    end
+  end
+
+  class WikipediaPath
+    def initialize(base_path, language)
+      @base_path = base_path
+      @language = language
+    end
+
+    def download_base_url
+      "http://dumps.wikimedia.org/#{@language}wiki/latest"
+    end
+
+    def pages
+      @base_path.download_dir + pages_base_name
+    end
+
+    def pages_base_name
+      "#{@language}wiki-latest-pages-articles.xml.bz2"
+    end
+
+    def pages_url
+      "#{download_base_url}/#{pages_base_name}"
+    end
+
+    def titles
+      @base_path.download_dir + titles_base_name
+    end
+
+    def titles_base_name
+      "#{@language}wiki-latest-all-titles.gz"
+    end
+
+    def titles_url
+      "#{download_base_url}/#{titles_base_name}"
+    end
+  end
+
+  class GroongaPath
+    def initialize(base_path, language)
+      @base_path = base_path
+      @language = language
+    end
+
+    def config_dir
+      @base_path.config_dir + "groonga"
+    end
+
+    def data_dir
+      @base_path.data_dir + "groonga"
+    end
+
+    def schema
+      config_dir + "schema.grn"
+    end
+
+    def indexes
+      config_dir + "indexes.grn"
+    end
+
+    def pages
+      data_dir + "#{@language}-pages.grn"
+    end
+
+    def database_dir
+      data_dir + "db"
+    end
+
+    def database
+      database_dir + "wikipedia"
+    end
+
+    def log
+      database_dir + "groonga.log"
+    end
+
+    def query_log
+      database_dir + "query.log"
+    end
+  end
+
+  class DroongaPath
+    def initialize(base_path, language)
+      @base_path = base_path
+      @language = language
+    end
+
+    def config_dir
+      @base_path.config_dir + "droonga"
+    end
+
+    def data_dir
+      @base_path.data_dir + "droonga"
+    end
+
+    def pages
+      data_dir + "#{@language}-pages.jsons"
+    end
+  end
+end

  Modified: lib/wikipedia-search/task.rb (+35 -83)
===================================================================
--- lib/wikipedia-search/task.rb    2014-04-07 10:52:09 +0900 (12e8479)
+++ lib/wikipedia-search/task.rb    2014-04-07 11:16:38 +0900 (159beca)
@@ -2,6 +2,7 @@ require "rbconfig"
 require "shellwords"
 
 require "wikipedia-search/downloader"
+require "wikipedia-search/path"
 
 module WikipediaSearch
   class Task
@@ -12,6 +13,10 @@ module WikipediaSearch
     end
     include Rake::DSL
 
+    def initialize
+      @path = Path.new(".", "ja")
+    end
+
     def define
       define_data_tasks
       define_groonga_tasks
@@ -20,37 +25,40 @@ module WikipediaSearch
     private
     def define_data_tasks
       namespace :data do
-        directory data_dir_path.to_s
         define_data_download_tasks
         define_data_convert_tasks
       end
     end
 
     def define_data_download_tasks
+      path =****@path*****
+      direc****@path*****_dir.to_s
+
       namespace :download do
         namespace :pages do
-          file ja_pages_path.to_s => data_dir_path.to_s do
-            url = "#{ja_download_base_url}/#{ja_pages_base_name}"
-            WikipediaSearch::Downloader.download(url, ja_pages_path)
+          file path.pages.to_s => @path.download_dir.to_s do
+            WikipediaSearch::Downloader.download(path.pages_url, path.pages)
           end
 
           desc "Download the latest Japanese Wikipedia pages."
-          task :ja => ja_pages_path.to_s
+          task :ja => path.pages.to_s
         end
 
         namespace :titles do
-          file ja_titles_path.to_s => data_dir_path.to_s do
-            url = "#{ja_download_base_url}/#{ja_titles_base_name}"
-            WikipediaSearch::Downloader.download(url, ja_titles_path)
+          file path.titles.to_s => @path.download_dir.to_s do
+            WikipediaSearch::Downloader.download(path.titles_url,
+                                                 path.titles)
           end
 
           desc "Download the latest Japanese Wikipedia titles."
-          task :ja => ja_titles_path.to_s
+          task :ja => path.titles.to_s
         end
       end
     end
 
     def define_data_convert_tasks
+      direc****@path*****_dir.to_s
+
       namespace :convert do
         define_data_convert_groonga_tasks
         define_data_convert_droonga_tasks
@@ -59,10 +67,10 @@ module WikipediaSearch
 
     def define_data_convert_groonga_tasks
       namespace :groonga do
-        file ja_groonga_pages_path.to_s => ja_pages_path.to_s do
+        file****@path*****_s => @path.wikipedia.pages.to_s do
           command_line = []
           command_line << "bzcat"
-          command_line << Shellwords.escape(ja_pages_path.to_s)
+          command_line << Shellwords.escape(@path.wikipedia.pages.to_s)
           command_line << "|"
           command_line << RbConfig.ruby
           command_line << "bin/wikipedia-to-groonga.rb"
@@ -71,38 +79,38 @@ module WikipediaSearch
           command_line << "--max-n-characters"
           command_line << "1000"
           command_line << "--output"
-          command_line << ja_groonga_pages_path.to_s
+          command_line << @path.groonga.pages.to_s
           sh(command_line.join(" "))
         end
 
         desc "Convert Japanese Wikipedia page data to Groonga page data."
-        task :ja => ja_groonga_pages_path.to_s
+        task :ja => @path.groonga.pages.to_s
       end
     end
 
     def define_data_convert_droonga_tasks
       namespace :droonga do
-        file ja_droonga_pages_path.to_s => ja_groonga_pages_path.to_s do
+        file****@path*****_s => @path.groonga.pages.to_s do
           sh("grn2drn",
              "--dataset", "Wikipedia",
-             "--output", ja_droonga_pages_path.to_s,
-             ja_groonga_pages_path.to_s)
+             "--output", @path.droonga.pages.to_s,
+             @path.groonga.pages.to_s)
         end
 
         desc "Convert Japanese Wikipedia page data to Droonga page data."
-        task :ja => ja_droonga_pages_path.to_s
+        task :ja => @path.droonga.pages.to_s
       end
     end
 
     def define_groonga_tasks
       namespace :groonga do
         desc "Load data."
-        task :load do
-          rm_rf(groonga_database_dir_path.to_s)
-          mkdir_p(groonga_database_dir_path.to_s)
-          groonga_run(groonga_schema_path.to_s)
-          groonga_run(ja_groonga_pages_path.to_s.to_s)
-          groonga_run(groonga_indexes_path.to_s)
+        task :load => @path.groonga.pages.to_s do
+          rm_rf(@path.groonga.database_dir.to_s)
+          mkdir_p(@path.groonga.database_dir.to_s)
+          groonga_run(@path.groonga.schema.to_s)
+          groonga_run(@path.groonga.pages.to_s)
+          groonga_run(@path.groonga.indexes.to_s)
         end
       end
     end
@@ -110,71 +118,15 @@ module WikipediaSearch
     def groonga_run(input)
       command_line = [
         "groonga",
-        "--log-path", (groonga_database_dir_path + "groonga.log").to_s,
-        "--query-log-path", (groonga_database_dir_path + "query.log").to_s,
+        "--log-path", @path.groonga.log.to_s,
+        "--query-log-path", @path.groonga.query_log.to_s,
         "--file", input,
       ]
-      unless groonga_database_path.exist?
+      unles****@path*****?
         command_line << "-n"
       end
-      command_line << groonga_database_path.to_s
+      command_line << @path.groonga.database.to_s
       sh(*command_line)
     end
-
-    def download_base_url(language)
-      "http://dumps.wikimedia.org/#{language}wiki/latest"
-    end
-
-    def ja_download_base_url
-      download_base_url("ja")
-    end
-
-    def data_dir_path
-      Pathname.new("data")
-    end
-
-    def ja_pages_path
-      data_dir_path + ja_pages_base_name
-    end
-
-    def ja_pages_base_name
-      "jawiki-latest-pages-articles.xml.bz2"
-    end
-
-    def ja_groonga_pages_path
-       data_dir_path + "ja-pages.grn"
-    end
-
-    def ja_droonga_pages_path
-      data_dir_path + "ja-pages.jsons"
-    end
-
-    def ja_titles_path
-      data_dir_path + ja_titles_base_name
-    end
-
-    def ja_titles_base_name
-      "jawiki-latest-all-titles.gz"
-    end
-
-    def config_dir
-      Pathname.new("config")
-    end
-
-    def groonga_schema_path
-      config_dir + "groonga" + "schema.grn"
-    end
-
-    def groonga_indexes_path
-      config_dir + "groonga" + "indexes.grn"
-    end
-
-    def groonga_database_dir_path
-      data_dir_path + "groonga"
-    end
-
-    def groonga_database_path
-      groonga_database_dir_path + "db"
-    end
   end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index