Kouhei Sutou
null+****@clear*****
Mon Apr 7 11:16:38 JST 2014
Kouhei Sutou 2014-04-07 11:16:38 +0900 (Mon, 07 Apr 2014) New Revision: 9ffa2221e5048855d52c41212e921b9755932c02 https://github.com/droonga/wikipedia-search/commit/9ffa2221e5048855d52c41212e921b9755932c02 Message: Introduce path object Added files: lib/wikipedia-search/path.rb Modified files: lib/wikipedia-search/task.rb Added: lib/wikipedia-search/path.rb (+131 -0) 100644 =================================================================== --- /dev/null +++ lib/wikipedia-search/path.rb 2014-04-07 11:16:38 +0900 (debcaa5) @@ -0,0 +1,131 @@ +require "pathname" + +module WikipediaSearch + class Path + def initialize(base, language) + @base = Pathname.new(base) + @language = language + end + + def data_dir + @base + "data" + end + + def download_dir + data_dir + "download" + end + + def config_dir + @base + "config" + end + + def wikipedia + WikipediaPath.new(self, @language) + end + + def groonga + GroongaPath.new(self, @language) + end + + def droonga + DroongaPath.new(self, @language) + end + end + + class WikipediaPath + def initialize(base_path, language) + @base_path = base_path + @language = language + end + + def download_base_url + "http://dumps.wikimedia.org/#{@language}wiki/latest" + end + + def pages + @base_path.download_dir + pages_base_name + end + + def pages_base_name + "#{@language}wiki-latest-pages-articles.xml.bz2" + end + + def pages_url + "#{download_base_url}/#{pages_base_name}" + end + + def titles + @base_path.download_dir + titles_base_name + end + + def titles_base_name + "#{@language}wiki-latest-all-titles.gz" + end + + def titles_url + "#{download_base_url}/#{titles_base_name}" + end + end + + class GroongaPath + def initialize(base_path, language) + @base_path = base_path + @language = language + end + + def config_dir + @base_path.config_dir + "groonga" + end + + def data_dir + @base_path.data_dir + "groonga" + end + + def schema + config_dir + "schema.grn" + end + + def indexes + config_dir + "indexes.grn" + end + + def pages + data_dir + "#{@language}-pages.grn" + end + + def database_dir + data_dir + "db" + end + + def database + database_dir + "wikipedia" + end + + def log + database_dir + "groonga.log" + end + + def query_log + database_dir + "query.log" + end + end + + class DroongaPath + def initialize(base_path, language) + @base_path = base_path + @language = language + end + + def config_dir + @base_path.config_dir + "droonga" + end + + def data_dir + @base_path.data_dir + "droonga" + end + + def pages + data_dir + "#{@language}-pages.jsons" + end + end +end Modified: lib/wikipedia-search/task.rb (+35 -83) =================================================================== --- lib/wikipedia-search/task.rb 2014-04-07 10:52:09 +0900 (12e8479) +++ lib/wikipedia-search/task.rb 2014-04-07 11:16:38 +0900 (159beca) @@ -2,6 +2,7 @@ require "rbconfig" require "shellwords" require "wikipedia-search/downloader" +require "wikipedia-search/path" module WikipediaSearch class Task @@ -12,6 +13,10 @@ module WikipediaSearch end include Rake::DSL + def initialize + @path = Path.new(".", "ja") + end + def define define_data_tasks define_groonga_tasks @@ -20,37 +25,40 @@ module WikipediaSearch private def define_data_tasks namespace :data do - directory data_dir_path.to_s define_data_download_tasks define_data_convert_tasks end end def define_data_download_tasks + path =****@path***** + direc****@path*****_dir.to_s + namespace :download do namespace :pages do - file ja_pages_path.to_s => data_dir_path.to_s do - url = "#{ja_download_base_url}/#{ja_pages_base_name}" - WikipediaSearch::Downloader.download(url, ja_pages_path) + file path.pages.to_s => @path.download_dir.to_s do + WikipediaSearch::Downloader.download(path.pages_url, path.pages) end desc "Download the latest Japanese Wikipedia pages." - task :ja => ja_pages_path.to_s + task :ja => path.pages.to_s end namespace :titles do - file ja_titles_path.to_s => data_dir_path.to_s do - url = "#{ja_download_base_url}/#{ja_titles_base_name}" - WikipediaSearch::Downloader.download(url, ja_titles_path) + file path.titles.to_s => @path.download_dir.to_s do + WikipediaSearch::Downloader.download(path.titles_url, + path.titles) end desc "Download the latest Japanese Wikipedia titles." - task :ja => ja_titles_path.to_s + task :ja => path.titles.to_s end end end def define_data_convert_tasks + direc****@path*****_dir.to_s + namespace :convert do define_data_convert_groonga_tasks define_data_convert_droonga_tasks @@ -59,10 +67,10 @@ module WikipediaSearch def define_data_convert_groonga_tasks namespace :groonga do - file ja_groonga_pages_path.to_s => ja_pages_path.to_s do + file****@path*****_s => @path.wikipedia.pages.to_s do command_line = [] command_line << "bzcat" - command_line << Shellwords.escape(ja_pages_path.to_s) + command_line << Shellwords.escape(@path.wikipedia.pages.to_s) command_line << "|" command_line << RbConfig.ruby command_line << "bin/wikipedia-to-groonga.rb" @@ -71,38 +79,38 @@ module WikipediaSearch command_line << "--max-n-characters" command_line << "1000" command_line << "--output" - command_line << ja_groonga_pages_path.to_s + command_line << @path.groonga.pages.to_s sh(command_line.join(" ")) end desc "Convert Japanese Wikipedia page data to Groonga page data." - task :ja => ja_groonga_pages_path.to_s + task :ja => @path.groonga.pages.to_s end end def define_data_convert_droonga_tasks namespace :droonga do - file ja_droonga_pages_path.to_s => ja_groonga_pages_path.to_s do + file****@path*****_s => @path.groonga.pages.to_s do sh("grn2drn", "--dataset", "Wikipedia", - "--output", ja_droonga_pages_path.to_s, - ja_groonga_pages_path.to_s) + "--output", @path.droonga.pages.to_s, + @path.groonga.pages.to_s) end desc "Convert Japanese Wikipedia page data to Droonga page data." - task :ja => ja_droonga_pages_path.to_s + task :ja => @path.droonga.pages.to_s end end def define_groonga_tasks namespace :groonga do desc "Load data." - task :load do - rm_rf(groonga_database_dir_path.to_s) - mkdir_p(groonga_database_dir_path.to_s) - groonga_run(groonga_schema_path.to_s) - groonga_run(ja_groonga_pages_path.to_s.to_s) - groonga_run(groonga_indexes_path.to_s) + task :load => @path.groonga.pages.to_s do + rm_rf(@path.groonga.database_dir.to_s) + mkdir_p(@path.groonga.database_dir.to_s) + groonga_run(@path.groonga.schema.to_s) + groonga_run(@path.groonga.pages.to_s) + groonga_run(@path.groonga.indexes.to_s) end end end @@ -110,71 +118,15 @@ module WikipediaSearch def groonga_run(input) command_line = [ "groonga", - "--log-path", (groonga_database_dir_path + "groonga.log").to_s, - "--query-log-path", (groonga_database_dir_path + "query.log").to_s, + "--log-path", @path.groonga.log.to_s, + "--query-log-path", @path.groonga.query_log.to_s, "--file", input, ] - unless groonga_database_path.exist? + unles****@path*****? command_line << "-n" end - command_line << groonga_database_path.to_s + command_line << @path.groonga.database.to_s sh(*command_line) end - - def download_base_url(language) - "http://dumps.wikimedia.org/#{language}wiki/latest" - end - - def ja_download_base_url - download_base_url("ja") - end - - def data_dir_path - Pathname.new("data") - end - - def ja_pages_path - data_dir_path + ja_pages_base_name - end - - def ja_pages_base_name - "jawiki-latest-pages-articles.xml.bz2" - end - - def ja_groonga_pages_path - data_dir_path + "ja-pages.grn" - end - - def ja_droonga_pages_path - data_dir_path + "ja-pages.jsons" - end - - def ja_titles_path - data_dir_path + ja_titles_base_name - end - - def ja_titles_base_name - "jawiki-latest-all-titles.gz" - end - - def config_dir - Pathname.new("config") - end - - def groonga_schema_path - config_dir + "groonga" + "schema.grn" - end - - def groonga_indexes_path - config_dir + "groonga" + "indexes.grn" - end - - def groonga_database_dir_path - data_dir_path + "groonga" - end - - def groonga_database_path - groonga_database_dir_path + "db" - end end end -------------- next part -------------- HTML����������������������������...Download