Kouhei Sutou
null+****@clear*****
Fri Apr 4 17:22:59 JST 2014
Kouhei Sutou 2014-04-04 17:22:59 +0900 (Fri, 04 Apr 2014) New Revision: a3c84f4e901d179611407d292f7ba7527a2e0edc https://github.com/droonga/wikipedia-search/commit/a3c84f4e901d179611407d292f7ba7527a2e0edc Message: Add data:convert:ja:droonga task Modified files: lib/wikipedia-search/task.rb Modified: lib/wikipedia-search/task.rb (+21 -5) =================================================================== --- lib/wikipedia-search/task.rb 2014-04-04 17:00:24 +0900 (3b6fa8d) +++ lib/wikipedia-search/task.rb 2014-04-04 17:22:59 +0900 (700d68a) @@ -36,8 +36,7 @@ module WikipediaSearch def define_convert_tasks namespace :convert do namespace :ja do - desc "Convert Japanese Wikipedia data to Groonga data." - task :groonga => ja_data_path.to_s do + file ja_groonga_data_path.to_s => ja_data_path.to_s do command_line = [] command_line << "bzcat" command_line << Shellwords.escape(ja_data_path.to_s) @@ -47,9 +46,22 @@ module WikipediaSearch command_line << "--max-n-records" command_line << "5000" command_line << "--output" - command_line << ja_groonga_output_path.to_s + command_line << ja_groonga_data_path.to_s sh(command_line.join(" ")) end + + desc "Convert Japanese Wikipedia data to Groonga data." + task :groonga => ja_groonga_data_path.to_s + + file ja_droonga_data_path.to_s => ja_groonga_data_path.to_s do + sh("grn2drn", + "--dataset", "Wikipedia", + "--output", ja_droonga_data_path.to_s, + ja_groonga_data_path.to_s) + end + + desc "Convert Japanese Wikipedia data to Droonga data." + task :droonga => ja_droonga_data_path.to_s end end end @@ -66,8 +78,12 @@ module WikipediaSearch "jawiki-latest-pages-articles.xml.bz2" end - def ja_groonga_output_path - @ja_groonga_output_path ||= data_dir_path + "ja-data.grn" + def ja_groonga_data_path + @ja_groonga_data_path ||= data_dir_path + "ja-data.grn" + end + + def ja_droonga_data_path + @ja_droonga_data_path ||= data_dir_path + "ja-data.jsons" end end end -------------- next part -------------- HTML����������������������������...Download