Kouhei Sutou
null+****@clear*****
Fri Apr 4 17:00:24 JST 2014
Kouhei Sutou 2014-04-04 17:00:24 +0900 (Fri, 04 Apr 2014) New Revision: 6e1c73d5586e09d65756b57067993908b2201358 https://github.com/droonga/wikipedia-search/commit/6e1c73d5586e09d65756b57067993908b2201358 Message: Add data:cnvert:ja:groonga task Modified files: bin/wikipedia-to-groonga.rb lib/wikipedia-search/task.rb Modified: bin/wikipedia-to-groonga.rb (+20 -5) =================================================================== --- bin/wikipedia-to-groonga.rb 2014-04-04 17:00:06 +0900 (ae4d03c) +++ bin/wikipedia-to-groonga.rb 2014-04-04 17:00:24 +0900 (8dde77d) @@ -1,6 +1,7 @@ #!/usr/bin/env ruby require "pathname" +require "ostruct" require "optparse" base_dir_path = Pathname.new(__FILE__).dirname @@ -10,16 +11,30 @@ $LOAD_PATH.unshift(lib_dir_path.to_s) require "wikipedia-search/groonga-converter" -options = { +options = OpenStruct.new +options.output = "-" +converter_options = { :max_n_records => -1, } parser = OptionParser.new parser.on("--max-n-records=N", Integer, "The number of maximum records. -1 means unlimited.", - "(#{options[:max_n_records]})") do |n| - options[:max_n_records] = n + "(#{converter_options[:max_n_records]})") do |n| + converter_options[:max_n_records] = n +end +parser.on("--output=PATH", + "Output to PATH. '-' means the standard output.", + "(#{options.output})") do |path| + options.output = path end parser.parse!(ARGV) -converter = WikipediaSearch::GroongaConverter.new(ARGF, options) -converter.convert($stdout) +converter = WikipediaSearch::GroongaConverter.new(ARGF, converter_options) +if options.output == "-" + output = $stdout + converter.convert(output) +else + File.open(options.output, "w") do |output| + converter.convert(output) + end +end Modified: lib/wikipedia-search/task.rb (+29 -0) =================================================================== --- lib/wikipedia-search/task.rb 2014-04-04 17:00:06 +0900 (f94c768) +++ lib/wikipedia-search/task.rb 2014-04-04 17:00:24 +0900 (3b6fa8d) @@ -1,3 +1,6 @@ +require "rbconfig" +require "shellwords" + require "wikipedia-search/downloader" module WikipediaSearch @@ -13,6 +16,7 @@ module WikipediaSearch namespace :data do directory data_dir_path.to_s define_download_tasks + define_convert_tasks end end @@ -29,6 +33,27 @@ module WikipediaSearch end end + def define_convert_tasks + namespace :convert do + namespace :ja do + desc "Convert Japanese Wikipedia data to Groonga data." + task :groonga => ja_data_path.to_s do + command_line = [] + command_line << "bzcat" + command_line << Shellwords.escape(ja_data_path.to_s) + command_line << "|" + command_line << RbConfig.ruby + command_line << "bin/wikipedia-to-groonga.rb" + command_line << "--max-n-records" + command_line << "5000" + command_line << "--output" + command_line << ja_groonga_output_path.to_s + sh(command_line.join(" ")) + end + end + end + end + def data_dir_path @data_dir_path ||= Pathname.new("data") end @@ -40,5 +65,9 @@ module WikipediaSearch def ja_data_base_name "jawiki-latest-pages-articles.xml.bz2" end + + def ja_groonga_output_path + @ja_groonga_output_path ||= data_dir_path + "ja-data.grn" + end end end -------------- next part -------------- HTML����������������������������...Download