[Groonga-commit] droonga/wikipedia-search at 6e1c73d [master] Add data:cnvert:ja:groonga task

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Apr 4 17:00:24 JST 2014


Kouhei Sutou	2014-04-04 17:00:24 +0900 (Fri, 04 Apr 2014)

  New Revision: 6e1c73d5586e09d65756b57067993908b2201358
  https://github.com/droonga/wikipedia-search/commit/6e1c73d5586e09d65756b57067993908b2201358

  Message:
    Add data:cnvert:ja:groonga task

  Modified files:
    bin/wikipedia-to-groonga.rb
    lib/wikipedia-search/task.rb

  Modified: bin/wikipedia-to-groonga.rb (+20 -5)
===================================================================
--- bin/wikipedia-to-groonga.rb    2014-04-04 17:00:06 +0900 (ae4d03c)
+++ bin/wikipedia-to-groonga.rb    2014-04-04 17:00:24 +0900 (8dde77d)
@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 
 require "pathname"
+require "ostruct"
 require "optparse"
 
 base_dir_path = Pathname.new(__FILE__).dirname
@@ -10,16 +11,30 @@ $LOAD_PATH.unshift(lib_dir_path.to_s)
 
 require "wikipedia-search/groonga-converter"
 
-options = {
+options = OpenStruct.new
+options.output = "-"
+converter_options = {
   :max_n_records => -1,
 }
 parser = OptionParser.new
 parser.on("--max-n-records=N", Integer,
           "The number of maximum records. -1 means unlimited.",
-          "(#{options[:max_n_records]})") do |n|
-  options[:max_n_records] = n
+          "(#{converter_options[:max_n_records]})") do |n|
+  converter_options[:max_n_records] = n
+end
+parser.on("--output=PATH",
+          "Output to PATH. '-' means the standard output.",
+          "(#{options.output})") do |path|
+  options.output = path
 end
 parser.parse!(ARGV)
 
-converter = WikipediaSearch::GroongaConverter.new(ARGF, options)
-converter.convert($stdout)
+converter = WikipediaSearch::GroongaConverter.new(ARGF, converter_options)
+if options.output == "-"
+  output = $stdout
+  converter.convert(output)
+else
+  File.open(options.output, "w") do |output|
+    converter.convert(output)
+  end
+end

  Modified: lib/wikipedia-search/task.rb (+29 -0)
===================================================================
--- lib/wikipedia-search/task.rb    2014-04-04 17:00:06 +0900 (f94c768)
+++ lib/wikipedia-search/task.rb    2014-04-04 17:00:24 +0900 (3b6fa8d)
@@ -1,3 +1,6 @@
+require "rbconfig"
+require "shellwords"
+
 require "wikipedia-search/downloader"
 
 module WikipediaSearch
@@ -13,6 +16,7 @@ module WikipediaSearch
       namespace :data do
         directory data_dir_path.to_s
         define_download_tasks
+        define_convert_tasks
       end
     end
 
@@ -29,6 +33,27 @@ module WikipediaSearch
       end
     end
 
+    def define_convert_tasks
+      namespace :convert do
+        namespace :ja do
+          desc "Convert Japanese Wikipedia data to Groonga data."
+          task :groonga => ja_data_path.to_s do
+            command_line = []
+            command_line << "bzcat"
+            command_line << Shellwords.escape(ja_data_path.to_s)
+            command_line << "|"
+            command_line << RbConfig.ruby
+            command_line << "bin/wikipedia-to-groonga.rb"
+            command_line << "--max-n-records"
+            command_line << "5000"
+            command_line << "--output"
+            command_line << ja_groonga_output_path.to_s
+            sh(command_line.join(" "))
+          end
+        end
+      end
+    end
+
     def data_dir_path
       @data_dir_path ||= Pathname.new("data")
     end
@@ -40,5 +65,9 @@ module WikipediaSearch
     def ja_data_base_name
       "jawiki-latest-pages-articles.xml.bz2"
     end
+
+    def ja_groonga_output_path
+      @ja_groonga_output_path ||= data_dir_path + "ja-data.grn"
+    end
   end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index