[Groonga-commit] droonga/wikipedia-search at 0508bb6 [master] Support creating all pages

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Jul 17 23:22:21 JST 2014


Kouhei Sutou	2014-07-17 23:22:21 +0900 (Thu, 17 Jul 2014)

  New Revision: 0508bb6d2bd88bbfdfe383db2f319f0719e6c18c
  https://github.com/droonga/wikipedia-search/commit/0508bb6d2bd88bbfdfe383db2f319f0719e6c18c

  Message:
    Support creating all pages

  Modified files:
    lib/wikipedia-search/path.rb
    lib/wikipedia-search/task.rb

  Modified: lib/wikipedia-search/path.rb (+8 -0)
===================================================================
--- lib/wikipedia-search/path.rb    2014-07-17 23:14:32 +0900 (7b8bbeb)
+++ lib/wikipedia-search/path.rb    2014-07-17 23:22:21 +0900 (8e55e18)
@@ -93,6 +93,10 @@ module WikipediaSearch
       data_dir + "#{@language}-pages.grn"
     end
 
+    def all_pages
+      data_dir + "#{@language}-all-pages.grn"
+    end
+
     def database_dir
       data_dir + "db"
     end
@@ -128,6 +132,10 @@ module WikipediaSearch
       data_dir + "#{@language}-pages.jsons"
     end
 
+    def all_pages
+      data_dir + "#{@language}-all-pages.jsons"
+    end
+
     def schema
       data_dir + "schema.json"
     end

  Modified: lib/wikipedia-search/task.rb (+21 -6)
===================================================================
--- lib/wikipedia-search/task.rb    2014-07-17 23:14:32 +0900 (4e6431e)
+++ lib/wikipedia-search/task.rb    2014-07-17 23:22:21 +0900 (60d4313)
@@ -69,13 +69,15 @@ module WikipediaSearch
 
     def define_data_convert_groonga_tasks
       namespace :groonga do
+        base_command_line = [
+          "bzcat",
+          Shellwords.escape(@path.wikipedia.pages.to_s),
+          "|",
+          RbConfig.ruby,
+          "bin/wikipedia-to-groonga.rb",
+        ]
         file****@path*****_s => @path.wikipedia.pages.to_s do
-          command_line = []
-          command_line << "bzcat"
-          command_line << Shellwords.escape(@path.wikipedia.pages.to_s)
-          command_line << "|"
-          command_line << RbConfig.ruby
-          command_line << "bin/wikipedia-to-groonga.rb"
+          command_line = base_command_line.dup
           command_line << "--max-n-records"
           command_line << "5000"
           command_line << "--max-n-characters"
@@ -85,6 +87,13 @@ module WikipediaSearch
           sh(command_line.join(" "))
         end
 
+        file****@path*****_pages.to_s => @path.wikipedia.pages.to_s do
+          command_line = base_command_line.dup
+          command_line << "--output"
+          command_line << @path.groonga.all_pages.to_s
+          sh(command_line.join(" "))
+        end
+
         desc "Convert Japanese Wikipedia page data to Groonga page data."
         task :ja => @path.groonga.pages.to_s
       end
@@ -113,6 +122,12 @@ module WikipediaSearch
                @path.groonga.pages.to_s)
           end
 
+          file****@path*****_pages.to_s => @path.groonga.all_pages.to_s do
+            sh("grn2drn",
+               "--output", @path.droonga.all_pages.to_s,
+               @path.groonga.all_pages.to_s)
+          end
+
           desc "Convert Japanese Wikipedia page data to Droonga page data."
           task :ja => @path.droonga.pages.to_s
         end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index