[Groonga-commit] droonga/wikipedia-search at 7fd702d [master] Add --max-n-records option

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Apr 4 16:24:03 JST 2014


Kouhei Sutou	2014-04-04 16:24:03 +0900 (Fri, 04 Apr 2014)

  New Revision: 7fd702d44b2aab43e0861b279ef5e7afc49e51e9
  https://github.com/droonga/wikipedia-search/commit/7fd702d44b2aab43e0861b279ef5e7afc49e51e9

  Message:
    Add --max-n-records option

  Modified files:
    bin/wikipedia-to-groonga.rb
    lib/wikipedia-search/groonga-converter.rb
    test/test-groonga-converter.rb

  Modified: bin/wikipedia-to-groonga.rb (+13 -1)
===================================================================
--- bin/wikipedia-to-groonga.rb    2014-04-04 15:47:17 +0900 (518160c)
+++ bin/wikipedia-to-groonga.rb    2014-04-04 16:24:03 +0900 (ae4d03c)
@@ -1,6 +1,7 @@
 #!/usr/bin/env ruby
 
 require "pathname"
+require "optparse"
 
 base_dir_path = Pathname.new(__FILE__).dirname
 lib_dir_path = base_dir_path + "lib"
@@ -9,5 +10,16 @@ $LOAD_PATH.unshift(lib_dir_path.to_s)
 
 require "wikipedia-search/groonga-converter"
 
-converter = WikipediaSearch::GroongaConverter.new(ARGF)
+options = {
+  :max_n_records => -1,
+}
+parser = OptionParser.new
+parser.on("--max-n-records=N", Integer,
+          "The number of maximum records. -1 means unlimited.",
+          "(#{options[:max_n_records]})") do |n|
+  options[:max_n_records] = n
+end
+parser.parse!(ARGV)
+
+converter = WikipediaSearch::GroongaConverter.new(ARGF, options)
 converter.convert($stdout)

  Modified: lib/wikipedia-search/groonga-converter.rb (+20 -5)
===================================================================
--- lib/wikipedia-search/groonga-converter.rb    2014-04-04 15:47:17 +0900 (c7a0057)
+++ lib/wikipedia-search/groonga-converter.rb    2014-04-04 16:24:03 +0900 (f8f1f8e)
@@ -5,24 +5,35 @@ require "rexml/parsers/streamparser"
 
 module WikipediaSearch
   class GroongaConverter
-    def initialize(input)
+    def initialize(input, options={})
       @input = input
+      @options = options
     end
 
     def convert(output)
-      listener = Listener.new(output)
-      parser = REXML::Parsers::StreamParser.new(@input, listener)
-      parser.parse
+      listener = Listener.new(output, @options)
+      catch do |tag|
+        parser = REXML::Parsers::StreamParser.new(@input, listener)
+        listener.start(tag)
+        parser.parse
+      end
       listener.finish
     end
 
     class Listener
       include REXML::StreamListener
 
-      def initialize(output)
+      def initialize(output, options)
         @output = output
+        @options = options
         @text_stack = [""]
         @first_page = true
+        @n_records = 0
+        @max_n_records = @options[:max_n_records]
+      end
+
+      def start(abort_tag)
+        @abort_tag = abort_tag
         @output.puts("load --table Pages")
         @output.puts("[")
       end
@@ -45,6 +56,9 @@ module WikipediaSearch
       def tag_end(name)
         case name
         when "page"
+          if @max_n_records and @n_records >= @max_n_records
+            throw(@abort_tag)
+          end
           if @first_page
             @first_page = false
           else
@@ -56,6 +70,7 @@ module WikipediaSearch
             "text"  => @text,
           }
           @output.print(JSON.generate(page))
+          @n_records += 1
         when "title"
           @title = @text_stack.last
         when "id"

  Modified: test/test-groonga-converter.rb (+31 -2)
===================================================================
--- test/test-groonga-converter.rb    2014-04-04 15:47:17 +0900 (4032f8a)
+++ test/test-groonga-converter.rb    2014-04-04 16:24:03 +0900 (c4e1e14)
@@ -2,10 +2,10 @@ require "stringio"
 require "wikipedia-search/groonga-converter"
 
 class TestGroongaConverter < Test::Unit::TestCase
-  def convert(xml)
+  def convert(xml, options={})
     input = StringIO.new(xml)
     output = StringIO.new
-    converter = WikipediaSearch::GroongaConverter.new(input)
+    converter = WikipediaSearch::GroongaConverter.new(input, options)
     converter.convert(output)
     output.string
   end
@@ -42,4 +42,33 @@ load --table Pages
 ]
     GROONGA
   end
+
+  def test_max_n_records
+    xml = <<-XML
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.8/">
+  <page>
+    <title>Title1</title>
+    <id>1</id>
+    <revision>
+      <id>1001</id>
+      <text>Text1</text>
+    </revision>
+  </page>
+  <page>
+    <title>Title2</title>
+    <id>2</id>
+    <revision>
+      <id>1002</id>
+      <text>Text2</text>
+    </revision>
+  </page>
+</mediawiki>
+    XML
+    assert_equal(<<-GROONGA, convert(xml, :max_n_records => 1))
+load --table Pages
+[
+{"_key":1,"title":"Title1","text":"Text1"}
+]
+    GROONGA
+  end
 end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index