[Groonga-commit] groonga/groonga-normalizer-mysql at 3947f91 [master] Add --debug

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Jul 18 12:46:11 JST 2018


Kouhei Sutou	2018-07-18 12:46:11 +0900 (Wed, 18 Jul 2018)

  New Revision: 3947f91fc107bf6784b8a9e39017832be10648eb
  https://github.com/groonga/groonga-normalizer-mysql/commit/3947f91fc107bf6784b8a9e39017832be10648eb

  Message:
    Add --debug

  Modified files:
    tool/generate_uca900_table.rb
    tool/generate_uca_table.rb
    tool/parser.rb

  Modified: tool/generate_uca900_table.rb (+12 -5)
===================================================================
--- tool/generate_uca900_table.rb    2018-07-17 17:57:52 +0900 (44a777b)
+++ tool/generate_uca900_table.rb    2018-07-18 12:46:11 +0900 (724f2d4)
@@ -21,7 +21,10 @@ require "optparse"
 $LOAD_PATH.unshift(File.dirname(__FILE__))
 require "parser"
 
- �� weight_level = 1
+ �� options = {
+  weight_level: 1,
+  debug: false,
+}
 @suffix = nil
 
 option_parser = OptionParser.new
@@ -29,8 +32,8 @@ option_parser.banner += " MYSQL_SOURCE/strings/uca900_data.h"
 
 option_parser.on("--weight-level=N", Integer,
                  "Use N level weights",
-                 "(#{@weight_level})") do |level|
-  @weight_level = level
+                 "(#{@options[:weight_level]})") do |level|
+  @options[:weight_level] = level
 end
 option_parser.on("--tailoring-locale=LOCALE",
                  "Use LOCALE tailoring",
@@ -45,6 +48,10 @@ end
 option_parser.on("--suffix=SUFFIX", "Add SUFFIX to names") do |suffix|
   @suffix = suffix
 end
+option_parser.on("--[no-]debug",
+                 "Enable debug output") do |boolean|
+  @options[:debug] = boolean
+end
 
 begin
   option_parser.parse!(ARGV)
@@ -60,7 +67,7 @@ end
 
 uca_h_path = ARGV[0]
 
-parser = UCA900Parser.new
+parser = UCA900Parser.new(@options)
 if @tailoring_path
   File.open(@tailoring_path) do |tailoring_file|
     parser.parse_tailoring(tailoring_file, @tailoring_locale)
@@ -70,7 +77,7 @@ File.open(uca_h_path) do |uca_h|
   parser.parse(uca_h)
 end
 
-normalization_table = parser.normalization_table(level: @weight_level)
+normalization_table = parser.normalization_table
 
 normalized_uca_h_path =
   uca_h_path.sub(/\A.*\/([^\/]+\/strings\/uca900_data\.h)\z/, "\\1")

  Modified: tool/generate_uca_table.rb (+8 -2)
===================================================================
--- tool/generate_uca_table.rb    2018-07-17 17:57:52 +0900 (c5d4221)
+++ tool/generate_uca_table.rb    2018-07-18 12:46:11 +0900 (5a7a1e6)
@@ -27,6 +27,7 @@ require "parser"
   split_small_kana: false,
   split_kana_with_voiced_sound_mark: false,
   split_kana_with_semi_voiced_sound_mark: false,
+  debug: false,
 }
 
 option_parser = OptionParser.new
@@ -59,6 +60,11 @@ option_parser.on("--[no-]split-kana-with-semi-voiced-sound-mark",
   @options[:split_kana_with_semi_voiced_sound_mark] = boolean
 end
 
+option_parser.on("--[no-]debug",
+                 "Enable debug output") do |boolean|
+  @options[:debug] = boolean
+end
+
 begin
   option_parser.parse!(ARGV)
 rescue OptionParser::ParseError
@@ -73,12 +79,12 @@ end
 
 ctype_uca_c_path = ARGV[0]
 
-parser = CTypeUCAParser.new(@version)
+parser = CTypeUCAParser.new(@version, @options)
 File.open(ctype_uca_c_path) do |ctype_uca_c|
   parser.parse(ctype_uca_c)
 end
 
-normalization_table = parser.normalization_table(@options)
+normalization_table = parser.normalization_table
 
 normalized_ctype_uca_c_path =
   ctype_uca_c_path.sub(/\A.*\/([^\/]+\/strings\/ctype-uca\.c)\z/, "\\1")

  Modified: tool/parser.rb (+36 -22)
===================================================================
--- tool/parser.rb    2018-07-17 17:57:52 +0900 (51ce805)
+++ tool/parser.rb    2018-07-18 12:46:11 +0900 (8d083b8)
@@ -157,25 +157,35 @@ class CTypeUTF8Parser
 end
 
 class UCAParser
-  def initialize
+  def initialize(options)
+    @options = options
     @pages = {}
   end
 
-  def normalization_table(options={})
+  def normalization_table
     table = {}
-    group_characters(options).each do |characters|
+    group_characters.each do |characters|
       characters.extend(CharacterArray)
-      next if characters.size == 1
+      if characters.size == 1
+        if @options[:debug]
+          p ["U+%04x" % characters.first.code_point,
+             characters.first.utf8,
+             characters.first.weights]
+        end
+        next
+      end
       representative_character =
-        characters.find_representative_character(options)
+        characters.find_representative_character(@options)
       representative_code_point = representative_character.code_point
       rest_characters = characters.reject do |character|
         character == representative_character
       end
-      # p ["U+%04x" % representative_character.code_point,
-      #    representative_character.utf8,
-      #    representative_character.weights,
-      #    rest_characters.collect {|x| [x.utf8, x.weights]}]
+      if @options[:debug]
+        p ["U+%04x" % representative_character.code_point,
+           representative_character.utf8,
+           representative_character.weights,
+           rest_characters.collect {|x| [x.utf8, x.weights]}]
+      end
       rest_characters.each do |character|
         code_point = character.code_point
         page = code_point >> 8
@@ -211,23 +221,23 @@ class UCAParser
     weight_based_characters
   end
 
-  def group_characters(options={})
+  def group_characters
     grouped_characters = []
-    level = options[:level] || 1
+    level = @options[:weight_level] || 1
     weight_based_characters(level).each do |weight, characters|
-      grouped_characters.concat(split_characters(characters, options))
+      grouped_characters.concat(split_characters(characters))
     end
     grouped_characters
   end
 
-  def split_characters(characters, options)
+  def split_characters(characters)
     grouped_characters = characters.group_by do |character|
-      if options[:split_small_kana] and character.small_kana?
+      if @options[:split_small_kana] and character.small_kana?
         :small_kana
-      elsif options[:split_kana_with_voiced_sound_mark] and
+      elsif @options[:split_kana_with_voiced_sound_mark] and
           character.kana_with_voiced_sound_mark?
         :kana_with_voiced_sound_mark
-      elsif options[:split_kana_with_semi_voiced_sound_mark] and
+      elsif @options[:split_kana_with_semi_voiced_sound_mark] and
           character.kana_with_semi_voiced_sound_mark?
         :kana_with_semi_voiced_sound_mark
       else
@@ -239,8 +249,8 @@ class UCAParser
 end
 
 class CTypeUCAParser < UCAParser
-  def initialize(version=nil)
-    super()
+  def initialize(version=nil, options)
+    super(options)
     @version = version
     @lengths = []
   end
@@ -419,8 +429,8 @@ class ICUCollationCustomizationRuleParser
 end
 
 class UCA900Parser < UCAParser
-  def initialize
-    super
+  def initialize(options={})
+    super(options)
     @tailoring = {}
   end
 
@@ -538,7 +548,9 @@ class UCA900Parser < UCAParser
       rule = @tailoring[utf8]
       next if rule.nil?
       target_character = all_characters[rule[:target]]
-      # p [utf8, rule, character.weights, target_character.weights]
+      if @options[:debug]
+        p [utf8, rule, character.weights, target_character.weights]
+      end
       nth_weight = rule[:nth_weight]
       if nth_weight
         character.weights.each_with_index do |weight, i|
@@ -554,7 +566,9 @@ class UCA900Parser < UCAParser
       else
         target_character.weights = character.weights
       end
-      # p [utf8, rule, character.weights, target_character.weights]
+      if @options[:debug]
+        p [utf8, rule, character.weights, target_character.weights]
+      end
     end
   end
 end
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180718/7d0a6807/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index