Kouhei Sutou
null+****@clear*****
Wed Jul 18 12:46:11 JST 2018
Kouhei Sutou 2018-07-18 12:46:11 +0900 (Wed, 18 Jul 2018) New Revision: 3947f91fc107bf6784b8a9e39017832be10648eb https://github.com/groonga/groonga-normalizer-mysql/commit/3947f91fc107bf6784b8a9e39017832be10648eb Message: Add --debug Modified files: tool/generate_uca900_table.rb tool/generate_uca_table.rb tool/parser.rb Modified: tool/generate_uca900_table.rb (+12 -5) =================================================================== --- tool/generate_uca900_table.rb 2018-07-17 17:57:52 +0900 (44a777b) +++ tool/generate_uca900_table.rb 2018-07-18 12:46:11 +0900 (724f2d4) @@ -21,7 +21,10 @@ require "optparse" $LOAD_PATH.unshift(File.dirname(__FILE__)) require "parser" - �� weight_level = 1 + �� options = { + weight_level: 1, + debug: false, +} @suffix = nil option_parser = OptionParser.new @@ -29,8 +32,8 @@ option_parser.banner += " MYSQL_SOURCE/strings/uca900_data.h" option_parser.on("--weight-level=N", Integer, "Use N level weights", - "(#{@weight_level})") do |level| - @weight_level = level + "(#{@options[:weight_level]})") do |level| + @options[:weight_level] = level end option_parser.on("--tailoring-locale=LOCALE", "Use LOCALE tailoring", @@ -45,6 +48,10 @@ end option_parser.on("--suffix=SUFFIX", "Add SUFFIX to names") do |suffix| @suffix = suffix end +option_parser.on("--[no-]debug", + "Enable debug output") do |boolean| + @options[:debug] = boolean +end begin option_parser.parse!(ARGV) @@ -60,7 +67,7 @@ end uca_h_path = ARGV[0] -parser = UCA900Parser.new +parser = UCA900Parser.new(@options) if @tailoring_path File.open(@tailoring_path) do |tailoring_file| parser.parse_tailoring(tailoring_file, @tailoring_locale) @@ -70,7 +77,7 @@ File.open(uca_h_path) do |uca_h| parser.parse(uca_h) end -normalization_table = parser.normalization_table(level: @weight_level) +normalization_table = parser.normalization_table normalized_uca_h_path = uca_h_path.sub(/\A.*\/([^\/]+\/strings\/uca900_data\.h)\z/, "\\1") Modified: tool/generate_uca_table.rb (+8 -2) =================================================================== --- tool/generate_uca_table.rb 2018-07-17 17:57:52 +0900 (c5d4221) +++ tool/generate_uca_table.rb 2018-07-18 12:46:11 +0900 (5a7a1e6) @@ -27,6 +27,7 @@ require "parser" split_small_kana: false, split_kana_with_voiced_sound_mark: false, split_kana_with_semi_voiced_sound_mark: false, + debug: false, } option_parser = OptionParser.new @@ -59,6 +60,11 @@ option_parser.on("--[no-]split-kana-with-semi-voiced-sound-mark", @options[:split_kana_with_semi_voiced_sound_mark] = boolean end +option_parser.on("--[no-]debug", + "Enable debug output") do |boolean| + @options[:debug] = boolean +end + begin option_parser.parse!(ARGV) rescue OptionParser::ParseError @@ -73,12 +79,12 @@ end ctype_uca_c_path = ARGV[0] -parser = CTypeUCAParser.new(@version) +parser = CTypeUCAParser.new(@version, @options) File.open(ctype_uca_c_path) do |ctype_uca_c| parser.parse(ctype_uca_c) end -normalization_table = parser.normalization_table(@options) +normalization_table = parser.normalization_table normalized_ctype_uca_c_path = ctype_uca_c_path.sub(/\A.*\/([^\/]+\/strings\/ctype-uca\.c)\z/, "\\1") Modified: tool/parser.rb (+36 -22) =================================================================== --- tool/parser.rb 2018-07-17 17:57:52 +0900 (51ce805) +++ tool/parser.rb 2018-07-18 12:46:11 +0900 (8d083b8) @@ -157,25 +157,35 @@ class CTypeUTF8Parser end class UCAParser - def initialize + def initialize(options) + @options = options @pages = {} end - def normalization_table(options={}) + def normalization_table table = {} - group_characters(options).each do |characters| + group_characters.each do |characters| characters.extend(CharacterArray) - next if characters.size == 1 + if characters.size == 1 + if @options[:debug] + p ["U+%04x" % characters.first.code_point, + characters.first.utf8, + characters.first.weights] + end + next + end representative_character = - characters.find_representative_character(options) + characters.find_representative_character(@options) representative_code_point = representative_character.code_point rest_characters = characters.reject do |character| character == representative_character end - # p ["U+%04x" % representative_character.code_point, - # representative_character.utf8, - # representative_character.weights, - # rest_characters.collect {|x| [x.utf8, x.weights]}] + if @options[:debug] + p ["U+%04x" % representative_character.code_point, + representative_character.utf8, + representative_character.weights, + rest_characters.collect {|x| [x.utf8, x.weights]}] + end rest_characters.each do |character| code_point = character.code_point page = code_point >> 8 @@ -211,23 +221,23 @@ class UCAParser weight_based_characters end - def group_characters(options={}) + def group_characters grouped_characters = [] - level = options[:level] || 1 + level = @options[:weight_level] || 1 weight_based_characters(level).each do |weight, characters| - grouped_characters.concat(split_characters(characters, options)) + grouped_characters.concat(split_characters(characters)) end grouped_characters end - def split_characters(characters, options) + def split_characters(characters) grouped_characters = characters.group_by do |character| - if options[:split_small_kana] and character.small_kana? + if @options[:split_small_kana] and character.small_kana? :small_kana - elsif options[:split_kana_with_voiced_sound_mark] and + elsif @options[:split_kana_with_voiced_sound_mark] and character.kana_with_voiced_sound_mark? :kana_with_voiced_sound_mark - elsif options[:split_kana_with_semi_voiced_sound_mark] and + elsif @options[:split_kana_with_semi_voiced_sound_mark] and character.kana_with_semi_voiced_sound_mark? :kana_with_semi_voiced_sound_mark else @@ -239,8 +249,8 @@ class UCAParser end class CTypeUCAParser < UCAParser - def initialize(version=nil) - super() + def initialize(version=nil, options) + super(options) @version = version @lengths = [] end @@ -419,8 +429,8 @@ class ICUCollationCustomizationRuleParser end class UCA900Parser < UCAParser - def initialize - super + def initialize(options={}) + super(options) @tailoring = {} end @@ -538,7 +548,9 @@ class UCA900Parser < UCAParser rule = @tailoring[utf8] next if rule.nil? target_character = all_characters[rule[:target]] - # p [utf8, rule, character.weights, target_character.weights] + if @options[:debug] + p [utf8, rule, character.weights, target_character.weights] + end nth_weight = rule[:nth_weight] if nth_weight character.weights.each_with_index do |weight, i| @@ -554,7 +566,9 @@ class UCA900Parser < UCAParser else target_character.weights = character.weights end - # p [utf8, rule, character.weights, target_character.weights] + if @options[:debug] + p [utf8, rule, character.weights, target_character.weights] + end end end end -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180718/7d0a6807/attachment-0001.htm