Kouhei Sutou
null+****@clear*****
Mon Feb 11 13:07:51 JST 2013
Kouhei Sutou 2013-02-11 13:07:51 +0900 (Mon, 11 Feb 2013) New Revision: 95d98a0b04458c0484ef6fe162e272acab80ebfe https://github.com/groonga/groonga-normalizer-mysql/commit/95d98a0b04458c0484ef6fe162e272acab80ebfe Log: Use more normal character for representive chracter Modified files: tool/generate_uca_table.rb Modified: tool/generate_uca_table.rb (+23 -2) =================================================================== --- tool/generate_uca_table.rb 2013-02-11 12:42:16 +0900 (6b09d42) +++ tool/generate_uca_table.rb 2013-02-11 13:07:51 +0900 (0e2dad0) @@ -1,4 +1,5 @@ #!/usr/bin/env ruby +# -*- coding: utf-8 -*- # # Copyright (C) 2013 Kouhei Sutou <kou �� clear-code.com> # @@ -31,12 +32,32 @@ File.open(ctype_uca_c_path) do |ctype_uca_c| parser.parse(ctype_uca_c) end +def find_representative_character(characters) + representative_character = nil + case characters.first[:utf8] + when "⺄", "⺇", "⺈", "⺊", "⺌", "⺗" + representative_character = characters.last + when "⺜", "⺝", "⺧", "⺫", "⺬", "⺮", "⺶", "⺻", "⺼", "⺽" + representative_character = characters[1] + when "⻆", "⻊", "⻏", "⻑", "⻕", "⻗", "⻝", "⻡", "⻣", "⻤" + representative_character = characters.last + when "⻱", "⼀", "⼆", "⼈" + representative_character = characters[1] + when "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "っ", "ゃ", "ゅ", "ょ", "ゎ" + representative_character = characters[1] + end + representative_character ||= characters.first + representative_character +end + target_pages = {} parser.weight_based_characters.each do |weight, characters| next if characters.size == 1 - representative_character = characters.first + representative_character = find_representative_character(characters) representative_code_point = representative_character[:code_point] - rest_characters = characters[1..-1] + rest_characters = characters.reject do |character| + character == representative_character + end rest_characters.each do |character| code_point = character[:code_point] page = code_point >> 8 -------------- next part -------------- HTML����������������������������...Download