Kouhei Sutou
null+****@clear*****
Sat Apr 20 20:40:16 JST 2013
Kouhei Sutou 2013-04-20 20:40:16 +0900 (Sat, 20 Apr 2013) New Revision: 39460c6c19c22c154bc2210f18d72bf93b92939f https://github.com/groonga/groonga-normalizer-mysql/commit/39460c6c19c22c154bc2210f18d72bf93b92939f Message: Use capital character as represental character for Greek characters It is incompatible change but it doesn't effect anyone because Greek character normalization doesn't work by a bug. :p Modified files: normalizers/mysql_unicode_ci_table.h test/suite/unicode_ci/character_length_2.expected tool/generate_uca_table.rb Modified: normalizers/mysql_unicode_ci_table.h (+47 -47) =================================================================== --- normalizers/mysql_unicode_ci_table.h 2013-04-20 20:22:01 +0900 (36dd541) +++ normalizers/mysql_unicode_ci_table.h 2013-04-20 20:40:16 +0900 (861c202) @@ -69,7 +69,7 @@ static uint32_t unicode_ci_page_00[] = { 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00020, 0x000a1, 0x000a2, 0x000a3, 0x000a4, 0x000a5, 0x000a6, 0x000a7, 0x000a8, 0x000a9, 0x00041, 0x000ab, 0x000ac, 0x000ad, 0x000ae, 0x000af, - 0x000b0, 0x000b1, 0x00032, 0x00033, 0x000b4, 0x000b5, 0x000b6, 0x000b7, + 0x000b0, 0x000b1, 0x00032, 0x00033, 0x000b4, 0x0039c, 0x000b6, 0x000b7, 0x000b8, 0x00031, 0x0004f, 0x000bb, 0x000bc, 0x000bd, 0x000be, 0x000bf, 0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x000c6, 0x00043, 0x00045, 0x00045, 0x00045, 0x00045, 0x00049, 0x00049, 0x00049, 0x00049, @@ -167,22 +167,22 @@ static uint32_t unicode_ci_page_03[] = { 0x00000, 0x00000, 0x00000, 0x00041, 0x00045, 0x00049, 0x0004f, 0x00055, 0x00043, 0x00044, 0x00048, 0x0004d, 0x00052, 0x00054, 0x00056, 0x00058, 0x00370, 0x00371, 0x00372, 0x00373, 0x002b9, 0x00375, 0x00376, 0x00377, - 0x00378, 0x00379, 0x0037a, 0x0037b, 0x0037c, 0x0037d, 0x0003b, 0x0037f, - 0x00380, 0x00381, 0x00382, 0x00383, 0x000b4, 0x000a8, 0x00386, 0x000b7, - 0x00388, 0x00389, 0x0037a, 0x0038b, 0x0038c, 0x0038d, 0x0038e, 0x0038f, - 0x0037a, 0x00386, 0x00392, 0x00393, 0x00394, 0x00388, 0x00396, 0x00389, - 0x00398, 0x0037a, 0x0039a, 0x0039b, 0x000b5, 0x0039d, 0x0039e, 0x0038c, - 0x003a0, 0x003a1, 0x003a2, 0x003a3, 0x003a4, 0x0038e, 0x003a6, 0x003a7, - 0x003a8, 0x0038f, 0x0037a, 0x0038e, 0x00386, 0x00388, 0x00389, 0x0037a, - 0x0038e, 0x00386, 0x00392, 0x00393, 0x00394, 0x00388, 0x00396, 0x00389, - 0x00398, 0x0037a, 0x0039a, 0x0039b, 0x000b5, 0x0039d, 0x0039e, 0x0038c, - 0x003a0, 0x003a1, 0x003a3, 0x003a3, 0x003a4, 0x0038e, 0x003a6, 0x003a7, - 0x003a8, 0x0038f, 0x0037a, 0x0038e, 0x0038c, 0x0038e, 0x0038f, 0x003cf, - 0x00392, 0x00398, 0x0038e, 0x0038e, 0x0038e, 0x003a6, 0x003a0, 0x003d7, + 0x00378, 0x00379, 0x00399, 0x0037b, 0x0037c, 0x0037d, 0x0003b, 0x0037f, + 0x00380, 0x00381, 0x00382, 0x00383, 0x000b4, 0x000a8, 0x00391, 0x000b7, + 0x00395, 0x00397, 0x00399, 0x0038b, 0x0039f, 0x0038d, 0x003a5, 0x003a9, + 0x00399, 0x00391, 0x00392, 0x00393, 0x00394, 0x00395, 0x00396, 0x00397, + 0x00398, 0x00399, 0x0039a, 0x0039b, 0x0039c, 0x0039d, 0x0039e, 0x0039f, + 0x003a0, 0x003a1, 0x003a2, 0x003a3, 0x003a4, 0x003a5, 0x003a6, 0x003a7, + 0x003a8, 0x003a9, 0x00399, 0x003a5, 0x00391, 0x00395, 0x00397, 0x00399, + 0x003a5, 0x00391, 0x00392, 0x00393, 0x00394, 0x00395, 0x00396, 0x00397, + 0x00398, 0x00399, 0x0039a, 0x0039b, 0x0039c, 0x0039d, 0x0039e, 0x0039f, + 0x003a0, 0x003a1, 0x003a3, 0x003a3, 0x003a4, 0x003a5, 0x003a6, 0x003a7, + 0x003a8, 0x003a9, 0x00399, 0x003a5, 0x0039f, 0x003a5, 0x003a9, 0x003cf, + 0x00392, 0x00398, 0x003a5, 0x003a5, 0x003a5, 0x003a6, 0x003a0, 0x003d7, 0x003d8, 0x003d8, 0x003da, 0x003da, 0x003dc, 0x003dc, 0x003de, 0x003de, 0x003e0, 0x003e0, 0x003e2, 0x003e2, 0x003e4, 0x003e4, 0x003e6, 0x003e6, 0x003e8, 0x003e8, 0x003ea, 0x003ea, 0x003ec, 0x003ec, 0x003ee, 0x003ee, - 0x0039a, 0x003a1, 0x003a3, 0x003f3, 0x00398, 0x00388, 0x003f6, 0x003f7, + 0x0039a, 0x003a1, 0x003a3, 0x003f3, 0x00398, 0x00395, 0x003f6, 0x003f7, 0x003f7, 0x003a3, 0x003fa, 0x003fa, 0x003fc, 0x003fd, 0x003fe, 0x003ff }; @@ -852,38 +852,38 @@ static uint32_t unicode_ci_page_1e[] = { }; static uint32_t unicode_ci_page_1f[] = { - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, - 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x01f16, 0x01f17, - 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x01f1e, 0x01f1f, - 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, - 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, - 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, - 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, - 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x01f46, 0x01f47, - 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x01f4e, 0x01f4f, - 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, - 0x01f58, 0x0038e, 0x01f5a, 0x0038e, 0x01f5c, 0x0038e, 0x01f5e, 0x0038e, - 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, - 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, - 0x00386, 0x00386, 0x00388, 0x00388, 0x00389, 0x00389, 0x0037a, 0x0037a, - 0x0038c, 0x0038c, 0x0038e, 0x0038e, 0x0038f, 0x0038f, 0x01f7e, 0x01f7f, - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, - 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, - 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, - 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, - 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x01fb5, 0x00386, 0x00386, - 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x01fbd, 0x0037a, 0x01fbd, - 0x01fc0, 0x000a8, 0x00389, 0x00389, 0x00389, 0x01fc5, 0x00389, 0x00389, - 0x00388, 0x00388, 0x00389, 0x00389, 0x00389, 0x01fbd, 0x01fbd, 0x01fbd, - 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x01fd4, 0x01fd5, 0x0037a, 0x0037a, - 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x01fdc, 0x01fdd, 0x01fdd, 0x01fdd, - 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x003a1, 0x003a1, 0x0038e, 0x0038e, - 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x003a1, 0x000a8, 0x000a8, 0x00060, - 0x01ff0, 0x01ff1, 0x0038f, 0x0038f, 0x0038f, 0x01ff5, 0x0038f, 0x0038f, - 0x0038c, 0x0038c, 0x0038f, 0x0038f, 0x0038f, 0x000b4, 0x01fdd, 0x01fff + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, + 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x01f16, 0x01f17, + 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x01f1e, 0x01f1f, + 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, + 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, + 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, + 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, + 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x01f46, 0x01f47, + 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x01f4e, 0x01f4f, + 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, + 0x01f58, 0x003a5, 0x01f5a, 0x003a5, 0x01f5c, 0x003a5, 0x01f5e, 0x003a5, + 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, + 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, + 0x00391, 0x00391, 0x00395, 0x00395, 0x00397, 0x00397, 0x00399, 0x00399, + 0x0039f, 0x0039f, 0x003a5, 0x003a5, 0x003a9, 0x003a9, 0x01f7e, 0x01f7f, + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, + 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, + 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, + 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, + 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x01fb5, 0x00391, 0x00391, + 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x01fbd, 0x00399, 0x01fbd, + 0x01fc0, 0x000a8, 0x00397, 0x00397, 0x00397, 0x01fc5, 0x00397, 0x00397, + 0x00395, 0x00395, 0x00397, 0x00397, 0x00397, 0x01fbd, 0x01fbd, 0x01fbd, + 0x00399, 0x00399, 0x00399, 0x00399, 0x01fd4, 0x01fd5, 0x00399, 0x00399, + 0x00399, 0x00399, 0x00399, 0x00399, 0x01fdc, 0x01fdd, 0x01fdd, 0x01fdd, + 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a1, 0x003a1, 0x003a5, 0x003a5, + 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a1, 0x000a8, 0x000a8, 0x00060, + 0x01ff0, 0x01ff1, 0x003a9, 0x003a9, 0x003a9, 0x01ff5, 0x003a9, 0x003a9, + 0x0039f, 0x0039f, 0x003a9, 0x003a9, 0x003a9, 0x000b4, 0x01fdd, 0x01fff }; static uint32_t unicode_ci_page_20[] = { @@ -926,7 +926,7 @@ static uint32_t unicode_ci_page_21[] = { 0x02108, 0x02109, 0x00047, 0x00048, 0x00048, 0x00048, 0x00048, 0x00126, 0x00049, 0x00049, 0x0004c, 0x0004c, 0x02114, 0x0004e, 0x02116, 0x02117, 0x02118, 0x00050, 0x00051, 0x00052, 0x00052, 0x00052, 0x0211e, 0x0211f, - 0x02120, 0x02121, 0x02122, 0x02123, 0x0005a, 0x02125, 0x0038f, 0x02127, + 0x02120, 0x02121, 0x02122, 0x02123, 0x0005a, 0x02125, 0x003a9, 0x02127, 0x0005a, 0x02129, 0x0004b, 0x00041, 0x00042, 0x00043, 0x0212e, 0x00045, 0x00045, 0x00046, 0x02132, 0x0004d, 0x0004f, 0x005d0, 0x005d1, 0x005d2, 0x005d3, 0x00049, 0x0213a, 0x0213b, 0x0213c, 0x00393, 0x00393, 0x003a0, Modified: test/suite/unicode_ci/character_length_2.expected (+1 -1) =================================================================== --- test/suite/unicode_ci/character_length_2.expected 2013-04-20 20:22:01 +0900 (c60c8cd) +++ test/suite/unicode_ci/character_length_2.expected 2013-04-20 20:40:16 +0900 (84a3d14) @@ -1,4 +1,4 @@ register normalizers/mysql [[0,0.0,0.0],true] normalize NormalizerMySQLUnicodeCI "λογία" -[[0,0.0,0.0],{"normalized":"ΛΌΓͺΆ","types":[]}] +[[0,0.0,0.0],{"normalized":"ΛΟΓΙΑ","types":[]}] Modified: tool/generate_uca_table.rb (+9 -0) =================================================================== --- tool/generate_uca_table.rb 2013-04-20 20:22:01 +0900 (0e2dad0) +++ tool/generate_uca_table.rb 2013-04-20 20:40:16 +0900 (ca7f5ff) @@ -32,6 +32,13 @@ File.open(ctype_uca_c_path) do |ctype_uca_c| parser.parse(ctype_uca_c) end +GREEK_CAPITAL_UNICODE_RANGE = Unicode.from_utf8("Α")..Unicode.from_utf8("Ω") +def find_greek_capital_character(characters) + characters.find do |character| + GREEK_CAPITAL_UNICODE_RANGE.cover?(character[:code_point]) + end +end + def find_representative_character(characters) representative_character = nil case characters.first[:utf8] @@ -45,6 +52,8 @@ def find_representative_character(characters) representative_character = characters[1] when "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "っ", "ゃ", "ゅ", "ょ", "ゎ" representative_character = characters[1] + else + representative_character ||= find_greek_capital_character(characters) end representative_character ||= characters.first representative_character -------------- next part -------------- HTML����������������������������...Download