[Groonga-commit] groonga/groonga-normalizer-mysql at 39460c6 [master] Use capital character as represental character for Greek characters

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Apr 20 20:40:16 JST 2013


Kouhei Sutou	2013-04-20 20:40:16 +0900 (Sat, 20 Apr 2013)

  New Revision: 39460c6c19c22c154bc2210f18d72bf93b92939f
  https://github.com/groonga/groonga-normalizer-mysql/commit/39460c6c19c22c154bc2210f18d72bf93b92939f

  Message:
    Use capital character as represental character for Greek characters
    
    It is incompatible change but it doesn't effect anyone because Greek
    character normalization doesn't work by a bug. :p

  Modified files:
    normalizers/mysql_unicode_ci_table.h
    test/suite/unicode_ci/character_length_2.expected
    tool/generate_uca_table.rb

  Modified: normalizers/mysql_unicode_ci_table.h (+47 -47)
===================================================================
--- normalizers/mysql_unicode_ci_table.h    2013-04-20 20:22:01 +0900 (36dd541)
+++ normalizers/mysql_unicode_ci_table.h    2013-04-20 20:40:16 +0900 (861c202)
@@ -69,7 +69,7 @@ static uint32_t unicode_ci_page_00[] = {
   0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000, 0x00000,
   0x00020, 0x000a1, 0x000a2, 0x000a3, 0x000a4, 0x000a5, 0x000a6, 0x000a7,
   0x000a8, 0x000a9, 0x00041, 0x000ab, 0x000ac, 0x000ad, 0x000ae, 0x000af,
-  0x000b0, 0x000b1, 0x00032, 0x00033, 0x000b4, 0x000b5, 0x000b6, 0x000b7,
+  0x000b0, 0x000b1, 0x00032, 0x00033, 0x000b4, 0x0039c, 0x000b6, 0x000b7,
   0x000b8, 0x00031, 0x0004f, 0x000bb, 0x000bc, 0x000bd, 0x000be, 0x000bf,
   0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x000c6, 0x00043,
   0x00045, 0x00045, 0x00045, 0x00045, 0x00049, 0x00049, 0x00049, 0x00049,
@@ -167,22 +167,22 @@ static uint32_t unicode_ci_page_03[] = {
   0x00000, 0x00000, 0x00000, 0x00041, 0x00045, 0x00049, 0x0004f, 0x00055,
   0x00043, 0x00044, 0x00048, 0x0004d, 0x00052, 0x00054, 0x00056, 0x00058,
   0x00370, 0x00371, 0x00372, 0x00373, 0x002b9, 0x00375, 0x00376, 0x00377,
-  0x00378, 0x00379, 0x0037a, 0x0037b, 0x0037c, 0x0037d, 0x0003b, 0x0037f,
-  0x00380, 0x00381, 0x00382, 0x00383, 0x000b4, 0x000a8, 0x00386, 0x000b7,
-  0x00388, 0x00389, 0x0037a, 0x0038b, 0x0038c, 0x0038d, 0x0038e, 0x0038f,
-  0x0037a, 0x00386, 0x00392, 0x00393, 0x00394, 0x00388, 0x00396, 0x00389,
-  0x00398, 0x0037a, 0x0039a, 0x0039b, 0x000b5, 0x0039d, 0x0039e, 0x0038c,
-  0x003a0, 0x003a1, 0x003a2, 0x003a3, 0x003a4, 0x0038e, 0x003a6, 0x003a7,
-  0x003a8, 0x0038f, 0x0037a, 0x0038e, 0x00386, 0x00388, 0x00389, 0x0037a,
-  0x0038e, 0x00386, 0x00392, 0x00393, 0x00394, 0x00388, 0x00396, 0x00389,
-  0x00398, 0x0037a, 0x0039a, 0x0039b, 0x000b5, 0x0039d, 0x0039e, 0x0038c,
-  0x003a0, 0x003a1, 0x003a3, 0x003a3, 0x003a4, 0x0038e, 0x003a6, 0x003a7,
-  0x003a8, 0x0038f, 0x0037a, 0x0038e, 0x0038c, 0x0038e, 0x0038f, 0x003cf,
-  0x00392, 0x00398, 0x0038e, 0x0038e, 0x0038e, 0x003a6, 0x003a0, 0x003d7,
+  0x00378, 0x00379, 0x00399, 0x0037b, 0x0037c, 0x0037d, 0x0003b, 0x0037f,
+  0x00380, 0x00381, 0x00382, 0x00383, 0x000b4, 0x000a8, 0x00391, 0x000b7,
+  0x00395, 0x00397, 0x00399, 0x0038b, 0x0039f, 0x0038d, 0x003a5, 0x003a9,
+  0x00399, 0x00391, 0x00392, 0x00393, 0x00394, 0x00395, 0x00396, 0x00397,
+  0x00398, 0x00399, 0x0039a, 0x0039b, 0x0039c, 0x0039d, 0x0039e, 0x0039f,
+  0x003a0, 0x003a1, 0x003a2, 0x003a3, 0x003a4, 0x003a5, 0x003a6, 0x003a7,
+  0x003a8, 0x003a9, 0x00399, 0x003a5, 0x00391, 0x00395, 0x00397, 0x00399,
+  0x003a5, 0x00391, 0x00392, 0x00393, 0x00394, 0x00395, 0x00396, 0x00397,
+  0x00398, 0x00399, 0x0039a, 0x0039b, 0x0039c, 0x0039d, 0x0039e, 0x0039f,
+  0x003a0, 0x003a1, 0x003a3, 0x003a3, 0x003a4, 0x003a5, 0x003a6, 0x003a7,
+  0x003a8, 0x003a9, 0x00399, 0x003a5, 0x0039f, 0x003a5, 0x003a9, 0x003cf,
+  0x00392, 0x00398, 0x003a5, 0x003a5, 0x003a5, 0x003a6, 0x003a0, 0x003d7,
   0x003d8, 0x003d8, 0x003da, 0x003da, 0x003dc, 0x003dc, 0x003de, 0x003de,
   0x003e0, 0x003e0, 0x003e2, 0x003e2, 0x003e4, 0x003e4, 0x003e6, 0x003e6,
   0x003e8, 0x003e8, 0x003ea, 0x003ea, 0x003ec, 0x003ec, 0x003ee, 0x003ee,
-  0x0039a, 0x003a1, 0x003a3, 0x003f3, 0x00398, 0x00388, 0x003f6, 0x003f7,
+  0x0039a, 0x003a1, 0x003a3, 0x003f3, 0x00398, 0x00395, 0x003f6, 0x003f7,
   0x003f7, 0x003a3, 0x003fa, 0x003fa, 0x003fc, 0x003fd, 0x003fe, 0x003ff
 };
 
@@ -852,38 +852,38 @@ static uint32_t unicode_ci_page_1e[] = {
 };
 
 static uint32_t unicode_ci_page_1f[] = {
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386,
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386,
-  0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x01f16, 0x01f17,
-  0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x00388, 0x01f1e, 0x01f1f,
-  0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389,
-  0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389,
-  0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a,
-  0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x0037a,
-  0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x01f46, 0x01f47,
-  0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x0038c, 0x01f4e, 0x01f4f,
-  0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x0038e,
-  0x01f58, 0x0038e, 0x01f5a, 0x0038e, 0x01f5c, 0x0038e, 0x01f5e, 0x0038e,
-  0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f,
-  0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f,
-  0x00386, 0x00386, 0x00388, 0x00388, 0x00389, 0x00389, 0x0037a, 0x0037a,
-  0x0038c, 0x0038c, 0x0038e, 0x0038e, 0x0038f, 0x0038f, 0x01f7e, 0x01f7f,
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386,
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x00386,
-  0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389,
-  0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389, 0x00389,
-  0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f,
-  0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f, 0x0038f,
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x01fb5, 0x00386, 0x00386,
-  0x00386, 0x00386, 0x00386, 0x00386, 0x00386, 0x01fbd, 0x0037a, 0x01fbd,
-  0x01fc0, 0x000a8, 0x00389, 0x00389, 0x00389, 0x01fc5, 0x00389, 0x00389,
-  0x00388, 0x00388, 0x00389, 0x00389, 0x00389, 0x01fbd, 0x01fbd, 0x01fbd,
-  0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x01fd4, 0x01fd5, 0x0037a, 0x0037a,
-  0x0037a, 0x0037a, 0x0037a, 0x0037a, 0x01fdc, 0x01fdd, 0x01fdd, 0x01fdd,
-  0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x003a1, 0x003a1, 0x0038e, 0x0038e,
-  0x0038e, 0x0038e, 0x0038e, 0x0038e, 0x003a1, 0x000a8, 0x000a8, 0x00060,
-  0x01ff0, 0x01ff1, 0x0038f, 0x0038f, 0x0038f, 0x01ff5, 0x0038f, 0x0038f,
-  0x0038c, 0x0038c, 0x0038f, 0x0038f, 0x0038f, 0x000b4, 0x01fdd, 0x01fff
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391,
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391,
+  0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x01f16, 0x01f17,
+  0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x01f1e, 0x01f1f,
+  0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397,
+  0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397,
+  0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399,
+  0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399, 0x00399,
+  0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x01f46, 0x01f47,
+  0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x0039f, 0x01f4e, 0x01f4f,
+  0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a5,
+  0x01f58, 0x003a5, 0x01f5a, 0x003a5, 0x01f5c, 0x003a5, 0x01f5e, 0x003a5,
+  0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9,
+  0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9,
+  0x00391, 0x00391, 0x00395, 0x00395, 0x00397, 0x00397, 0x00399, 0x00399,
+  0x0039f, 0x0039f, 0x003a5, 0x003a5, 0x003a9, 0x003a9, 0x01f7e, 0x01f7f,
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391,
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391,
+  0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397,
+  0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397, 0x00397,
+  0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9,
+  0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9, 0x003a9,
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x01fb5, 0x00391, 0x00391,
+  0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x01fbd, 0x00399, 0x01fbd,
+  0x01fc0, 0x000a8, 0x00397, 0x00397, 0x00397, 0x01fc5, 0x00397, 0x00397,
+  0x00395, 0x00395, 0x00397, 0x00397, 0x00397, 0x01fbd, 0x01fbd, 0x01fbd,
+  0x00399, 0x00399, 0x00399, 0x00399, 0x01fd4, 0x01fd5, 0x00399, 0x00399,
+  0x00399, 0x00399, 0x00399, 0x00399, 0x01fdc, 0x01fdd, 0x01fdd, 0x01fdd,
+  0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a1, 0x003a1, 0x003a5, 0x003a5,
+  0x003a5, 0x003a5, 0x003a5, 0x003a5, 0x003a1, 0x000a8, 0x000a8, 0x00060,
+  0x01ff0, 0x01ff1, 0x003a9, 0x003a9, 0x003a9, 0x01ff5, 0x003a9, 0x003a9,
+  0x0039f, 0x0039f, 0x003a9, 0x003a9, 0x003a9, 0x000b4, 0x01fdd, 0x01fff
 };
 
 static uint32_t unicode_ci_page_20[] = {
@@ -926,7 +926,7 @@ static uint32_t unicode_ci_page_21[] = {
   0x02108, 0x02109, 0x00047, 0x00048, 0x00048, 0x00048, 0x00048, 0x00126,
   0x00049, 0x00049, 0x0004c, 0x0004c, 0x02114, 0x0004e, 0x02116, 0x02117,
   0x02118, 0x00050, 0x00051, 0x00052, 0x00052, 0x00052, 0x0211e, 0x0211f,
-  0x02120, 0x02121, 0x02122, 0x02123, 0x0005a, 0x02125, 0x0038f, 0x02127,
+  0x02120, 0x02121, 0x02122, 0x02123, 0x0005a, 0x02125, 0x003a9, 0x02127,
   0x0005a, 0x02129, 0x0004b, 0x00041, 0x00042, 0x00043, 0x0212e, 0x00045,
   0x00045, 0x00046, 0x02132, 0x0004d, 0x0004f, 0x005d0, 0x005d1, 0x005d2,
   0x005d3, 0x00049, 0x0213a, 0x0213b, 0x0213c, 0x00393, 0x00393, 0x003a0,

  Modified: test/suite/unicode_ci/character_length_2.expected (+1 -1)
===================================================================
--- test/suite/unicode_ci/character_length_2.expected    2013-04-20 20:22:01 +0900 (c60c8cd)
+++ test/suite/unicode_ci/character_length_2.expected    2013-04-20 20:40:16 +0900 (84a3d14)
@@ -1,4 +1,4 @@
 register normalizers/mysql
 [[0,0.0,0.0],true]
 normalize NormalizerMySQLUnicodeCI "λογία"
-[[0,0.0,0.0],{"normalized":"ΛΌΓͺΆ","types":[]}]
+[[0,0.0,0.0],{"normalized":"ΛΟΓΙΑ","types":[]}]

  Modified: tool/generate_uca_table.rb (+9 -0)
===================================================================
--- tool/generate_uca_table.rb    2013-04-20 20:22:01 +0900 (0e2dad0)
+++ tool/generate_uca_table.rb    2013-04-20 20:40:16 +0900 (ca7f5ff)
@@ -32,6 +32,13 @@ File.open(ctype_uca_c_path) do |ctype_uca_c|
   parser.parse(ctype_uca_c)
 end
 
+GREEK_CAPITAL_UNICODE_RANGE = Unicode.from_utf8("Α")..Unicode.from_utf8("Ω")
+def find_greek_capital_character(characters)
+  characters.find do |character|
+    GREEK_CAPITAL_UNICODE_RANGE.cover?(character[:code_point])
+  end
+end
+
 def find_representative_character(characters)
   representative_character = nil
   case characters.first[:utf8]
@@ -45,6 +52,8 @@ def find_representative_character(characters)
     representative_character = characters[1]
   when "ぁ", "ぃ", "ぅ", "ぇ", "ぉ", "っ", "ゃ", "ゅ", "ょ", "ゎ"
     representative_character = characters[1]
+  else
+    representative_character ||= find_greek_capital_character(characters)
   end
   representative_character ||= characters.first
   representative_character
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index