Kouhei Sutou
null+****@clear*****
Wed Apr 11 17:48:39 JST 2018
Kouhei Sutou 2018-04-11 17:48:39 +0900 (Wed, 11 Apr 2018) New Revision: bd79f8e0c5da9847cf02552f1f62abf9778efbfd https://github.com/groonga/groonga/commit/bd79f8e0c5da9847cf02552f1f62abf9778efbfd Message: NormalizerNFKC100 unify_kana_case: add missing "tu" support Modified files: lib/normalizer.c test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test Modified: lib/normalizer.c (+6 -0) =================================================================== --- lib/normalizer.c 2018-04-11 17:40:22 +0900 (11780f52b) +++ lib/normalizer.c 2018-04-11 17:48:39 +0900 (3eab50117) @@ -667,11 +667,14 @@ utf8_normalize_unify_hiragana_case(const unsigned char *utf8_char, if (utf8_char[0] == 0xe3) { if ((utf8_char[1] == 0x81 && (0x81 <= utf8_char[2] && utf8_char[2] <= 0x89)) || + (utf8_char[1] == 0x81 && utf8_char[2] == 0xa3) || (utf8_char[1] == 0x82 && (0x83 <= utf8_char[2] && utf8_char[2] <= 0x87))) { /* U+3041 HIRAGANA LETTER SMALL A .. * U+3049 HIRAGANA LETTER SMALL O * + * U+3063 HIRAGANA LETTER SMALL TU + * * U+3083 HIRAGANA LETTER SMALL YA .. * U+3087 HIRAGANA LETTER SMALL YO */ if (utf8_char[2] & 0x1) { @@ -711,11 +714,14 @@ utf8_normalize_unify_katakana_case(const unsigned char *utf8_char, if (utf8_char[0] == 0xe3) { if ((utf8_char[1] == 0x82 && (0xa1 <= utf8_char[2] && utf8_char[2] <= 0xa9)) || + (utf8_char[1] == 0x83 && utf8_char[2] == 0x83) || (utf8_char[1] == 0x83 && (0xa3 <= utf8_char[2] && utf8_char[2] <= 0xa7))) { /* U+30A1 KATAKANA LETTER SMALL A .. * U+30A9 KATAKANA LETTER SMALL O * + * U+30C3 KATAKANA LETTER SMALL TU + * * U+30E3 KATAKANA LETTER SMALL YA .. * U+30E7 KATAKANA LETTER SMALL YO */ if (utf8_char[2] & 0x1) { Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected (+4 -2) =================================================================== --- test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected 2018-04-11 17:40:22 +0900 (d646bb8d8) +++ test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected 2018-04-11 17:48:39 +0900 (deebbd0c4) @@ -1,4 +1,4 @@ -normalize 'NormalizerNFKC100("unify_kana_case", true)' "ぁあぃいぅうぇえぉおゃやゅゆょよゎわゕかゖけ" WITH_TYPES +normalize 'NormalizerNFKC100("unify_kana_case", true)' "ぁあぃいぅうぇえぉおっつゃやゅゆょよゎわゕかゖけ" WITH_TYPES [ [ 0, @@ -6,7 +6,7 @@ normalize 'NormalizerNFKC100("unify_kana_case", true)' "ぁあぃいぅう 0.0 ], { - "normalized": "ああいいううええおおややゆゆよよわわかかけけ", + "normalized": "ああいいううええおおつつややゆゆよよわわかかけけ", "types": [ "hiragana", "hiragana", @@ -29,6 +29,8 @@ normalize 'NormalizerNFKC100("unify_kana_case", true)' "ぁあぃいぅう "hiragana", "hiragana", "hiragana", + "hiragana", + "hiragana", "hiragana" ], "checks": [ Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test (+1 -1) =================================================================== --- test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test 2018-04-11 17:40:22 +0900 (93006d064) +++ test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test 2018-04-11 17:48:39 +0900 (104b679b8) @@ -1,4 +1,4 @@ normalize \ 'NormalizerNFKC100("unify_kana_case", true)' \ - "ぁあぃいぅうぇえぉおゃやゅゆょよゎわゕかゖけ" \ + "ぁあぃいぅうぇえぉおっつゃやゅゆょよゎわゕかゖけ" \ WITH_TYPES Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected (+4 -2) =================================================================== --- test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected 2018-04-11 17:40:22 +0900 (9ac121551) +++ test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected 2018-04-11 17:48:39 +0900 (abbb3a0e8) @@ -1,4 +1,4 @@ -normalize 'NormalizerNFKC100("unify_kana_case", true)' "ァアィイゥウェエォオャヤュユョヨヮワヵカヶケ" WITH_TYPES +normalize 'NormalizerNFKC100("unify_kana_case", true)' "ァアィイゥウェエォオッツャヤュユョヨヮワヵカヶケ" WITH_TYPES [ [ 0, @@ -6,7 +6,7 @@ normalize 'NormalizerNFKC100("unify_kana_case", true)' "ァアィイゥウ 0.0 ], { - "normalized": "アアイイウウエエオオヤヤユユヨヨワワカカケケ", + "normalized": "アアイイウウエエオオツツヤヤユユヨヨワワカカケケ", "types": [ "katakana", "katakana", @@ -29,6 +29,8 @@ normalize 'NormalizerNFKC100("unify_kana_case", true)' "ァアィイゥウ "katakana", "katakana", "katakana", + "katakana", + "katakana", "katakana" ], "checks": [ Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test (+1 -1) =================================================================== --- test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test 2018-04-11 17:40:22 +0900 (683a3ddca) +++ test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test 2018-04-11 17:48:39 +0900 (60b39b7ba) @@ -1,4 +1,4 @@ normalize \ 'NormalizerNFKC100("unify_kana_case", true)' \ - "ァアィイゥウェエォオャヤュユョヨヮワヵカヶケ" \ + "ァアィイゥウェエォオッツャヤュユョヨヮワヵカヶケ" \ WITH_TYPES -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180411/b3e66938/attachment-0001.htm