[Groonga-commit] groonga/groonga at bd79f8e [master] NormalizerNFKC100 unify_kana_case: add missing "tu" support

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Apr 11 17:48:39 JST 2018


Kouhei Sutou	2018-04-11 17:48:39 +0900 (Wed, 11 Apr 2018)

  New Revision: bd79f8e0c5da9847cf02552f1f62abf9778efbfd
  https://github.com/groonga/groonga/commit/bd79f8e0c5da9847cf02552f1f62abf9778efbfd

  Message:
    NormalizerNFKC100 unify_kana_case: add missing "tu" support

  Modified files:
    lib/normalizer.c
    test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected
    test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test
    test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected
    test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test

  Modified: lib/normalizer.c (+6 -0)
===================================================================
--- lib/normalizer.c    2018-04-11 17:40:22 +0900 (11780f52b)
+++ lib/normalizer.c    2018-04-11 17:48:39 +0900 (3eab50117)
@@ -667,11 +667,14 @@ utf8_normalize_unify_hiragana_case(const unsigned char *utf8_char,
   if (utf8_char[0] == 0xe3) {
     if ((utf8_char[1] == 0x81 && (0x81 <= utf8_char[2] &&
                                   utf8_char[2] <= 0x89)) ||
+        (utf8_char[1] == 0x81 && utf8_char[2] == 0xa3) ||
         (utf8_char[1] == 0x82 && (0x83 <= utf8_char[2] &&
                                   utf8_char[2] <= 0x87))) {
       /* U+3041 HIRAGANA LETTER SMALL A ..
        * U+3049 HIRAGANA LETTER SMALL O
        *
+       * U+3063 HIRAGANA LETTER SMALL TU
+       *
        * U+3083 HIRAGANA LETTER SMALL YA ..
        * U+3087 HIRAGANA LETTER SMALL YO */
       if (utf8_char[2] & 0x1) {
@@ -711,11 +714,14 @@ utf8_normalize_unify_katakana_case(const unsigned char *utf8_char,
   if (utf8_char[0] == 0xe3) {
     if ((utf8_char[1] == 0x82 && (0xa1 <= utf8_char[2] &&
                                   utf8_char[2] <= 0xa9)) ||
+        (utf8_char[1] == 0x83 && utf8_char[2] == 0x83) ||
         (utf8_char[1] == 0x83 && (0xa3 <= utf8_char[2] &&
                                   utf8_char[2] <= 0xa7))) {
       /* U+30A1 KATAKANA LETTER SMALL A ..
        * U+30A9 KATAKANA LETTER SMALL O
        *
+       * U+30C3 KATAKANA LETTER SMALL TU
+       *
        * U+30E3 KATAKANA LETTER SMALL YA ..
        * U+30E7 KATAKANA LETTER SMALL YO */
       if (utf8_char[2] & 0x1) {

  Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected (+4 -2)
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected    2018-04-11 17:40:22 +0900 (d646bb8d8)
+++ test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.expected    2018-04-11 17:48:39 +0900 (deebbd0c4)
@@ -1,4 +1,4 @@
-normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ぁあぃいぅうぇえぉおゃやゅゆょよゎわゕかゖけ"   WITH_TYPES
+normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ぁあぃいぅうぇえぉおっつゃやゅゆょよゎわゕかゖけ"   WITH_TYPES
 [
   [
     0,
@@ -6,7 +6,7 @@ normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ぁあぃいぅう
     0.0
   ],
   {
-    "normalized": "ああいいううええおおややゆゆよよわわかかけけ",
+    "normalized": "ああいいううええおおつつややゆゆよよわわかかけけ",
     "types": [
       "hiragana",
       "hiragana",
@@ -29,6 +29,8 @@ normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ぁあぃいぅう
       "hiragana",
       "hiragana",
       "hiragana",
+      "hiragana",
+      "hiragana",
       "hiragana"
     ],
     "checks": [

  Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test (+1 -1)
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test    2018-04-11 17:40:22 +0900 (93006d064)
+++ test/command/suite/normalizers/nfkc100/unify_kana_case_hiragana.test    2018-04-11 17:48:39 +0900 (104b679b8)
@@ -1,4 +1,4 @@
 normalize \
   'NormalizerNFKC100("unify_kana_case", true)' \
-  "ぁあぃいぅうぇえぉおゃやゅゆょよゎわゕかゖけ" \
+  "ぁあぃいぅうぇえぉおっつゃやゅゆょよゎわゕかゖけ" \
   WITH_TYPES

  Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected (+4 -2)
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected    2018-04-11 17:40:22 +0900 (9ac121551)
+++ test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.expected    2018-04-11 17:48:39 +0900 (abbb3a0e8)
@@ -1,4 +1,4 @@
-normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ァアィイゥウェエォオャヤュユョヨヮワヵカヶケ"   WITH_TYPES
+normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ァアィイゥウェエォオッツャヤュユョヨヮワヵカヶケ"   WITH_TYPES
 [
   [
     0,
@@ -6,7 +6,7 @@ normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ァアィイゥウ
     0.0
   ],
   {
-    "normalized": "アアイイウウエエオオヤヤユユヨヨワワカカケケ",
+    "normalized": "アアイイウウエエオオツツヤヤユユヨヨワワカカケケ",
     "types": [
       "katakana",
       "katakana",
@@ -29,6 +29,8 @@ normalize   'NormalizerNFKC100("unify_kana_case", true)'   "ァアィイゥウ
       "katakana",
       "katakana",
       "katakana",
+      "katakana",
+      "katakana",
       "katakana"
     ],
     "checks": [

  Modified: test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test (+1 -1)
===================================================================
--- test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test    2018-04-11 17:40:22 +0900 (683a3ddca)
+++ test/command/suite/normalizers/nfkc100/unify_kana_case_katakana.test    2018-04-11 17:48:39 +0900 (60b39b7ba)
@@ -1,4 +1,4 @@
 normalize \
   'NormalizerNFKC100("unify_kana_case", true)' \
-  "ァアィイゥウェエォオャヤュユョヨヮワヵカヶケ" \
+  "ァアィイゥウェエォオッツャヤュユョヨヮワヵカヶケ" \
   WITH_TYPES
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180411/b3e66938/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index