[Groonga-commit] groonga/groonga-normalizer-mysql at d167006 [master] Ignore U+0000 as normalized character

Back to archive index

Kouhei Sutou null+****@clear*****
Sat May 18 15:44:27 JST 2013


Kouhei Sutou	2013-05-18 15:44:27 +0900 (Sat, 18 May 2013)

  New Revision: d167006e4d3c16ceef38aef5da5aeba30e2bfee4
  https://github.com/groonga/groonga-normalizer-mysql/commit/d167006e4d3c16ceef38aef5da5aeba30e2bfee4

  Message:
    Ignore U+0000 as normalized character
    
    https://gist.github.com/y-ken/eb49eaad879f47b27cec
    
    Reported by Y.Kentaro. Thanks!!!

  Added files:
    test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.expected
    test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.test
    test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.expected
    test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.test
  Modified files:
    normalizers/mysql.c

  Modified: normalizers/mysql.c (+5 -3)
===================================================================
--- normalizers/mysql.c    2013-05-09 17:34:10 +0900 (de2f9f5)
+++ normalizers/mysql.c    2013-05-18 15:44:27 +0900 (413b7e9)
@@ -159,9 +159,11 @@ normalize(grn_ctx *ctx, grn_obj *string, uint32_t **normalize_table)
         uint32_t normalized_code;
         unsigned int n_bytes;
         normalized_code = normalize_table[page][low_code];
-        n_bytes = unichar_to_utf8(normalized_code,
-                                  normalized + normalized_length_in_bytes);
-        normalized_length_in_bytes += n_bytes;
+        if (normalized_code != 0) {
+          n_bytes = unichar_to_utf8(normalized_code,
+                                    normalized + normalized_length_in_bytes);
+          normalized_length_in_bytes += n_bytes;
+        }
       } else {
         int i;
         for (i = 0; i < character_length; i++) {

  Added: test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.expected    2013-05-18 15:44:27 +0900 (685fd1f)
@@ -0,0 +1,4 @@
+register normalizers/mysql
+[[0,0.0,0.0],true]
+normalize NormalizerMySQLUnicodeCI "beforeゲafter"
+[[0,0.0,0.0],{"normalized":"BEFOREけAFTER","types":[]}]

  Added: test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.test (+3 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/unicode_ci/halfwidth_katakana_voiced_sound_mark.test    2013-05-18 15:44:27 +0900 (c9f0a5e)
@@ -0,0 +1,3 @@
+register normalizers/mysql
+
+normalize NormalizerMySQLUnicodeCI "beforeゲafter"

  Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.expected (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.expected    2013-05-18 15:44:27 +0900 (3ff6818)
@@ -0,0 +1,4 @@
+register normalizers/mysql
+[[0,0.0,0.0],true]
+normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark   "beforeゲafter"
+[[0,0.0,0.0],{"normalized":"BEFOREけAFTER","types":[]}]

  Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/halfwidth_katakana_voiced_sound_mark.test    2013-05-18 15:44:27 +0900 (c6ac08f)
@@ -0,0 +1,4 @@
+register normalizers/mysql
+
+normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark \
+  "beforeゲafter"
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index