Kouhei Sutou
null+****@clear*****
Wed Jan 29 00:11:08 JST 2014
Kouhei Sutou 2014-01-29 00:11:08 +0900 (Wed, 29 Jan 2014) New Revision: acc4bdfd606756a97bde78b324f94565a4dece19 https://github.com/groonga/groonga-normalizer-mysql/commit/acc4bdfd606756a97bde78b324f94565a4dece19 Message: Support checks Added files: test/suite/general_ci/with_checks.expected test/suite/general_ci/with_checks.test test/suite/unicode_ci/with_checks.expected test/suite/unicode_ci/with_checks.test test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.expected test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.test test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.expected test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.test Modified files: normalizers/mysql.c test/suite/general_ci/remove_blank.expected test/suite/general_ci/remove_blank.test test/suite/unicode_ci/remove_blank.expected test/suite/unicode_ci/remove_blank.test Modified: normalizers/mysql.c (+33 -1) =================================================================== --- normalizers/mysql.c 2014-01-28 23:03:11 +0900 (1cd32c8) +++ normalizers/mysql.c 2014-01-29 00:11:08 +0900 (1f0bb4d) @@ -1,6 +1,6 @@ /* -*- c-basic-offset: 2 -*- */ /* - Copyright(C) 2013 Kouhei Sutou <kou �� clear-code.com> + Copyright(C) 2013-2014 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public @@ -47,6 +47,7 @@ typedef grn_bool (*normalizer_func)(grn_ctx *ctx, int rest_length, uint32_t **normalize_table, char *normalized, + unsigned int *normalized_characer_length, unsigned int *normalized_length_in_bytes, unsigned int *normalized_n_characters); @@ -179,6 +180,7 @@ static inline void normalize_character(const char *utf8, int character_length, uint32_t **normalize_table, char *normalized, + unsigned int *normalized_character_length, unsigned int *normalized_length_in_bytes, unsigned int *normalized_n_characters) { @@ -192,6 +194,7 @@ normalize_character(const char *utf8, int character_length, if (normalized_code != 0) { n_bytes = unichar_to_utf8(normalized_code, normalized + *normalized_length_in_bytes); + *normalized_character_length = n_bytes; *normalized_length_in_bytes += n_bytes; } } else { @@ -199,6 +202,7 @@ normalize_character(const char *utf8, int character_length, for (i = 0; i < character_length; i++) { normalized[*normalized_length_in_bytes + i] = utf8[i]; } + *normalized_character_length = character_length; *normalized_length_in_bytes += character_length; } (*normalized_n_characters)++; @@ -298,6 +302,8 @@ normalize(grn_ctx *ctx, grn_obj *string, unsigned int normalized_n_characters = 0; unsigned char *types = NULL; unsigned char *current_type = NULL; + short *checks = NULL; + short *current_check = NULL; grn_encoding encoding; int flags; grn_bool remove_blank_p; @@ -315,6 +321,12 @@ normalize(grn_ctx *ctx, grn_obj *string, types = GRN_PLUGIN_MALLOC(ctx, max_normalized_n_characters); current_type = types; } + if (flags & GRN_STRING_WITH_CHECKS) { + unsigned int max_normalized_length_in_bytes = original_length_in_bytes + 1; + checks = GRN_PLUGIN_MALLOC(ctx, max_normalized_length_in_bytes); + current_check = checks; + current_check[0] = 0; + } rest = original; rest_length = original_length_in_bytes; while (rest_length > 0) { @@ -329,8 +341,12 @@ normalize(grn_ctx *ctx, grn_obj *string, if (current_type > types) { current_type[-1] |= GRN_CHAR_BLANK; } + if (current_check) { + current_check[0]++; + } } else { grn_bool custom_normalized = GRN_FALSE; + unsigned int normalized_character_length; if (custom_normalizer) { custom_normalized = custom_normalizer(ctx, rest, @@ -338,12 +354,14 @@ normalize(grn_ctx *ctx, grn_obj *string, rest_length - character_length, normalize_table, normalized, + &normalized_character_length, &normalized_length_in_bytes, &normalized_n_characters); } if (!custom_normalized) { normalize_character(rest, character_length, normalize_table, normalized, + &normalized_character_length, &normalized_length_in_bytes, &normalized_n_characters); } @@ -355,6 +373,16 @@ normalize(grn_ctx *ctx, grn_obj *string, grn_nfkc_char_type((unsigned char *)current_normalized); current_type++; } + if (current_check) { + unsigned int i; + current_check[0] += character_length; + current_check++; + for (i = 1; i < normalized_character_length; i++) { + current_check[0] = 0; + current_check++; + } + current_check[0] = 0; + } } rest += character_length; @@ -382,6 +410,7 @@ normalize(grn_ctx *ctx, grn_obj *string, normalized_length_in_bytes, normalized_n_characters); grn_string_set_types(ctx, string, types); + grn_string_set_checks(ctx, string, checks); } static grn_obj * @@ -476,6 +505,7 @@ normalize_halfwidth_katakana_with_voiced_sound_mark( int rest_length, GNUC_UNUSED uint32_t **normalize_table, char *normalized, + unsigned int *normalized_character_length, unsigned int *normalized_length_in_bytes, unsigned int *normalized_n_characters) { @@ -543,6 +573,7 @@ normalize_halfwidth_katakana_with_voiced_sound_mark( normalized + *normalized_length_in_bytes); } *character_length += next_character_length; + *normalized_character_length = n_bytes; *normalized_length_in_bytes += n_bytes; (*normalized_n_characters)++; custom_normalized = GRN_TRUE; @@ -556,6 +587,7 @@ normalize_halfwidth_katakana_with_voiced_sound_mark( HIRAGANA_HA_LINE_GAP), normalized + *normalized_length_in_bytes); *character_length += next_character_length; + *normalized_character_length = n_bytes; *normalized_length_in_bytes += n_bytes; (*normalized_n_characters)++; custom_normalized = GRN_TRUE; Modified: test/suite/general_ci/remove_blank.expected (+2 -2) =================================================================== --- test/suite/general_ci/remove_blank.expected 2014-01-28 23:03:11 +0900 (8c9e949) +++ test/suite/general_ci/remove_blank.expected 2014-01-29 00:11:08 +0900 (e22989e) @@ -1,4 +1,4 @@ register normalizers/mysql [[0,0.0,0.0],true] -normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK -[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[]}] +normalize NormalizerMySQLGeneralCI " a b c " REMOVE_BLANK|WITH_CHECKS +[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[2,3,4]}] Modified: test/suite/general_ci/remove_blank.test (+1 -1) =================================================================== --- test/suite/general_ci/remove_blank.test 2014-01-28 23:03:11 +0900 (a070785) +++ test/suite/general_ci/remove_blank.test 2014-01-29 00:11:08 +0900 (464759c) @@ -1,3 +1,3 @@ register normalizers/mysql -normalize NormalizerMySQLGeneralCI "a b c" REMOVE_BLANK +normalize NormalizerMySQLGeneralCI " a b c " REMOVE_BLANK|WITH_CHECKS Added: test/suite/general_ci/with_checks.expected (+28 -0) 100644 =================================================================== --- /dev/null +++ test/suite/general_ci/with_checks.expected 2014-01-29 00:11:08 +0900 (b751588) @@ -0,0 +1,28 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLGeneralCI "ア㌕AZ" WITH_CHECKS +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "ア㌕AZ", + "types": [ + + ], + "checks": [ + 3, + 0, + 0, + 3, + 0, + 0, + 3, + 0, + 0, + 1 + ] + } +] Added: test/suite/general_ci/with_checks.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/suite/general_ci/with_checks.test 2014-01-29 00:11:08 +0900 (a61151a) @@ -0,0 +1,3 @@ +register normalizers/mysql + +normalize NormalizerMySQLGeneralCI "ア㌕AZ" WITH_CHECKS Modified: test/suite/unicode_ci/remove_blank.expected (+2 -2) =================================================================== --- test/suite/unicode_ci/remove_blank.expected 2014-01-28 23:03:11 +0900 (2b22a4b) +++ test/suite/unicode_ci/remove_blank.expected 2014-01-29 00:11:08 +0900 (dc1df15) @@ -1,4 +1,4 @@ register normalizers/mysql [[0,0.0,0.0],true] -normalize NormalizerMySQLUnicodeCI "a b c" REMOVE_BLANK -[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[]}] +normalize NormalizerMySQLUnicodeCI " a b c" REMOVE_BLANK|WITH_CHECKS +[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[2,3,4]}] Modified: test/suite/unicode_ci/remove_blank.test (+1 -1) =================================================================== --- test/suite/unicode_ci/remove_blank.test 2014-01-28 23:03:11 +0900 (ba58c02) +++ test/suite/unicode_ci/remove_blank.test 2014-01-29 00:11:08 +0900 (0e02b32) @@ -1,3 +1,3 @@ register normalizers/mysql -normalize NormalizerMySQLUnicodeCI "a b c" REMOVE_BLANK +normalize NormalizerMySQLUnicodeCI " a b c" REMOVE_BLANK|WITH_CHECKS Added: test/suite/unicode_ci/with_checks.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci/with_checks.expected 2014-01-29 00:11:08 +0900 (9ab7691) @@ -0,0 +1,4 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLUnicodeCI "ア㌕AZ" WITH_CHECKS +[[0,0.0,0.0],{"normalized":"あ㌕AZ","types":[],"checks":[3,0,0,3,0,0,3,1]}] Added: test/suite/unicode_ci/with_checks.test (+3 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci/with_checks.test 2014-01-29 00:11:08 +0900 (f581eec) @@ -0,0 +1,3 @@ +register normalizers/mysql + +normalize NormalizerMySQLUnicodeCI "ア㌕AZ" WITH_CHECKS Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.expected 2014-01-29 00:11:08 +0900 (2decaf7) @@ -0,0 +1,4 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark " a b c" REMOVE_BLANK|WITH_CHECKS +[[0,0.0,0.0],{"normalized":"ABC","types":[],"checks":[2,3,4]}] Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/remobe_blank.test 2014-01-29 00:11:08 +0900 (195326c) @@ -0,0 +1,4 @@ +register normalizers/mysql + +normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark \ + " a b c" REMOVE_BLANK|WITH_CHECKS Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.expected (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.expected 2014-01-29 00:11:08 +0900 (9931731) @@ -0,0 +1,4 @@ +register normalizers/mysql +[[0,0.0,0.0],true] +normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark "ア㌕AZ" WITH_CHECKS +[[0,0.0,0.0],{"normalized":"あ㌕AZ","types":[],"checks":[3,0,0,3,0,0,3,1]}] Added: test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/suite/unicode_ci_except_kana_ci_kana_with_voiced_sound_mark/with_checks.test 2014-01-29 00:11:08 +0900 (833a5c9) @@ -0,0 +1,4 @@ +register normalizers/mysql + +normalize NormalizerMySQLUnicodeCIExceptKanaCIKanaWithVoicedSoundMark \ + "ア㌕AZ" WITH_CHECKS -------------- next part -------------- HTML����������������������������...Download