Kouhei Sutou
null+****@clear*****
Fri Feb 1 18:17:44 JST 2013
Kouhei Sutou 2013-02-01 18:17:44 +0900 (Fri, 01 Feb 2013) New Revision: 6f0e184dbd1a9f1ad706eefee7000535a7d76d2f https://github.com/groonga/groonga/commit/6f0e184dbd1a9f1ad706eefee7000535a7d76d2f Log: normalize: support types output Added files: test/command/suite/normalize/with_types.expected test/command/suite/normalize/with_types.test Modified files: lib/proc.c test/command/suite/normalize/ascii.expected test/command/suite/normalize/remove_blank.expected test/command/suite/normalize/remove_tokenized_delimiter.expected Modified: lib/proc.c (+57 -1) =================================================================== --- lib/proc.c 2013-02-01 17:19:16 +0900 (1f61644) +++ lib/proc.c 2013-02-01 18:17:44 +0900 (ed29c82) @@ -2786,6 +2786,41 @@ parse_normalize_flags(grn_ctx *ctx, grn_obj *flag_names) return flags; } +static const char * +char_type_name(grn_char_type type) +{ + const char *name = "unknown"; + + switch (type) { + case GRN_CHAR_NULL : + name = "null"; + break; + case GRN_CHAR_ALPHA : + name = "alpha"; + break; + case GRN_CHAR_DIGIT : + name = "digit"; + break; + case GRN_CHAR_SYMBOL : + name = "symbol"; + break; + case GRN_CHAR_HIRAGANA : + name = "hiragana"; + break; + case GRN_CHAR_KATAKANA : + name = "katakana"; + break; + case GRN_CHAR_KANJI : + name = "kanji"; + break; + case GRN_CHAR_OTHERS : + name = "others"; + break; + } + + return name; +} + static grn_obj * proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { @@ -2806,6 +2841,7 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data grn_obj *normalizer; grn_obj *grn_string; int flags; + unsigned int normalized_n_characters; flags = parse_normalize_flags(ctx, flag_names); normalizer = grn_ctx_get(ctx, @@ -2825,6 +2861,7 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data normalizer, flags); grn_obj_unlink(ctx, normalizer); + GRN_OUTPUT_MAP_OPEN("RESULT", 2); { const char *normalized; unsigned int normalized_length_in_bytes; @@ -2832,9 +2869,28 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data grn_string_get_normalized(ctx, grn_string, &normalized, &normalized_length_in_bytes, - NULL); + &normalized_n_characters); + GRN_OUTPUT_CSTR("normalized"); GRN_OUTPUT_STR(normalized, normalized_length_in_bytes); } + { + const unsigned char *types; + + types = grn_string_get_types(ctx, grn_string); + GRN_OUTPUT_CSTR("types"); + if (types) { + unsigned int i; + GRN_OUTPUT_ARRAY_OPEN("types", normalized_n_characters); + for (i = 0; i < normalized_n_characters; i++) { + GRN_OUTPUT_CSTR(char_type_name(types[i])); + } + GRN_OUTPUT_ARRAY_CLOSE(); + } else { + GRN_OUTPUT_ARRAY_OPEN("types", 0); + GRN_OUTPUT_ARRAY_CLOSE(); + } + } + GRN_OUTPUT_MAP_CLOSE(); grn_obj_unlink(ctx, grn_string); } Modified: test/command/suite/normalize/ascii.expected (+1 -1) =================================================================== --- test/command/suite/normalize/ascii.expected 2013-02-01 17:19:16 +0900 (948c499) +++ test/command/suite/normalize/ascii.expected 2013-02-01 18:17:44 +0900 (8ac1422) @@ -1,2 +1,2 @@ normalize NormalizerAuto "aBcDe 123" -[[0,0.0,0.0],"abcde 123"] +[[0,0.0,0.0],{"normalized":"abcde 123","types":[]}] Modified: test/command/suite/normalize/remove_blank.expected (+1 -1) =================================================================== --- test/command/suite/normalize/remove_blank.expected 2013-02-01 17:19:16 +0900 (c34a63c) +++ test/command/suite/normalize/remove_blank.expected 2013-02-01 18:17:44 +0900 (7937d1e) @@ -1,2 +1,2 @@ normalize NormalizerAuto "a b c" REMOVE_BLANK -[[0,0.0,0.0],"abc"] +[[0,0.0,0.0],{"normalized":"abc","types":[]}] Modified: test/command/suite/normalize/remove_tokenized_delimiter.expected (+1 -1) =================================================================== --- test/command/suite/normalize/remove_tokenized_delimiter.expected 2013-02-01 17:19:16 +0900 (8ef370e) +++ test/command/suite/normalize/remove_tokenized_delimiter.expected 2013-02-01 18:17:44 +0900 (8c18cf6) @@ -1,2 +1,2 @@ normalize NormalizerAuto "abc" REMOVE_TOKENIZED_DELIMITER -[[0,0.0,0.0],"abc"] +[[0,0.0,0.0],{"normalized":"abc","types":[]}] Added: test/command/suite/normalize/with_types.expected (+21 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalize/with_types.expected 2013-02-01 18:17:44 +0900 (e6e4be8) @@ -0,0 +1,21 @@ +normalize NormalizerAuto "a1!あア亜💕 " WITH_TYPES +[ + [ + 0, + 0.0, + 0.0 + ], + { + "normalized": "a1!あア亜💕 ", + "types": [ + "alpha", + "digit", + "symbol", + "hiragana", + "katakana", + "kanji", + "others", + "others" + ] + } +] Added: test/command/suite/normalize/with_types.test (+1 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalize/with_types.test 2013-02-01 18:17:44 +0900 (448b3f6) @@ -0,0 +1 @@ +normalize NormalizerAuto "a1!あア亜💕 " WITH_TYPES -------------- next part -------------- HTML����������������������������...Download