Kouhei Sutou
null+****@clear*****
Fri Nov 9 16:48:08 JST 2012
Kouhei Sutou 2012-11-09 16:48:08 +0900 (Fri, 09 Nov 2012) New Revision: d3dc2aacc2a83b428b0fd3ef7ed9f81a55c82e82 https://github.com/groonga/groonga/commit/d3dc2aacc2a83b428b0fd3ef7ed9f81a55c82e82 Log: tokenizer -> tokenized Modified files: include/groonga.h lib/string.c lib/token.c test/unit/util/test-string.c Modified: include/groonga.h (+1 -1) =================================================================== --- include/groonga.h 2012-11-09 16:46:19 +0900 (74704b8) +++ include/groonga.h 2012-11-09 16:48:08 +0900 (8d50e7f) @@ -2550,7 +2550,7 @@ GRN_API grn_rc grn_str_close(grn_ctx *ctx, grn_str *nstr); #define GRN_STRING_REMOVE_BLANK (0x01<<0) #define GRN_STRING_WITH_TYPES (0x01<<1) #define GRN_STRING_WITH_CHECKS (0x01<<2) -#define GRN_STRING_REMOVE_TOKENIZER_DELIMITER (0x01<<3) +#define GRN_STRING_REMOVE_TOKENIZED_DELIMITER (0x01<<3) #define GRN_NORMALIZER_AUTO ((grn_obj *)1) Modified: lib/string.c (+4 -4) =================================================================== --- lib/string.c 2012-11-09 16:46:19 +0900 (3881295) +++ lib/string.c 2012-11-09 16:48:08 +0900 (b84e645) @@ -559,8 +559,8 @@ utf8_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data grn_string *nstr = (grn_string *)args[0]; size_t length = 0, ls, lp, size = nstr->original_length_in_bytes, ds = size * 3; int removeblankp = nstr->flags & GRN_STRING_REMOVE_BLANK; - grn_bool remove_tokenizer_delimiter_p = - nstr->flags & GRN_STRING_REMOVE_TOKENIZER_DELIMITER; + grn_bool remove_tokenized_delimiter_p = + nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER; if (!(nstr->normalized = GRN_MALLOC(ds + 1))) { ERR(GRN_NO_MEMORY_AVAILABLE, "[strinig][utf8] failed to allocate normalized text space"); @@ -594,7 +594,7 @@ utf8_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data if (!(ls = grn_str_charlen_utf8(ctx, s, e))) { break; } - if (remove_tokenizer_delimiter_p && + if (remove_tokenized_delimiter_p && grn_tokenizer_is_tokenized_delimiter(ctx, s, ls, GRN_ENC_UTF8)) { continue; } @@ -1076,7 +1076,7 @@ grn_fake_string_open(grn_ctx *ctx, grn_string *string) return NULL; } - if (nstr->flags & GRN_STRING_REMOVE_TOKENIZER_DELIMITER && + if (nstr->flags & GRN_STRING_REMOVE_TOKENIZED_DELIMITER && ctx->encoding == GRN_ENC_UTF8) { int char_length; const char *source_current = str; Modified: lib/token.c (+1 -1) =================================================================== --- lib/token.c 2012-11-09 16:46:19 +0900 (764c5c1) +++ lib/token.c 2012-11-09 16:48:08 +0900 (bd55c20) @@ -233,7 +233,7 @@ ngram_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, uint8_t ngram int nflags = GRN_STRING_REMOVE_BLANK | GRN_STRING_WITH_TYPES | - GRN_STRING_REMOVE_TOKENIZER_DELIMITER; + GRN_STRING_REMOVE_TOKENIZED_DELIMITER; const char *normalized; unsigned int normalized_length_in_bytes; grn_ngram_tokenizer *token; Modified: test/unit/util/test-string.c (+5 -5) =================================================================== --- test/unit/util/test-string.c 2012-11-09 16:46:19 +0900 (81d0a27) +++ test/unit/util/test-string.c 2012-11-09 16:48:08 +0900 (b97118f) @@ -31,8 +31,8 @@ void data_normalize(void); void test_normalize(gconstpointer data); void data_normalize_broken(void); void test_normalize_broken(gconstpointer data); -void data_remove_tokenizer_delimiter(void); -void test_remove_tokenizer_delimiter(gconstpointer data); +void data_remove_tokenized_delimiter(void); +void test_remove_tokenized_delimiter(gconstpointer data); void data_charlen_broken(void); void test_charlen_broken(gconstpointer data); void data_urlenc(void); @@ -263,7 +263,7 @@ test_normalize_broken(gconstpointer data) } void -data_remove_tokenizer_delimiter(void) +data_remove_tokenized_delimiter(void) { #define ADD_DATUM(label, expected, input, flags) \ gcut_add_datum(label, \ @@ -297,7 +297,7 @@ data_remove_tokenizer_delimiter(void) } void -test_remove_tokenizer_delimiter(gconstpointer data) +test_remove_tokenized_delimiter(gconstpointer data) { grn_obj *string; grn_obj *normalizer = NULL; @@ -305,7 +305,7 @@ test_remove_tokenizer_delimiter(gconstpointer data) const gchar *input; const gchar *normalized; unsigned int length_in_bytes; - int flags = GRN_STRING_REMOVE_TOKENIZER_DELIMITER; + int flags = GRN_STRING_REMOVE_TOKENIZED_DELIMITER; GRN_CTX_SET_ENCODING(&context, GRN_ENC_UTF8); -------------- next part -------------- HTML����������������������������...Download