Kouhei Sutou
null+****@clear*****
Fri Nov 9 16:46:19 JST 2012
Kouhei Sutou 2012-11-09 16:46:19 +0900 (Fri, 09 Nov 2012) New Revision: d0b86c01e129930d43917ed4a715b395991775ec https://github.com/groonga/groonga/commit/d0b86c01e129930d43917ed4a715b395991775ec Log: Add 'tokenized_' prefix Modified files: include/groonga/tokenizer.h lib/string.c lib/token.c lib/tokenizer.c test/unit/core/test-tokenizer.c Modified: include/groonga/tokenizer.h (+14 -14) =================================================================== --- include/groonga/tokenizer.h 2012-11-09 16:41:13 +0900 (ea34ffe) +++ include/groonga/tokenizer.h 2012-11-09 16:46:19 +0900 (9ddd770) @@ -47,24 +47,24 @@ int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr, unsigned int str_length, grn_encoding encoding); /* - grn_tokenizer_is_delimiter() returns whether is the first character - in the string specified by `str_ptr' and `str_length' the special - delimiter character or not. + grn_tokenizer_is_tokenized_delimiter() returns whether is the first + character in the string specified by `str_ptr' and `str_length' the + special tokenized delimiter character or not. */ -grn_bool grn_tokenizer_is_delimiter(grn_ctx *ctx, - const char *str_ptr, - unsigned int str_length, - grn_encoding encoding); +grn_bool grn_tokenizer_is_tokenized_delimiter(grn_ctx *ctx, + const char *str_ptr, + unsigned int str_length, + grn_encoding encoding); /* - grn_tokenizer_have_delimiter() returns whether is there the special - delimiter character in the string specified by `str_ptr' and - `str_length' the special delimiter character or not. + grn_tokenizer_have_tokenized_delimiter() returns whether is there + the special delimiter character in the string specified by `str_ptr' + and `str_length' the special tokenized delimiter character or not. */ -grn_bool grn_tokenizer_have_delimiter(grn_ctx *ctx, - const char *str_ptr, - unsigned int str_length, - grn_encoding encoding); +grn_bool grn_tokenizer_have_tokenized_delimiter(grn_ctx *ctx, + const char *str_ptr, + unsigned int str_length, + grn_encoding encoding); /* grn_tokenizer_query is a structure for storing a query. See the following Modified: lib/string.c (+4 -4) =================================================================== --- lib/string.c 2012-11-09 16:41:13 +0900 (0f90636) +++ lib/string.c 2012-11-09 16:46:19 +0900 (3881295) @@ -595,7 +595,7 @@ utf8_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data break; } if (remove_tokenizer_delimiter_p && - grn_tokenizer_is_delimiter(ctx, s, ls, GRN_ENC_UTF8)) { + grn_tokenizer_is_tokenized_delimiter(ctx, s, ls, GRN_ENC_UTF8)) { continue; } if ((p = (unsigned char *)grn_nfkc_map1(s))) { @@ -1084,9 +1084,9 @@ grn_fake_string_open(grn_ctx *ctx, grn_string *string) char *destination = nstr->normalized; unsigned int destination_length = 0; while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) { - if (!grn_tokenizer_is_delimiter(ctx, - source_current, char_length, - ctx->encoding)) { + if (!grn_tokenizer_is_tokenized_delimiter(ctx, + source_current, char_length, + ctx->encoding)) { memcpy(destination, source_current, char_length); destination += char_length; destination_length += char_length; Modified: lib/token.c (+4 -3) =================================================================== --- lib/token.c 2012-11-09 16:41:13 +0900 (bd54953) +++ lib/token.c 2012-11-09 16:46:19 +0900 (764c5c1) @@ -114,9 +114,10 @@ delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data, grn_table_get_info(ctx, table, &table_flags, &tokenizer->encoding, NULL); tokenizer->have_tokenized_delimiter = - grn_tokenizer_have_delimiter(ctx, - GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str), - tokenizer->encoding); + grn_tokenizer_have_tokenized_delimiter(ctx, + GRN_TEXT_VALUE(str), + GRN_TEXT_LEN(str), + tokenizer->encoding); if (tokenizer->have_tokenized_delimiter) { tokenizer->delimiter = GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8; tokenizer->delimiter_len = strlen(tokenizer->delimiter); Modified: lib/tokenizer.c (+11 -5) =================================================================== --- lib/tokenizer.c 2012-11-09 16:41:13 +0900 (7e3581b) +++ lib/tokenizer.c 2012-11-09 16:46:19 +0900 (566d403) @@ -82,8 +82,10 @@ grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr, } grn_bool -grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr, - unsigned int str_length, grn_encoding encoding) +grn_tokenizer_is_tokenized_delimiter(grn_ctx *ctx, + const char *str_ptr, + unsigned int str_length, + grn_encoding encoding) { const unsigned char *binary_string = str_ptr; @@ -101,8 +103,10 @@ grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr, } grn_bool -grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr, - unsigned int str_length, grn_encoding encoding) +grn_tokenizer_have_tokenized_delimiter(grn_ctx *ctx, + const char *str_ptr, + unsigned int str_length, + grn_encoding encoding) { int char_length; const char *current = str_ptr; @@ -117,7 +121,9 @@ grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr, } while ((char_length = grn_charlen_(ctx, current, end, encoding)) > 0) { - if (grn_tokenizer_is_delimiter(ctx, current, char_length, encoding)) { + if (grn_tokenizer_is_tokenized_delimiter(ctx, + current, char_length, + encoding)) { return GRN_TRUE; } current += char_length; Modified: test/unit/core/test-tokenizer.c (+20 -16) =================================================================== --- test/unit/core/test-tokenizer.c 2012-11-09 16:41:13 +0900 (46903e1) +++ test/unit/core/test-tokenizer.c 2012-11-09 16:46:19 +0900 (f2218a8) @@ -23,10 +23,10 @@ #include "../lib/grn-assertions.h" -void data_is_delimiter(void); -void test_is_delimiter(gconstpointer data); -void data_have_delimiter(void); -void test_have_delimiter(gconstpointer data); +void data_is_tokenized_delimiter(void); +void test_is_tokenized_delimiter(gconstpointer data); +void data_have_tokenized_delimiter(void); +void test_have_tokenized_delimiter(gconstpointer data); static grn_ctx context; static grn_obj *db; @@ -46,7 +46,7 @@ teardown (void) } void -data_is_delimiter(void) +data_is_tokenized_delimiter(void) { #define ADD_DATUM(label, expected, input, encoding) \ gcut_add_datum(label, \ @@ -68,7 +68,7 @@ data_is_delimiter(void) } void -test_is_delimiter(gconstpointer data) +test_is_tokenized_delimiter(gconstpointer data) { const gchar *input; grn_encoding encoding; @@ -77,16 +77,18 @@ test_is_delimiter(gconstpointer data) GRN_CTX_SET_ENCODING(&context, encoding); input = gcut_data_get_string(data, "input"); if (gcut_data_get_boolean(data, "expected")) { - cut_assert_true(grn_tokenizer_is_delimiter(&context, input, strlen(input), - encoding)); + cut_assert_true(grn_tokenizer_is_tokenized_delimiter(&context, + input, strlen(input), + encoding)); } else { - cut_assert_false(grn_tokenizer_is_delimiter(&context, input, strlen(input), - encoding)); + cut_assert_false(grn_tokenizer_is_tokenized_delimiter(&context, + input, strlen(input), + encoding)); } } void -data_have_delimiter(void) +data_have_tokenized_delimiter(void) { #define ADD_DATUM(label, expected, input) \ gcut_add_datum(label, \ @@ -105,7 +107,7 @@ data_have_delimiter(void) } void -test_have_delimiter(gconstpointer data) +test_have_tokenized_delimiter(gconstpointer data) { const gchar *input; grn_encoding encoding = GRN_ENC_UTF8; @@ -113,10 +115,12 @@ test_have_delimiter(gconstpointer data) GRN_CTX_SET_ENCODING(&context, encoding); input = gcut_data_get_string(data, "input"); if (gcut_data_get_boolean(data, "expected")) { - cut_assert_true(grn_tokenizer_have_delimiter(&context, input, strlen(input), - encoding)); + cut_assert_true(grn_tokenizer_have_tokenized_delimiter(&context, + input, strlen(input), + encoding)); } else { - cut_assert_false(grn_tokenizer_have_delimiter(&context, input, strlen(input), - encoding)); + cut_assert_false(grn_tokenizer_have_tokenized_delimiter(&context, + input, strlen(input), + encoding)); } } -------------- next part -------------- HTML����������������������������...Download