Kouhei Sutou
null+****@clear*****
Mon Feb 4 12:52:15 JST 2013
Kouhei Sutou 2013-02-04 12:52:15 +0900 (Mon, 04 Feb 2013) New Revision: d972baadf6513e4d622de30405d08bb384e27c5c https://github.com/groonga/groonga/commit/d972baadf6513e4d622de30405d08bb384e27c5c Log: Move generic functions to grn_plugin_* from grn_tokenizer_* grn_tokenizer_charlen() -> grn_plugin_charlen() grn_tokenizer_isspace() -> grn_plugin_isspace() They are not tokenizer specific functions. They are useful for normalizer. So the prefix is changed to grn_plugin_. grn_tokenizer_*() still exists for backward compatibility. Use grn_plugin_*() for newly written code. Modified files: include/groonga/plugin.h include/groonga/tokenizer.h lib/plugin.c lib/tokenizer.c Modified: include/groonga/plugin.h (+18 -0) =================================================================== --- include/groonga/plugin.h 2013-02-04 12:43:51 +0900 (1f0bfe9) +++ include/groonga/plugin.h 2013-02-04 12:52:15 +0900 (fda1dca) @@ -180,6 +180,24 @@ GRN_API grn_obj *grn_plugin_proc_alloc(grn_ctx *ctx, grn_user_data *user_data, */ GRN_API const char *grn_plugin_win32_base_dir(void); +/* + grn_plugin_charlen() returns the length (#bytes) of the first character + in the string specified by `str_ptr' and `str_length'. If the starting bytes + are invalid as a character, grn_plugin_charlen() returns 0. See + grn_encoding in "groonga.h" for more details of `encoding' + */ +int grn_plugin_charlen(grn_ctx *ctx, const char *str_ptr, + unsigned int str_length, grn_encoding encoding); + +/* + grn_plugin_isspace() returns the length (#bytes) of the first character + in the string specified by `str_ptr' and `str_length' if it is a space + character. Otherwise, grn_plugin_isspace() returns 0. + */ +int grn_plugin_isspace(grn_ctx *ctx, const char *str_ptr, + unsigned int str_length, grn_encoding encoding); + + #ifdef __cplusplus } Modified: include/groonga/tokenizer.h (+4 -0) =================================================================== --- include/groonga/tokenizer.h 2013-02-04 12:43:51 +0900 (30d061b) +++ include/groonga/tokenizer.h 2013-02-04 12:52:15 +0900 (5865ae4) @@ -34,6 +34,8 @@ extern "C" { in the string specified by `str_ptr' and `str_length'. If the starting bytes are invalid as a character, grn_tokenizer_charlen() returns 0. See grn_encoding in "groonga.h" for more details of `encoding' + + Deprecated. Use grn_plugin_charlen() instead. */ int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr, unsigned int str_length, grn_encoding encoding); @@ -42,6 +44,8 @@ int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr, grn_tokenizer_isspace() returns the length (#bytes) of the first character in the string specified by `str_ptr' and `str_length' if it is a space character. Otherwise, grn_tokenizer_isspace() returns 0. + + Deprecated. Use grn_plugin_isspace() instead. */ int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr, unsigned int str_length, grn_encoding encoding); Modified: lib/plugin.c (+53 -0) =================================================================== --- lib/plugin.c 2013-02-04 12:43:51 +0900 (c96dfc5) +++ lib/plugin.c 2013-02-04 12:52:15 +0900 (2bda3fd) @@ -588,3 +588,56 @@ grn_plugin_win32_base_dir(void) return NULL; #endif /* WIN32 */ } + +/* + grn_plugin_charlen() takes the length of a string, unlike grn_charlen_(). + */ +int +grn_plugin_charlen(grn_ctx *ctx, const char *str_ptr, + unsigned int str_length, grn_encoding encoding) +{ + return grn_charlen_(ctx, str_ptr, str_ptr + str_length, encoding); +} + +/* + grn_plugin_isspace() takes the length of a string, unlike grn_isspace(). + */ +int +grn_plugin_isspace(grn_ctx *ctx, const char *str_ptr, + unsigned int str_length, grn_encoding encoding) +{ + if ((str_ptr == NULL) || (str_length == 0)) { + return 0; + } + switch ((unsigned char)str_ptr[0]) { + case ' ' : + case '\f' : + case '\n' : + case '\r' : + case '\t' : + case '\v' : + return 1; + case 0x81 : + if ((encoding == GRN_ENC_SJIS) && (str_length >= 2) && + ((unsigned char)str_ptr[1] == 0x40)) { + return 2; + } + break; + case 0xA1 : + if ((encoding == GRN_ENC_EUC_JP) && (str_length >= 2) && + ((unsigned char)str_ptr[1] == 0xA1)) { + return 2; + } + break; + case 0xE3 : + if ((encoding == GRN_ENC_UTF8) && (str_length >= 3) && + ((unsigned char)str_ptr[1] == 0x80) && + ((unsigned char)str_ptr[2] == 0x80)) { + return 3; + } + break; + default : + break; + } + return 0; +} Modified: lib/tokenizer.c (+4 -37) =================================================================== --- lib/tokenizer.c 2013-02-04 12:43:51 +0900 (ba65c01) +++ lib/tokenizer.c 2013-02-04 12:52:15 +0900 (f609db6) @@ -29,56 +29,23 @@ #include "token.h" /* - grn_tokenizer_charlen() takes the length of a string, unlike grn_charlen_(). + Just for backward compatibility. See grn_plugin_charlen() instead. */ int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr, unsigned int str_length, grn_encoding encoding) { - return grn_charlen_(ctx, str_ptr, str_ptr + str_length, encoding); + return grn_plugin_charlen(ctx, str_ptr, str_length, encoding); } /* - grn_tokenizer_isspace() takes the length of a string, unlike grn_isspace(). + Just for backward compatibility. See grn_plugin_isspace() instead. */ int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr, unsigned int str_length, grn_encoding encoding) { - if ((str_ptr == NULL) || (str_length == 0)) { - return 0; - } - switch ((unsigned char)str_ptr[0]) { - case ' ' : - case '\f' : - case '\n' : - case '\r' : - case '\t' : - case '\v' : - return 1; - case 0x81 : - if ((encoding == GRN_ENC_SJIS) && (str_length >= 2) && - ((unsigned char)str_ptr[1] == 0x40)) { - return 2; - } - break; - case 0xA1 : - if ((encoding == GRN_ENC_EUC_JP) && (str_length >= 2) && - ((unsigned char)str_ptr[1] == 0xA1)) { - return 2; - } - break; - case 0xE3 : - if ((encoding == GRN_ENC_UTF8) && (str_length >= 3) && - ((unsigned char)str_ptr[1] == 0x80) && - ((unsigned char)str_ptr[2] == 0x80)) { - return 3; - } - break; - default : - break; - } - return 0; + return grn_plugin_isspace(ctx, str_ptr, str_length, encoding); } grn_bool -------------- next part -------------- HTML����������������������������...Download