Kouhei Sutou
null+****@clear*****
Mon Nov 17 22:35:20 JST 2014
Kouhei Sutou 2014-11-17 22:35:20 +0900 (Mon, 17 Nov 2014) New Revision: 2ca44e921d1f1db6765770ce35dd2ffb1e627934 https://github.com/groonga/groonga/commit/2ca44e921d1f1db6765770ce35dd2ffb1e627934 Message: Rename grn_tokenizer_status to grn_token_status Because it's status of grn_token not grn_tokenizer. grn_tokenizer_status still available but it's deprecated. Modified files: include/groonga/tokenizer.h lib/grn_token_cursor.h lib/token_cursor.c lib/tokenizer.c lib/tokenizers.c Modified: include/groonga/tokenizer.h (+113 -21) =================================================================== --- include/groonga/tokenizer.h 2014-11-17 22:32:04 +0900 (7908af6) +++ include/groonga/tokenizer.h 2014-11-17 22:35:20 +0900 (3b4a483) @@ -163,28 +163,120 @@ GRN_PLUGIN_EXPORT void grn_tokenizer_token_init(grn_ctx *ctx, grn_tokenizer_toke GRN_PLUGIN_EXPORT void grn_tokenizer_token_fin(grn_ctx *ctx, grn_tokenizer_token *token); /* + * grn_token_status is a flag set for tokenizer status codes. + * If a document or query contains no tokens, push an empty string with + * GRN_TOKEN_LAST as a token. + * + * @since 4.0.7 + */ +typedef unsigned int grn_token_status; + +/* + * GRN_TOKEN_CONTINUE means that the next token is not the last one. + * + * @since 4.0.7 + */ +#define GRN_TOKEN_CONTINUE (0) +/* + * GRN_TOKEN_LAST means that the next token is the last one. + * + * @since 4.0.7 + */ +#define GRN_TOKEN_LAST (0x01L<<0) +/* + * GRN_TOKEN_OVERLAP means that ... + * + * @since 4.0.7 + */ +#define GRN_TOKEN_OVERLAP (0x01L<<1) +/* + * GRN_TOKEN_UNMATURED means that ... + * + * @since 4.0.7 + */ +#define GRN_TOKEN_UNMATURED (0x01L<<2) +/* + * GRN_TOKEN_REACH_END means that ... + * + * @since 4.0.7 + */ +#define GRN_TOKEN_REACH_END (0x01L<<3) +/* + * GRN_TOKEN_SKIP means that the token is skipped + * + * @since 4.0.7 + */ +#define GRN_TOKEN_SKIP (0x01L<<4) +/* + * GRN_TOKEN_SKIP_WITH_POSITION means that the token and postion is skipped + * + * @since 4.0.7 + */ +#define GRN_TOKEN_SKIP_WITH_POSITION (0x01L<<5) +/* + * GRN_TOKEN_FORCE_PREIX that the token is used common prefix search + * + * @since 4.0.7 + */ +#define GRN_TOKEN_FORCE_PREFIX (0x01L<<6) + +/* * grn_tokenizer_status is a flag set for tokenizer status codes. * If a document or query contains no tokens, push an empty string with * GRN_TOKENIZER_TOKEN_LAST as a token. + * + * @deprecated since 4.0.7. Use grn_token_status instead. + */ +typedef grn_token_status grn_tokenizer_status; + +/* + * GRN_TOKENIZER_TOKEN_CONTINUE means that the next token is not the last one. + * + * @deprecated since 4.0.7. Use GRN_TOKEN_CONTINUE instead. + */ +#define GRN_TOKENIZER_TOKEN_CONTINUE GRN_TOKEN_CONTINUE +/* + * GRN_TOKENIZER_TOKEN_LAST means that the next token is the last one. + * + * @deprecated since 4.0.7. Use GRN_TOKEN_LAST instead. + */ +#define GRN_TOKENIZER_TOKEN_LAST GRN_TOKEN_LAST +/* + * GRN_TOKENIZER_TOKEN_OVERLAP means that ... + * + * @deprecated since 4.0.7. Use GRN_TOKEN_OVERLAP instead. + */ +#define GRN_TOKENIZER_TOKEN_OVERLAP GRN_TOKEN_OVERLAP +/* + * GRN_TOKENIZER_TOKEN_UNMATURED means that ... + * + * @deprecated since 4.0.7. Use GRN_TOKEN_UNMATURED instead. + */ +#define GRN_TOKENIZER_TOKEN_UNMATURED GRN_TOKEN_UNMATURED +/* + * GRN_TOKENIZER_TOKEN_REACH_END means that ... + * + * @deprecated since 4.0.7. Use GRN_TOKEN_REACH_END instead. + */ +#define GRN_TOKENIZER_TOKEN_REACH_END GRN_TOKEN_REACH_END +/* + * GRN_TOKENIZER_TOKEN_SKIP means that the token is skipped + * + * @deprecated since 4.0.7. Use GRN_TOKEN_SKIP instead. + */ +#define GRN_TOKENIZER_TOKEN_SKIP GRN_TOKEN_SKIP +/* + * GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION means that the token and postion is skipped + * + * @deprecated since 4.0.7. Use GRN_TOKEN_SKIP_WITH_POSITION instead. + */ +#define GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION GRN_TOKEN_SKIP_WITH_POSITION +/* + * GRN_TOKENIZER_TOKEN_FORCE_PREIX that the token is used common prefix search + * + * @deprecated since 4.0.7. Use GRN_TOKEN_FORCE_PREIX instead. */ -typedef unsigned int grn_tokenizer_status; - -/* GRN_TOKENIZER_TOKEN_CONTINUE means that the next token is not the last one. */ -#define GRN_TOKENIZER_TOKEN_CONTINUE (0) -/* GRN_TOKENIZER_TOKEN_LAST means that the next token is the last one. */ -#define GRN_TOKENIZER_TOKEN_LAST (0x01L<<0) -/* GRN_TOKENIZER_TOKEN_OVERLAP means that ... */ -#define GRN_TOKENIZER_TOKEN_OVERLAP (0x01L<<1) -/* GRN_TOKENIZER_TOKEN_UNMATURED means that ... */ -#define GRN_TOKENIZER_TOKEN_UNMATURED (0x01L<<2) -/* GRN_TOKENIZER_TOKEN_REACH_END means that ... */ -#define GRN_TOKENIZER_TOKEN_REACH_END (0x01L<<3) -/* GRN_TOKENIZER_TOKEN_SKIP means that the token is skipped */ -#define GRN_TOKENIZER_TOKEN_SKIP (0x01L<<4) -/* GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION means that the token and postion is skipped */ -#define GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION (0x01L<<5) -/* GRN_TOKENIZER_TOKEN_FORCE_PREIX that the token is used common prefix search */ -#define GRN_TOKENIZER_TOKEN_FORCE_PREFIX (0x01L<<6) +#define GRN_TOKENIZER_TOKEN_FORCE_PREFIX GRN_TOKEN_FORCE_PREFIX /* * GRN_TOKENIZER_CONTINUE and GRN_TOKENIZER_LAST are deprecated. They @@ -207,7 +299,7 @@ GRN_PLUGIN_EXPORT grn_tokenizer_status grn_token_get_status(grn_ctx *ctx, grn_token *token); GRN_PLUGIN_EXPORT grn_rc grn_token_set_status(grn_ctx *ctx, grn_token *token, - grn_tokenizer_status status); + grn_token_status status); /* @@ -215,12 +307,12 @@ GRN_PLUGIN_EXPORT grn_rc grn_token_set_status(grn_ctx *ctx, grn_tokenizer_token_push() does not make a copy of the given string. This means that you have to maintain a memory space allocated to the string. Also note that the grn_tokenizer_token object must be maintained until the - request for the next token or finalization comes. See grn_tokenizer_status in + request for the next token or finalization comes. See grn_token_status in this header for more details of `status'. */ GRN_PLUGIN_EXPORT void grn_tokenizer_token_push(grn_ctx *ctx, grn_tokenizer_token *token, const char *str_ptr, unsigned int str_length, - grn_tokenizer_status status); + grn_token_status status); /* grn_tokenizer_tokenized_delimiter_next() extracts the next token Modified: lib/grn_token_cursor.h (+1 -1) =================================================================== --- lib/grn_token_cursor.h 2014-11-17 22:32:04 +0900 (060c8cf) +++ lib/grn_token_cursor.h 2014-11-17 22:35:20 +0900 (2b6682d) @@ -35,7 +35,7 @@ typedef enum { struct _grn_token { grn_obj data; - grn_tokenizer_status status; + grn_token_status status; }; typedef struct { Modified: lib/token_cursor.c (+10 -10) =================================================================== --- lib/token_cursor.c 2014-11-17 22:32:04 +0900 (99cde24) +++ lib/token_cursor.c 2014-11-17 22:35:20 +0900 (41e5e95) @@ -153,8 +153,8 @@ grn_token_cursor_next_apply_token_filters(grn_ctx *ctx, grn_proc *token_filter = (grn_proc *)token_filter_object; #define SKIP_FLAGS\ - (GRN_TOKENIZER_TOKEN_SKIP |\ - GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) + (GRN_TOKEN_SKIP |\ + GRN_TOKEN_SKIP_WITH_POSITION) if (current_token.status & SKIP_FLAGS) { break; } @@ -193,15 +193,15 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) status = grn_token_cursor_next_apply_token_filters(ctx, token_cursor, curr_, stat_); token_cursor->status = - ((status & GRN_TOKENIZER_TOKEN_LAST) || + ((status & GRN_TOKEN_LAST) || (token_cursor->mode == GRN_TOKEN_GET && - (status & GRN_TOKENIZER_TOKEN_REACH_END))) + (status & GRN_TOKEN_REACH_END))) ? GRN_TOKEN_CURSOR_DONE : GRN_TOKEN_CURSOR_DOING; token_cursor->force_prefix = GRN_FALSE; #define SKIP_FLAGS \ - (GRN_TOKENIZER_TOKEN_SKIP | GRN_TOKENIZER_TOKEN_SKIP_WITH_POSITION) + (GRN_TOKEN_SKIP | GRN_TOKEN_SKIP_WITH_POSITION) if (status & SKIP_FLAGS) { - if (status & GRN_TOKENIZER_TOKEN_SKIP) { + if (status & GRN_TOKEN_SKIP) { token_cursor->pos++; } if (token_cursor->status == GRN_TOKEN_CURSOR_DONE && tid == GRN_ID_NIL) { @@ -212,7 +212,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) } } #undef SKIP_FLAGS - if (status & GRN_TOKENIZER_TOKEN_FORCE_PREFIX) { + if (status & GRN_TOKEN_FORCE_PREFIX) { token_cursor->force_prefix = GRN_TRUE; } if (token_cursor->curr_size == 0) { @@ -236,14 +236,14 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) token_cursor->curr_size, token_cursor->curr); continue; } - if (status & GRN_TOKENIZER_TOKEN_UNMATURED) { - if (status & GRN_TOKENIZER_TOKEN_OVERLAP) { + if (status & GRN_TOKEN_UNMATURED) { + if (status & GRN_TOKEN_OVERLAP) { if (token_cursor->mode == GRN_TOKEN_GET) { token_cursor->pos++; continue; } } else { - if (status & GRN_TOKENIZER_TOKEN_LAST) { + if (status & GRN_TOKEN_LAST) { token_cursor->force_prefix = GRN_TRUE; } } Modified: lib/tokenizer.c (+5 -5) =================================================================== --- lib/tokenizer.c 2014-11-17 22:32:04 +0900 (e5692ff) +++ lib/tokenizer.c 2014-11-17 22:35:20 +0900 (d1921fd) @@ -241,7 +241,7 @@ grn_tokenizer_token_fin(grn_ctx *ctx, grn_tokenizer_token *token) void grn_tokenizer_token_push(grn_ctx *ctx, grn_tokenizer_token *token, const char *str_ptr, unsigned int str_length, - grn_tokenizer_status status) + grn_token_status status) { GRN_TEXT_SET_REF(&token->str, str_ptr, str_length); GRN_UINT32_SET(ctx, &token->status, status); @@ -262,7 +262,7 @@ grn_tokenizer_tokenized_delimiter_next(grn_ctx *ctx, const char *end = str_ptr + str_length; const char *next_start = NULL; unsigned int token_length; - grn_tokenizer_status status; + grn_token_status status; for (current = start; current < end; current += char_length) { char_length = grn_charlen_(ctx, current, end, encoding); @@ -349,13 +349,13 @@ exit: GRN_API_RETURN(ctx->rc); } -grn_tokenizer_status +grn_token_status grn_token_get_status(grn_ctx *ctx, grn_token *token) { GRN_API_ENTER; if (!token) { ERR(GRN_INVALID_ARGUMENT, "token must not be NULL"); - GRN_API_RETURN(GRN_TOKENIZER_TOKEN_CONTINUE); + GRN_API_RETURN(GRN_TOKEN_CONTINUE); } GRN_API_RETURN(token->status); } @@ -363,7 +363,7 @@ grn_token_get_status(grn_ctx *ctx, grn_token *token) grn_rc grn_token_set_status(grn_ctx *ctx, grn_token *token, - grn_tokenizer_status status) + grn_token_status status) { GRN_API_ENTER; if (!token) { Modified: lib/tokenizers.c (+13 -12) =================================================================== --- lib/tokenizers.c 2014-11-17 22:32:04 +0900 (cfee35a) +++ lib/tokenizers.c 2014-11-17 22:35:20 +0900 (3f6df15) @@ -70,13 +70,13 @@ uvector_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) if (tokenizer->tail < p) { grn_tokenizer_token_push(ctx, &(tokenizer->token), (const char *)tokenizer->curr, 0, - GRN_TOKENIZER_TOKEN_LAST); + GRN_TOKEN_LAST); } else { - grn_tokenizer_status status; + grn_token_status status; if (tokenizer->tail == p) { - status = GRN_TOKENIZER_TOKEN_LAST; + status = GRN_TOKEN_LAST; } else { - status = GRN_TOKENIZER_TOKEN_CONTINUE; + status = GRN_TOKEN_CONTINUE; } grn_tokenizer_token_push(ctx, &(tokenizer->token), (const char *)tokenizer->curr, tokenizer->unit, @@ -171,7 +171,7 @@ delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data size_t cl; const unsigned char *p = tokenizer->next, *r; const unsigned char *e = tokenizer->end; - grn_tokenizer_status status; + grn_token_status status; for (r = p; r < e; r += cl) { if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, tokenizer->query->encoding))) { @@ -194,9 +194,9 @@ delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data } } if (r == e) { - status = GRN_TOKENIZER_LAST; + status = GRN_TOKEN_LAST; } else { - status = GRN_TOKENIZER_CONTINUE; + status = GRN_TOKEN_CONTINUE; } grn_tokenizer_token_push(ctx, &(tokenizer->token), @@ -350,7 +350,8 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) size_t cl; grn_ngram_tokenizer *tokenizer = user_data->ptr; const unsigned char *p = tokenizer->next, *r = p, *e = tokenizer->end; - int32_t len = 0, pos = tokenizer->pos + tokenizer->skip, status = 0; + int32_t len = 0, pos = tokenizer->pos + tokenizer->skip; + grn_token_status status = 0; const uint_least8_t *cp = tokenizer->ctypes ? tokenizer->ctypes + pos : NULL; if (cp && tokenizer->uni_alpha && GRN_STR_CTYPE(*cp) == GRN_CHAR_ALPHA) { while ((cl = grn_charlen_(ctx, (char *)r, (char *)e, @@ -427,10 +428,10 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) r += cl; } if (tokenizer->overlap) { - status |= GRN_TOKENIZER_TOKEN_OVERLAP; + status |= GRN_TOKEN_OVERLAP; } if (len < tokenizer->ngram_unit) { - status |= GRN_TOKENIZER_TOKEN_UNMATURED; + status |= GRN_TOKEN_UNMATURED; } tokenizer->overlap = (len > 1) ? 1 : 0; } @@ -440,11 +441,11 @@ ngram_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) tokenizer->tail = pos + len - 1; if (p == r || tokenizer->next == e) { tokenizer->skip = 0; - status |= GRN_TOKENIZER_TOKEN_LAST; + status |= GRN_TOKEN_LAST; } else { tokenizer->skip = tokenizer->overlap ? 1 : len; } - if (r == e) { status |= GRN_TOKENIZER_TOKEN_REACH_END; } + if (r == e) { status |= GRN_TOKEN_REACH_END; } grn_tokenizer_token_push(ctx, &(tokenizer->token), (const char *)p, -------------- next part -------------- HTML����������������������������...Download