Kouhei Sutou 2018-11-12 14:14:04 +0900 (Mon, 12 Nov 2018) Revision: de1a2af87cd39e0c898e23ad7036e01cf073fcb0 https://github.com/groonga/groonga/commit/de1a2af87cd39e0c898e23ad7036e01cf073fcb0 Message: Export new APIs for grn_token: * grn_token_get_force_prefix_search() * grn_token_set_force_prefix_search() * grn_token_get_position() * grn_token_set_position() Modified files: include/groonga/token.h lib/grn_token.h lib/grn_token_cursor.h lib/highlighter.c lib/ii.c lib/proc/proc_tokenize.c lib/token.c lib/token_cursor.c Modified: include/groonga/token.h (+14 -0) =================================================================== --- include/groonga/token.h 2018-11-12 11:47:23 +0900 (25f7345d9) +++ include/groonga/token.h 2018-11-12 14:14:04 +0900 (10109daa0) @@ -165,6 +165,20 @@ grn_token_set_overlap(grn_ctx *ctx, GRN_API grn_obj * grn_token_get_metadata(grn_ctx *ctx, grn_token *token); +GRN_API grn_bool +grn_token_get_force_prefix_search(grn_ctx *ctx, + grn_token *token); +GRN_API grn_rc +grn_token_set_force_prefix_search(grn_ctx *ctx, + grn_token *token, + grn_bool force); +GRN_API uint32_t +grn_token_get_position(grn_ctx *ctx, + grn_token *token); +GRN_API grn_rc +grn_token_set_position(grn_ctx *ctx, + grn_token *token, + uint32_t position); #ifdef __cplusplus } /* extern "C" */ Modified: lib/grn_token.h (+2 -0) =================================================================== --- lib/grn_token.h 2018-11-12 11:47:23 +0900 (f93a3c936) +++ lib/grn_token.h 2018-11-12 14:14:04 +0900 (69f484833) @@ -33,6 +33,8 @@ struct _grn_token { uint32_t source_first_character_length; grn_bool have_overlap; grn_obj metadata; + grn_bool force_prefix_search; + uint32_t position; }; grn_rc grn_token_init(grn_ctx *ctx, grn_token *token); Modified: lib/grn_token_cursor.h (+1 -1) =================================================================== --- lib/grn_token_cursor.h 2018-11-12 11:47:23 +0900 (c144939ae) +++ lib/grn_token_cursor.h 2018-11-12 14:14:04 +0900 (79c6c9337) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2009-2016 Brazil + Copyright(C) 2018 Kouhei Sutou <kou****@clear*****> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -36,7 +37,6 @@ struct _grn_token_cursor { int32_t pos; grn_tokenize_mode mode; grn_token_cursor_status status; - grn_bool force_prefix; grn_obj_flags table_flags; grn_encoding encoding; struct { Modified: lib/highlighter.c (+3 -3) =================================================================== --- lib/highlighter.c 2018-11-12 11:47:23 +0900 (0bffb4d73) +++ lib/highlighter.c 2018-11-12 14:14:04 +0900 (d7940cae8) @@ -306,14 +306,14 @@ grn_highlighter_prepare_lexicon(grn_ctx *ctx, } GRN_BULK_REWIND(token_id_chunk); while ((token_id = grn_token_cursor_next(ctx, cursor)) != GRN_ID_NIL) { + grn_token *token; GRN_TEXT_PUT(ctx, token_id_chunk, &token_id, sizeof(grn_id)); - if (cursor->force_prefix && + token = grn_token_cursor_get_token(ctx, cursor); + if (grn_token_get_force_prefix_search(ctx, token) && highlighter->lexicon.object->header.type != GRN_TABLE_HASH_KEY) { - grn_token *token; const char *data; size_t data_length; - token = grn_token_cursor_get_token(ctx, cursor); data = grn_token_get_data_raw(ctx, token, &data_length); grn_vector_add_element(ctx, lazy_keywords, Modified: lib/ii.c (+9 -3) =================================================================== --- lib/ii.c 2018-11-12 11:47:23 +0900 (4cd9386f6) +++ lib/ii.c 2018-11-12 14:14:04 +0900 (c1c1d3080) @@ -7251,7 +7251,9 @@ token_candidate_init(grn_ctx *ctx, grn_ii *ii, grn_token_cursor *token_cursor, } tid = grn_token_cursor_next(ctx, token_cursor); if (token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) { - if (token_cursor->force_prefix) { ef |= EX_PREFIX; } + grn_token *token; + token = grn_token_cursor_get_token(ctx, token_cursor); + if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; } TOKEN_CANDIDATE_NODE_SET(); token_candidate_adjacent_set(ctx, token_cursor, top, curr); if (curr->estimated_size > *max_estimated_size) { @@ -7511,6 +7513,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, } else { grn_id tid; int ef; + grn_token *token; switch (mode) { case GRN_OP_PREFIX : ef = EX_PREFIX; @@ -7526,7 +7529,8 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, break; } tid = grn_token_cursor_next(ctx, token_cursor); - if (token_cursor->force_prefix) { ef |= EX_PREFIX; } + token = grn_token_cursor_get_token(ctx, token_cursor); + if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; } switch (token_cursor->status) { case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); @@ -7556,8 +7560,10 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, } while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) { + grn_token *token; tid = grn_token_cursor_next(ctx, token_cursor); - if (token_cursor->force_prefix) { ef |= EX_PREFIX; } + token = grn_token_cursor_get_token(ctx, token_cursor); + if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; } switch (token_cursor->status) { case GRN_TOKEN_CURSOR_DONE_SKIP : continue; Modified: lib/proc/proc_tokenize.c (+2 -2) =================================================================== --- lib/proc/proc_tokenize.c 2018-11-12 11:47:23 +0900 (9bf0b0c06) +++ lib/proc/proc_tokenize.c 2018-11-12 14:14:04 +0900 (6d7535016) @@ -249,8 +249,8 @@ tokenize(grn_ctx *ctx, grn_bulk_space(ctx, tokens, sizeof(tokenize_token)); current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1; current_token->id = token_id; - current_token->position = token_cursor->pos; - current_token->force_prefix = token_cursor->force_prefix; + current_token->position = grn_token_get_position(ctx, token); + current_token->force_prefix = grn_token_get_force_prefix_search(ctx, token); current_token->source_offset = grn_token_get_source_offset(ctx, token); current_token->source_length = grn_token_get_source_length(ctx, token); current_token->source_first_character_length = Modified: lib/token.c (+58 -2) =================================================================== --- lib/token.c 2018-11-12 11:47:23 +0900 (f98e512c5) +++ lib/token.c 2018-11-12 14:14:04 +0900 (9fc812946) @@ -31,6 +31,8 @@ grn_token_init(grn_ctx *ctx, grn_token *token) token->source_first_character_length = 0; token->have_overlap = GRN_FALSE; grn_token_metadata_init(ctx, &(token->metadata)); + token->force_prefix_search = GRN_FALSE; + token->position = 0; GRN_API_RETURN(ctx->rc); } @@ -225,7 +227,7 @@ grn_token_set_overlap(grn_ctx *ctx, GRN_API_ENTER; if (!token) { ERR(GRN_INVALID_ARGUMENT, - "[token][overlapping][set] token must not be NULL"); + "[token][overlap][set] token must not be NULL"); goto exit; } token->have_overlap = have_overlap; @@ -239,12 +241,62 @@ grn_token_get_metadata(grn_ctx *ctx, grn_token *token) GRN_API_ENTER; if (!token) { ERR(GRN_INVALID_ARGUMENT, - "[token][data][get][metadata] token must not be NULL"); + "[token][metadata][get] token must not be NULL"); GRN_API_RETURN(NULL); } GRN_API_RETURN(&(token->metadata)); } +grn_bool +grn_token_get_force_prefix_search(grn_ctx *ctx, grn_token *token) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, + "[token][force-prefix-search][get] token must not be NULL"); + GRN_API_RETURN(GRN_FALSE); + } + GRN_API_RETURN(token->force_prefix_search); +} + +grn_rc +grn_token_set_force_prefix_search(grn_ctx *ctx, grn_token *token, grn_bool force) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, + "[token][force-prefix-search][set] token must not be NULL"); + GRN_API_RETURN(ctx->rc); + } + token->force_prefix_search = force; + GRN_API_RETURN(ctx->rc); +} + +uint32_t +grn_token_get_position(grn_ctx *ctx, grn_token *token) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, + "[token][position][get] token must not be NULL"); + GRN_API_RETURN(0); + } + GRN_API_RETURN(token->position); +} + +grn_rc +grn_token_set_position(grn_ctx *ctx, grn_token *token, uint32_t position) +{ + GRN_API_ENTER; + if (!token) { + ERR(GRN_INVALID_ARGUMENT, + "[token][position][set] token must not be NULL"); + GRN_API_RETURN(ctx->rc); + } + token->position = position; + GRN_API_RETURN(ctx->rc); +} + grn_rc grn_token_reset(grn_ctx *ctx, grn_token *token) { @@ -260,6 +312,8 @@ grn_token_reset(grn_ctx *ctx, grn_token *token) token->source_first_character_length = 0; token->have_overlap = GRN_FALSE; grn_token_metadata_reset(ctx, &(token->metadata)); + token->force_prefix_search = GRN_FALSE; + token->position = 0; exit: GRN_API_RETURN(ctx->rc); } @@ -285,6 +339,8 @@ grn_token_copy(grn_ctx *ctx, token->have_overlap = source->have_overlap; grn_token_metadata_reset(ctx, &(token->metadata)); grn_token_metadata_copy(ctx, &(token->metadata), &(source->metadata)); + token->force_prefix_search = source->force_prefix_search; + token->position = source->position; exit: GRN_API_RETURN(ctx->rc); } Modified: lib/token_cursor.c (+3 -4) =================================================================== --- lib/token_cursor.c 2018-11-12 11:47:23 +0900 (cd9de1bd2) +++ lib/token_cursor.c 2018-11-12 14:14:04 +0900 (2d589c8de) @@ -102,7 +102,6 @@ grn_token_cursor_open(grn_ctx *ctx, grn_obj *table, token_cursor->curr_size = 0; token_cursor->pos = -1; token_cursor->status = GRN_TOKEN_CURSOR_DOING; - token_cursor->force_prefix = GRN_FALSE; if (tokenizer) { grn_proc *tokenizer_proc = (grn_proc *)tokenizer; if (tokenizer_proc->callbacks.tokenizer.init) { @@ -259,7 +258,6 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) (token_cursor->mode == GRN_TOKENIZE_GET && (status & GRN_TOKEN_REACH_END))) ? GRN_TOKEN_CURSOR_DONE : GRN_TOKEN_CURSOR_DOING; - token_cursor->force_prefix = GRN_FALSE; #define SKIP_FLAGS \ (GRN_TOKEN_SKIP | GRN_TOKEN_SKIP_WITH_POSITION) if (status & SKIP_FLAGS) { @@ -275,7 +273,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) } #undef SKIP_FLAGS if (status & GRN_TOKEN_FORCE_PREFIX) { - token_cursor->force_prefix = GRN_TRUE; + grn_token_set_force_prefix_search(ctx, current_token, GRN_TRUE); } if (token_cursor->curr_size == 0) { if (token_cursor->status != GRN_TOKEN_CURSOR_DONE) { @@ -308,7 +306,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) } } else { if (status & GRN_TOKEN_REACH_END) { - token_cursor->force_prefix = GRN_TRUE; + grn_token_set_force_prefix_search(ctx, current_token, GRN_TRUE); } } } @@ -377,6 +375,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor) token_cursor->status = GRN_TOKEN_CURSOR_NOT_FOUND; } token_cursor->pos++; + grn_token_set_position(ctx, current_token, token_cursor->pos); break; } GRN_API_RETURN(tid); -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181112/cd33bbe1/attachment-0001.html>