Kouhei Sutou
null+****@clear*****
Tue Nov 13 11:27:41 JST 2012
Kouhei Sutou 2012-11-13 11:27:41 +0900 (Tue, 13 Nov 2012) New Revision: f755fc70fdb5fd50e997478d87155745965a6b0d https://github.com/groonga/groonga/commit/f755fc70fdb5fd50e997478d87155745965a6b0d Log: Use grn_tokenizer_tokenized_delimiter_next() in TokenMecab Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+21 -25) =================================================================== --- plugins/tokenizers/mecab.c 2012-11-13 11:23:30 +0900 (55be0bd) +++ plugins/tokenizers/mecab.c 2012-11-13 11:27:41 +0900 (e4fafde) @@ -37,8 +37,8 @@ typedef struct { grn_str *nstr; mecab_t *mecab; char *buf; - char *next; - char *end; + const char *next; + const char *end; grn_encoding encoding; grn_tokenizer_token token; grn_bool have_tokenized_delimiter; @@ -180,48 +180,44 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) static grn_obj * mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { - size_t cl; /* grn_obj *table = args[0]; */ grn_mecab_tokenizer *tokenizer = user_data->ptr; - char *p = tokenizer->next, *r; - char *e = tokenizer->end; grn_encoding encoding = tokenizer->encoding; - grn_tokenizer_status status; if (tokenizer->have_tokenized_delimiter) { - for (r = p; r < e; r += cl) { - cl = grn_charlen_(ctx, r, e, encoding); - if (cl > 0) { - if (grn_tokenizer_is_tokenized_delimiter(ctx, r, cl, encoding)) { - tokenizer->next = r + cl; - break; - } - } else { - tokenizer->next = e; - break; - } - } + tokenizer->next = + grn_tokenizer_tokenized_delimiter_next(ctx, + &(tokenizer->token), + tokenizer->next, + tokenizer->end - tokenizer->next, + encoding); } else { + size_t cl; + const char *p = tokenizer->next, *r; + const char *e = tokenizer->end; + grn_tokenizer_status status; + for (r = p; r < e; r += cl) { if (!(cl = grn_charlen_(ctx, r, e, encoding))) { tokenizer->next = e; break; } if (grn_isspace(r, encoding)) { - char *q = r; + const char *q = r; while ((cl = grn_isspace(q, encoding))) { q += cl; } tokenizer->next = q; break; } } - } - if (r == e) { - status = GRN_TOKENIZER_LAST; - } else { - status = GRN_TOKENIZER_CONTINUE; + if (r == e) { + status = GRN_TOKENIZER_LAST; + } else { + status = GRN_TOKENIZER_CONTINUE; + } + grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status); } - grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status); + return NULL; } -------------- next part -------------- HTML����������������������������...Download