Kouhei Sutou
null+****@clear*****
Sun Mar 1 19:00:44 JST 2015
Kouhei Sutou 2015-03-01 19:00:44 +0900 (Sun, 01 Mar 2015) New Revision: ce81963a9ec57c65e9d784d25ecfc0f156487c43 https://github.com/groonga/groonga/commit/ce81963a9ec57c65e9d784d25ecfc0f156487c43 Message: mecab: remove a trailing LF in tokenized chunk Modified files: plugins/tokenizers/mecab.c Modified: plugins/tokenizers/mecab.c (+11 -1) =================================================================== --- plugins/tokenizers/mecab.c 2015-03-01 18:52:20 +0900 (a9e6b65) +++ plugins/tokenizers/mecab.c 2015-03-01 19:00:44 +0900 (b48ea01) @@ -147,6 +147,7 @@ chunked_tokenize_utf8_chunk(grn_ctx *ctx, unsigned int chunk_bytes) { const char *tokenized_chunk; + size_t tokenized_chunk_length; tokenized_chunk = mecab_sparse_tostr2(tokenizer->mecab, chunk, chunk_bytes); if (!tokenized_chunk) { @@ -161,7 +162,16 @@ chunked_tokenize_utf8_chunk(grn_ctx *ctx, if (GRN_TEXT_LEN(&(tokenizer->buf)) > 0) { GRN_TEXT_PUTS(ctx, &(tokenizer->buf), " "); } - GRN_TEXT_PUTS(ctx, &(tokenizer->buf), tokenized_chunk); + + tokenized_chunk_length = strlen(tokenized_chunk); + if (tokenized_chunk_length >= 1 && + isspace(tokenized_chunk[tokenized_chunk_length - 1])) { + GRN_TEXT_PUT(ctx, &(tokenizer->buf), + tokenized_chunk, tokenized_chunk_length - 1); + } else { + GRN_TEXT_PUT(ctx, &(tokenizer->buf), + tokenized_chunk, tokenized_chunk_length); + } return GRN_TRUE; } -------------- next part -------------- HTML����������������������������...Download