Kouhei Sutou
null+****@clear*****
Tue May 22 16:39:29 JST 2018
Kouhei Sutou 2018-05-22 16:39:29 +0900 (Tue, 22 May 2018) New Revision: 27eee0a827203a927fb42873ea161b2806a410b6 https://github.com/groonga/groonga/commit/27eee0a827203a927fb42873ea161b2806a410b6 Message: highlighter: fix wrong character length detection Added files: test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test Modified files: lib/highlighter.c Modified: lib/highlighter.c (+2 -9) =================================================================== --- lib/highlighter.c 2018-05-22 16:38:48 +0900 (d9e730c3c) +++ lib/highlighter.c 2018-05-22 16:39:29 +0900 (9c27b6b60) @@ -455,7 +455,6 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, grn_obj *output) { grn_token_cursor *cursor; - grn_encoding encoding = highlighter->lexicon.encoding; grn_obj *token_ids = &(highlighter->lexicon.token_ids); grn_obj *token_locations = &(highlighter->lexicon.token_locations); grn_obj *candidates = &(highlighter->lexicon.candidates); @@ -487,15 +486,9 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, GRN_RECORD_PUT(ctx, token_ids, token_id); location.offset = grn_token_get_source_offset(ctx, token); location.length = grn_token_get_source_length(ctx, token); + location.first_character_length = + grn_token_get_source_first_character_length(ctx, token); location.have_overlap = grn_token_have_overlap(ctx, token); - { - const char *data; - size_t data_length; - - data = grn_token_get_data_raw(ctx, token, &data_length); - location.first_character_length = - grn_charlen_(ctx, data, data + data_length, encoding); - } GRN_TEXT_PUT(ctx, token_locations, &location, sizeof(location)); } grn_token_cursor_close(ctx, cursor); Added: test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected (+37 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.expected 2018-05-22 16:39:29 +0900 (962e6f444) @@ -0,0 +1,37 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer 'TokenNgram("loose_symbol", true, "report_source_location", true)' --normalizer 'NormalizerNFKC100("unify_hyphen_and_prolonged_sound_mark", true)' +[[0,0.0,0.0],true] +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "[クリアコード]"} +] +[[0,0.0,0.0],1] +select Entries --match_columns body --query 'クリアコード' --output_columns 'highlight_html(body, Terms)' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_html", + null + ] + ], + [ + "<span class=\"keyword\">[クリアコード</span>]" + ] + ] + ] +] Added: test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test (+19 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/lexicon/loose_and_unify.test 2018-05-22 16:39:29 +0900 (fb4556c79) @@ -0,0 +1,19 @@ + +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer 'TokenNgram("loose_symbol", true, \ + "report_source_location", true)' \ + --normalizer 'NormalizerNFKC100("unify_hyphen_and_prolonged_sound_mark", true)' +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body + +load --table Entries +[ +{"body": "[クリアコード]"} +] + +select Entries \ + --match_columns body \ + --query 'クリアコード' \ + --output_columns 'highlight_html(body, Terms)' -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180522/3567b240/attachment-0001.htm