Kouhei Sutou
null+****@clear*****
Fri Mar 10 15:00:36 JST 2017
Kouhei Sutou 2017-03-10 15:00:36 +0900 (Fri, 10 Mar 2017) New Revision: 4af8c8c0e4f4c5acd0c7c678001942e7a068492e https://github.com/groonga/groonga/commit/4af8c8c0e4f4c5acd0c7c678001942e7a068492e Message: grn_pat_scan: ignore blanks in target string. With this change, highlight family functions support highlighting text that has some blanks. See the test in this change for example. Added files: test/command/suite/select/function/highlight_html/space_in_target.expected test/command/suite/select/function/highlight_html/space_in_target.test Modified files: lib/pat.c Modified: lib/pat.c (+22 -4) =================================================================== --- lib/pat.c 2017-03-10 14:57:01 +0900 (0fa4da9) +++ lib/pat.c 2017-03-10 15:00:36 +0900 (624e84e) @@ -2161,10 +2161,15 @@ grn_pat_scan(grn_ctx *ctx, grn_pat *pat, const char *str, unsigned int str_len, return 0; } if (pat->normalizer) { + int flags = + GRN_STRING_REMOVE_BLANK | + GRN_STRING_WITH_TYPES | + GRN_STRING_WITH_CHECKS; grn_obj *nstr = grn_string_open(ctx, str, str_len, - pat->normalizer, GRN_STRING_WITH_CHECKS); + pat->normalizer, flags); if (nstr) { const short *cp = grn_string_get_checks(ctx, nstr); + const unsigned char *tp = grn_string_get_types(ctx, nstr); unsigned int offset = 0, offset0 = 0; unsigned int normalized_length_in_bytes; const char *sp, *se; @@ -2173,18 +2178,31 @@ grn_pat_scan(grn_ctx *ctx, grn_pat *pat, const char *str, unsigned int str_len, se = sp + normalized_length_in_bytes; while (n < sh_size) { if ((tid = grn_pat_lcp_search(ctx, pat, sp, se - sp))) { + const char *key; uint32_t len; - _grn_pat_key(ctx, pat, tid, &len); + key = _grn_pat_key(ctx, pat, tid, &len); sh[n].id = tid; sh[n].offset = (*cp > 0) ? offset : offset0; + if (sh[n].offset > 0 && + GRN_CHAR_IS_BLANK(tp[-1]) && + grn_charlen(ctx, key, key + len) == 1 && + key[0] != ' ') { + /* Remove leading spaces. */ + const char *original_str = str + sh[n].offset; + while (grn_charlen(ctx, original_str, str + str_len) == 1 && + original_str[0] == ' ') { + original_str++; + sh[n].offset++; + } + } while (len--) { - if (*cp > 0) { offset0 = offset; offset += *cp; } + if (*cp > 0) { offset0 = offset; offset += *cp; tp++; } sp++; cp++; } sh[n].length = offset - sh[n].offset; n++; } else { - if (*cp > 0) { offset0 = offset; offset += *cp; } + if (*cp > 0) { offset0 = offset; offset += *cp; tp++; } do { sp++; cp++; } while (sp < se && !*cp); Added: test/command/suite/select/function/highlight_html/space_in_target.expected (+37 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/space_in_target.expected 2017-03-10 15:00:36 +0900 (56e5f95) @@ -0,0 +1,37 @@ +table_create Entries TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Entries body COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto +[[0,0.0,0.0],true] +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body +[[0,0.0,0.0],true] +load --table Entries +[ +{"body": "高速な Mroonga ストレージエンジン"} +] +[[0,0.0,0.0],1] +select Entries --output_columns --match_columns body --query 'Mroongaストレージ' --output_columns 'highlight_html(body)' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "highlight_html", + null + ] + ], + [ + "高速な <span class=\"keyword\">Mroonga ストレージ</span>エンジン" + ] + ] + ] +] Added: test/command/suite/select/function/highlight_html/space_in_target.test (+14 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/highlight_html/space_in_target.test 2017-03-10 15:00:36 +0900 (184d314) @@ -0,0 +1,14 @@ +table_create Entries TABLE_NO_KEY +column_create Entries body COLUMN_SCALAR ShortText + +table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto +column_create Terms document_index COLUMN_INDEX|WITH_POSITION Entries body + +load --table Entries +[ +{"body": "高速な Mroonga ストレージエンジン"} +] + +select Entries --output_columns \ + --match_columns body --query 'Mroongaストレージ' \ + --output_columns 'highlight_html(body)' -------------- next part -------------- HTML����������������������������...Download