Kouhei Sutou
null+****@clear*****
Mon May 28 10:25:21 JST 2018
Kouhei Sutou 2018-05-28 10:25:21 +0900 (Mon, 28 May 2018) New Revision: f8ef46906ebd6463cfa55464def5f7d1a845807e https://github.com/groonga/groonga/commit/f8ef46906ebd6463cfa55464def5f7d1a845807e Message: NormalizerNFKC100: add include_removed_source_location option It's for more strict highlight. Added files: test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test Modified files: lib/normalizer.c Modified: lib/normalizer.c (+13 -1) =================================================================== --- lib/normalizer.c 2018-05-23 17:54:52 +0900 (c9555446c) +++ lib/normalizer.c 2018-05-28 10:25:21 +0900 (0be649903) @@ -620,6 +620,7 @@ typedef struct { grn_nfkc_char_type_func char_type_func; grn_nfkc_decompose_func decompose_func; grn_nfkc_compose_func compose_func; + grn_bool include_removed_source_location; grn_bool unify_kana; grn_bool unify_kana_case; grn_bool unify_kana_voiced_sound_mark; @@ -640,6 +641,7 @@ utf8_normalize_options_init(grn_utf8_normalize_options *options, options->char_type_func = char_type_func; options->decompose_func = decompose_func; options->compose_func = compose_func; + options->include_removed_source_location = GRN_TRUE; options->unify_kana = GRN_FALSE; options->unify_kana_case = GRN_FALSE; options->unify_kana_voiced_sound_mark = GRN_FALSE; @@ -1153,6 +1155,9 @@ utf8_normalize(grn_ctx *ctx, } if ((*p == ' ' && removeblankp) || *p < 0x20 /* skip unprintable ascii */ ) { if (cp > nstr->ctypes) { *(cp - 1) |= GRN_CHAR_BLANK; } + if (!options->include_removed_source_location) { + s_ += lp; + } } else { size_t lp_original = lp; grn_char_type char_type; @@ -1780,7 +1785,14 @@ nfkc100_open_options(grn_ctx *ctx, name_raw.value = name; name_raw.length = name_length; - if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_kana")) { + if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, + "include_removed_source_location")) { + options->include_removed_source_location = + grn_vector_get_element_bool(ctx, + raw_options, + i, + options->include_removed_source_location); + } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_kana")) { options->unify_kana = grn_vector_get_element_bool(ctx, raw_options, i, Added: test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected (+2 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected 2018-05-28 10:25:21 +0900 (6ea9a6092) @@ -0,0 +1,2 @@ +normalize 'NormalizerNFKC100("include_removed_source_location", false)' " aa bbb c " REMOVE_BLANK|WITH_CHECKS +[[0,0.0,0.0],{"normalized":"aabbbc","types":[],"checks":[1,1,1,1,1,1]}] Added: test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test (+4 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test 2018-05-28 10:25:21 +0900 (d1f8788ce) @@ -0,0 +1,4 @@ +normalize \ + 'NormalizerNFKC100("include_removed_source_location", false)' \ + " aa bbb c " \ + REMOVE_BLANK|WITH_CHECKS -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180528/7894c7c1/attachment-0001.htm