[Groonga-commit] groonga/groonga at f8ef469 [master] NormalizerNFKC100: add include_removed_source_location option

Back to archive index

Kouhei Sutou null+****@clear*****
Mon May 28 10:25:21 JST 2018


Kouhei Sutou	2018-05-28 10:25:21 +0900 (Mon, 28 May 2018)

  New Revision: f8ef46906ebd6463cfa55464def5f7d1a845807e
  https://github.com/groonga/groonga/commit/f8ef46906ebd6463cfa55464def5f7d1a845807e

  Message:
    NormalizerNFKC100: add include_removed_source_location option
    
    It's for more strict highlight.

  Added files:
    test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected
    test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test
  Modified files:
    lib/normalizer.c

  Modified: lib/normalizer.c (+13 -1)
===================================================================
--- lib/normalizer.c    2018-05-23 17:54:52 +0900 (c9555446c)
+++ lib/normalizer.c    2018-05-28 10:25:21 +0900 (0be649903)
@@ -620,6 +620,7 @@ typedef struct {
   grn_nfkc_char_type_func char_type_func;
   grn_nfkc_decompose_func decompose_func;
   grn_nfkc_compose_func compose_func;
+  grn_bool include_removed_source_location;
   grn_bool unify_kana;
   grn_bool unify_kana_case;
   grn_bool unify_kana_voiced_sound_mark;
@@ -640,6 +641,7 @@ utf8_normalize_options_init(grn_utf8_normalize_options *options,
   options->char_type_func = char_type_func;
   options->decompose_func = decompose_func;
   options->compose_func = compose_func;
+  options->include_removed_source_location = GRN_TRUE;
   options->unify_kana = GRN_FALSE;
   options->unify_kana_case = GRN_FALSE;
   options->unify_kana_voiced_sound_mark = GRN_FALSE;
@@ -1153,6 +1155,9 @@ utf8_normalize(grn_ctx *ctx,
       }
       if ((*p == ' ' && removeblankp) || *p < 0x20  /* skip unprintable ascii */ ) {
         if (cp > nstr->ctypes) { *(cp - 1) |= GRN_CHAR_BLANK; }
+        if (!options->include_removed_source_location) {
+          s_ += lp;
+        }
       } else {
         size_t lp_original = lp;
         grn_char_type char_type;
@@ -1780,7 +1785,14 @@ nfkc100_open_options(grn_ctx *ctx,
     name_raw.value = name;
     name_raw.length = name_length;
 
-    if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_kana")) {
+    if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw,
+                                     "include_removed_source_location")) {
+      options->include_removed_source_location =
+        grn_vector_get_element_bool(ctx,
+                                    raw_options,
+                                    i,
+                                    options->include_removed_source_location);
+    } else if (GRN_RAW_STRING_EQUAL_CSTRING(name_raw, "unify_kana")) {
       options->unify_kana = grn_vector_get_element_bool(ctx,
                                                         raw_options,
                                                         i,

  Added: test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected (+2 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.expected    2018-05-28 10:25:21 +0900 (6ea9a6092)
@@ -0,0 +1,2 @@
+normalize   'NormalizerNFKC100("include_removed_source_location", false)'   " aa bbb   c "   REMOVE_BLANK|WITH_CHECKS
+[[0,0.0,0.0],{"normalized":"aabbbc","types":[],"checks":[1,1,1,1,1,1]}]

  Added: test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test (+4 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalizers/nfkc100/include_removed_source_location_blank.test    2018-05-28 10:25:21 +0900 (d1f8788ce)
@@ -0,0 +1,4 @@
+normalize \
+  'NormalizerNFKC100("include_removed_source_location", false)' \
+  " aa bbb   c " \
+  REMOVE_BLANK|WITH_CHECKS
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180528/7894c7c1/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index