[Groonga-commit] groonga/groonga at b8f7950 [master] Extract code not for unification from unify function

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Nov 2 18:41:26 JST 2018


Kouhei Sutou	2018-11-02 18:41:26 +0900 (Fri, 02 Nov 2018)

  Revision: b8f7950446676aa4989d7aa697d916945b1122af
  https://github.com/groonga/groonga/commit/b8f7950446676aa4989d7aa697d916945b1122af

  Message:
    Extract code not for unification from unify function

  Modified files:
    lib/normalizer.c

  Modified: lib/normalizer.c (+36 -28)
===================================================================
--- lib/normalizer.c    2018-11-02 18:14:53 +0900 (0bb240515)
+++ lib/normalizer.c    2018-11-02 18:41:26 +0900 (d6b2b5624)
@@ -582,6 +582,13 @@ typedef struct {
   size_t ds;
   grn_bool remove_blank_p;
   grn_bool remove_tokenized_delimiter_p;
+
+  unsigned char unified_kana[3];
+  unsigned char unified_kana_case[3];
+  unsigned char unified_kana_voiced_sound_mark[3];
+  unsigned char unified_hyphen[1];
+  unsigned char unified_prolonged_sound_mark[3];
+  unsigned char unified_middle_dot[2];
 } grn_nfkc_normalize_data;
 
 grn_inline static void
@@ -632,6 +639,15 @@ grn_nfkc_normalize_data_init(grn_ctx *ctx,
   data->de = data->d + data->ds;
   data->d_ = NULL;
   data->e = (unsigned char *)(data->string->original) + data->size;
+
+  data->unified_hyphen[0] = '-';
+  /* U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK */
+  data->unified_prolonged_sound_mark[0] = 0xe3;
+  data->unified_prolonged_sound_mark[1] = 0x83;
+  data->unified_prolonged_sound_mark[2] = 0xbc;
+  /* U+00B7 MIDDLE DOT */
+  data->unified_middle_dot[0] = 0xc2;
+  data->unified_middle_dot[1] = 0xb7;
 }
 
 grn_inline static void
@@ -1115,21 +1131,11 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
                          grn_nfkc_normalize_data *data,
                          grn_char_type char_type)
 {
-  const unsigned char *p_original = data->p;
-  unsigned char unified_kana[3];
-  unsigned char unified_kana_case[3];
-  unsigned char unified_kana_voiced_sound_mark[3];
-  const unsigned char unified_hyphen[] = {'-'};
-  /* U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK */
-  const unsigned char unified_prolonged_sound_mark[] = {0xe3, 0x83, 0xbc};
-  /* U+00B7 MIDDLE DOT */
-  const unsigned char unified_middle_dot[] = {0xc2, 0xb7};
-
   if (data->options->unify_kana &&
       char_type == GRN_CHAR_KATAKANA &&
       data->lp == 3) {
-    data->p = grn_nfkc_normalize_unify_kana(data->p, unified_kana);
-    if (data->p == unified_kana) {
+    data->p = grn_nfkc_normalize_unify_kana(data->p, data->unified_kana);
+    if (data->p == data->unified_kana) {
       char_type = GRN_CHAR_HIRAGANA;
     }
   }
@@ -1139,13 +1145,13 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     case GRN_CHAR_HIRAGANA :
       if (data->lp == 3) {
         data->p = grn_nfkc_normalize_unify_hiragana_case(
-          data->p, unified_kana_case);
+          data->p, data->unified_kana_case);
       }
       break;
     case GRN_CHAR_KATAKANA :
       if (data->lp == 3) {
         data->p = grn_nfkc_normalize_unify_katakana_case(
-          data->p, unified_kana_case);
+          data->p, data->unified_kana_case);
       }
       break;
     default :
@@ -1158,13 +1164,13 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     case GRN_CHAR_HIRAGANA :
       if (data->lp == 3) {
         data->p = grn_nfkc_normalize_unify_hiragana_voiced_sound_mark(
-          data->p, unified_kana_voiced_sound_mark);
+          data->p, data->unified_kana_voiced_sound_mark);
       }
       break;
     case GRN_CHAR_KATAKANA :
       if (data->lp == 3) {
         data->p = grn_nfkc_normalize_unify_katakana_voiced_sound_mark(
-          data->p, unified_kana_voiced_sound_mark);
+          data->p, data->unified_kana_voiced_sound_mark);
       }
       break;
     default :
@@ -1174,16 +1180,16 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
 
   if (data->options->unify_hyphen) {
     if (grn_nfkc_normalize_is_hyphen_famity(data->p, data->lp)) {
-      data->p = unified_hyphen;
-      data->lp = sizeof(unified_hyphen);
+      data->p = data->unified_hyphen;
+      data->lp = sizeof(data->unified_hyphen);
       char_type = GRN_CHAR_SYMBOL;
     }
   }
 
   if (data->options->unify_prolonged_sound_mark) {
     if (grn_nfkc_normalize_is_prolonged_sound_mark_famity(data->p, data->lp)) {
-      data->p = unified_prolonged_sound_mark;
-      data->lp = sizeof(unified_prolonged_sound_mark);
+      data->p = data->unified_prolonged_sound_mark;
+      data->lp = sizeof(data->unified_prolonged_sound_mark);
       char_type = GRN_CHAR_KATAKANA;
     }
   }
@@ -1191,16 +1197,16 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
   if (data->options->unify_hyphen_and_prolonged_sound_mark) {
     if (grn_nfkc_normalize_is_hyphen_famity(data->p, data->lp) ||
         grn_nfkc_normalize_is_prolonged_sound_mark_famity(data->p, data->lp)) {
-      data->p = unified_hyphen;
-      data->lp = sizeof(unified_hyphen);
+      data->p = data->unified_hyphen;
+      data->lp = sizeof(data->unified_hyphen);
       char_type = GRN_CHAR_SYMBOL;
     }
   }
 
   if (data->options->unify_middle_dot) {
     if (grn_nfkc_normalize_is_middle_dot_family(data->p, data->lp)) {
-      data->p = unified_middle_dot;
-      data->lp = sizeof(unified_middle_dot);
+      data->p = data->unified_middle_dot;
+      data->lp = sizeof(data->unified_middle_dot);
       char_type = GRN_CHAR_SYMBOL;
     }
   }
@@ -1217,9 +1223,6 @@ grn_nfkc_normalize_unify(grn_ctx *ctx,
     }
   }
 
-  grn_memcpy(data->d, data->p, data->lp);
-  data->p = p_original;
-
   return char_type;
 }
 
@@ -1293,7 +1296,12 @@ grn_nfkc_normalize(grn_ctx *ctx,
           }
         }
 
-        char_type = grn_nfkc_normalize_unify(ctx, &data, char_type);
+        {
+          const unsigned char *p = data.p;
+          char_type = grn_nfkc_normalize_unify(ctx, &data, char_type);
+          grn_memcpy(data.d, data.p, data.lp);
+          data.p = p;
+        }
 
         data.d_ = data.d;
         if (data.lp > 0) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181102/dbe3e654/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index