[Groonga-commit] groonga/groonga at 08bb9d3 [master] Remove duplicated function

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Nov 2 17:04:00 JST 2018


Kouhei Sutou	2018-11-02 17:04:00 +0900 (Fri, 02 Nov 2018)

  Revision: 08bb9d393f44f0779f9b8702e79162bbc8fa4389
  https://github.com/groonga/groonga/commit/08bb9d393f44f0779f9b8702e79162bbc8fa4389

  Message:
    Remove duplicated function

  Modified files:
    lib/normalizer.c

  Modified: lib/normalizer.c (+2 -52)
===================================================================
--- lib/normalizer.c    2018-11-02 16:55:56 +0900 (f01e4297a)
+++ lib/normalizer.c    2018-11-02 17:04:00 +0900 (bd15647b7)
@@ -584,56 +584,6 @@ typedef struct {
   grn_bool remove_tokenized_delimiter_p;
 } grn_nfkc_normalize_data;
 
-static grn_inline int
-grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char *end)
-{
-  /* MEMO: This function allows non-null-terminated string as str. */
-  /*       But requires the end of string. */
-  const unsigned char *p = str;
-  if (end <= p || !*p) { return 0; }
-  if (*p & 0x80) {
-    int b, w;
-    int size;
-    int i;
-    for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
-    if (!w) {
-      GRN_LOG(ctx, GRN_LOG_WARNING,
-              "invalid utf8 string: the first bit is 0x80: <%.*s>: <%.*s>",
-              (int)(end - p), p,
-              (int)(end - str), str);
-      return 0;
-    }
-    size = w + 1;
-    for (i = 1; i < size; i++) {
-      if (++p >= end) {
-        GRN_LOG(ctx, GRN_LOG_WARNING,
-                "invalid utf8 string: too short: "
-                "%d byte is required but %d byte is given: <%.*s>",
-                size, i,
-                (int)(end - str), str);
-        return 0;
-      }
-      if (!*p) {
-        GRN_LOG(ctx, GRN_LOG_WARNING,
-                "invalid utf8 string: NULL character is found: <%.*s>",
-                (int)(end - str), str);
-        return 0;
-      }
-      if ((*p & 0xc0) != 0x80) {
-        GRN_LOG(ctx, GRN_LOG_WARNING,
-                "invalid utf8 string: 0x80 is not allowed: <%.*s>: <%.*s>",
-                (int)(end - p), p,
-                (int)(end - str), str);
-        return 0;
-      }
-    }
-    return size;
-  } else {
-    return 1;
-  }
-  return 0;
-}
-
 grn_inline static const unsigned char *
 grn_nfkc_normalize_unify_kana(const unsigned char *utf8_char,
                               unsigned char *unified)
@@ -1111,7 +1061,7 @@ grn_nfkc_normalize(grn_ctx *ctx,
   for (data.s = data.s_ = (unsigned char *)(data.string->original);
        ;
        data.s += data.ls) {
-    if (!(data.ls = grn_str_charlen_utf8(ctx, data.s, data.e))) {
+    if (!(data.ls = grn_charlen_(ctx, data.s, data.e, GRN_ENC_UTF8))) {
       break;
     }
     if (data.remove_tokenized_delimiter_p &&
@@ -1145,7 +1095,7 @@ grn_nfkc_normalize(grn_ctx *ctx,
       data.length--;
     }
     for (; ; data.p += data.lp) {
-      if (!(data.lp = grn_str_charlen_utf8(ctx, data.p, data.pe))) {
+      if (!(data.lp = grn_charlen_(ctx, data.p, data.pe, GRN_ENC_UTF8))) {
         break;
       }
       if ((*(data.p) == ' ' && data.remove_blank_p) ||
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181102/eb148be0/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index