[Groonga-commit] groonga/groonga [master] Improve error message of charlen for invalid UTF-8 string

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Nov 30 18:43:25 JST 2012


Kouhei Sutou	2012-11-30 18:43:25 +0900 (Fri, 30 Nov 2012)

  New Revision: d9c9a88826479f0e4fe256b5dc71ebc7d8edaee1
  https://github.com/groonga/groonga/commit/d9c9a88826479f0e4fe256b5dc71ebc7d8edaee1

  Log:
    Improve error message of charlen for invalid UTF-8 string

  Modified files:
    lib/string.c

  Modified: lib/string.c (+26 -4)
===================================================================
--- lib/string.c    2012-11-30 16:55:21 +0900 (19aea58)
+++ lib/string.c    2012-11-30 18:43:25 +0900 (0105eb8)
@@ -531,14 +531,36 @@ grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char
   if (*p & 0x80) {
     int b, w;
     int size;
+    int i;
     for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
     if (!w) {
-      GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen_utf8");
+      GRN_LOG(ctx, GRN_LOG_WARNING,
+              "invalid utf8 string: the first bit is 0x80: <%.*s>: <%.*s>",
+              (int)(end - p), p,
+              (int)(end - str), str);
       return 0;
     }
-    for (size = 1; w--; size++) {
-      if (++p >= end || !*p || (*p & 0xc0) != 0x80) {
-        GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8");
+    size = w + 1;
+    for (i = 1; i < size; i++) {
+      if (++p >= end) {
+        GRN_LOG(ctx, GRN_LOG_WARNING,
+                "invalid utf8 string: too short: "
+                "%d byte is required but %d byte is given: <%.*s>",
+                size, i,
+                (int)(end - str), str);
+        return 0;
+      }
+      if (!*p) {
+        GRN_LOG(ctx, GRN_LOG_WARNING,
+                "invalid utf8 string: NULL character is found: <%.*s>",
+                (int)(end - str), str);
+        return 0;
+      }
+      if ((*p & 0xc0) != 0x80) {
+        GRN_LOG(ctx, GRN_LOG_WARNING,
+                "invalid utf8 string: 0x80 is not allowed: <%.*s>: <%.*s>",
+                (int)(end - p), p,
+                (int)(end - str), str);
         return 0;
       }
     }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index