Kouhei Sutou
null+****@clear*****
Fri Nov 30 18:43:25 JST 2012
Kouhei Sutou 2012-11-30 18:43:25 +0900 (Fri, 30 Nov 2012) New Revision: d9c9a88826479f0e4fe256b5dc71ebc7d8edaee1 https://github.com/groonga/groonga/commit/d9c9a88826479f0e4fe256b5dc71ebc7d8edaee1 Log: Improve error message of charlen for invalid UTF-8 string Modified files: lib/string.c Modified: lib/string.c (+26 -4) =================================================================== --- lib/string.c 2012-11-30 16:55:21 +0900 (19aea58) +++ lib/string.c 2012-11-30 18:43:25 +0900 (0105eb8) @@ -531,14 +531,36 @@ grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char if (*p & 0x80) { int b, w; int size; + int i; for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++); if (!w) { - GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen_utf8"); + GRN_LOG(ctx, GRN_LOG_WARNING, + "invalid utf8 string: the first bit is 0x80: <%.*s>: <%.*s>", + (int)(end - p), p, + (int)(end - str), str); return 0; } - for (size = 1; w--; size++) { - if (++p >= end || !*p || (*p & 0xc0) != 0x80) { - GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8"); + size = w + 1; + for (i = 1; i < size; i++) { + if (++p >= end) { + GRN_LOG(ctx, GRN_LOG_WARNING, + "invalid utf8 string: too short: " + "%d byte is required but %d byte is given: <%.*s>", + size, i, + (int)(end - str), str); + return 0; + } + if (!*p) { + GRN_LOG(ctx, GRN_LOG_WARNING, + "invalid utf8 string: NULL character is found: <%.*s>", + (int)(end - str), str); + return 0; + } + if ((*p & 0xc0) != 0x80) { + GRN_LOG(ctx, GRN_LOG_WARNING, + "invalid utf8 string: 0x80 is not allowed: <%.*s>: <%.*s>", + (int)(end - p), p, + (int)(end - str), str); return 0; } } -------------- next part -------------- HTML����������������������������...Download