susumu.yata
null+****@clear*****
Tue Mar 5 12:05:48 JST 2013
susumu.yata 2013-03-05 12:05:48 +0900 (Tue, 05 Mar 2013) New Revision: 9325846e8755aa25fe97b10df68b1bbeea40c41f https://github.com/groonga/groonga/commit/9325846e8755aa25fe97b10df68b1bbeea40c41f Log: Improve the speed of grn_str_charlen_utf8 Use GRN_BIT_SCAN_REV to remove the first for-loop. Simplify the second for-loop. Modified files: lib/str.c Modified: lib/str.c (+20 -12) =================================================================== --- lib/str.c 2013-03-05 00:02:30 +0900 (cc9fd67) +++ lib/str.c 2013-03-05 12:05:48 +0900 (d112ad0) @@ -31,27 +31,35 @@ grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char { /* MEMO: This function allows non-null-terminated string as str. */ /* But requires the end of string. */ - const unsigned char *p = str; - if (end <= p || !*p) { return 0; } - if (*p & 0x80) { - int b, w; - int size; - for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++); - if (!w) { + if (end <= str || !*str) { + return 0; + } + if (*str & 0x80) { + int i; + int len; + GRN_BIT_SCAN_REV(~(*str << 24), len); + len = 31 - len; + if ((unsigned int)(len - 2) >= 3) { /* (len == 1 || len >= 5) */ + /* Error: invalid first byte. */ GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen_utf8"); return 0; } - for (size = 1; w--; size++) { - if (++p >= end || !*p || (*p & 0xc0) != 0x80) { - GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8"); + if (str + len > end) { + /* Error: the character is incomplete. */ + GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8"); + return 0; + } + for (i = 1; i < len; ++i) { + if ((str[i] & 0xc0) != 0x80) { + /* Error: the (i+1)-th byte is invalid. */ + GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(3) on grn_str_charlen_utf8"); return 0; } } - return size; + return len; } else { return 1; } - return 0; } unsigned int -------------- next part -------------- HTML����������������������������...Download