[Groonga-commit] groonga/groonga [master] Improve the speed of grn_str_charlen_utf8

Back to archive index

susumu.yata null+****@clear*****
Tue Mar 5 12:05:48 JST 2013


susumu.yata	2013-03-05 12:05:48 +0900 (Tue, 05 Mar 2013)

  New Revision: 9325846e8755aa25fe97b10df68b1bbeea40c41f
  https://github.com/groonga/groonga/commit/9325846e8755aa25fe97b10df68b1bbeea40c41f

  Log:
    Improve the speed of grn_str_charlen_utf8
    
    Use GRN_BIT_SCAN_REV to remove the first for-loop.
    Simplify the second for-loop.

  Modified files:
    lib/str.c

  Modified: lib/str.c (+20 -12)
===================================================================
--- lib/str.c    2013-03-05 00:02:30 +0900 (cc9fd67)
+++ lib/str.c    2013-03-05 12:05:48 +0900 (d112ad0)
@@ -31,27 +31,35 @@ grn_str_charlen_utf8(grn_ctx *ctx, const unsigned char *str, const unsigned char
 {
   /* MEMO: This function allows non-null-terminated string as str. */
   /*       But requires the end of string. */
-  const unsigned char *p = str;
-  if (end <= p || !*p) { return 0; }
-  if (*p & 0x80) {
-    int b, w;
-    int size;
-    for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
-    if (!w) {
+  if (end <= str || !*str) {
+    return 0;
+  }
+  if (*str & 0x80) {
+    int i;
+    int len;
+    GRN_BIT_SCAN_REV(~(*str << 24), len);
+    len = 31 - len;
+    if ((unsigned int)(len - 2) >= 3) {  /* (len == 1 || len >= 5) */
+      /* Error: invalid first byte. */
       GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(1) on grn_str_charlen_utf8");
       return 0;
     }
-    for (size = 1; w--; size++) {
-      if (++p >= end || !*p || (*p & 0xc0) != 0x80) {
-        GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8");
+    if (str + len > end) {
+      /* Error: the character is incomplete. */
+      GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(2) on grn_str_charlen_utf8");
+      return 0;
+    }
+    for (i = 1; i < len; ++i) {
+      if ((str[i] & 0xc0) != 0x80) {
+        /* Error: the (i+1)-th byte is invalid. */
+        GRN_LOG(ctx, GRN_LOG_WARNING, "invalid utf8 string(3) on grn_str_charlen_utf8");
         return 0;
       }
     }
-    return size;
+    return len;
   } else {
     return 1;
   }
-  return 0;
 }
 
 unsigned int
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index