[Groonga-commit] groonga/groonga-normalizer-mysql [master] Extract decompose code

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Feb 1 17:40:13 JST 2013


Kouhei Sutou	2013-02-01 17:40:13 +0900 (Fri, 01 Feb 2013)

  New Revision: 6de4c6a3aaaed8c068827a81c229923d3316ab2d
  https://github.com/groonga/groonga-normalizer-mysql/commit/6de4c6a3aaaed8c068827a81c229923d3316ab2d

  Log:
    Extract decompose code
    
    UTF-8 character ->
    Plane + low unicode code point

  Modified files:
    normalizers/mysql.c

  Modified: normalizers/mysql.c (+40 -33)
===================================================================
--- normalizers/mysql.c    2013-02-01 14:52:45 +0900 (482b634)
+++ normalizers/mysql.c    2013-02-01 17:40:13 +0900 (161cdb6)
@@ -1575,6 +1575,45 @@ unichar_to_utf8(uint32_t unichar, char *output)
   return n_bytes;
 }
 
+static inline void
+decompose_character(const char *rest, int character_length,
+                    int *plane, uint32_t *low_code)
+{
+  switch (character_length) {
+  case 1 :
+    *plane = 0x00;
+    *low_code = rest[0] & 0x7f;
+    break;
+  case 2 :
+    *plane = 0x00;
+    *low_code = (rest[0] & 0x1f << 6) + (rest[1] & 0x3f);
+    break;
+  case 3 :
+    *plane = rest[0] & 0x0f;
+    *low_code =
+      ((rest[0] & 0x0f) << 12) +
+      ((rest[1] & 0x3f) << 6) +
+      (rest[2] & 0x3f);
+    break;
+  case 4 :
+    *plane = ((rest[0] & 0x07) << 6) + (rest[1] & 0x3f);
+    *low_code =
+      ((rest[0] & 0x07) << 15) +
+      ((rest[1] & 0x3f) << 12) +
+      ((rest[2] & 0x3f) << 6) +
+      (rest[3] & 0x3f);
+    if (*plane > 0xff) {
+      *plane = -1;
+    }
+    break;
+  default :
+    *plane = -1;
+    *low_code = 0x00;
+    break;
+  }
+}
+
+
 static void
 normalize(grn_ctx *ctx, grn_obj *string)
 {
@@ -1604,39 +1643,7 @@ normalize(grn_ctx *ctx, grn_obj *string)
       break;
     }
 
-    switch (character_length) {
-    case 1 :
-      plane = 0x00;
-      low_code = rest[0] & 0x7f;
-      break;
-    case 2 :
-      plane = 0x00;
-      low_code = (rest[0] & 0x1f << 6) + (rest[1] & 0x3f);
-      break;
-    case 3 :
-      plane = rest[0] & 0x0f;
-      low_code =
-        ((rest[0] & 0x0f) << 12) +
-        ((rest[1] & 0x3f) << 6) +
-        (rest[2] & 0x3f);
-      break;
-    case 4 :
-      plane = ((rest[0] & 0x07) << 6) + (rest[1] & 0x3f);
-      low_code =
-        ((rest[0] & 0x07) << 15) +
-        ((rest[1] & 0x3f) << 12) +
-        ((rest[2] & 0x3f) << 6) +
-        (rest[3] & 0x3f);
-      if (plane > 0xff) {
-        plane = -1;
-      }
-      break;
-    default :
-      plane = -1;
-      low_code = 0x00;
-      break;
-    }
-
+    decompose_character(rest, character_length, &plane, &low_code);
     if (remove_blank_p && character_length == 1 && rest[0] == ' ') {
       /* TODO: set GRN_CHAR_BLANK */
     } else {
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index