[Groonga-commit] groonga/groonga [master] normalize: support types output

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Feb 1 18:17:44 JST 2013


Kouhei Sutou	2013-02-01 18:17:44 +0900 (Fri, 01 Feb 2013)

  New Revision: 6f0e184dbd1a9f1ad706eefee7000535a7d76d2f
  https://github.com/groonga/groonga/commit/6f0e184dbd1a9f1ad706eefee7000535a7d76d2f

  Log:
    normalize: support types output

  Added files:
    test/command/suite/normalize/with_types.expected
    test/command/suite/normalize/with_types.test
  Modified files:
    lib/proc.c
    test/command/suite/normalize/ascii.expected
    test/command/suite/normalize/remove_blank.expected
    test/command/suite/normalize/remove_tokenized_delimiter.expected

  Modified: lib/proc.c (+57 -1)
===================================================================
--- lib/proc.c    2013-02-01 17:19:16 +0900 (1f61644)
+++ lib/proc.c    2013-02-01 18:17:44 +0900 (ed29c82)
@@ -2786,6 +2786,41 @@ parse_normalize_flags(grn_ctx *ctx, grn_obj *flag_names)
   return flags;
 }
 
+static const char *
+char_type_name(grn_char_type type)
+{
+  const char *name = "unknown";
+
+  switch (type) {
+  case GRN_CHAR_NULL :
+    name = "null";
+    break;
+  case GRN_CHAR_ALPHA :
+    name = "alpha";
+    break;
+  case GRN_CHAR_DIGIT :
+    name = "digit";
+    break;
+  case GRN_CHAR_SYMBOL :
+    name = "symbol";
+    break;
+  case GRN_CHAR_HIRAGANA :
+    name = "hiragana";
+    break;
+  case GRN_CHAR_KATAKANA :
+    name = "katakana";
+    break;
+  case GRN_CHAR_KANJI :
+    name = "kanji";
+    break;
+  case GRN_CHAR_OTHERS :
+    name = "others";
+    break;
+  }
+
+  return name;
+}
+
 static grn_obj *
 proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
@@ -2806,6 +2841,7 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
     grn_obj *normalizer;
     grn_obj *grn_string;
     int flags;
+    unsigned int normalized_n_characters;
 
     flags = parse_normalize_flags(ctx, flag_names);
     normalizer = grn_ctx_get(ctx,
@@ -2825,6 +2861,7 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
                                  normalizer, flags);
     grn_obj_unlink(ctx, normalizer);
 
+    GRN_OUTPUT_MAP_OPEN("RESULT", 2);
     {
       const char *normalized;
       unsigned int normalized_length_in_bytes;
@@ -2832,9 +2869,28 @@ proc_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
       grn_string_get_normalized(ctx, grn_string,
                                 &normalized,
                                 &normalized_length_in_bytes,
-                                NULL);
+                                &normalized_n_characters);
+      GRN_OUTPUT_CSTR("normalized");
       GRN_OUTPUT_STR(normalized, normalized_length_in_bytes);
     }
+    {
+      const unsigned char *types;
+
+      types = grn_string_get_types(ctx, grn_string);
+      GRN_OUTPUT_CSTR("types");
+      if (types) {
+        unsigned int i;
+        GRN_OUTPUT_ARRAY_OPEN("types", normalized_n_characters);
+        for (i = 0; i < normalized_n_characters; i++) {
+          GRN_OUTPUT_CSTR(char_type_name(types[i]));
+        }
+        GRN_OUTPUT_ARRAY_CLOSE();
+      } else {
+        GRN_OUTPUT_ARRAY_OPEN("types", 0);
+        GRN_OUTPUT_ARRAY_CLOSE();
+      }
+    }
+    GRN_OUTPUT_MAP_CLOSE();
 
     grn_obj_unlink(ctx, grn_string);
   }

  Modified: test/command/suite/normalize/ascii.expected (+1 -1)
===================================================================
--- test/command/suite/normalize/ascii.expected    2013-02-01 17:19:16 +0900 (948c499)
+++ test/command/suite/normalize/ascii.expected    2013-02-01 18:17:44 +0900 (8ac1422)
@@ -1,2 +1,2 @@
 normalize NormalizerAuto "aBcDe 123"
-[[0,0.0,0.0],"abcde 123"]
+[[0,0.0,0.0],{"normalized":"abcde 123","types":[]}]

  Modified: test/command/suite/normalize/remove_blank.expected (+1 -1)
===================================================================
--- test/command/suite/normalize/remove_blank.expected    2013-02-01 17:19:16 +0900 (c34a63c)
+++ test/command/suite/normalize/remove_blank.expected    2013-02-01 18:17:44 +0900 (7937d1e)
@@ -1,2 +1,2 @@
 normalize NormalizerAuto "a b c" REMOVE_BLANK
-[[0,0.0,0.0],"abc"]
+[[0,0.0,0.0],{"normalized":"abc","types":[]}]

  Modified: test/command/suite/normalize/remove_tokenized_delimiter.expected (+1 -1)
===================================================================
--- test/command/suite/normalize/remove_tokenized_delimiter.expected    2013-02-01 17:19:16 +0900 (8ef370e)
+++ test/command/suite/normalize/remove_tokenized_delimiter.expected    2013-02-01 18:17:44 +0900 (8c18cf6)
@@ -1,2 +1,2 @@
 normalize NormalizerAuto "a￾b￾c" REMOVE_TOKENIZED_DELIMITER
-[[0,0.0,0.0],"abc"]
+[[0,0.0,0.0],{"normalized":"abc","types":[]}]

  Added: test/command/suite/normalize/with_types.expected (+21 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalize/with_types.expected    2013-02-01 18:17:44 +0900 (e6e4be8)
@@ -0,0 +1,21 @@
+normalize NormalizerAuto "a1!あア亜💕 " WITH_TYPES
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  {
+    "normalized": "a1!あア亜💕 ",
+    "types": [
+      "alpha",
+      "digit",
+      "symbol",
+      "hiragana",
+      "katakana",
+      "kanji",
+      "others",
+      "others"
+    ]
+  }
+]

  Added: test/command/suite/normalize/with_types.test (+1 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/normalize/with_types.test    2013-02-01 18:17:44 +0900 (448b3f6)
@@ -0,0 +1 @@
+normalize NormalizerAuto "a1!あア亜💕 " WITH_TYPES
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index