[Groonga-commit] groonga/groonga [master] Add 'tokenized_' prefix

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Nov 9 16:46:19 JST 2012


Kouhei Sutou	2012-11-09 16:46:19 +0900 (Fri, 09 Nov 2012)

  New Revision: d0b86c01e129930d43917ed4a715b395991775ec
  https://github.com/groonga/groonga/commit/d0b86c01e129930d43917ed4a715b395991775ec

  Log:
    Add 'tokenized_' prefix

  Modified files:
    include/groonga/tokenizer.h
    lib/string.c
    lib/token.c
    lib/tokenizer.c
    test/unit/core/test-tokenizer.c

  Modified: include/groonga/tokenizer.h (+14 -14)
===================================================================
--- include/groonga/tokenizer.h    2012-11-09 16:41:13 +0900 (ea34ffe)
+++ include/groonga/tokenizer.h    2012-11-09 16:46:19 +0900 (9ddd770)
@@ -47,24 +47,24 @@ int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr,
                           unsigned int str_length, grn_encoding encoding);
 
 /*
-  grn_tokenizer_is_delimiter() returns whether is the first character
-  in the string specified by `str_ptr' and `str_length' the special
-  delimiter character or not.
+  grn_tokenizer_is_tokenized_delimiter() returns whether is the first
+  character in the string specified by `str_ptr' and `str_length' the
+  special tokenized delimiter character or not.
  */
-grn_bool grn_tokenizer_is_delimiter(grn_ctx *ctx,
-                                    const char *str_ptr,
-                                    unsigned int str_length,
-                                    grn_encoding encoding);
+grn_bool grn_tokenizer_is_tokenized_delimiter(grn_ctx *ctx,
+                                              const char *str_ptr,
+                                              unsigned int str_length,
+                                              grn_encoding encoding);
 
 /*
-  grn_tokenizer_have_delimiter() returns whether is there the special
-  delimiter character in the string specified by `str_ptr' and
-  `str_length' the special delimiter character or not.
+  grn_tokenizer_have_tokenized_delimiter() returns whether is there
+  the special delimiter character in the string specified by `str_ptr'
+  and `str_length' the special tokenized delimiter character or not.
  */
-grn_bool grn_tokenizer_have_delimiter(grn_ctx *ctx,
-                                      const char *str_ptr,
-                                      unsigned int str_length,
-                                      grn_encoding encoding);
+grn_bool grn_tokenizer_have_tokenized_delimiter(grn_ctx *ctx,
+                                                const char *str_ptr,
+                                                unsigned int str_length,
+                                                grn_encoding encoding);
 
 /*
   grn_tokenizer_query is a structure for storing a query. See the following

  Modified: lib/string.c (+4 -4)
===================================================================
--- lib/string.c    2012-11-09 16:41:13 +0900 (0f90636)
+++ lib/string.c    2012-11-09 16:46:19 +0900 (3881295)
@@ -595,7 +595,7 @@ utf8_normalize(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
       break;
     }
     if (remove_tokenizer_delimiter_p &&
-        grn_tokenizer_is_delimiter(ctx, s, ls, GRN_ENC_UTF8)) {
+        grn_tokenizer_is_tokenized_delimiter(ctx, s, ls, GRN_ENC_UTF8)) {
       continue;
     }
     if ((p = (unsigned char *)grn_nfkc_map1(s))) {
@@ -1084,9 +1084,9 @@ grn_fake_string_open(grn_ctx *ctx, grn_string *string)
     char *destination = nstr->normalized;
     unsigned int destination_length = 0;
     while ((char_length = grn_charlen(ctx, source_current, source_end)) > 0) {
-      if (!grn_tokenizer_is_delimiter(ctx,
-                                      source_current, char_length,
-                                      ctx->encoding)) {
+      if (!grn_tokenizer_is_tokenized_delimiter(ctx,
+                                                source_current, char_length,
+                                                ctx->encoding)) {
         memcpy(destination, source_current, char_length);
         destination += char_length;
         destination_length += char_length;

  Modified: lib/token.c (+4 -3)
===================================================================
--- lib/token.c    2012-11-09 16:41:13 +0900 (bd54953)
+++ lib/token.c    2012-11-09 16:46:19 +0900 (764c5c1)
@@ -114,9 +114,10 @@ delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
   grn_table_get_info(ctx, table, &table_flags, &tokenizer->encoding, NULL);
 
   tokenizer->have_tokenized_delimiter =
-    grn_tokenizer_have_delimiter(ctx,
-                                 GRN_TEXT_VALUE(str), GRN_TEXT_LEN(str),
-                                 tokenizer->encoding);
+    grn_tokenizer_have_tokenized_delimiter(ctx,
+                                           GRN_TEXT_VALUE(str),
+                                           GRN_TEXT_LEN(str),
+                                           tokenizer->encoding);
   if (tokenizer->have_tokenized_delimiter) {
     tokenizer->delimiter = GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8;
     tokenizer->delimiter_len = strlen(tokenizer->delimiter);

  Modified: lib/tokenizer.c (+11 -5)
===================================================================
--- lib/tokenizer.c    2012-11-09 16:41:13 +0900 (7e3581b)
+++ lib/tokenizer.c    2012-11-09 16:46:19 +0900 (566d403)
@@ -82,8 +82,10 @@ grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr,
 }
 
 grn_bool
-grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr,
-                           unsigned int str_length, grn_encoding encoding)
+grn_tokenizer_is_tokenized_delimiter(grn_ctx *ctx,
+                                     const char *str_ptr,
+                                     unsigned int str_length,
+                                     grn_encoding encoding)
 {
   const unsigned char *binary_string = str_ptr;
 
@@ -101,8 +103,10 @@ grn_tokenizer_is_delimiter(grn_ctx *ctx, const char *str_ptr,
 }
 
 grn_bool
-grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr,
-                             unsigned int str_length, grn_encoding encoding)
+grn_tokenizer_have_tokenized_delimiter(grn_ctx *ctx,
+                                       const char *str_ptr,
+                                       unsigned int str_length,
+                                       grn_encoding encoding)
 {
   int char_length;
   const char *current = str_ptr;
@@ -117,7 +121,9 @@ grn_tokenizer_have_delimiter(grn_ctx *ctx, const char *str_ptr,
   }
 
   while ((char_length = grn_charlen_(ctx, current, end, encoding)) > 0) {
-    if (grn_tokenizer_is_delimiter(ctx, current, char_length, encoding)) {
+    if (grn_tokenizer_is_tokenized_delimiter(ctx,
+                                             current, char_length,
+                                             encoding)) {
       return GRN_TRUE;
     }
     current += char_length;

  Modified: test/unit/core/test-tokenizer.c (+20 -16)
===================================================================
--- test/unit/core/test-tokenizer.c    2012-11-09 16:41:13 +0900 (46903e1)
+++ test/unit/core/test-tokenizer.c    2012-11-09 16:46:19 +0900 (f2218a8)
@@ -23,10 +23,10 @@
 
 #include "../lib/grn-assertions.h"
 
-void data_is_delimiter(void);
-void test_is_delimiter(gconstpointer data);
-void data_have_delimiter(void);
-void test_have_delimiter(gconstpointer data);
+void data_is_tokenized_delimiter(void);
+void test_is_tokenized_delimiter(gconstpointer data);
+void data_have_tokenized_delimiter(void);
+void test_have_tokenized_delimiter(gconstpointer data);
 
 static grn_ctx context;
 static grn_obj *db;
@@ -46,7 +46,7 @@ teardown (void)
 }
 
 void
-data_is_delimiter(void)
+data_is_tokenized_delimiter(void)
 {
 #define ADD_DATUM(label, expected, input, encoding)                     \
   gcut_add_datum(label,                                                 \
@@ -68,7 +68,7 @@ data_is_delimiter(void)
 }
 
 void
-test_is_delimiter(gconstpointer data)
+test_is_tokenized_delimiter(gconstpointer data)
 {
   const gchar *input;
   grn_encoding encoding;
@@ -77,16 +77,18 @@ test_is_delimiter(gconstpointer data)
   GRN_CTX_SET_ENCODING(&context, encoding);
   input = gcut_data_get_string(data, "input");
   if (gcut_data_get_boolean(data, "expected")) {
-    cut_assert_true(grn_tokenizer_is_delimiter(&context, input, strlen(input),
-                                               encoding));
+    cut_assert_true(grn_tokenizer_is_tokenized_delimiter(&context,
+                                                         input, strlen(input),
+                                                         encoding));
   } else {
-    cut_assert_false(grn_tokenizer_is_delimiter(&context, input, strlen(input),
-                                                encoding));
+    cut_assert_false(grn_tokenizer_is_tokenized_delimiter(&context,
+                                                          input, strlen(input),
+                                                          encoding));
   }
 }
 
 void
-data_have_delimiter(void)
+data_have_tokenized_delimiter(void)
 {
 #define ADD_DATUM(label, expected, input)                               \
   gcut_add_datum(label,                                                 \
@@ -105,7 +107,7 @@ data_have_delimiter(void)
 }
 
 void
-test_have_delimiter(gconstpointer data)
+test_have_tokenized_delimiter(gconstpointer data)
 {
   const gchar *input;
   grn_encoding encoding = GRN_ENC_UTF8;
@@ -113,10 +115,12 @@ test_have_delimiter(gconstpointer data)
   GRN_CTX_SET_ENCODING(&context, encoding);
   input = gcut_data_get_string(data, "input");
   if (gcut_data_get_boolean(data, "expected")) {
-    cut_assert_true(grn_tokenizer_have_delimiter(&context, input, strlen(input),
-                                                 encoding));
+    cut_assert_true(grn_tokenizer_have_tokenized_delimiter(&context,
+                                                           input, strlen(input),
+                                                           encoding));
   } else {
-    cut_assert_false(grn_tokenizer_have_delimiter(&context, input, strlen(input),
-                                                  encoding));
+    cut_assert_false(grn_tokenizer_have_tokenized_delimiter(&context,
+                                                            input, strlen(input),
+                                                            encoding));
   }
 }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index