[Groonga-commit] groonga/groonga [master] Move generic functions to grn_plugin_* from grn_tokenizer_*

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Feb 4 12:52:15 JST 2013


Kouhei Sutou	2013-02-04 12:52:15 +0900 (Mon, 04 Feb 2013)

  New Revision: d972baadf6513e4d622de30405d08bb384e27c5c
  https://github.com/groonga/groonga/commit/d972baadf6513e4d622de30405d08bb384e27c5c

  Log:
    Move generic functions to grn_plugin_* from grn_tokenizer_*
    
    grn_tokenizer_charlen() ->
    grn_plugin_charlen()
    
    grn_tokenizer_isspace() ->
    grn_plugin_isspace()
    
    They are not tokenizer specific functions. They are useful for
    normalizer. So the prefix is changed to grn_plugin_.
    
    grn_tokenizer_*() still exists for backward compatibility. Use
    grn_plugin_*() for newly written code.

  Modified files:
    include/groonga/plugin.h
    include/groonga/tokenizer.h
    lib/plugin.c
    lib/tokenizer.c

  Modified: include/groonga/plugin.h (+18 -0)
===================================================================
--- include/groonga/plugin.h    2013-02-04 12:43:51 +0900 (1f0bfe9)
+++ include/groonga/plugin.h    2013-02-04 12:52:15 +0900 (fda1dca)
@@ -180,6 +180,24 @@ GRN_API grn_obj *grn_plugin_proc_alloc(grn_ctx *ctx, grn_user_data *user_data,
  */
 GRN_API const char *grn_plugin_win32_base_dir(void);
 
+/*
+  grn_plugin_charlen() returns the length (#bytes) of the first character
+  in the string specified by `str_ptr' and `str_length'. If the starting bytes
+  are invalid as a character, grn_plugin_charlen() returns 0. See
+  grn_encoding in "groonga.h" for more details of `encoding'
+ */
+int grn_plugin_charlen(grn_ctx *ctx, const char *str_ptr,
+                       unsigned int str_length, grn_encoding encoding);
+
+/*
+  grn_plugin_isspace() returns the length (#bytes) of the first character
+  in the string specified by `str_ptr' and `str_length' if it is a space
+  character. Otherwise, grn_plugin_isspace() returns 0.
+ */
+int grn_plugin_isspace(grn_ctx *ctx, const char *str_ptr,
+                       unsigned int str_length, grn_encoding encoding);
+
+
 
 #ifdef __cplusplus
 }

  Modified: include/groonga/tokenizer.h (+4 -0)
===================================================================
--- include/groonga/tokenizer.h    2013-02-04 12:43:51 +0900 (30d061b)
+++ include/groonga/tokenizer.h    2013-02-04 12:52:15 +0900 (5865ae4)
@@ -34,6 +34,8 @@ extern "C" {
   in the string specified by `str_ptr' and `str_length'. If the starting bytes
   are invalid as a character, grn_tokenizer_charlen() returns 0. See
   grn_encoding in "groonga.h" for more details of `encoding'
+
+  Deprecated. Use grn_plugin_charlen() instead.
  */
 int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr,
                           unsigned int str_length, grn_encoding encoding);
@@ -42,6 +44,8 @@ int grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr,
   grn_tokenizer_isspace() returns the length (#bytes) of the first character
   in the string specified by `str_ptr' and `str_length' if it is a space
   character. Otherwise, grn_tokenizer_isspace() returns 0.
+
+  Deprecated. Use grn_plugin_isspace() instead.
  */
 int grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr,
                           unsigned int str_length, grn_encoding encoding);

  Modified: lib/plugin.c (+53 -0)
===================================================================
--- lib/plugin.c    2013-02-04 12:43:51 +0900 (c96dfc5)
+++ lib/plugin.c    2013-02-04 12:52:15 +0900 (2bda3fd)
@@ -588,3 +588,56 @@ grn_plugin_win32_base_dir(void)
   return NULL;
 #endif /* WIN32 */
 }
+
+/*
+  grn_plugin_charlen() takes the length of a string, unlike grn_charlen_().
+ */
+int
+grn_plugin_charlen(grn_ctx *ctx, const char *str_ptr,
+                   unsigned int str_length, grn_encoding encoding)
+{
+  return grn_charlen_(ctx, str_ptr, str_ptr + str_length, encoding);
+}
+
+/*
+  grn_plugin_isspace() takes the length of a string, unlike grn_isspace().
+ */
+int
+grn_plugin_isspace(grn_ctx *ctx, const char *str_ptr,
+                   unsigned int str_length, grn_encoding encoding)
+{
+  if ((str_ptr == NULL) || (str_length == 0)) {
+    return 0;
+  }
+  switch ((unsigned char)str_ptr[0]) {
+  case ' ' :
+  case '\f' :
+  case '\n' :
+  case '\r' :
+  case '\t' :
+  case '\v' :
+    return 1;
+  case 0x81 :
+    if ((encoding == GRN_ENC_SJIS) && (str_length >= 2) &&
+        ((unsigned char)str_ptr[1] == 0x40)) {
+      return 2;
+    }
+    break;
+  case 0xA1 :
+    if ((encoding == GRN_ENC_EUC_JP) && (str_length >= 2) &&
+        ((unsigned char)str_ptr[1] == 0xA1)) {
+      return 2;
+    }
+    break;
+  case 0xE3 :
+    if ((encoding == GRN_ENC_UTF8) && (str_length >= 3) &&
+        ((unsigned char)str_ptr[1] == 0x80) &&
+        ((unsigned char)str_ptr[2] == 0x80)) {
+      return 3;
+    }
+    break;
+  default :
+    break;
+  }
+  return 0;
+}

  Modified: lib/tokenizer.c (+4 -37)
===================================================================
--- lib/tokenizer.c    2013-02-04 12:43:51 +0900 (ba65c01)
+++ lib/tokenizer.c    2013-02-04 12:52:15 +0900 (f609db6)
@@ -29,56 +29,23 @@
 #include "token.h"
 
 /*
-  grn_tokenizer_charlen() takes the length of a string, unlike grn_charlen_().
+  Just for backward compatibility. See grn_plugin_charlen() instead.
  */
 int
 grn_tokenizer_charlen(grn_ctx *ctx, const char *str_ptr,
                       unsigned int str_length, grn_encoding encoding)
 {
-  return grn_charlen_(ctx, str_ptr, str_ptr + str_length, encoding);
+  return grn_plugin_charlen(ctx, str_ptr, str_length, encoding);
 }
 
 /*
-  grn_tokenizer_isspace() takes the length of a string, unlike grn_isspace().
+  Just for backward compatibility. See grn_plugin_isspace() instead.
  */
 int
 grn_tokenizer_isspace(grn_ctx *ctx, const char *str_ptr,
                       unsigned int str_length, grn_encoding encoding)
 {
-  if ((str_ptr == NULL) || (str_length == 0)) {
-    return 0;
-  }
-  switch ((unsigned char)str_ptr[0]) {
-  case ' ' :
-  case '\f' :
-  case '\n' :
-  case '\r' :
-  case '\t' :
-  case '\v' :
-    return 1;
-  case 0x81 :
-    if ((encoding == GRN_ENC_SJIS) && (str_length >= 2) &&
-        ((unsigned char)str_ptr[1] == 0x40)) {
-      return 2;
-    }
-    break;
-  case 0xA1 :
-    if ((encoding == GRN_ENC_EUC_JP) && (str_length >= 2) &&
-        ((unsigned char)str_ptr[1] == 0xA1)) {
-      return 2;
-    }
-    break;
-  case 0xE3 :
-    if ((encoding == GRN_ENC_UTF8) && (str_length >= 3) &&
-        ((unsigned char)str_ptr[1] == 0x80) &&
-        ((unsigned char)str_ptr[2] == 0x80)) {
-      return 3;
-    }
-    break;
-  default :
-    break;
-  }
-  return 0;
+  return grn_plugin_isspace(ctx, str_ptr, str_length, encoding);
 }
 
 grn_bool
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index