[Groonga-commit] groonga/groonga at e199214 [master] Support grn_tokenizer_query on stack for internal use

Back to archive index

Kouhei Sutou null+****@clear*****
Wed May 9 10:49:16 JST 2018


Kouhei Sutou	2018-05-09 10:49:16 +0900 (Wed, 09 May 2018)

  New Revision: e199214f7e35cab4afdfddc914e8cfb1918c3433
  https://github.com/groonga/groonga/commit/e199214f7e35cab4afdfddc914e8cfb1918c3433

  Message:
    Support grn_tokenizer_query on stack for internal use

  Modified files:
    lib/grn_tokenizer.h
    lib/tokenizer.c

  Modified: lib/grn_tokenizer.h (+25 -0)
===================================================================
--- lib/grn_tokenizer.h    2018-05-09 10:47:07 +0900 (a1ad61517)
+++ lib/grn_tokenizer.h    2018-05-09 10:49:16 +0900 (badf7bc45)
@@ -45,8 +45,33 @@ typedef struct _grn_tokenizer_query {
   grn_obj *lexicon;
   unsigned int normalize_flags;
   grn_bool need_normalize;
+  grn_bool need_delimiter_check;
 } grn_tokenizer_query;
 
+grn_rc
+grn_tokenizer_query_init(grn_ctx *ctx,
+                         grn_tokenizer_query *query);
+void
+grn_tokenizer_query_fin(grn_ctx *ctx,
+                        grn_tokenizer_query *query);
+grn_rc
+grn_tokenizer_query_set_raw_string(grn_ctx *ctx,
+                                   grn_tokenizer_query *query,
+                                   const char *string,
+                                   size_t string_length);
+grn_rc
+grn_tokenizer_query_set_flags(grn_ctx *ctx,
+                              grn_tokenizer_query *query,
+                              unsigned int flags);
+grn_rc
+grn_tokenizer_query_set_mode(grn_ctx *ctx,
+                             grn_tokenizer_query *query,
+                             grn_tokenize_mode mode);
+grn_rc
+grn_tokenizer_query_set_lexicon(grn_ctx *ctx,
+                                grn_tokenizer_query *query,
+                                grn_obj *lexicon);
+
 #ifdef __cplusplus
 }
 #endif

  Modified: lib/tokenizer.c (+149 -45)
===================================================================
--- lib/tokenizer.c    2018-05-09 10:47:07 +0900 (bd7a4dff5)
+++ lib/tokenizer.c    2018-05-09 10:49:16 +0900 (6ad0121a5)
@@ -120,6 +120,21 @@ grn_tokenizer_query_ensure_normalized(grn_ctx *ctx, grn_tokenizer_query *query)
     return;
   }
 
+  query->need_delimiter_check = GRN_TRUE;
+}
+
+static void
+grn_tokenizer_query_ensure_have_tokenized_delimiter(grn_ctx *ctx,
+                                                    grn_tokenizer_query *query)
+{
+  grn_tokenizer_query_ensure_normalized(ctx, query);
+
+  if (!query->need_delimiter_check) {
+    return;
+  }
+
+  query->need_delimiter_check = GRN_FALSE;
+
   if (query->flags & GRN_TOKEN_CURSOR_ENABLE_TOKENIZED_DELIMITER) {
     const char *normalized_string;
     unsigned int normalized_string_length;
@@ -139,6 +154,26 @@ grn_tokenizer_query_ensure_normalized(grn_ctx *ctx, grn_tokenizer_query *query)
   }
 }
 
+grn_rc
+grn_tokenizer_query_init(grn_ctx *ctx, grn_tokenizer_query *query)
+{
+  query->normalize_flags = 0;
+  query->normalized_query = NULL;
+  query->query_buf = NULL;
+  query->ptr = NULL;
+  query->length = 0;
+  query->flags = 0;
+  query->tokenize_mode = GRN_TOKENIZE_ADD;
+  query->token_mode = query->tokenize_mode;
+  query->lexicon = NULL;
+  query->encoding = ctx->encoding;
+
+  query->need_normalize = GRN_TRUE;
+  query->need_delimiter_check = GRN_TRUE;
+
+  return ctx->rc;
+}
+
 grn_tokenizer_query *
 grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args,
                          unsigned int normalize_flags)
@@ -166,51 +201,28 @@ grn_tokenizer_query_open(grn_ctx *ctx, int num_args, grn_obj **args,
   {
     grn_tokenizer_query * const query =
         GRN_PLUGIN_MALLOC(ctx, sizeof(grn_tokenizer_query));
-    if (query == NULL) {
+    if (!query) {
+      GRN_API_RETURN(NULL);
+    }
+    grn_tokenizer_query_init(ctx, query);
+    grn_tokenizer_query_set_raw_string(ctx,
+                                       query,
+                                       GRN_TEXT_VALUE(query_str),
+                                       GRN_TEXT_LEN(query_str));
+    if (ctx->rc != GRN_SUCCESS) {
+      GRN_PLUGIN_FREE(ctx, query);
       GRN_API_RETURN(NULL);
     }
-    query->normalize_flags = normalize_flags;
-    query->normalized_query = NULL;
-    query->query_buf = NULL;
     if (flags) {
-      query->flags = GRN_UINT32_VALUE(flags);
-    } else {
-      query->flags = 0;
+      grn_tokenizer_query_set_flags(ctx, query, GRN_UINT32_VALUE(flags));
     }
     if (tokenize_mode) {
-      query->tokenize_mode = GRN_UINT32_VALUE(tokenize_mode);
-    } else {
-      query->tokenize_mode = GRN_TOKENIZE_ADD;
+      grn_tokenizer_query_set_mode(ctx, query, GRN_UINT32_VALUE(tokenize_mode));
     }
-    query->token_mode = query->tokenize_mode;
+    grn_tokenizer_query_set_normalize_flags(ctx, query, normalize_flags);
+    grn_tokenizer_query_set_lexicon(ctx, query, args[0]);
 
-    {
-      grn_obj * const table = args[0];
-      grn_encoding table_encoding;
-      query->lexicon = table;
-      grn_table_get_info(ctx, table, NULL, &table_encoding, NULL,
-                         NULL, NULL);
-      query->encoding = table_encoding;
-    }
-    {
-      unsigned int query_length = GRN_TEXT_LEN(query_str);
-      char *query_buf = (char *)GRN_PLUGIN_MALLOC(ctx, query_length + 1);
-
-      if (query_buf == NULL) {
-        GRN_PLUGIN_FREE(ctx, query);
-        GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
-                         "[tokenizer] failed to duplicate query");
-        GRN_API_RETURN(NULL);
-      }
-      grn_memcpy(query_buf, GRN_TEXT_VALUE(query_str), query_length);
-      query_buf[query_length] = '\0';
-      query->query_buf = query_buf;
-      query->ptr = query_buf;
-      query->length = query_length;
-    }
-
-    query->need_normalize = GRN_TRUE;
-    grn_tokenizer_query_ensure_normalized(ctx, query);
+    grn_tokenizer_query_ensure_have_tokenized_delimiter(ctx, query);
 
     GRN_API_RETURN(query);
   }
@@ -223,16 +235,22 @@ grn_tokenizer_query_create(grn_ctx *ctx, int num_args, grn_obj **args)
 }
 
 void
+grn_tokenizer_query_fin(grn_ctx *ctx, grn_tokenizer_query *query)
+{
+  if (query->normalized_query) {
+    grn_obj_unlink(ctx, query->normalized_query);
+  }
+  if (query->query_buf) {
+    GRN_PLUGIN_FREE(ctx, query->query_buf);
+  }
+}
+
+void
 grn_tokenizer_query_close(grn_ctx *ctx, grn_tokenizer_query *query)
 {
   GRN_API_ENTER;
   if (query) {
-    if (query->normalized_query) {
-      grn_obj_unlink(ctx, query->normalized_query);
-    }
-    if (query->query_buf) {
-      GRN_PLUGIN_FREE(ctx, query->query_buf);
-    }
+    grn_tokenizer_query_fin(ctx, query);
     GRN_PLUGIN_FREE(ctx, query);
   }
   GRN_API_RETURN();
@@ -274,6 +292,39 @@ grn_tokenizer_query_get_normalized_string(grn_ctx *ctx,
   GRN_API_RETURN(query->normalized_query);
 }
 
+grn_rc
+grn_tokenizer_query_set_raw_string(grn_ctx *ctx,
+                                   grn_tokenizer_query *query,
+                                   const char *string,
+                                   size_t string_length)
+{
+  GRN_API_ENTER;
+
+  if (query->query_buf) {
+    GRN_PLUGIN_FREE(ctx, query->query_buf);
+  }
+
+  if (string_length == 0) {
+    query->query_buf = NULL;
+    query->ptr = NULL;
+    query->length = 0;
+    query->need_normalize = GRN_TRUE;
+  } else {
+    query->query_buf = (char *)GRN_PLUGIN_MALLOC(ctx, string_length + 1);
+    if (!query->query_buf) {
+      GRN_PLUGIN_ERROR(ctx, GRN_TOKENIZER_ERROR,
+                       "[tokenizer][query] failed to duplicate query");
+      GRN_API_RETURN(ctx->rc);
+    }
+    grn_memcpy(query->query_buf, string, string_length);
+    query->query_buf[string_length] = '\0';
+    query->ptr = query->query_buf;
+    query->length = string_length;
+  }
+
+  GRN_API_RETURN(ctx->rc);
+}
+
 const char *
 grn_tokenizer_query_get_raw_string(grn_ctx *ctx,
                                    grn_tokenizer_query *query,
@@ -293,6 +344,19 @@ grn_tokenizer_query_get_encoding(grn_ctx *ctx, grn_tokenizer_query *query)
   GRN_API_RETURN(query->encoding);
 }
 
+grn_rc
+grn_tokenizer_query_set_flags(grn_ctx *ctx,
+                              grn_tokenizer_query *query,
+                              unsigned int flags)
+{
+  GRN_API_ENTER;
+  if (query->flags != flags) {
+    query->flags = flags;
+    query->need_normalize = GRN_TRUE;
+  }
+  GRN_API_RETURN(ctx->rc);
+}
+
 unsigned int
 grn_tokenizer_query_get_flags(grn_ctx *ctx, grn_tokenizer_query *query)
 {
@@ -305,10 +369,24 @@ grn_tokenizer_query_have_tokenized_delimiter(grn_ctx *ctx,
                                              grn_tokenizer_query *query)
 {
   GRN_API_ENTER;
-  grn_tokenizer_query_ensure_normalized(ctx, query);
+  grn_tokenizer_query_ensure_have_tokenized_delimiter(ctx, query);
   GRN_API_RETURN(query->have_tokenized_delimiter);
 }
 
+grn_rc
+grn_tokenizer_query_set_mode(grn_ctx *ctx,
+                             grn_tokenizer_query *query,
+                             grn_tokenize_mode mode)
+{
+  GRN_API_ENTER;
+  if (query->tokenize_mode != mode) {
+    query->tokenize_mode = mode;
+    query->token_mode = query->tokenize_mode;
+    query->need_normalize = GRN_TRUE;
+  }
+  GRN_API_RETURN(ctx->rc);
+}
+
 grn_tokenize_mode
 grn_tokenizer_query_get_mode(grn_ctx *ctx, grn_tokenizer_query *query)
 {
@@ -316,6 +394,32 @@ grn_tokenizer_query_get_mode(grn_ctx *ctx, grn_tokenizer_query *query)
   GRN_API_RETURN(query->tokenize_mode);
 }
 
+grn_rc
+grn_tokenizer_query_set_lexicon(grn_ctx *ctx,
+                                grn_tokenizer_query *query,
+                                grn_obj *lexicon)
+{
+  GRN_API_ENTER;
+
+  if (query->lexicon != lexicon) {
+    query->lexicon = lexicon;
+    if (query->lexicon) {
+      grn_table_get_info(ctx,
+                         query->lexicon,
+                         NULL,
+                         &(query->encoding),
+                         NULL,
+                         NULL,
+                         NULL);
+    } else {
+      query->encoding = ctx->encoding;
+    }
+    query->need_normalize = GRN_TRUE;
+  }
+
+  GRN_API_RETURN(ctx->rc);
+}
+
 grn_obj *
 grn_tokenizer_query_get_lexicon(grn_ctx *ctx, grn_tokenizer_query *query)
 {
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180509/5ff2d8d8/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index