[Groonga-commit] groonga/groonga at de1a2af [master] Export new APIs for grn_token:

Back to archive index
Kouhei Sutou null+****@clear*****
Mon Nov 12 14:14:04 JST 2018


Kouhei Sutou	2018-11-12 14:14:04 +0900 (Mon, 12 Nov 2018)

  Revision: de1a2af87cd39e0c898e23ad7036e01cf073fcb0
  https://github.com/groonga/groonga/commit/de1a2af87cd39e0c898e23ad7036e01cf073fcb0

  Message:
    Export new APIs for grn_token:
    
      * grn_token_get_force_prefix_search()
      * grn_token_set_force_prefix_search()
      * grn_token_get_position()
      * grn_token_set_position()

  Modified files:
    include/groonga/token.h
    lib/grn_token.h
    lib/grn_token_cursor.h
    lib/highlighter.c
    lib/ii.c
    lib/proc/proc_tokenize.c
    lib/token.c
    lib/token_cursor.c

  Modified: include/groonga/token.h (+14 -0)
===================================================================
--- include/groonga/token.h    2018-11-12 11:47:23 +0900 (25f7345d9)
+++ include/groonga/token.h    2018-11-12 14:14:04 +0900 (10109daa0)
@@ -165,6 +165,20 @@ grn_token_set_overlap(grn_ctx *ctx,
 GRN_API grn_obj *
 grn_token_get_metadata(grn_ctx *ctx,
                        grn_token *token);
+GRN_API grn_bool
+grn_token_get_force_prefix_search(grn_ctx *ctx,
+                                  grn_token *token);
+GRN_API grn_rc
+grn_token_set_force_prefix_search(grn_ctx *ctx,
+                                  grn_token *token,
+                                  grn_bool force);
+GRN_API uint32_t
+grn_token_get_position(grn_ctx *ctx,
+                       grn_token *token);
+GRN_API grn_rc
+grn_token_set_position(grn_ctx *ctx,
+                       grn_token *token,
+                       uint32_t position);
 
 #ifdef __cplusplus
 }  /* extern "C" */

  Modified: lib/grn_token.h (+2 -0)
===================================================================
--- lib/grn_token.h    2018-11-12 11:47:23 +0900 (f93a3c936)
+++ lib/grn_token.h    2018-11-12 14:14:04 +0900 (69f484833)
@@ -33,6 +33,8 @@ struct _grn_token {
   uint32_t source_first_character_length;
   grn_bool have_overlap;
   grn_obj metadata;
+  grn_bool force_prefix_search;
+  uint32_t position;
 };
 
 grn_rc grn_token_init(grn_ctx *ctx, grn_token *token);

  Modified: lib/grn_token_cursor.h (+1 -1)
===================================================================
--- lib/grn_token_cursor.h    2018-11-12 11:47:23 +0900 (c144939ae)
+++ lib/grn_token_cursor.h    2018-11-12 14:14:04 +0900 (79c6c9337)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2016 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -36,7 +37,6 @@ struct _grn_token_cursor {
   int32_t pos;
   grn_tokenize_mode mode;
   grn_token_cursor_status status;
-  grn_bool force_prefix;
   grn_obj_flags table_flags;
   grn_encoding encoding;
   struct {

  Modified: lib/highlighter.c (+3 -3)
===================================================================
--- lib/highlighter.c    2018-11-12 11:47:23 +0900 (0bffb4d73)
+++ lib/highlighter.c    2018-11-12 14:14:04 +0900 (d7940cae8)
@@ -306,14 +306,14 @@ grn_highlighter_prepare_lexicon(grn_ctx *ctx,
     }
     GRN_BULK_REWIND(token_id_chunk);
     while ((token_id = grn_token_cursor_next(ctx, cursor)) != GRN_ID_NIL) {
+      grn_token *token;
       GRN_TEXT_PUT(ctx, token_id_chunk, &token_id, sizeof(grn_id));
-      if (cursor->force_prefix &&
+      token = grn_token_cursor_get_token(ctx, cursor);
+      if (grn_token_get_force_prefix_search(ctx, token) &&
           highlighter->lexicon.object->header.type != GRN_TABLE_HASH_KEY) {
-        grn_token *token;
         const char *data;
         size_t data_length;
 
-        token = grn_token_cursor_get_token(ctx, cursor);
         data = grn_token_get_data_raw(ctx, token, &data_length);
         grn_vector_add_element(ctx,
                                lazy_keywords,

  Modified: lib/ii.c (+9 -3)
===================================================================
--- lib/ii.c    2018-11-12 11:47:23 +0900 (4cd9386f6)
+++ lib/ii.c    2018-11-12 14:14:04 +0900 (c1c1d3080)
@@ -7251,7 +7251,9 @@ token_candidate_init(grn_ctx *ctx, grn_ii *ii, grn_token_cursor *token_cursor,
     }
     tid = grn_token_cursor_next(ctx, token_cursor);
     if (token_cursor->status != GRN_TOKEN_CURSOR_DONE_SKIP) {
-      if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+      grn_token *token;
+      token = grn_token_cursor_get_token(ctx, token_cursor);
+      if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; }
       TOKEN_CANDIDATE_NODE_SET();
       token_candidate_adjacent_set(ctx, token_cursor, top, curr);
       if (curr->estimated_size > *max_estimated_size) {
@@ -7511,6 +7513,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
   } else {
     grn_id tid;
     int ef;
+    grn_token *token;
     switch (mode) {
     case GRN_OP_PREFIX :
       ef = EX_PREFIX;
@@ -7526,7 +7529,8 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
       break;
     }
     tid = grn_token_cursor_next(ctx, token_cursor);
-    if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+    token = grn_token_cursor_get_token(ctx, token_cursor);
+    if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; }
     switch (token_cursor->status) {
     case GRN_TOKEN_CURSOR_DOING :
       key = _grn_table_key(ctx, lexicon, tid, &size);
@@ -7556,8 +7560,10 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string,
     }
 
     while (token_cursor->status == GRN_TOKEN_CURSOR_DOING) {
+      grn_token *token;
       tid = grn_token_cursor_next(ctx, token_cursor);
-      if (token_cursor->force_prefix) { ef |= EX_PREFIX; }
+      token = grn_token_cursor_get_token(ctx, token_cursor);
+      if (grn_token_get_force_prefix_search(ctx, token)) { ef |= EX_PREFIX; }
       switch (token_cursor->status) {
       case GRN_TOKEN_CURSOR_DONE_SKIP :
         continue;

  Modified: lib/proc/proc_tokenize.c (+2 -2)
===================================================================
--- lib/proc/proc_tokenize.c    2018-11-12 11:47:23 +0900 (9bf0b0c06)
+++ lib/proc/proc_tokenize.c    2018-11-12 14:14:04 +0900 (6d7535016)
@@ -249,8 +249,8 @@ tokenize(grn_ctx *ctx,
     grn_bulk_space(ctx, tokens, sizeof(tokenize_token));
     current_token = ((tokenize_token *)(GRN_BULK_CURR(tokens))) - 1;
     current_token->id = token_id;
-    current_token->position = token_cursor->pos;
-    current_token->force_prefix = token_cursor->force_prefix;
+    current_token->position = grn_token_get_position(ctx, token);
+    current_token->force_prefix = grn_token_get_force_prefix_search(ctx, token);
     current_token->source_offset = grn_token_get_source_offset(ctx, token);
     current_token->source_length = grn_token_get_source_length(ctx, token);
     current_token->source_first_character_length =

  Modified: lib/token.c (+58 -2)
===================================================================
--- lib/token.c    2018-11-12 11:47:23 +0900 (f98e512c5)
+++ lib/token.c    2018-11-12 14:14:04 +0900 (9fc812946)
@@ -31,6 +31,8 @@ grn_token_init(grn_ctx *ctx, grn_token *token)
   token->source_first_character_length = 0;
   token->have_overlap = GRN_FALSE;
   grn_token_metadata_init(ctx, &(token->metadata));
+  token->force_prefix_search = GRN_FALSE;
+  token->position = 0;
   GRN_API_RETURN(ctx->rc);
 }
 
@@ -225,7 +227,7 @@ grn_token_set_overlap(grn_ctx *ctx,
   GRN_API_ENTER;
   if (!token) {
     ERR(GRN_INVALID_ARGUMENT,
-        "[token][overlapping][set] token must not be NULL");
+        "[token][overlap][set] token must not be NULL");
     goto exit;
   }
   token->have_overlap = have_overlap;
@@ -239,12 +241,62 @@ grn_token_get_metadata(grn_ctx *ctx, grn_token *token)
   GRN_API_ENTER;
   if (!token) {
     ERR(GRN_INVALID_ARGUMENT,
-        "[token][data][get][metadata] token must not be NULL");
+        "[token][metadata][get] token must not be NULL");
     GRN_API_RETURN(NULL);
   }
   GRN_API_RETURN(&(token->metadata));
 }
 
+grn_bool
+grn_token_get_force_prefix_search(grn_ctx *ctx, grn_token *token)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][force-prefix-search][get] token must not be NULL");
+    GRN_API_RETURN(GRN_FALSE);
+  }
+  GRN_API_RETURN(token->force_prefix_search);
+}
+
+grn_rc
+grn_token_set_force_prefix_search(grn_ctx *ctx, grn_token *token, grn_bool force)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][force-prefix-search][set] token must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+  token->force_prefix_search = force;
+  GRN_API_RETURN(ctx->rc);
+}
+
+uint32_t
+grn_token_get_position(grn_ctx *ctx, grn_token *token)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][position][get] token must not be NULL");
+    GRN_API_RETURN(0);
+  }
+  GRN_API_RETURN(token->position);
+}
+
+grn_rc
+grn_token_set_position(grn_ctx *ctx, grn_token *token, uint32_t position)
+{
+  GRN_API_ENTER;
+  if (!token) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[token][position][set] token must not be NULL");
+    GRN_API_RETURN(ctx->rc);
+  }
+  token->position = position;
+  GRN_API_RETURN(ctx->rc);
+}
+
 grn_rc
 grn_token_reset(grn_ctx *ctx, grn_token *token)
 {
@@ -260,6 +312,8 @@ grn_token_reset(grn_ctx *ctx, grn_token *token)
   token->source_first_character_length = 0;
   token->have_overlap = GRN_FALSE;
   grn_token_metadata_reset(ctx, &(token->metadata));
+  token->force_prefix_search = GRN_FALSE;
+  token->position = 0;
 exit:
   GRN_API_RETURN(ctx->rc);
 }
@@ -285,6 +339,8 @@ grn_token_copy(grn_ctx *ctx,
   token->have_overlap = source->have_overlap;
   grn_token_metadata_reset(ctx, &(token->metadata));
   grn_token_metadata_copy(ctx, &(token->metadata), &(source->metadata));
+  token->force_prefix_search = source->force_prefix_search;
+  token->position = source->position;
 exit:
   GRN_API_RETURN(ctx->rc);
 }

  Modified: lib/token_cursor.c (+3 -4)
===================================================================
--- lib/token_cursor.c    2018-11-12 11:47:23 +0900 (cd9de1bd2)
+++ lib/token_cursor.c    2018-11-12 14:14:04 +0900 (2d589c8de)
@@ -102,7 +102,6 @@ grn_token_cursor_open(grn_ctx *ctx, grn_obj *table,
   token_cursor->curr_size = 0;
   token_cursor->pos = -1;
   token_cursor->status = GRN_TOKEN_CURSOR_DOING;
-  token_cursor->force_prefix = GRN_FALSE;
   if (tokenizer) {
     grn_proc *tokenizer_proc = (grn_proc *)tokenizer;
     if (tokenizer_proc->callbacks.tokenizer.init) {
@@ -259,7 +258,6 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor)
          (token_cursor->mode == GRN_TOKENIZE_GET &&
           (status & GRN_TOKEN_REACH_END)))
         ? GRN_TOKEN_CURSOR_DONE : GRN_TOKEN_CURSOR_DOING;
-      token_cursor->force_prefix = GRN_FALSE;
 #define SKIP_FLAGS \
       (GRN_TOKEN_SKIP | GRN_TOKEN_SKIP_WITH_POSITION)
       if (status & SKIP_FLAGS) {
@@ -275,7 +273,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor)
       }
 #undef SKIP_FLAGS
       if (status & GRN_TOKEN_FORCE_PREFIX) {
-        token_cursor->force_prefix = GRN_TRUE;
+        grn_token_set_force_prefix_search(ctx, current_token, GRN_TRUE);
       }
       if (token_cursor->curr_size == 0) {
         if (token_cursor->status != GRN_TOKEN_CURSOR_DONE) {
@@ -308,7 +306,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor)
           }
         } else {
           if (status & GRN_TOKEN_REACH_END) {
-            token_cursor->force_prefix = GRN_TRUE;
+            grn_token_set_force_prefix_search(ctx, current_token, GRN_TRUE);
           }
         }
       }
@@ -377,6 +375,7 @@ grn_token_cursor_next(grn_ctx *ctx, grn_token_cursor *token_cursor)
       token_cursor->status = GRN_TOKEN_CURSOR_NOT_FOUND;
     }
     token_cursor->pos++;
+    grn_token_set_position(ctx, current_token, token_cursor->pos);
     break;
   }
   GRN_API_RETURN(tid);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181112/cd33bbe1/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index