[Groonga-commit] groonga/groonga at 8f4852d [master] TokenRegexp: don't require character types to normalizer

Back to archive index

Kouhei Sutou null+****@clear*****
Sun May 10 00:05:08 JST 2015


Kouhei Sutou	2015-05-10 00:05:08 +0900 (Sun, 10 May 2015)

  New Revision: 8f4852d9ae082eb595f17d52e3348b0262099577
  https://github.com/groonga/groonga/commit/8f4852d9ae082eb595f17d52e3348b0262099577

  Message:
    TokenRegexp: don't require character types to normalizer

  Modified files:
    lib/tokenizers.c

  Modified: lib/tokenizers.c (+16 -9)
===================================================================
--- lib/tokenizers.c    2015-05-10 00:02:05 +0900 (e983ef1)
+++ lib/tokenizers.c    2015-05-10 00:05:08 +0900 (8e83da2)
@@ -582,8 +582,7 @@ regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
   grn_obj *buffer = &(tokenizer->buffer);
   const char *current = tokenizer->next;
   const char *end = tokenizer->end;
-  const const uint_least8_t *char_types =
-    tokenizer->char_types + tokenizer->nth_char;
+  const const uint_least8_t *char_types = tokenizer->char_types;
   grn_tokenize_mode mode = tokenizer->query->tokenize_mode;
   grn_bool is_first_token = tokenizer->is_first_token;
   grn_bool escaping = GRN_FALSE;
@@ -592,6 +591,10 @@ regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
   GRN_BULK_REWIND(buffer);
   tokenizer->is_first_token = GRN_FALSE;
 
+  if (char_types) {
+    char_types += tokenizer->nth_char;
+  }
+
   if (mode == GRN_TOKEN_GET) {
     if (tokenizer->get.have_begin) {
       grn_tokenizer_token_push(ctx,
@@ -646,9 +649,10 @@ regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
         char_len == 1 && current[0] == '\\') {
       current += char_len;
       escaping = GRN_TRUE;
-      char_types++;
+      if (char_types) {
+        char_types++;
+      }
     } else {
-      uint_least8_t char_type;
       n_characters++;
       GRN_TEXT_PUT(ctx, buffer, current, char_len);
       current += char_len;
@@ -660,11 +664,14 @@ regexp_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
         }
       }
       escaping = GRN_FALSE;
-      char_type = char_types[0];
-      char_types++;
-      if (GRN_STR_ISBLANK(char_type)) {
-        break_by_blank = GRN_TRUE;
-        break;
+      if (char_types) {
+        uint_least8_t char_type;
+        char_type = char_types[0];
+        char_types++;
+        if (GRN_STR_ISBLANK(char_type)) {
+          break_by_blank = GRN_TRUE;
+          break;
+        }
       }
       if (n_characters == ngram_unit) {
         break;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index