[Groonga-commit] groonga/groonga [master] Use grn_tokenizer_tokenized_delimiter_next() in TokenDelimiter

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Nov 13 11:23:30 JST 2012


Kouhei Sutou	2012-11-13 11:23:30 +0900 (Tue, 13 Nov 2012)

  New Revision: a8caf9567c96785df96c4ea032e95beca507f2f1
  https://github.com/groonga/groonga/commit/a8caf9567c96785df96c4ea032e95beca507f2f1

  Log:
    Use grn_tokenizer_tokenized_delimiter_next() in TokenDelimiter

  Modified files:
    lib/token.c

  Modified: lib/token.c (+33 -25)
===================================================================
--- lib/token.c    2012-11-13 11:23:18 +0900 (bd55c20)
+++ lib/token.c    2012-11-13 11:23:30 +0900 (bdcbafe)
@@ -118,13 +118,8 @@ delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
                                            GRN_TEXT_VALUE(str),
                                            GRN_TEXT_LEN(str),
                                            tokenizer->encoding);
-  if (tokenizer->have_tokenized_delimiter) {
-    tokenizer->delimiter = GRN_TOKENIZER_TOKENIZED_DELIMITER_UTF8;
-    tokenizer->delimiter_len = strlen(tokenizer->delimiter);
-  } else {
-    tokenizer->delimiter = delimiter;
-    tokenizer->delimiter_len = delimiter_len;
-  }
+  tokenizer->delimiter = delimiter;
+  tokenizer->delimiter_len = delimiter_len;
 
   if (table_flags & GRN_OBJ_KEY_NORMALIZE) {
     normalizer = GRN_NORMALIZER_AUTO;
@@ -151,28 +146,41 @@ delimited_init(grn_ctx *ctx, grn_obj *table, grn_user_data *user_data,
 static grn_obj *
 delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
-  size_t cl;
   grn_delimited_tokenizer *tokenizer = user_data->ptr;
-  const unsigned char *p = tokenizer->next, *r;
-  const unsigned char *e = tokenizer->end;
-  grn_tokenizer_status status;
-  for (r = p; r < e; r += cl) {
-    if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, tokenizer->encoding))) {
-      tokenizer->next = (unsigned char *)e;
-      break;
+
+  if (tokenizer->have_tokenized_delimiter) {
+    unsigned int rest_length;
+    rest_length = tokenizer->end - tokenizer->next;
+    tokenizer->next =
+      grn_tokenizer_tokenized_delimiter_next(ctx,
+                                             &(tokenizer->token),
+                                             tokenizer->next,
+                                             rest_length,
+                                             tokenizer->encoding);
+  } else {
+    size_t cl;
+    const unsigned char *p = tokenizer->next, *r;
+    const unsigned char *e = tokenizer->end;
+    grn_tokenizer_status status;
+    for (r = p; r < e; r += cl) {
+      if (!(cl = grn_charlen_(ctx, (char *)r, (char *)e, tokenizer->encoding))) {
+        tokenizer->next = (unsigned char *)e;
+        break;
+      }
+      if (r + tokenizer->delimiter_len <= e &&
+          !memcmp(r, tokenizer->delimiter, tokenizer->delimiter_len)) {
+        tokenizer->next = r + tokenizer->delimiter_len;
+        break;
+      }
     }
-    if (r + tokenizer->delimiter_len <= e &&
-        !memcmp(r, tokenizer->delimiter, tokenizer->delimiter_len)) {
-      tokenizer->next = r + tokenizer->delimiter_len;
-      break;
+    if (r == e) {
+      status = GRN_TOKENIZER_LAST;
+    } else {
+      status = GRN_TOKENIZER_CONTINUE;
     }
+    grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
   }
-  if (r == e) {
-    status = GRN_TOKENIZER_LAST;
-  } else {
-    status = GRN_TOKENIZER_CONTINUE;
-  }
-  grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
+
   return NULL;
 }
 
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index