[Groonga-commit] groonga/groonga [master] Use grn_tokenizer_tokenized_delimiter_next() in TokenMecab

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Nov 13 11:27:41 JST 2012


Kouhei Sutou	2012-11-13 11:27:41 +0900 (Tue, 13 Nov 2012)

  New Revision: f755fc70fdb5fd50e997478d87155745965a6b0d
  https://github.com/groonga/groonga/commit/f755fc70fdb5fd50e997478d87155745965a6b0d

  Log:
    Use grn_tokenizer_tokenized_delimiter_next() in TokenMecab

  Modified files:
    plugins/tokenizers/mecab.c

  Modified: plugins/tokenizers/mecab.c (+21 -25)
===================================================================
--- plugins/tokenizers/mecab.c    2012-11-13 11:23:30 +0900 (55be0bd)
+++ plugins/tokenizers/mecab.c    2012-11-13 11:27:41 +0900 (e4fafde)
@@ -37,8 +37,8 @@ typedef struct {
   grn_str *nstr;
   mecab_t *mecab;
   char *buf;
-  char *next;
-  char *end;
+  const char *next;
+  const char *end;
   grn_encoding encoding;
   grn_tokenizer_token token;
   grn_bool have_tokenized_delimiter;
@@ -180,48 +180,44 @@ mecab_init(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 static grn_obj *
 mecab_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
-  size_t cl;
   /* grn_obj *table = args[0]; */
   grn_mecab_tokenizer *tokenizer = user_data->ptr;
-  char *p = tokenizer->next, *r;
-  char *e = tokenizer->end;
   grn_encoding encoding = tokenizer->encoding;
-  grn_tokenizer_status status;
 
   if (tokenizer->have_tokenized_delimiter) {
-    for (r = p; r < e; r += cl) {
-      cl = grn_charlen_(ctx, r, e, encoding);
-      if (cl > 0) {
-        if (grn_tokenizer_is_tokenized_delimiter(ctx, r, cl, encoding)) {
-          tokenizer->next = r + cl;
-          break;
-        }
-      } else {
-        tokenizer->next = e;
-        break;
-      }
-    }
+    tokenizer->next =
+      grn_tokenizer_tokenized_delimiter_next(ctx,
+                                             &(tokenizer->token),
+                                             tokenizer->next,
+                                             tokenizer->end - tokenizer->next,
+                                             encoding);
   } else {
+    size_t cl;
+    const char *p = tokenizer->next, *r;
+    const char *e = tokenizer->end;
+    grn_tokenizer_status status;
+
     for (r = p; r < e; r += cl) {
       if (!(cl = grn_charlen_(ctx, r, e, encoding))) {
         tokenizer->next = e;
         break;
       }
       if (grn_isspace(r, encoding)) {
-        char *q = r;
+        const char *q = r;
         while ((cl = grn_isspace(q, encoding))) { q += cl; }
         tokenizer->next = q;
         break;
       }
     }
-  }
 
-  if (r == e) {
-    status = GRN_TOKENIZER_LAST;
-  } else {
-    status = GRN_TOKENIZER_CONTINUE;
+    if (r == e) {
+      status = GRN_TOKENIZER_LAST;
+    } else {
+      status = GRN_TOKENIZER_CONTINUE;
+    }
+    grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
   }
-  grn_tokenizer_token_push(ctx, &(tokenizer->token), p, r - p, status);
+
   return NULL;
 }
 
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index