[Groonga-commit] groonga/groonga at 9acd0fb [master] token delimit: treat continuous spaces as a space

Back to archive index

Kouhei Sutou null+****@clear*****
Wed Sep 18 11:24:48 JST 2013


Kouhei Sutou	2013-09-18 11:24:48 +0900 (Wed, 18 Sep 2013)

  New Revision: 9acd0fb5ae4718c7ba99d1afb303a8ae1aacece7
  https://github.com/groonga/groonga/commit/9acd0fb5ae4718c7ba99d1afb303a8ae1aacece7

  Message:
    token delimit: treat continuous spaces as a space
    
    Now "a  b" don't report a warning.

  Modified files:
    lib/token.c
    test/command/suite/tokenizers/delimit/invalid/empty.expected

  Modified: lib/token.c (+13 -4)
===================================================================
--- lib/token.c    2013-09-18 09:41:53 +0900 (bbd688a)
+++ lib/token.c    2013-09-18 11:24:48 +0900 (189517c)
@@ -180,10 +180,19 @@ delimited_next(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data
         tokenizer->next = (unsigned char *)e;
         break;
       }
-      if (r + tokenizer->delimiter_len <= e &&
-          !memcmp(r, tokenizer->delimiter, tokenizer->delimiter_len)) {
-        tokenizer->next = r + tokenizer->delimiter_len;
-        break;
+      {
+        grn_bool found_delimiter = GRN_FALSE;
+        const unsigned char *current_end = r;
+        while (current_end + tokenizer->delimiter_len <= e &&
+               !memcmp(current_end,
+                       tokenizer->delimiter, tokenizer->delimiter_len)) {
+          current_end += tokenizer->delimiter_len;
+          tokenizer->next = current_end;
+          found_delimiter = GRN_TRUE;
+        }
+        if (found_delimiter) {
+          break;
+        }
       }
     }
     if (r == e) {

  Modified: test/command/suite/tokenizers/delimit/invalid/empty.expected (+0 -1)
===================================================================
--- test/command/suite/tokenizers/delimit/invalid/empty.expected    2013-09-18 09:41:53 +0900 (405c284)
+++ test/command/suite/tokenizers/delimit/invalid/empty.expected    2013-09-18 11:24:48 +0900 (96d13e2)
@@ -1,3 +1,2 @@
 tokenize TokenDelimit "A  B"
 [[0,0.0,0.0],[{"value":"A","position":0},{"value":"B","position":1}]]
-#|w| [token_next] ignore an empty token.
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index