[Groonga-commit] groonga/groonga at 46beac9 [master] table_create: support multiple token filters

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Oct 11 22:53:32 JST 2014


Kouhei Sutou	2014-10-11 22:53:32 +0900 (Sat, 11 Oct 2014)

  New Revision: 46beac9dbec71a7a6a44c3f4657e51b1dae8627a
  https://github.com/groonga/groonga/commit/46beac9dbec71a7a6a44c3f4657e51b1dae8627a

  Message:
    table_create: support multiple token filters

  Added files:
    test/command/suite/table_create/token_filters/empty_between_comma.expected
    test/command/suite/table_create/token_filters/empty_between_comma.test
    test/command/suite/table_create/token_filters/leading_comma.expected
    test/command/suite/table_create/token_filters/leading_comma.test
    test/command/suite/table_create/token_filters/leading_spaces.expected
    test/command/suite/table_create/token_filters/leading_spaces.test
    test/command/suite/table_create/token_filters/multiple.expected
    test/command/suite/table_create/token_filters/multiple.test
    test/command/suite/table_create/token_filters/spaces_around_comma.expected
    test/command/suite/table_create/token_filters/spaces_around_comma.test
    test/command/suite/table_create/token_filters/trailing_comma.expected
    test/command/suite/table_create/token_filters/trailing_comma.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+95 -6)
===================================================================
--- lib/proc.c    2014-10-11 22:14:23 +0900 (656b9e9)
+++ lib/proc.c    2014-10-11 22:53:32 +0900 (df4927f)
@@ -1194,6 +1194,96 @@ grn_column_create_flags_to_text(grn_ctx *ctx, grn_obj *buf, grn_obj_flags flags)
   }
 }
 
+static grn_bool
+proc_table_create_set_token_filters_put(grn_ctx *ctx,
+                                        grn_obj *token_filters,
+                                        const char *token_filter_name,
+                                        int token_filter_name_length)
+{
+  grn_obj *token_filter;
+
+  token_filter = grn_ctx_get(ctx,
+                             token_filter_name,
+                             token_filter_name_length);
+  if (token_filter) {
+    GRN_PTR_PUT(ctx, token_filters, token_filter);
+    return GRN_TRUE;
+  } else {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][create][token-filter] nonexistent token filter: <%.*s>",
+        token_filter_name_length, token_filter_name);
+    return GRN_FALSE;
+  }
+}
+
+static grn_bool
+proc_table_create_set_token_filters_fill(grn_ctx *ctx,
+                                         grn_obj *token_filters,
+                                         grn_obj *token_filter_names)
+{
+  const char *start, *current, *end;
+  const char *name_start, *name_end;
+  const char *last_name_end;
+
+  start = GRN_TEXT_VALUE(token_filter_names);
+  end = start + GRN_TEXT_LEN(token_filter_names);
+  current = start;
+  name_start = NULL;
+  name_end = NULL;
+  last_name_end = start;
+  while (current < end) {
+    switch (current[0]) {
+    case ' ' :
+      if (name_start && !name_end) {
+        name_end = current;
+      }
+      break;
+    case ',' :
+      if (!name_start) {
+        goto break_loop;
+      }
+      if (!name_end) {
+        name_end = current;
+      }
+      proc_table_create_set_token_filters_put(ctx,
+                                              token_filters,
+                                              name_start,
+                                              name_end - name_start);
+      last_name_end = name_end + 1;
+      name_start = NULL;
+      name_end = NULL;
+      break;
+    default :
+      if (!name_start) {
+        name_start = current;
+      }
+      break;
+    }
+    current++;
+  }
+
+break_loop:
+  if (!name_start) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "[table][create][token-filter] empty token filter name: "
+        "<%.*s|%.*s|%.*s>",
+        (int)(last_name_end - start), start,
+        (int)(current - last_name_end), last_name_end,
+        (int)(end - current), current);
+    return GRN_FALSE;
+  }
+
+  if (!name_end) {
+    name_end = current;
+  }
+  proc_table_create_set_token_filters_put(ctx,
+                                          token_filters,
+                                          name_start,
+                                          name_end - name_start);
+
+  return GRN_TRUE;
+}
+
 static void
 proc_table_create_set_token_filters(grn_ctx *ctx,
                                     grn_obj *table,
@@ -1206,12 +1296,11 @@ proc_table_create_set_token_filters(grn_ctx *ctx,
   }
 
   GRN_PTR_INIT(&token_filters, GRN_OBJ_VECTOR, 0);
-  GRN_PTR_PUT(ctx,
-              &token_filters,
-              grn_ctx_get(ctx,
-                          GRN_TEXT_VALUE(token_filter_names),
-                          GRN_TEXT_LEN(token_filter_names)));
-  grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters);
+  if (proc_table_create_set_token_filters_fill(ctx,
+                                               &token_filters,
+                                               token_filter_names)) {
+    grn_obj_set_info(ctx, table, GRN_INFO_TOKEN_FILTERS, &token_filters);
+  }
   grn_obj_unlink(ctx, &token_filters);
 }
 

  Added: test/command/suite/table_create/token_filters/empty_between_comma.expected (+18 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/empty_between_comma.expected    2014-10-11 22:53:32 +0900 (4cfbf35)
@@ -0,0 +1,18 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters "TokenFilterStopWord, ,TokenFilterStopWord"
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord>"
+  ],
+  false
+]
+#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,| |,TokenFilterStopWord>
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto
+

  Added: test/command/suite/table_create/token_filters/empty_between_comma.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/empty_between_comma.test    2014-10-11 22:53:32 +0900 (492002b)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters "TokenFilterStopWord, ,TokenFilterStopWord"
+
+dump

  Added: test/command/suite/table_create/token_filters/leading_comma.expected (+18 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/leading_comma.expected    2014-10-11 22:53:32 +0900 (0f4c044)
@@ -0,0 +1,18 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters ",TokenFilterStopWord"
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[table][create][token-filter] empty token filter name: <||,TokenFilterStopWord>"
+  ],
+  false
+]
+#|e| [table][create][token-filter] empty token filter name: <||,TokenFilterStopWord>
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto
+

  Added: test/command/suite/table_create/token_filters/leading_comma.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/leading_comma.test    2014-10-11 22:53:32 +0900 (f2b793d)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters ",TokenFilterStopWord"
+
+dump

  Added: test/command/suite/table_create/token_filters/leading_spaces.expected (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/leading_spaces.expected    2014-10-11 22:53:32 +0900 (3fc3481)
@@ -0,0 +1,7 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters "  TokenFilterStopWord"
+[[0,0.0,0.0],true]
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord
+

  Added: test/command/suite/table_create/token_filters/leading_spaces.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/leading_spaces.test    2014-10-11 22:53:32 +0900 (6915b88)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters "  TokenFilterStopWord"
+
+dump

  Added: test/command/suite/table_create/token_filters/multiple.expected (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/multiple.expected    2014-10-11 22:53:32 +0900 (bc3a2c4)
@@ -0,0 +1,7 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters TokenFilterStopWord,TokenFilterStopWord
+[[0,0.0,0.0],true]
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord
+

  Added: test/command/suite/table_create/token_filters/multiple.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/multiple.test    2014-10-11 22:53:32 +0900 (82c8ed3)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters TokenFilterStopWord,TokenFilterStopWord
+
+dump

  Added: test/command/suite/table_create/token_filters/spaces_around_comma.expected (+7 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/spaces_around_comma.expected    2014-10-11 22:53:32 +0900 (e3a6a55)
@@ -0,0 +1,7 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters "TokenFilterStopWord , TokenFilterStopWord"
+[[0,0.0,0.0],true]
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters TokenFilterStopWord,TokenFilterStopWord
+

  Added: test/command/suite/table_create/token_filters/spaces_around_comma.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/spaces_around_comma.test    2014-10-11 22:53:32 +0900 (4879a70)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters "TokenFilterStopWord , TokenFilterStopWord"
+
+dump

  Added: test/command/suite/table_create/token_filters/trailing_comma.expected (+18 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/trailing_comma.expected    2014-10-11 22:53:32 +0900 (c7b9bdf)
@@ -0,0 +1,18 @@
+register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_PAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters "TokenFilterStopWord,"
+[
+  [
+    [
+      -22,
+      0.0,
+      0.0
+    ],
+    "[table][create][token-filter] empty token filter name: <TokenFilterStopWord,||>"
+  ],
+  false
+]
+#|e| [table][create][token-filter] empty token filter name: <TokenFilterStopWord,||>
+dump
+table_create Terms TABLE_PAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto
+

  Added: test/command/suite/table_create/token_filters/trailing_comma.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/trailing_comma.test    2014-10-11 22:53:32 +0900 (dace8c1)
@@ -0,0 +1,8 @@
+register token_filters/stop_word
+
+table_create Terms TABLE_PAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters "TokenFilterStopWord,"
+
+dump
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index