[Groonga-commit] groonga/groonga at 6e53454 [master] TABLE_DAT_KEY: support token filter options

Back to archive index
Kouhei Sutou null+****@clear*****
Mon Oct 29 16:39:34 JST 2018


Kouhei Sutou	2018-10-29 16:39:34 +0900 (Mon, 29 Oct 2018)

  Revision: 6e53454d844deff22b97b3a085a456d74fcefc0b
  https://github.com/groonga/groonga/commit/6e53454d844deff22b97b3a085a456d74fcefc0b

  Message:
    TABLE_DAT_KEY: support token filter options

  Added files:
    test/command/suite/table_create/token_filters/options/double_array_trie.expected
    test/command/suite/table_create/token_filters/options/double_array_trie.test
  Modified files:
    lib/dat.cpp
    lib/db.c
    lib/grn_dat.h

  Modified: lib/dat.cpp (+25 -4)
===================================================================
--- lib/dat.cpp    2018-10-29 16:31:56 +0900 (7b15e6e64)
+++ lib/dat.cpp    2018-10-29 16:39:34 +0900 (fc58dbdfc)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2011-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -127,11 +128,29 @@ grn_dat_init(grn_ctx *, grn_dat *dat)
   dat->encoding = GRN_ENC_DEFAULT;
   dat->trie = NULL;
   dat->old_trie = NULL;
-  GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_TEXT_INIT(&(dat->token_filters), 0);
+  GRN_PTR_INIT(&(dat->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
   CRITICAL_SECTION_INIT(dat->lock);
   dat->is_dirty = GRN_FALSE;
 }
 
+static void
+grn_dat_close_token_filters(grn_ctx *ctx, grn_dat *dat)
+{
+  grn_obj *token_filters = &(dat->token_filters);
+  grn_table_module *raw_token_filters =
+    (grn_table_module *)GRN_BULK_HEAD(token_filters);
+  size_t i, n;
+
+  n = GRN_BULK_VSIZE(token_filters) / sizeof(grn_table_module);
+  for (i = 0; i < n; i++) {
+    grn_table_module *raw_token_filter = raw_token_filters + i;
+    grn_table_module_fin(ctx, raw_token_filter);
+  }
+  GRN_OBJ_FIN(ctx, token_filters);
+  GRN_OBJ_FIN(ctx, &(dat->token_filter_procs));
+}
+
 void
 grn_dat_fin(grn_ctx *ctx, grn_dat *dat)
 {
@@ -150,7 +169,7 @@ grn_dat_fin(grn_ctx *ctx, grn_dat *dat)
   }
   grn_table_module_fin(ctx, &(dat->tokenizer));
   grn_table_module_fin(ctx, &(dat->normalizer));
-  GRN_OBJ_FIN(ctx, &(dat->token_filters));
+  grn_dat_close_token_filters(ctx, dat);
 }
 
 /*
@@ -356,7 +375,8 @@ grn_dat_create(grn_ctx *ctx, const char *path, uint32_t,
   dat->encoding = encoding;
   grn_table_module_init(ctx, &(dat->tokenizer), GRN_ID_NIL);
   grn_table_module_init(ctx, &(dat->normalizer), dat->header->normalizer);
-  GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_TEXT_INIT(&(dat->token_filters), 0);
+  GRN_PTR_INIT(&(dat->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
 
   dat->obj.header.flags = dat->header->flags;
 
@@ -398,7 +418,8 @@ grn_dat_open(grn_ctx *ctx, const char *path)
     dat->header->normalizer = grn_obj_id(ctx, normalizer);
   }
   grn_table_module_init(ctx, &(dat->normalizer), dat->header->normalizer);
-  GRN_PTR_INIT(&(dat->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_TEXT_INIT(&(dat->token_filters), 0);
+  GRN_PTR_INIT(&(dat->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
   dat->obj.header.flags = dat->header->flags;
   return dat;
 }

  Modified: lib/db.c (+14 -7)
===================================================================
--- lib/db.c    2018-10-29 16:31:56 +0900 (dc30bd7f0)
+++ lib/db.c    2018-10-29 16:39:34 +0900 (7671e6c41)
@@ -2427,7 +2427,9 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
       if (encoding) { *encoding = ((grn_pat *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_pat *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_pat *)table)->normalizer.proc; }
-      if (token_filters) { *token_filters = &(((grn_pat *)table)->token_filter_procs); }
+      if (token_filters) {
+        *token_filters = &(((grn_pat *)table)->token_filter_procs);
+      }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_DAT_KEY :
@@ -2435,7 +2437,9 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
       if (encoding) { *encoding = ((grn_dat *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_dat *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_dat *)table)->normalizer.proc; }
-      if (token_filters) { *token_filters = &(((grn_dat *)table)->token_filters); }
+      if (token_filters) {
+        *token_filters = &(((grn_dat *)table)->token_filter_procs);
+      }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_HASH_KEY :
@@ -2443,7 +2447,9 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
       if (encoding) { *encoding = ((grn_hash *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer.proc; }
-      if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filter_procs); }
+      if (token_filters) {
+        *token_filters = &(((grn_hash *)table)->token_filter_procs);
+      }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_NO_KEY :
@@ -8379,7 +8385,7 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb
         valuebuf = &(((grn_pat *)obj)->token_filter_procs);
         break;
       case GRN_TABLE_DAT_KEY :
-        valuebuf = &(((grn_dat *)obj)->token_filters);
+        valuebuf = &(((grn_dat *)obj)->token_filter_procs);
         break;
       default :
         ERR(GRN_INVALID_ARGUMENT,
@@ -8662,7 +8668,7 @@ grn_obj_spec_save(grn_ctx *ctx, grn_db_obj *obj)
     grn_vector_delimit(ctx, &v, 0, 0);
     break;
   case GRN_TABLE_DAT_KEY :
-    grn_token_filters_pack(ctx, &(((grn_dat *)obj)->token_filters), b);
+    grn_token_filters_pack(ctx, &(((grn_dat *)obj)->token_filter_procs), b);
     grn_vector_delimit(ctx, &v, 0, 0);
     break;
   case GRN_EXPR :
@@ -9429,7 +9435,8 @@ grn_obj_set_info_token_filters(grn_ctx *ctx,
     token_filter_procs = &(((grn_pat *)table)->token_filter_procs);
     break;
   case GRN_TABLE_DAT_KEY :
-    token_filter_procs = &(((grn_dat *)table)->token_filters);
+    token_filters = &(((grn_dat *)table)->token_filters);
+    token_filter_procs = &(((grn_dat *)table)->token_filter_procs);
     break;
   default :
     break;
@@ -11194,7 +11201,7 @@ grn_ctx_at(grn_ctx *ctx, grn_id id)
                   UNPACK_INFO(spec, &decoded_spec);
                   vp->ptr->header.flags = flags;
                   grn_token_filters_unpack(ctx,
-                                           &(dat->token_filters),
+                                           &(dat->token_filter_procs),
                                            &decoded_spec);
                 }
                 break;

  Modified: lib/grn_dat.h (+3 -0)
===================================================================
--- lib/grn_dat.h    2018-10-29 16:31:56 +0900 (c15aefad7)
+++ lib/grn_dat.h    2018-10-29 16:39:34 +0900 (4098c8b6a)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2011-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -37,6 +38,8 @@ struct _grn_dat {
   grn_table_module tokenizer;
   grn_table_module normalizer;
   grn_obj token_filters;
+  /* For backward compatibility */
+  grn_obj token_filter_procs;
   grn_critical_section lock;
   grn_bool is_dirty;
 };

  Added: test/command/suite/table_create/token_filters/options/double_array_trie.expected (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/options/double_array_trie.expected    2018-10-29 16:39:34 +0900 (d814bd753)
@@ -0,0 +1,8 @@
+plugin_register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_DAT_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters 'TokenFilterStopWord("column", "ignore")'
+[[0,0.0,0.0],true]
+dump
+plugin_register token_filters/stop_word
+
+table_create Terms TABLE_DAT_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord(\"column\", \"ignore\")"

  Added: test/command/suite/table_create/token_filters/options/double_array_trie.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/options/double_array_trie.test    2018-10-29 16:39:34 +0900 (b51c27a77)
@@ -0,0 +1,8 @@
+plugin_register token_filters/stop_word
+
+table_create Terms TABLE_DAT_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters 'TokenFilterStopWord("column", "ignore")'
+
+dump
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181029/67d4880c/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index