[Groonga-commit] groonga/groonga at bbb8879 [master] TABLE_HASH_KEY: support token filter options

Back to archive index
Kouhei Sutou null+****@clear*****
Mon Oct 29 16:31:56 JST 2018


Kouhei Sutou	2018-10-29 16:31:56 +0900 (Mon, 29 Oct 2018)

  Revision: bbb8879eb55c70d346c8988ac2b0af72730d3746
  https://github.com/groonga/groonga/commit/bbb8879eb55c70d346c8988ac2b0af72730d3746

  Message:
    TABLE_HASH_KEY: support token filter options

  Added files:
    test/command/suite/table_create/token_filters/options/hash.expected
    test/command/suite/table_create/token_filters/options/hash.test
  Modified files:
    lib/db.c
    lib/grn_hash.h
    lib/hash.c

  Modified: lib/db.c (+7 -6)
===================================================================
--- lib/db.c    2018-10-29 16:16:15 +0900 (cf953636a)
+++ lib/db.c    2018-10-29 16:31:56 +0900 (dc30bd7f0)
@@ -2443,7 +2443,7 @@ grn_table_get_info(grn_ctx *ctx, grn_obj *table, grn_table_flags *flags,
       if (encoding) { *encoding = ((grn_hash *)table)->encoding; }
       if (tokenizer) { *tokenizer = ((grn_hash *)table)->tokenizer.proc; }
       if (normalizer) { *normalizer = ((grn_hash *)table)->normalizer.proc; }
-      if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filters); }
+      if (token_filters) { *token_filters = &(((grn_hash *)table)->token_filter_procs); }
       rc = GRN_SUCCESS;
       break;
     case GRN_TABLE_NO_KEY :
@@ -8373,7 +8373,7 @@ grn_obj_get_info(grn_ctx *ctx, grn_obj *obj, grn_info_type type, grn_obj *valueb
     case GRN_INFO_TOKEN_FILTERS :
       switch (obj->header.type) {
       case GRN_TABLE_HASH_KEY :
-        valuebuf = &(((grn_hash *)obj)->token_filters);
+        valuebuf = &(((grn_hash *)obj)->token_filter_procs);
         break;
       case GRN_TABLE_PAT_KEY :
         valuebuf = &(((grn_pat *)obj)->token_filter_procs);
@@ -8654,7 +8654,7 @@ grn_obj_spec_save(grn_ctx *ctx, grn_db_obj *obj)
   grn_vector_delimit(ctx, &v, 0, 0);
   switch (obj->header.type) {
   case GRN_TABLE_HASH_KEY :
-    grn_token_filters_pack(ctx, &(((grn_hash *)obj)->token_filters), b);
+    grn_token_filters_pack(ctx, &(((grn_hash *)obj)->token_filter_procs), b);
     grn_vector_delimit(ctx, &v, 0, 0);
     break;
   case GRN_TABLE_PAT_KEY :
@@ -9421,7 +9421,8 @@ grn_obj_set_info_token_filters(grn_ctx *ctx,
 
   switch (table->header.type) {
   case GRN_TABLE_HASH_KEY :
-    token_filter_procs = &(((grn_hash *)table)->token_filters);
+    token_filters = &(((grn_hash *)table)->token_filters);
+    token_filter_procs = &(((grn_hash *)table)->token_filter_procs);
     break;
   case GRN_TABLE_PAT_KEY :
     token_filters = &(((grn_pat *)table)->token_filters);
@@ -11162,7 +11163,7 @@ grn_ctx_at(grn_ctx *ctx, grn_id id)
                   UNPACK_INFO(spec, &decoded_spec);
                   vp->ptr->header.flags = flags;
                   grn_token_filters_unpack(ctx,
-                                           &(hash->token_filters),
+                                           &(hash->token_filter_procs),
                                            &decoded_spec);
                 }
                 break;
@@ -11175,7 +11176,7 @@ grn_ctx_at(grn_ctx *ctx, grn_id id)
                   UNPACK_INFO(spec, &decoded_spec);
                   vp->ptr->header.flags = flags;
                   grn_token_filters_unpack(ctx,
-                                           &(pat->token_filters),
+                                           &(pat->token_filter_procs),
                                            &decoded_spec);
                   if (pat->tokenizer.proc) {
                     grn_pat_cache_enable(ctx,

  Modified: lib/grn_hash.h (+3 -0)
===================================================================
--- lib/grn_hash.h    2018-10-29 16:16:15 +0900 (3426b2dba)
+++ lib/grn_hash.h    2018-10-29 16:31:56 +0900 (4bdc1f9f7)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -196,6 +197,8 @@ struct _grn_hash {
   grn_table_module tokenizer;
   grn_table_module normalizer;
   grn_obj token_filters;
+  /* For backward compatibility */
+  grn_obj token_filter_procs;
 
   /* For grn_io_hash. */
   grn_io *io;

  Modified: lib/hash.c (+26 -5)
===================================================================
--- lib/hash.c    2018-10-29 16:16:15 +0900 (2b2bfc108)
+++ lib/hash.c    2018-10-29 16:31:56 +0900 (fc6412594)
@@ -1,6 +1,7 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
   Copyright(C) 2009-2018 Brazil
+  Copyright(C) 2018 Kouhei Sutou <kou****@clear*****>
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -1713,7 +1714,8 @@ grn_io_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
     header->normalizer = GRN_ID_NIL;
   }
   header->truncated = GRN_FALSE;
-  GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_TEXT_INIT(&(hash->token_filters), 0);
+  GRN_PTR_INIT(&(hash->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
 
   hash->obj.header.flags = (header->flags & GRN_OBJ_FLAGS_MASK);
   hash->ctx = ctx;
@@ -1788,7 +1790,8 @@ grn_tiny_hash_init(grn_ctx *ctx, grn_hash *hash, const char *path,
   hash->garbages = GRN_ID_NIL;
   grn_table_module_init(ctx, &(hash->tokenizer), GRN_ID_NIL);
   grn_table_module_init(ctx, &(hash->normalizer), GRN_ID_NIL);
-  GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_TEXT_INIT(&(hash->token_filters), 0);
+  GRN_PTR_INIT(&(hash->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
   grn_tiny_array_init(ctx, &hash->a, entry_size, GRN_TINY_ARRAY_CLEAR);
   grn_tiny_bitmap_init(ctx, &hash->bitmap);
   return GRN_SUCCESS;
@@ -1862,7 +1865,8 @@ grn_hash_open(grn_ctx *ctx, const char *path)
               header->normalizer = grn_obj_id(ctx, normalizer);
             }
             grn_table_module_init(ctx, &(hash->normalizer), header->normalizer);
-            GRN_PTR_INIT(&(hash->token_filters), GRN_OBJ_VECTOR, GRN_ID_NIL);
+            GRN_TEXT_INIT(&(hash->token_filters), 0);
+            GRN_PTR_INIT(&(hash->token_filter_procs), GRN_OBJ_VECTOR, GRN_ID_NIL);
             hash->obj.header.flags = header->flags;
             return hash;
           } else {
@@ -1901,6 +1905,23 @@ grn_hash_error_if_truncated(grn_ctx *ctx, grn_hash *hash)
   return GRN_SUCCESS;
 }
 
+static void
+grn_hash_close_token_filters(grn_ctx *ctx, grn_hash *hash)
+{
+  grn_obj *token_filters = &(hash->token_filters);
+  grn_table_module *raw_token_filters =
+    (grn_table_module *)GRN_BULK_HEAD(token_filters);
+  size_t i, n;
+
+  n = GRN_BULK_VSIZE(token_filters) / sizeof(grn_table_module);
+  for (i = 0; i < n; i++) {
+    grn_table_module *raw_token_filter = raw_token_filters + i;
+    grn_table_module_fin(ctx, raw_token_filter);
+  }
+  GRN_OBJ_FIN(ctx, token_filters);
+  GRN_OBJ_FIN(ctx, &(hash->token_filter_procs));
+}
+
 static grn_rc
 grn_io_hash_fin(grn_ctx *ctx, grn_hash *hash)
 {
@@ -1909,7 +1930,7 @@ grn_io_hash_fin(grn_ctx *ctx, grn_hash *hash)
   rc = grn_io_close(ctx, hash->io);
   grn_table_module_fin(ctx, &(hash->tokenizer));
   grn_table_module_fin(ctx, &(hash->normalizer));
-  GRN_OBJ_FIN(ctx, &(hash->token_filters));
+  grn_hash_close_token_filters(ctx, hash);
   return rc;
 }
 
@@ -1922,7 +1943,7 @@ grn_tiny_hash_fin(grn_ctx *ctx, grn_hash *hash)
 
   grn_table_module_fin(ctx, &(hash->tokenizer));
   grn_table_module_fin(ctx, &(hash->normalizer));
-  GRN_OBJ_FIN(ctx, &(hash->token_filters));
+  grn_hash_close_token_filters(ctx, hash);
 
   if (hash->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) {
     uint32_t num_remaining_entries = *hash->n_entries;

  Added: test/command/suite/table_create/token_filters/options/hash.expected (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/options/hash.expected    2018-10-29 16:31:56 +0900 (935510df2)
@@ -0,0 +1,8 @@
+plugin_register token_filters/stop_word
+[[0,0.0,0.0],true]
+table_create Terms TABLE_HASH_KEY ShortText   --default_tokenizer TokenBigram   --normalizer NormalizerAuto   --token_filters 'TokenFilterStopWord("column", "ignore")'
+[[0,0.0,0.0],true]
+dump
+plugin_register token_filters/stop_word
+
+table_create Terms TABLE_HASH_KEY ShortText --default_tokenizer TokenBigram --normalizer NormalizerAuto --token_filters "TokenFilterStopWord(\"column\", \"ignore\")"

  Added: test/command/suite/table_create/token_filters/options/hash.test (+8 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/table_create/token_filters/options/hash.test    2018-10-29 16:31:56 +0900 (064715e36)
@@ -0,0 +1,8 @@
+plugin_register token_filters/stop_word
+
+table_create Terms TABLE_HASH_KEY ShortText \
+  --default_tokenizer TokenBigram \
+  --normalizer NormalizerAuto \
+  --token_filters 'TokenFilterStopWord("column", "ignore")'
+
+dump
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181029/3c89f3d4/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index