[Groonga-commit] groonga/groonga at 80256b8 [master] Support operator per selector

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Aug 26 17:45:55 JST 2016


Kouhei Sutou	2016-08-07 17:56:05 +0900 (Sun, 07 Aug 2016)

  New Revision: 80256b8d2a490cdb43180737f41cbe7d1602ca7c
  https://github.com/groonga/groonga/commit/80256b8d2a490cdb43180737f41cbe7d1602ca7c

  Message:
    Support operator per selector
    
    Now, we can choose correct index for selector.
    
    GitHub: fix #589
    
    Reported by Naoya Murakami. Thanks!!!

  Added files:
    test/command/suite/select/function/in_values/with_index/hash_key.expected
    test/command/suite/select/function/in_values/with_index/hash_key.test
  Modified files:
    include/groonga/groonga.h
    lib/db.c
    lib/expr.c
    lib/grn_db.h
    lib/mrb/mrb_procedure.c
    lib/mrb/scripts/scan_info_data.rb
    lib/proc.c
    lib/proc/proc_fuzzy_search.c

  Modified: include/groonga/groonga.h (+5 -0)
===================================================================
--- include/groonga/groonga.h    2016-08-07 17:26:42 +0900 (9e6bdc2)
+++ include/groonga/groonga.h    2016-08-07 17:56:05 +0900 (f8dc664)
@@ -814,6 +814,11 @@ typedef grn_rc grn_selector_func(grn_ctx *ctx, grn_obj *table, grn_obj *index,
 
 GRN_API grn_rc grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc,
                                      grn_selector_func selector);
+GRN_API grn_rc grn_proc_set_selector_operator(grn_ctx *ctx,
+                                              grn_obj *proc,
+                                              grn_operator selector_op);
+GRN_API grn_operator grn_proc_get_selector_operator(grn_ctx *ctx,
+                                                    grn_obj *proc);
 
 /*-------------------------------------------------------------
  * grn_vector

  Modified: lib/db.c (+1 -0)
===================================================================
--- lib/db.c    2016-08-07 17:26:42 +0900 (3e1adff)
+++ lib/db.c    2016-08-07 17:56:05 +0900 (9e86f36)
@@ -910,6 +910,7 @@ grn_proc_create(grn_ctx *ctx, const char *name, int name_size, grn_proc_type typ
     res->funcs[PROC_NEXT] = next;
     res->funcs[PROC_FIN] = fin;
     memset(&(res->callbacks), 0, sizeof(res->callbacks));
+    res->callbacks.function.selector_op = GRN_OP_NOP;
     GRN_TEXT_INIT(&res->name_buf, 0);
     res->vars = NULL;
     res->nvars = 0;

  Modified: lib/expr.c (+33 -4)
===================================================================
--- lib/expr.c    2016-08-07 17:26:42 +0900 (e1015ce)
+++ lib/expr.c    2016-08-07 17:56:05 +0900 (1ffa403)
@@ -215,6 +215,27 @@ grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc, grn_selector_func selector)
   return GRN_SUCCESS;
 }
 
+grn_rc
+grn_proc_set_selector_operator(grn_ctx *ctx, grn_obj *proc, grn_operator op)
+{
+  grn_proc *proc_ = (grn_proc *)proc;
+  if (!grn_obj_is_function_proc(ctx, proc)) {
+    return GRN_INVALID_ARGUMENT;
+  }
+  proc_->callbacks.function.selector_op = op;
+  return GRN_SUCCESS;
+}
+
+grn_operator
+grn_proc_get_selector_operator(grn_ctx *ctx, grn_obj *proc)
+{
+  grn_proc *proc_ = (grn_proc *)proc;
+  if (!grn_obj_is_function_proc(ctx, proc)) {
+    return GRN_OP_NOP;
+  }
+  return proc_->callbacks.function.selector_op;
+}
+
 /* grn_expr */
 
 grn_obj *
@@ -4804,13 +4825,21 @@ grn_scan_info_build_full(grn_ctx *ctx, grn_obj *expr, int *n,
         si->end = c - e->codes;
         sis[i++] = si;
         /* better index resolving framework for functions should be implemented */
-        {
-          grn_obj **p = si->args, **pe = si->args + si->nargs;
+        if (grn_obj_is_selector_proc(ctx, si->args[0])) {
+          grn_obj *selector;
+          grn_obj **p;
+          grn_obj **pe;
+          grn_operator selector_op;
+
+          selector = si->args[0];
+          p = si->args + 1;
+          pe = si->args + si->nargs;
+          selector_op = grn_proc_get_selector_operator(ctx, selector);
           for (; p < pe; p++) {
             if (GRN_DB_OBJP(*p)) {
               grn_index_datum index_datum;
               unsigned int n_index_data;
-              n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+              n_index_data = grn_column_find_index_data(ctx, *p, selector_op,
                                                         &index_datum, 1);
               if (n_index_data > 0) {
                 scan_info_put_index(ctx, si,
@@ -4821,7 +4850,7 @@ grn_scan_info_build_full(grn_ctx *ctx, grn_obj *expr, int *n,
               grn_index_datum index_datum;
               unsigned int n_index_data;
               si->flags |= SCAN_ACCESSOR;
-              n_index_data = grn_column_find_index_data(ctx, *p, c->op,
+              n_index_data = grn_column_find_index_data(ctx, *p, selector_op,
                                                         &index_datum, 1);
               if (n_index_data > 0) {
                 scan_info_put_index(ctx, si,

  Modified: lib/grn_db.h (+1 -0)
===================================================================
--- lib/grn_db.h    2016-08-07 17:26:42 +0900 (ad8cd13)
+++ lib/grn_db.h    2016-08-07 17:56:05 +0900 (7f679f0)
@@ -186,6 +186,7 @@ struct _grn_proc {
   union {
     struct {
       grn_selector_func *selector;
+      grn_operator selector_op;
     } function;
     struct {
       grn_command_run_func *run;

  Modified: lib/mrb/mrb_procedure.c (+17 -1)
===================================================================
--- lib/mrb/mrb_procedure.c    2016-08-07 17:26:42 +0900 (06ca210)
+++ lib/mrb/mrb_procedure.c    2016-08-07 17:56:05 +0900 (2b425d6)
@@ -1,6 +1,6 @@
 /* -*- c-basic-offset: 2 -*- */
 /*
-  Copyright(C) 2014 Brazil
+  Copyright(C) 2014-2016 Brazil
 
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -25,6 +25,8 @@
 
 #include "mrb_procedure.h"
 
+#include "mrb_operator.h"
+
 static struct mrb_data_type mrb_grn_procedure_type = {
   "Groonga::Procedure",
   NULL
@@ -68,6 +70,17 @@ mrb_grn_procedure_scorer_p(mrb_state *mrb, mrb_value self)
   return mrb_bool_value(grn_obj_is_scorer_proc(ctx, proc));
 }
 
+static mrb_value
+mrb_grn_procedure_get_selector_operator(mrb_state *mrb, mrb_value self)
+{
+  grn_ctx *ctx = (grn_ctx *)mrb->ud;
+  grn_obj *proc = DATA_PTR(self);
+  grn_operator selector_op;
+
+  selector_op = grn_proc_get_selector_operator(ctx, proc);
+  return grn_mrb_value_from_operator(mrb, selector_op);
+}
+
 void
 grn_mrb_procedure_init(grn_ctx *ctx)
 {
@@ -88,5 +101,8 @@ grn_mrb_procedure_init(grn_ctx *ctx)
                     mrb_grn_procedure_selector_only_p, MRB_ARGS_NONE());
   mrb_define_method(mrb, klass, "scorer?",
                     mrb_grn_procedure_scorer_p, MRB_ARGS_NONE());
+
+  mrb_define_method(mrb, klass, "selector_operator",
+                    mrb_grn_procedure_get_selector_operator, MRB_ARGS_NONE());
 }
 #endif

  Modified: lib/mrb/scripts/scan_info_data.rb (+13 -10)
===================================================================
--- lib/mrb/scripts/scan_info_data.rb    2016-08-07 17:26:42 +0900 (e46bbe7)
+++ lib/mrb/scripts/scan_info_data.rb    2016-08-07 17:56:05 +0900 (0a572e9)
@@ -38,9 +38,12 @@ module Groonga
     end
 
     def call_relational_resolve_indexes
-      # better index resolving framework for functions should be implemented
-      @args.each do |arg|
-        call_relational_resolve_index(arg)
+      procedure, *args = *@args
+      return unless procedure.selector?
+
+      selector_op = procedure.selector_operator
+      args.each do |arg|
+        call_relational_resolve_index(arg, selector_op)
       end
     end
 
@@ -285,26 +288,26 @@ module Groonga
       end
     end
 
-    def call_relational_resolve_index(object)
+    def call_relational_resolve_index(object, selector_op)
       case object
       when Accessor
-        call_relational_resolve_index_accessor(object)
+        call_relational_resolve_index_accessor(object, selector_op)
       when Bulk
         self.query = object
       when Indexable
-        call_relational_resolve_index_indexable(object)
+        call_relational_resolve_index_indexable(object, selector_op)
       end
     end
 
-    def call_relational_resolve_index_indexable(indexable)
-      index_info = indexable.find_index(op)
+    def call_relational_resolve_index_indexable(indexable, selector_op)
+      index_info = indexable.find_index(selector_op)
       return if index_info.nil?
       put_search_index(index_info.index, index_info.section_id, 1)
     end
 
-    def call_relational_resolve_index_accessor(accessor)
+    def call_relational_resolve_index_accessor(accessor, selector_op)
       self.flags |= ScanInfo::Flags::ACCESSOR
-      index_info = accessor.find_index(op)
+      index_info = accessor.find_index(selector_op)
       return if index_info.nil?
       put_search_index(index_info.index, index_info.section_id, 1)
     end

  Modified: lib/proc.c (+10 -0)
===================================================================
--- lib/proc.c    2016-08-07 17:26:42 +0900 (4e2ac54)
+++ lib/proc.c    2016-08-07 17:56:05 +0900 (aeca678)
@@ -3441,11 +3441,15 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "geo_in_circle", -1, GRN_PROC_FUNCTION,
                                     func_geo_in_circle, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, grn_selector_geo_in_circle);
+    /* We may need GRN_OP_GEO_IN_CIRCLE. */
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_MATCH);
 
     selector_proc = grn_proc_create(ctx, "geo_in_rectangle", -1,
                                     GRN_PROC_FUNCTION,
                                     func_geo_in_rectangle, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, grn_selector_geo_in_rectangle);
+    /* We may need GRN_OP_GEO_IN_RECTANGLE. */
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_MATCH);
   }
 
   grn_proc_create(ctx, "geo_distance", -1, GRN_PROC_FUNCTION,
@@ -3467,6 +3471,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "all_records", -1, GRN_PROC_FUNCTION,
                                     func_all_records, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_all_records);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
   }
 
   /* experimental */
@@ -3478,6 +3483,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "query", -1, GRN_PROC_FUNCTION,
                                     func_query, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_query);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
   }
 
   {
@@ -3486,6 +3492,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "sub_filter", -1, GRN_PROC_FUNCTION,
                                     NULL, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_sub_filter);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
   }
 
   grn_proc_create(ctx, "html_untag", -1, GRN_PROC_FUNCTION,
@@ -3497,6 +3504,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "between", -1, GRN_PROC_FUNCTION,
                                     func_between, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_between);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_LESS);
   }
 
   grn_proc_init_highlight_html(ctx);
@@ -3508,6 +3516,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
     selector_proc = grn_proc_create(ctx, "in_values", -1, GRN_PROC_FUNCTION,
                                     func_in_values, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_in_values);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_EQUAL);
   }
 
   DEF_VAR(vars[0], "table");
@@ -3556,6 +3565,7 @@ grn_db_init_builtin_commands(grn_ctx *ctx)
                                     GRN_PROC_FUNCTION,
                                     NULL, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_prefix_rk_search);
+    grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_NOP);
   }
 
   grn_proc_init_config_get(ctx);

  Modified: lib/proc/proc_fuzzy_search.c (+1 -0)
===================================================================
--- lib/proc/proc_fuzzy_search.c    2016-08-07 17:26:42 +0900 (f38114a)
+++ lib/proc/proc_fuzzy_search.c    2016-08-07 17:56:05 +0900 (87c038a)
@@ -454,4 +454,5 @@ grn_proc_init_fuzzy_search(grn_ctx *ctx)
                                   GRN_PROC_FUNCTION,
                                   NULL, NULL, NULL, 0, NULL);
   grn_proc_set_selector(ctx, selector_proc, selector_fuzzy_search);
+  grn_proc_set_selector_operator(ctx, selector_proc, GRN_OP_FUZZY);
 }

  Added: test/command/suite/select/function/in_values/with_index/hash_key.expected (+48 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/in_values/with_index/hash_key.expected    2016-08-07 17:56:05 +0900 (0fc569a)
@@ -0,0 +1,48 @@
+table_create Memos TABLE_HASH_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Memos tag COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+table_create Tags TABLE_HASH_KEY ShortText --normalizer NormalizerAuto
+[[0,0.0,0.0],true]
+column_create Tags memos_tag COLUMN_INDEX Memos tag
+[[0,0.0,0.0],true]
+load --table Memos
+[
+{"_key": "Groonga is fast", "tag": "Groonga"},
+{"_key": "Mroonga is fast", "tag": "Mroonga"},
+{"_key": "Rroonga is fast", "tag": "Rroonga"}
+]
+[[0,0.0,0.0],3]
+select Memos   --filter 'in_values(tag, "groonga", "mroonga")'   --output_columns _key,tag
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        2
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "tag",
+          "ShortText"
+        ]
+      ],
+      [
+        "Groonga is fast",
+        "Groonga"
+      ],
+      [
+        "Mroonga is fast",
+        "Mroonga"
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/in_values/with_index/hash_key.test (+16 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/in_values/with_index/hash_key.test    2016-08-07 17:56:05 +0900 (68e0e5a)
@@ -0,0 +1,16 @@
+table_create Memos TABLE_HASH_KEY ShortText
+column_create Memos tag COLUMN_SCALAR ShortText
+
+table_create Tags TABLE_HASH_KEY ShortText --normalizer NormalizerAuto
+column_create Tags memos_tag COLUMN_INDEX Memos tag
+
+load --table Memos
+[
+{"_key": "Groonga is fast", "tag": "Groonga"},
+{"_key": "Mroonga is fast", "tag": "Mroonga"},
+{"_key": "Rroonga is fast", "tag": "Rroonga"}
+]
+
+select Memos \
+  --filter 'in_values(tag, "groonga", "mroonga")' \
+  --output_columns _key,tag
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index