[Groonga-commit] groonga/groonga at f912b9e [master] sub_filter: optimize for too filtered case

Back to archive index

Kouhei Sutou null+****@clear*****
Tue May 15 15:07:56 JST 2018


Kouhei Sutou	2018-05-15 15:07:56 +0900 (Tue, 15 May 2018)

  New Revision: f912b9ed13541913147fab4be4869e53e6ba8494
  https://github.com/groonga/groonga/commit/f912b9ed13541913147fab4be4869e53e6ba8494

  Message:
    sub_filter: optimize for too filtered case
    
    TODO:
    
      * Support non vector column scope case
      * Support customizing "too filtered" threshold

  Copied files:
    test/command/suite/select/function/sub_filter/column/vector/filtered.expected
      (from test/command/suite/select/function/sub_filter/column/vector.expected)
    test/command/suite/select/function/sub_filter/column/vector/filtered.test
      (from test/command/suite/select/function/sub_filter/column/vector.test)
  Modified files:
    lib/proc.c
  Renamed files:
    test/command/suite/select/function/sub_filter/column/vector/only.expected
      (from test/command/suite/select/function/sub_filter/column/vector.expected)
    test/command/suite/select/function/sub_filter/column/vector/only.test
      (from test/command/suite/select/function/sub_filter/column/vector.test)

  Modified: lib/proc.c (+47 -1)
===================================================================
--- lib/proc.c    2018-05-15 11:22:54 +0900 (edae76eb1)
+++ lib/proc.c    2018-05-15 15:07:56 +0900 (1b3cfb114)
@@ -2092,6 +2092,47 @@ selector_query(grn_ctx *ctx, grn_obj *table, grn_obj *index,
   return run_query(ctx, table, nargs - 1, args + 1, res, op);
 }
 
+static grn_bool
+sub_filter_restrict_base_res(grn_ctx *ctx,
+                             grn_obj *res,
+                             grn_obj *scope,
+                             grn_obj *base_res)
+{
+  grn_posting posting;
+  grn_obj values;
+
+  /* TODO: Cusotmizable */
+  if (grn_table_size(ctx, res) > 10) {
+    return GRN_FALSE;
+  }
+
+  if (!grn_obj_is_vector_column(ctx, scope)) {
+    return GRN_FALSE;
+  }
+
+  memset(&posting, 0, sizeof(grn_posting));
+  GRN_VOID_INIT(&values);
+  GRN_TABLE_EACH_BEGIN(ctx, res, cursor, id) {
+    grn_id *matched_id;
+    unsigned int i, n;
+
+    grn_table_cursor_get_key(ctx, cursor, (void **)&matched_id);
+    GRN_BULK_REWIND(&values);
+    grn_obj_get_value(ctx, scope, *matched_id, &values);
+    n = grn_vector_size(ctx, &values);
+    for (i = 0; i < n; i++) {
+      posting.rid = grn_uvector_get_element(ctx,
+                                            &values,
+                                            i,
+                                            &(posting.weight));
+      grn_ii_posting_add(ctx, &posting, (grn_hash *)base_res, GRN_OP_OR);
+    }
+  } GRN_TABLE_EACH_END(ctx, cursor);
+  GRN_OBJ_FIN(ctx, &values);
+
+  return GRN_TRUE;
+}
+
 static grn_rc
 run_sub_filter(grn_ctx *ctx, grn_obj *table,
                int nargs, grn_obj **args,
@@ -2163,11 +2204,16 @@ run_sub_filter(grn_ctx *ctx, grn_obj *table,
 
   {
     grn_obj *base_res = NULL;
+    grn_operator select_op = GRN_OP_OR;
 
     base_res = grn_table_create(ctx, NULL, 0, NULL,
                                 GRN_OBJ_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC,
                                 scope_domain, NULL);
-    grn_table_select(ctx, scope_domain, sub_filter, base_res, GRN_OP_OR);
+    if (op == GRN_OP_AND &&
+        sub_filter_restrict_base_res(ctx, res, scope, base_res)) {
+      select_op = GRN_OP_AND;
+    }
+    grn_table_select(ctx, scope_domain, sub_filter, base_res, select_op);
     if (scope->header.type == GRN_ACCESSOR) {
       rc = grn_accessor_resolve(ctx, scope, -1, base_res, res, op);
     } else {

  Copied: test/command/suite/select/function/sub_filter/column/vector/filtered.expected (+2 -13) 83%
===================================================================
--- test/command/suite/select/function/sub_filter/column/vector.expected    2018-05-15 11:22:54 +0900 (708a29abe)
+++ test/command/suite/select/function/sub_filter/column/vector/filtered.expected    2018-05-15 15:07:56 +0900 (c3ee255f5)
@@ -29,7 +29,7 @@ load --table Packages
 {"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
 ]
 [[0,0.0,0.0],3]
-select Packages   --filter 'sub_filter(files, "revision >= 10 && revision < 40")'   --output_columns '_key, files, files.revision'
+select Packages   --filter '_key == "rroonga" &&             sub_filter(files, "revision >= 10 && revision < 40")'   --output_columns '_key, files, files.revision'
 [
   [
     0,
@@ -39,7 +39,7 @@ select Packages   --filter 'sub_filter(files, "revision >= 10 && revision < 40")
   [
     [
       [
-        2
+        1
       ],
       [
         [
@@ -65,17 +65,6 @@ select Packages   --filter 'sub_filter(files, "revision >= 10 && revision < 40")
           12,
           24
         ]
-      ],
-      [
-        "groonga",
-        [
-          "include/groonga.h",
-          "src/groonga.c"
-        ],
-        [
-          100,
-          29
-        ]
       ]
     ]
   ]

  Copied: test/command/suite/select/function/sub_filter/column/vector/filtered.test (+2 -1) 90%
===================================================================
--- test/command/suite/select/function/sub_filter/column/vector.test    2018-05-15 11:22:54 +0900 (95d359909)
+++ test/command/suite/select/function/sub_filter/column/vector/filtered.test    2018-05-15 15:07:56 +0900 (c5cefefa1)
@@ -27,5 +27,6 @@ load --table Packages
 ]
 
 select Packages \
-  --filter 'sub_filter(files, "revision >= 10 && revision < 40")' \
+  --filter '_key == "rroonga" && \
+            sub_filter(files, "revision >= 10 && revision < 40")' \
   --output_columns '_key, files, files.revision'

  Renamed: test/command/suite/select/function/sub_filter/column/vector/only.expected (+0 -0) 100%
===================================================================

  Renamed: test/command/suite/select/function/sub_filter/column/vector/only.test (+0 -0) 100%
===================================================================
-------------- next part --------------
HTML����������������������������...
URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180515/53deef52/attachment-0001.htm 



More information about the Groonga-commit mailing list
Back to archive index