[Groonga-commit] groonga/groonga [master] Add a new function: sub_filter()

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Mar 7 15:46:31 JST 2013


Kouhei Sutou	2013-03-07 15:46:31 +0900 (Thu, 07 Mar 2013)

  New Revision: 5a179f498e509f5b74fa08034973a06d59c4b3f9
  https://github.com/groonga/groonga/commit/5a179f498e509f5b74fa08034973a06d59c4b3f9

  Message:
    Add a new function: sub_filter()
    
    sub_filter() resolves the following situation:
    
    Schema:
    
      table_create Files TABLE_PAT_KEY ShortText
      column_create Files revision COLUMN_SCALAR UInt32
    
      table_create Packages TABLE_PAT_KEY ShortText
      column_create Packages files COLUMN_VECTOR Files
    
      column_create Files packages_files_index COLUMN_INDEX Packages files
    
      table_create Revisions TABLE_PAT_KEY UInt32
      column_create Revisions files_revision COLUMN_INDEX Files revision
    
    Data:
    
      load --table Files
      [
      {"_key": "include/groonga.h", "revision": 100},
      {"_key": "src/groonga.c",     "revision": 29},
      {"_key": "lib/groonga.rb",    "revision": 12},
      {"_key": "README.textile",    "revision": 24},
      {"_key": "ha_mroonga.cc",     "revision": 40},
      {"_key": "ha_mroonga.hpp",    "revision": 6}
      ]
    
      load --table Packages
      [
      {"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
      {"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
      {"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
      ]
    
    We want to search packages that have any files where 10 <= revision < 40.
    
    We can't use the following query for this puropose:
    
      select Packages \
        --filter 'files.revision >= 10 && files.revision < 40' \
        --output_columns '_key, files, files.revision'
    
    It matches all packages because the query means that all of the
    following conditions should be true:
    
      * revision >= 10 for each file in files
      * revision < 40 for each file in files
    
    Let's consider about mroonga package. "revision >= 10" is matched
    against "ha_mroonga.cc" (revision: 40). "revision < 40" is matched
    against "ha_mroonga.hpp" (revision: 6). So mroonga package is
    matched.
    
    It is not what we want.
    
    We want packages that matches the following condition:
    
      * There is any file that matches both revision >= 10 and revision < 40.
    
    Let's consider about mroonga package. "ha_mroonga.cc" (revision: 40)
    doesn't match "revision >= 10 && revision <
    40". "ha_mroonga.hpp" (revision: 6) doesn't match "revision >= 10 &&
    revision < 40". So mroonga packages isn't matched.
    
    It is what we want.
    
    The following query is for the search condition:
    
      select Packages \
        --filter 'sub_filter(files, "revision >= 10 && revision < 40")' \
        --output_columns '_key, files, files.revision'
    
    It requires sub_filter().

  Added files:
    test/command/suite/select/function/sub_filter/accessor.expected
    test/command/suite/select/function/sub_filter/accessor.test
    test/command/suite/select/function/sub_filter/column.expected
    test/command/suite/select/function/sub_filter/column.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+185 -0)
===================================================================
--- lib/proc.c    2013-03-07 14:48:55 +0900 (ed29c82)
+++ lib/proc.c    2013-03-07 15:46:31 +0900 (dda990c)
@@ -3611,6 +3611,183 @@ selector_query(grn_ctx *ctx, grn_obj *table, grn_obj *index,
   return run_query(ctx, table, nargs - 1, args + 1, res, op);
 }
 
+static grn_rc
+run_sub_filter(grn_ctx *ctx, grn_obj *table,
+               int nargs, grn_obj **args,
+               grn_obj *res, grn_operator op)
+{
+  grn_rc rc = GRN_SUCCESS;
+  grn_obj *scope;
+  grn_obj *sub_filter_string;
+  grn_obj *scope_domain = NULL;
+  grn_obj *sub_filter = NULL;
+  grn_obj *dummy_variable = NULL;
+
+  if (nargs != 2) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "sub_filter(): wrong number of arguments (%d for 2)", nargs);
+    rc = ctx->rc;
+    goto exit;
+  }
+
+  scope = args[0];
+  sub_filter_string = args[1];
+
+  switch (scope->header.type) {
+  case GRN_ACCESSOR :
+  case GRN_COLUMN_FIX_SIZE :
+  case GRN_COLUMN_VAR_SIZE :
+    break;
+  default :
+    /* TODO: put inspected the 1nd argument to message */
+    ERR(GRN_INVALID_ARGUMENT,
+        "sub_filter(): the 1nd argument must be column or accessor");
+    rc = ctx->rc;
+    goto exit;
+    break;
+  }
+
+  scope_domain = grn_ctx_at(ctx, grn_obj_get_range(ctx, scope));
+
+  if (sub_filter_string->header.domain != GRN_DB_TEXT) {
+    /* TODO: put inspected the 2nd argument to message */
+    ERR(GRN_INVALID_ARGUMENT,
+        "sub_filter(): the 2nd argument must be String");
+    rc = ctx->rc;
+    goto exit;
+  }
+  if (GRN_TEXT_LEN(sub_filter_string) == 0) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "sub_filter(): the 2nd argument must not be empty String");
+    rc = ctx->rc;
+    goto exit;
+  }
+
+  GRN_EXPR_CREATE_FOR_QUERY(ctx, scope_domain, sub_filter, dummy_variable);
+  if (!sub_filter) {
+    rc = ctx->rc;
+    goto exit;
+  }
+
+  grn_expr_parse(ctx, sub_filter,
+                 GRN_TEXT_VALUE(sub_filter_string),
+                 GRN_TEXT_LEN(sub_filter_string),
+                 NULL, GRN_OP_MATCH, GRN_OP_AND,
+                 GRN_EXPR_SYNTAX_SCRIPT);
+  if (ctx->rc != GRN_SUCCESS) {
+    rc = ctx->rc;
+    goto exit;
+  }
+
+  {
+    grn_obj *base_res = NULL;
+
+    base_res = grn_table_create(ctx, NULL, 0, NULL,
+                                GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC,
+                                scope_domain, NULL);
+    grn_table_select(ctx, scope_domain, sub_filter, base_res, GRN_OP_OR);
+    if (scope->header.type == GRN_ACCESSOR) {
+      rc = grn_accessor_resolve(ctx, scope, -1, base_res, res, op, NULL);
+    } else {
+      grn_accessor accessor;
+      accessor.header.type = GRN_ACCESSOR;
+      accessor.obj = scope;
+      accessor.action = GRN_ACCESSOR_GET_COLUMN_VALUE;
+      accessor.next = NULL;
+      rc = grn_accessor_resolve(ctx, (grn_obj *)&accessor, -1, base_res,
+                                res, op, NULL);
+    }
+    grn_obj_unlink(ctx, base_res);
+  }
+
+exit:
+  if (scope_domain) {
+    grn_obj_unlink(ctx, scope_domain);
+  }
+  if (sub_filter) {
+    grn_obj_unlink(ctx, sub_filter);
+  }
+
+  return rc;
+}
+
+static grn_obj *
+func_sub_filter(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
+{
+  grn_obj *found;
+  grn_obj *command = ctx->impl->curr_expr;
+  grn_obj *condition_ptr = NULL;
+  grn_obj *condition = NULL;
+  grn_obj *variable;
+  grn_obj *table = NULL;
+  grn_obj *res = NULL;
+
+  found = GRN_PROC_ALLOC(GRN_DB_BOOL, 0);
+  if (!found) {
+    goto exit;
+  }
+  GRN_BOOL_SET(ctx, found, GRN_FALSE);
+
+  condition_ptr = grn_expr_get_var(ctx, command,
+                                   GRN_SELECT_INTERNAL_VAR_CONDITION,
+                                   strlen(GRN_SELECT_INTERNAL_VAR_CONDITION));
+  if (!condition_ptr) {
+    goto exit;
+  }
+
+  condition = GRN_PTR_VALUE(condition_ptr);
+  if (!condition) {
+    goto exit;
+  }
+
+  variable = grn_expr_get_var_by_offset(ctx, condition, 0);
+  if (!variable) {
+    goto exit;
+  }
+
+  table = grn_ctx_at(ctx, variable->header.domain);
+  if (!table) {
+    goto exit;
+  }
+
+  res = grn_table_create(ctx, NULL, 0, NULL,
+                         GRN_TABLE_HASH_KEY|GRN_OBJ_WITH_SUBREC, table, NULL);
+  if (!res) {
+    goto exit;
+  }
+  {
+    grn_rset_posinfo pi;
+    unsigned int key_size;
+    memset(&pi, 0, sizeof(grn_rset_posinfo));
+    pi.rid = GRN_RECORD_VALUE(variable);
+    key_size = ((grn_hash *)res)->key_size;
+    if (grn_table_add(ctx, res, &pi, key_size, NULL) == GRN_ID_NIL) {
+      goto exit;
+    }
+  }
+  if (run_sub_filter(ctx, table, nargs, args, res, GRN_OP_AND) == GRN_SUCCESS) {
+    GRN_BOOL_SET(ctx, found, grn_table_size(ctx, res) > 0);
+  }
+
+exit:
+  if (res) {
+    grn_obj_unlink(ctx, res);
+  }
+  if (table) {
+    grn_obj_unlink(ctx, table);
+  }
+
+  return found;
+}
+
+static grn_rc
+selector_sub_filter(grn_ctx *ctx, grn_obj *table, grn_obj *index,
+                    int nargs, grn_obj **args,
+                    grn_obj *res, grn_operator op)
+{
+  return run_sub_filter(ctx, table, nargs - 1, args + 1, res, op);
+}
+
 #define DEF_VAR(v,name_str) do {\
   (v).name = (name_str);\
   (v).name_size = GRN_STRLEN(name_str);\
@@ -3801,4 +3978,12 @@ grn_db_init_builtin_query(grn_ctx *ctx)
                                     func_query, NULL, NULL, 0, NULL);
     grn_proc_set_selector(ctx, selector_proc, selector_query);
   }
+
+  {
+    grn_obj *selector_proc;
+
+    selector_proc = grn_proc_create(ctx, "sub_filter", -1, GRN_PROC_FUNCTION,
+                                    func_sub_filter, NULL, NULL, 0, NULL);
+    grn_proc_set_selector(ctx, selector_proc, selector_sub_filter);
+  }
 }

  Added: test/command/suite/select/function/sub_filter/accessor.expected (+95 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/sub_filter/accessor.expected    2013-03-07 15:46:31 +0900 (c755006)
@@ -0,0 +1,95 @@
+table_create Users TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Users birthday COLUMN_SCALAR Time
+[[0,0.0,0.0],true]
+table_create Files TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Files author COLUMN_SCALAR Users
+[[0,0.0,0.0],true]
+table_create Packages TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Packages files COLUMN_VECTOR Files
+[[0,0.0,0.0],true]
+column_create Users files_author_index COLUMN_INDEX Files author
+[[0,0.0,0.0],true]
+column_create Files packages_files_index COLUMN_INDEX Packages files
+[[0,0.0,0.0],true]
+table_create Birthdays TABLE_PAT_KEY Time
+[[0,0.0,0.0],true]
+column_create Birthdays users_birthday COLUMN_INDEX Users birthday
+[[0,0.0,0.0],true]
+load --table Users
+[
+{"_key": "Alice",  "birthday": "1992-02-09 00:00:00"},
+{"_key": "Bob",    "birthday": "1988-01-04 00:00:00"},
+{"_key": "Carlos", "birthday": "1982-12-29 00:00:00"}
+]
+[[0,0.0,0.0],3]
+load --table Files
+[
+{"_key": "include/groonga.h", "author": "Alice"},
+{"_key": "src/groonga.c",     "author": "Bob"},
+{"_key": "lib/groonga.rb",    "author": "Carlos"},
+{"_key": "README.textile",    "author": "Alice"},
+{"_key": "ha_mroonga.cc",     "author": "Bob"},
+{"_key": "ha_mroonga.hpp",    "author": "Carlos"}
+]
+[[0,0.0,0.0],6]
+load --table Packages
+[
+{"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
+{"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
+{"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
+]
+[[0,0.0,0.0],3]
+select Packages   --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")'   --output_columns '_key, files, files.author.birthday'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        2
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "files",
+          "Files"
+        ],
+        [
+          "files.author.birthday",
+          "Time"
+        ]
+      ],
+      [
+        "groonga",
+        [
+          "include/groonga.h",
+          "src/groonga.c"
+        ],
+        [
+          697561200.0,
+          568220400.0
+        ]
+      ],
+      [
+        "mroonga",
+        [
+          "ha_mroonga.cc",
+          "ha_mroonga.hpp"
+        ],
+        [
+          568220400.0,
+          409935600.0
+        ]
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/sub_filter/accessor.test (+42 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/sub_filter/accessor.test    2013-03-07 15:46:31 +0900 (e7f0094)
@@ -0,0 +1,42 @@
+table_create Users TABLE_PAT_KEY ShortText
+column_create Users birthday COLUMN_SCALAR Time
+
+table_create Files TABLE_PAT_KEY ShortText
+column_create Files author COLUMN_SCALAR Users
+
+table_create Packages TABLE_PAT_KEY ShortText
+column_create Packages files COLUMN_VECTOR Files
+
+column_create Users files_author_index COLUMN_INDEX Files author
+column_create Files packages_files_index COLUMN_INDEX Packages files
+
+table_create Birthdays TABLE_PAT_KEY Time
+column_create Birthdays users_birthday COLUMN_INDEX Users birthday
+
+load --table Users
+[
+{"_key": "Alice",  "birthday": "1992-02-09 00:00:00"},
+{"_key": "Bob",    "birthday": "1988-01-04 00:00:00"},
+{"_key": "Carlos", "birthday": "1982-12-29 00:00:00"}
+]
+
+load --table Files
+[
+{"_key": "include/groonga.h", "author": "Alice"},
+{"_key": "src/groonga.c",     "author": "Bob"},
+{"_key": "lib/groonga.rb",    "author": "Carlos"},
+{"_key": "README.textile",    "author": "Alice"},
+{"_key": "ha_mroonga.cc",     "author": "Bob"},
+{"_key": "ha_mroonga.hpp",    "author": "Carlos"}
+]
+
+load --table Packages
+[
+{"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
+{"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
+{"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
+]
+
+select Packages \
+  --filter 'sub_filter(files.author, "birthday >= \\"1988-01-04 00:00:00\\" && birthday < \\"1992-02-09 00:00:00\\"")' \
+  --output_columns '_key, files, files.author.birthday'

  Added: test/command/suite/select/function/sub_filter/column.expected (+144 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/sub_filter/column.expected    2013-03-07 15:46:31 +0900 (d94e820)
@@ -0,0 +1,144 @@
+table_create Files TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Files revision COLUMN_SCALAR UInt32
+[[0,0.0,0.0],true]
+table_create Packages TABLE_PAT_KEY ShortText
+[[0,0.0,0.0],true]
+column_create Packages files COLUMN_VECTOR Files
+[[0,0.0,0.0],true]
+column_create Files packages_files_index COLUMN_INDEX Packages files
+[[0,0.0,0.0],true]
+table_create Revisions TABLE_PAT_KEY UInt32
+[[0,0.0,0.0],true]
+column_create Revisions files_revision COLUMN_INDEX Files revision
+[[0,0.0,0.0],true]
+load --table Files
+[
+{"_key": "include/groonga.h", "revision": 100},
+{"_key": "src/groonga.c",     "revision": 29},
+{"_key": "lib/groonga.rb",    "revision": 12},
+{"_key": "README.textile",    "revision": 24},
+{"_key": "ha_mroonga.cc",     "revision": 40},
+{"_key": "ha_mroonga.hpp",    "revision": 6}
+]
+[[0,0.0,0.0],6]
+load --table Packages
+[
+{"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
+{"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
+{"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
+]
+[[0,0.0,0.0],3]
+select Packages   --filter 'sub_filter(files, "revision >= 10 && revision < 40")'   --output_columns '_key, files, files.revision'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        2
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "files",
+          "Files"
+        ],
+        [
+          "files.revision",
+          "UInt32"
+        ]
+      ],
+      [
+        "rroonga",
+        [
+          "lib/groonga.rb",
+          "README.textile"
+        ],
+        [
+          12,
+          24
+        ]
+      ],
+      [
+        "groonga",
+        [
+          "include/groonga.h",
+          "src/groonga.c"
+        ],
+        [
+          100,
+          29
+        ]
+      ]
+    ]
+  ]
+]
+select Packages   --filter 'files.revision >= 10 && files.revision < 40'   --output_columns '_key, files, files.revision'
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        3
+      ],
+      [
+        [
+          "_key",
+          "ShortText"
+        ],
+        [
+          "files",
+          "Files"
+        ],
+        [
+          "files.revision",
+          "UInt32"
+        ]
+      ],
+      [
+        "rroonga",
+        [
+          "lib/groonga.rb",
+          "README.textile"
+        ],
+        [
+          12,
+          24
+        ]
+      ],
+      [
+        "groonga",
+        [
+          "include/groonga.h",
+          "src/groonga.c"
+        ],
+        [
+          100,
+          29
+        ]
+      ],
+      [
+        "mroonga",
+        [
+          "ha_mroonga.cc",
+          "ha_mroonga.hpp"
+        ],
+        [
+          40,
+          6
+        ]
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/sub_filter/column.test (+31 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/sub_filter/column.test    2013-03-07 15:46:31 +0900 (95d3599)
@@ -0,0 +1,31 @@
+table_create Files TABLE_PAT_KEY ShortText
+column_create Files revision COLUMN_SCALAR UInt32
+
+table_create Packages TABLE_PAT_KEY ShortText
+column_create Packages files COLUMN_VECTOR Files
+
+column_create Files packages_files_index COLUMN_INDEX Packages files
+
+table_create Revisions TABLE_PAT_KEY UInt32
+column_create Revisions files_revision COLUMN_INDEX Files revision
+
+load --table Files
+[
+{"_key": "include/groonga.h", "revision": 100},
+{"_key": "src/groonga.c",     "revision": 29},
+{"_key": "lib/groonga.rb",    "revision": 12},
+{"_key": "README.textile",    "revision": 24},
+{"_key": "ha_mroonga.cc",     "revision": 40},
+{"_key": "ha_mroonga.hpp",    "revision": 6}
+]
+
+load --table Packages
+[
+{"_key": "groonga", "files": ["include/groonga.h", "src/groonga.c"]},
+{"_key": "rroonga", "files": ["lib/groonga.rb", "README.textile"]},
+{"_key": "mroonga", "files": ["ha_mroonga.cc", "ha_mroonga.hpp"]}
+]
+
+select Packages \
+  --filter 'sub_filter(files, "revision >= 10 && revision < 40")' \
+  --output_columns '_key, files, files.revision'
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index