[Groonga-commit] groonga/groonga [master] Support custom query expansion by plugin

Back to archive index

Kouhei Sutou null+****@clear*****
Thu Oct 11 17:40:27 JST 2012


Kouhei Sutou	2012-10-11 17:40:27 +0900 (Thu, 11 Oct 2012)

  New Revision: b1fe08cf2435525ab33877dd6f9c24ad1aeadf3b
  https://github.com/groonga/groonga/commit/b1fe08cf2435525ab33877dd6f9c24ad1aeadf3b

  Log:
    Support custom query expansion by plugin

  Modified files:
    lib/db.h
    lib/proc.c
    test/benchmark/bench-geo-distance.c

  Modified: lib/db.h (+3 -0)
===================================================================
--- lib/db.h    2012-10-11 17:39:01 +0900 (5d9d792)
+++ lib/db.h    2012-10-11 17:40:27 +0900 (7258b76)
@@ -205,6 +205,9 @@ GRN_API grn_obj *grn_proc_alloc(grn_ctx *ctx, grn_user_data *user_data,
 grn_rc grn_proc_set_selector(grn_ctx *ctx, grn_obj *proc,
                              grn_selector_func selector);
 
+GRN_API grn_rc grn_proc_call(grn_ctx *ctx, grn_obj *proc,
+                             int nargs, grn_obj *caller);
+
 grn_obj *grn_expr_get_or_add_var(grn_ctx *ctx, grn_obj *expr,
                                  const char *name, unsigned int name_size);
 

  Modified: lib/proc.c (+136 -25)
===================================================================
--- lib/proc.c    2012-10-11 17:39:01 +0900 (ec75270)
+++ lib/proc.c    2012-10-11 17:40:27 +0900 (6c595f0)
@@ -33,6 +33,16 @@
 #define O_NOFOLLOW 0
 #endif
 
+typedef grn_rc (*grn_substitute_term_func) (grn_ctx *ctx,
+                                            const char *term,
+                                            unsigned int term_len,
+                                            grn_obj *substituted_term,
+                                            grn_user_data *user_data);
+typedef struct {
+  grn_obj *table;
+  grn_obj *column;
+} grn_substitute_term_by_column_data;
+
 /**** globals for procs ****/
 const char *grn_document_root = NULL;
 
@@ -92,12 +102,45 @@ exit :
 /**** query expander ****/
 
 static grn_rc
-substitute_term(grn_ctx *ctx, grn_obj *table, grn_obj *column,
-                const char *term, size_t term_size, grn_obj *expanded_term)
+substitute_term_by_func(grn_ctx *ctx, const char *term, unsigned int term_len,
+                        grn_obj *expanded_term, grn_user_data *user_data)
+{
+  grn_rc rc;
+  grn_obj *expander = user_data->ptr;
+  grn_obj grn_term;
+  grn_obj *caller;
+  grn_obj *rc_object;
+  int nargs = 0;
+
+  GRN_TEXT_INIT(&grn_term, GRN_OBJ_DO_SHALLOW_COPY);
+  GRN_TEXT_SET(ctx, &grn_term, term, term_len);
+  grn_ctx_push(ctx, &grn_term);
+  nargs++;
+  grn_ctx_push(ctx, expanded_term);
+  nargs++;
+
+  caller = grn_expr_create(ctx, NULL, 0);
+  rc = grn_proc_call(ctx, expander, nargs, caller);
+  GRN_OBJ_FIN(ctx, &grn_term);
+  rc_object = grn_ctx_pop(ctx);
+  rc = GRN_INT32_VALUE(rc_object);
+  grn_obj_unlink(ctx, caller);
+
+  return rc;
+}
+
+static grn_rc
+substitute_term_by_column(grn_ctx *ctx, const char *term, unsigned int term_len,
+                          grn_obj *expanded_term, grn_user_data *user_data)
 {
-  grn_id id;
   grn_rc rc = GRN_END_OF_DATA;
-  if ((id = grn_table_get(ctx, table, (const void *)term, (unsigned int)term_size))) {
+  grn_id id;
+  grn_substitute_term_by_column_data *data = user_data->ptr;
+  grn_obj *table, *column;
+
+  table = data->table;
+  column = data->column;
+  if ((id = grn_table_get(ctx, table, term, term_len))) {
     if ((column->header.type == GRN_COLUMN_VAR_SIZE) &&
         ((column->header.flags & GRN_OBJ_COLUMN_TYPE_MASK) == GRN_OBJ_COLUMN_VECTOR)) {
       unsigned int i, n;
@@ -128,8 +171,11 @@ substitute_term(grn_ctx *ctx, grn_obj *table, grn_obj *column,
 }
 
 static grn_rc
-expand_query(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_expr_flags flags,
-             const char *query, unsigned int query_len, grn_obj *expanded_query)
+substitute_terms(grn_ctx *ctx, const char *query, unsigned int query_len,
+                 grn_expr_flags flags,
+                 grn_obj *expanded_query,
+                 grn_substitute_term_func substitute_term_func,
+                 grn_user_data *user_data)
 {
   grn_obj buf;
   unsigned int len;
@@ -173,7 +219,8 @@ expand_query(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_expr_flags flags
         }
         GRN_TEXT_PUT(ctx, &buf, cur, len);
       }
-      if (substitute_term(ctx, table, column, GRN_TEXT_VALUE(&buf), GRN_TEXT_LEN(&buf), expanded_query)) {
+      if (substitute_term_func(ctx, GRN_TEXT_VALUE(&buf), GRN_TEXT_LEN(&buf),
+                               expanded_query, user_data)) {
         GRN_TEXT_PUT(ctx, expanded_query, start, cur - start);
       }
       break;
@@ -226,8 +273,8 @@ expand_query(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_expr_flags flags
         }
       }
       if (start < cur) {
-        if (substitute_term(ctx, table, column, start, cur - start,
-                            expanded_query)) {
+        if (substitute_term_func(ctx, start, cur - start,
+                                 expanded_query, user_data)) {
           GRN_TEXT_PUT(ctx, expanded_query, start, cur - start);
         }
       }
@@ -239,6 +286,81 @@ exit :
   return GRN_SUCCESS;
 }
 
+static grn_rc
+expand_query(grn_ctx *ctx, const char *query, unsigned int query_len,
+             grn_expr_flags flags,
+             const char *query_expander_name,
+             unsigned int query_expander_name_len,
+             grn_obj *expanded_query)
+{
+  grn_rc rc = GRN_SUCCESS;
+  grn_obj *query_expander;
+
+  query_expander = grn_ctx_get(ctx,
+                               query_expander_name, query_expander_name_len);
+  if (!query_expander) {
+    ERR(GRN_INVALID_ARGUMENT,
+        "nonexistent query expansion column: <%.*s>",
+        query_expander_name_len, query_expander_name);
+    return GRN_INVALID_ARGUMENT;
+  }
+
+  switch (query_expander->header.type) {
+  case GRN_PROC :
+    if (((grn_proc *)query_expander)->type == GRN_PROC_FUNCTION) {
+      grn_user_data user_data;
+      user_data.ptr = query_expander;
+      substitute_terms(ctx, query, query_len, flags, expanded_query,
+                       substitute_term_by_func, &user_data);
+    } else {
+      rc = GRN_INVALID_ARGUMENT;
+      ERR(rc,
+          "[expand-query] must be function proc: <%.*s>",
+          query_expander_name_len, query_expander_name);
+    }
+    break;
+  case GRN_COLUMN_FIX_SIZE :
+  case GRN_COLUMN_VAR_SIZE :
+    {
+      grn_obj *query_expansion_table;
+      query_expansion_table = grn_column_table(ctx, query_expander);
+      if (query_expansion_table) {
+        grn_user_data user_data;
+        grn_substitute_term_by_column_data data;
+        user_data.ptr = &data;
+        data.table = query_expansion_table;
+        data.column = query_expander;
+        substitute_terms(ctx, query, query_len, flags, expanded_query,
+                         substitute_term_by_column, &user_data);
+        grn_obj_unlink(ctx, query_expansion_table);
+      } else {
+        rc = GRN_INVALID_ARGUMENT;
+        ERR(rc,
+            "[expand-query] failed to get table of column: <%.*s>",
+            query_expander_name_len, query_expander_name);
+      }
+    }
+    break;
+  default :
+    rc = GRN_INVALID_ARGUMENT;
+    {
+      grn_obj type_name;
+      GRN_TEXT_INIT(&type_name, 0);
+      grn_inspect_type(ctx, &type_name, query_expander->header.type);
+      ERR(rc,
+          "[expand-query] must be a column or function proc: <%.*s>(%.*s)",
+          query_expander_name_len, query_expander_name,
+          (int)GRN_TEXT_LEN(&type_name), GRN_TEXT_VALUE(&type_name));
+      GRN_OBJ_FIN(ctx, &type_name);
+    }
+    break;
+  }
+  grn_obj_unlink(ctx, query_expander);
+
+  return rc;
+}
+
+
 /**** procs ****/
 
 #define DEFAULT_LIMIT           10
@@ -400,23 +522,12 @@ grn_select(grn_ctx *ctx, const char *table, unsigned int table_len,
             }
           }
           if (query_expansion_len) {
-            grn_obj *query_expansion_column;
-            query_expansion_column = grn_ctx_get(ctx, query_expansion, query_expansion_len);
-            if (query_expansion_column) {
-              grn_obj *query_expansion_table;
-              query_expansion_table = grn_column_table(ctx, query_expansion_column);
-              if (query_expansion_table) {
-                expand_query(ctx, query_expansion_table, query_expansion_column, flags,
-                             query, query_len, &query_expansion_buf);
-                query = GRN_TEXT_VALUE(&query_expansion_buf);
-                query_len = GRN_TEXT_LEN(&query_expansion_buf);
-                grn_obj_unlink(ctx, query_expansion_table);
-              }
-              grn_obj_unlink(ctx, query_expansion_column);
+            if (expand_query(ctx, query, query_len, flags,
+                             query_expansion, query_expansion_len,
+                             &query_expansion_buf) == GRN_SUCCESS) {
+              query = GRN_TEXT_VALUE(&query_expansion_buf);
+              query_len = GRN_TEXT_LEN(&query_expansion_buf);
             } else {
-              ERR(GRN_INVALID_ARGUMENT,
-                  "nonexistent query expansion column: <%.*s>",
-                  query_expansion_len, query_expansion);
               grn_obj_unlink(ctx, cond);
               GRN_OBJ_FIN(ctx, &query_expansion_buf);
               goto exit;

  Modified: test/benchmark/bench-geo-distance.c (+0 -1)
===================================================================
--- test/benchmark/bench-geo-distance.c    2012-10-11 17:39:01 +0900 (19dba72)
+++ test/benchmark/bench-geo-distance.c    2012-10-11 17:40:27 +0900 (0607353)
@@ -56,7 +56,6 @@
 
 #define GET(context, name) (grn_ctx_get(context, name, strlen(name)))
 
-grn_rc grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller);
 grn_obj *grn_expr_get_value(grn_ctx *ctx, grn_obj *expr, int offset);
 
 typedef struct _BenchmarkData
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index