[Groonga-commit] groonga/groonga at 1763431 [master] Support scorer with weight

Back to archive index

Kouhei Sutou null+****@clear*****
Sun Mar 1 23:39:45 JST 2015


Kouhei Sutou	2015-03-01 23:39:45 +0900 (Sun, 01 Mar 2015)

  New Revision: 176343145ebae0e5e7f5aa6df28f847a1acce323
  https://github.com/groonga/groonga/commit/176343145ebae0e5e7f5aa6df28f847a1acce323

  Message:
    Support scorer with weight

  Modified files:
    lib/expr.c
    lib/grn_expr.h
    lib/mrb/mrb_accessor.c
    lib/mrb/mrb_expr.c
    lib/mrb/scripts/scan_info.rb
    lib/mrb/scripts/scan_info_data.rb
    lib/mrb/scripts/scan_info_search_index.rb
    test/command/suite/select/match_columns/scorer/tf_at_most/weight.test

  Modified: lib/expr.c (+122 -113)
===================================================================
--- lib/expr.c    2015-03-01 23:37:41 +0900 (1bd454f)
+++ lib/expr.c    2015-03-01 23:39:45 +0900 (e6a8234)
@@ -3785,14 +3785,17 @@ struct _grn_scan_info {
   grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS];
   int max_interval;
   int similarity_threshold;
-  grn_obj *scorer;
-  grn_obj *scorer_args_expr;
-  uint32_t scorer_args_expr_offset;
+  grn_obj scorers;
+  grn_obj scorer_args_exprs;
+  grn_obj scorer_args_expr_offsets;
 };
 
 #define SI_FREE(si) do {\
   GRN_OBJ_FIN(ctx, &(si)->wv);\
   GRN_OBJ_FIN(ctx, &(si)->index);\
+  GRN_OBJ_FIN(ctx, &(si)->scorers);\
+  GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\
+  GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\
   GRN_FREE(si);\
 } while (0)
 
@@ -3811,9 +3814,9 @@ struct _grn_scan_info {
   (si)->max_interval = DEFAULT_MAX_INTERVAL;\
   (si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\
   (si)->start = (st);\
-  (si)->scorer = NULL;\
-  (si)->scorer_args_expr = NULL;\
-  (si)->scorer_args_expr_offset = 0;\
+  GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+  GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\
+  GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\
 } while (0)
 
 static scan_info **
@@ -4039,11 +4042,18 @@ grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr)
 
 
 static void
-scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+scan_info_put_index(grn_ctx *ctx, scan_info *si,
+                    grn_obj *index, uint32_t sid, int32_t weight,
+                    grn_obj *scorer,
+                    grn_obj *scorer_args_expr,
+                    uint32_t scorer_args_expr_offset)
 {
   GRN_PTR_PUT(ctx, &si->index, index);
   GRN_UINT32_PUT(ctx, &si->wv, sid);
   GRN_INT32_PUT(ctx, &si->wv, weight);
+  GRN_PTR_PUT(ctx, &si->scorers, scorer);
+  GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr);
+  GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset);
   {
     int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1;
     grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni);
@@ -4064,10 +4074,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i
 }
 
 static int32_t
-get_weight(grn_ctx *ctx, grn_expr_code *ec)
+get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
 {
   if (ec->modify == 2 && ec[2].op == GRN_OP_STAR &&
       ec[1].value && ec[1].value->header.type == GRN_BULK) {
+    if (offset) {
+      *offset = 2;
+    }
     if (ec[1].value->header.domain == GRN_DB_INT32 ||
         ec[1].value->header.domain == GRN_DB_UINT32) {
       return GRN_INT32_VALUE(ec[1].value);
@@ -4082,6 +4095,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec)
       return weight;
     }
   } else {
+    if (offset) {
+      *offset = 0;
+    }
     return 1;
   }
 }
@@ -4103,9 +4119,9 @@ grn_scan_info_open(grn_ctx *ctx, int start)
   si->max_interval = DEFAULT_MAX_INTERVAL;
   si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;
   si->start = start;
-  si->scorer = NULL;
-  si->scorer_args_expr = NULL;
-  si->scorer_args_expr_offset = 0;
+  GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);
+  GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR);
 
   return si;
 }
@@ -4117,9 +4133,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si)
 }
 
 void
-grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight)
+grn_scan_info_put_index(grn_ctx *ctx, scan_info *si,
+                        grn_obj *index, uint32_t sid, int32_t weight,
+                        grn_obj *scorer,
+                        grn_obj *scorer_args_expr,
+                        uint32_t scorer_args_expr_offset)
 {
-  scan_info_put_index(ctx, si, index, sid, weight);
+  scan_info_put_index(ctx, si, index, sid, weight,
+                      scorer,
+                      scorer_args_expr,
+                      scorer_args_expr_offset);
 }
 
 scan_info **
@@ -4130,9 +4153,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip,
 }
 
 int32_t
-grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec)
+grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset)
 {
-  return get_weight(ctx, ec);
+  return get_weight(ctx, ec, offset);
 }
 
 int
@@ -4207,42 +4230,6 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold)
   si->similarity_threshold = similarity_threshold;
 }
 
-grn_obj *
-grn_scan_info_get_scorer(scan_info *si)
-{
-  return si->scorer;
-}
-
-void
-grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer)
-{
-  si->scorer = scorer;
-}
-
-grn_obj *
-grn_scan_info_get_scorer_args_expr(scan_info *si)
-{
-  return si->scorer_args_expr;
-}
-
-void
-grn_scan_info_set_scorer_args_expr(scan_info *si, grn_obj *expr)
-{
-  si->scorer_args_expr = expr;
-}
-
-uint32_t
-grn_scan_info_get_scorer_args_expr_offset(scan_info *si)
-{
-  return si->scorer_args_expr_offset;
-}
-
-void
-grn_scan_info_set_scorer_args_expr_offset(scan_info *si, uint32_t offset)
-{
-  si->scorer_args_expr_offset = offset;
-}
-
 grn_bool
 grn_scan_info_push_arg(scan_info *si, grn_obj *arg)
 {
@@ -4264,32 +4251,51 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i)
 }
 
 static uint32_t
-scan_info_build_find_index_column_index(grn_ctx *ctx,
-                                        scan_info *si,
-                                        grn_expr_code *ec,
-                                        uint32_t n_rest_codes,
-                                        grn_operator op)
+scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si,
+                                            grn_expr *expr, uint32_t i,
+                                            grn_obj **index,
+                                            int *sid)
 {
-  uint32_t offset = 0;
-  grn_obj *index;
-  int sid = 0;
-  int32_t weight;
-
-  index = ec->value;
-  if (n_rest_codes >= 2 &&
-      ec[1].value &&
-      (ec[1].value->header.domain == GRN_DB_INT32 ||
-       ec[1].value->header.domain == GRN_DB_UINT32) &&
-      ec[2].op == GRN_OP_GET_MEMBER) {
-    if (ec[1].value->header.domain == GRN_DB_INT32) {
-      sid = GRN_INT32_VALUE(ec[1].value) + 1;
-    } else {
-      sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+  grn_expr_code *ec;
+  uint32_t offset = 1;
+
+  ec = &(expr->codes[i]);
+  switch (ec->value->header.type) {
+  case GRN_ACCESSOR :
+    if (grn_column_index(ctx, ec->value, si->op, index, 1, sid)) {
+      if (((grn_accessor *)ec->value)->next) {
+        *index = ec->value;
+      }
     }
-    offset = 2;
+    break;
+  case GRN_COLUMN_FIX_SIZE :
+  case GRN_COLUMN_VAR_SIZE :
+    grn_column_index(ctx, ec->value, si->op, index, 1, sid);
+    break;
+  case GRN_COLUMN_INDEX :
+    {
+      uint32_t n_rest_codes;
+
+      *index = ec->value;
+
+      n_rest_codes = expr->codes_curr - i;
+      if (n_rest_codes >= 2 &&
+          ec[1].value &&
+          (ec[1].value->header.domain == GRN_DB_INT32 ||
+           ec[1].value->header.domain == GRN_DB_UINT32) &&
+          ec[2].op == GRN_OP_GET_MEMBER) {
+        if (ec[1].value->header.domain == GRN_DB_INT32) {
+          *sid = GRN_INT32_VALUE(ec[1].value) + 1;
+        } else {
+          *sid = GRN_UINT32_VALUE(ec[1].value) + 1;
+        }
+        offset += 2;
+      }
+    }
+    break;
+  default :
+    break;
   }
-  weight = get_weight(ctx, ec + offset);
-  scan_info_put_index(ctx, si, index, sid, weight);
 
   return offset;
 }
@@ -4299,8 +4305,9 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
                                  grn_expr *expr, uint32_t i)
 {
   grn_expr_code *ec;
-  grn_obj *index;
-  int sid;
+  grn_obj *index = NULL;
+  int sid = 0;
+  uint32_t offset = 0;
 
   ec = &(expr->codes[i]);
   if (!ec->value) {
@@ -4309,29 +4316,19 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
 
   switch (ec->value->header.type) {
   case GRN_ACCESSOR :
-    if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) {
-      int32_t weight = get_weight(ctx, ec);
-      si->flags |= SCAN_ACCESSOR;
-      if (((grn_accessor *)ec->value)->next) {
-        scan_info_put_index(ctx, si, ec->value, sid, weight);
-      } else {
-        scan_info_put_index(ctx, si, index, sid, weight);
-      }
-    }
-    break;
   case GRN_COLUMN_FIX_SIZE :
   case GRN_COLUMN_VAR_SIZE :
-    if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) {
-      scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec));
-    }
-    break;
   case GRN_COLUMN_INDEX :
-    {
-      uint32_t n_rest_codes;
-      uint32_t offset;
-      n_rest_codes = expr->codes_curr - i;
-      offset = scan_info_build_find_index_column_index(ctx, si, ec,
-                                                       n_rest_codes, si->op);
+    offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+                                                         &index, &sid);
+    i += offset - 1;
+    if (index) {
+      if (ec->value->header.type == GRN_ACCESSOR) {
+        si->flags |= SCAN_ACCESSOR;
+      }
+      scan_info_put_index(ctx, si, index, sid,
+                          get_weight(ctx, &(expr->codes[i]), &offset),
+                          NULL, NULL, 0);
       i += offset;
     }
     break;
@@ -4347,17 +4344,27 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si,
       GRN_OBJ_FIN(ctx, &inspected);
       return expr->codes_curr;
     }
-    si->scorer = ec->value;
-    i = scan_info_build_match_expr_codes(ctx, si, expr, i + 1);
-    if (expr->codes[i].op != GRN_OP_CALL) {
-      si->scorer_args_expr = (grn_obj *)expr;
-      si->scorer_args_expr_offset = i;
+    i++;
+    offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i,
+                                                         &index, &sid);
+    i += offset;
+    if (index) {
+      uint32_t scorer_args_expr_offset = 0;
+      if (expr->codes[i].op != GRN_OP_CALL) {
+        scorer_args_expr_offset = i;
+      }
+      while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) {
+        i++;
+      }
+      scan_info_put_index(ctx, si, index, sid,
+                          get_weight(ctx, &(expr->codes[i]), &offset),
+                          ec->value,
+                          (grn_obj *)expr,
+                          scorer_args_expr_offset);
+      i += offset;
     }
     break;
-  case GRN_TABLE_NO_KEY :
-  case GRN_TABLE_HASH_KEY :
-  case GRN_TABLE_PAT_KEY :
-  case GRN_TABLE_DAT_KEY :
+  default :
     {
       char name[GRN_TABLE_MAX_KEY_SIZE];
       int name_size;
@@ -4393,15 +4400,15 @@ scan_info_build_match(grn_ctx *ctx, scan_info *si)
       scan_info_build_match_expr(ctx, si, (grn_expr *)(*p));
     } else if (GRN_DB_OBJP(*p)) {
       if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) {
-        scan_info_put_index(ctx, si, index, sid, 1);
+        scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0);
       }
     } else if (GRN_ACCESSORP(*p)) {
       si->flags |= SCAN_ACCESSOR;
       if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) {
         if (((grn_accessor *)(*p))->next) {
-          scan_info_put_index(ctx, si, *p, sid, 1);
+          scan_info_put_index(ctx, si, *p, sid, 1, NULL, NULL, 0);
         } else {
-          scan_info_put_index(ctx, si, index, sid, 1);
+          scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0);
         }
       }
     } else {
@@ -4632,12 +4639,12 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n,
           for (; p < pe; p++) {
             if (GRN_DB_OBJP(*p)) {
               if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
-                scan_info_put_index(ctx, si, index, sid, 1);
+                scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0);
               }
             } else if (GRN_ACCESSORP(*p)) {
               si->flags |= SCAN_ACCESSOR;
               if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) {
-                scan_info_put_index(ctx, si, index, sid, 1);
+                scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0);
               }
             } else {
               si->query = *p;
@@ -5272,9 +5279,6 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
         optarg.vector_size = 1;
         optarg.proc = NULL;
         optarg.max_size = 0;
-        optarg.scorer = si->scorer;
-        optarg.scorer_args_expr = si->scorer_args_expr;
-        optarg.scorer_args_expr_offset = si->scorer_args_expr_offset;
         ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND;
         for (; j--; ip++, wp += 2) {
           uint32_t sid = (uint32_t) wp[0];
@@ -5288,6 +5292,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si,
             optarg.weight_vector = NULL;
             optarg.vector_size = weight;
           }
+          optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j);
+          optarg.scorer_args_expr =
+            GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j);
+          optarg.scorer_args_expr_offset =
+            GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j);
           if (j) {
             if (sid && ip[0] == ip[1]) { continue; }
           } else {

  Modified: lib/grn_expr.h (+5 -10)
===================================================================
--- lib/grn_expr.h    2015-03-01 23:37:41 +0900 (7bb134b)
+++ lib/grn_expr.h    2015-03-01 23:39:45 +0900 (c59330f)
@@ -44,7 +44,10 @@ typedef grn_bool (*grn_scan_info_each_arg_callback)(grn_ctx *ctx, grn_obj *obj,
 scan_info *grn_scan_info_open(grn_ctx *ctx, int start);
 void grn_scan_info_close(grn_ctx *ctx, scan_info *si);
 void grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index,
-                             uint32_t sid, int32_t weight);
+                             uint32_t sid, int32_t weight,
+                             grn_obj *scorer,
+                             grn_obj *scorer_args_expr,
+                             uint32_t scorer_args_expr_offset);
 scan_info **grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip,
                                          grn_operator op, int start);
 int grn_scan_info_get_flags(scan_info *si);
@@ -59,18 +62,10 @@ int grn_scan_info_get_max_interval(scan_info *si);
 void grn_scan_info_set_max_interval(scan_info *si, int max_interval);
 int grn_scan_info_get_similarity_threshold(scan_info *si);
 void grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold);
-grn_obj *grn_scan_info_get_scorer(scan_info *si);
-void grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer);
-grn_obj *grn_scan_info_get_scorer_args_expr(scan_info *si);
-void grn_scan_info_set_scorer_args_expr(scan_info *si,
-                                        grn_obj *scorer_args_expr);
-uint32_t grn_scan_info_get_scorer_args_expr_offset(scan_info *si);
-void grn_scan_info_set_scorer_args_expr_offset(scan_info *si,
-                                               uint32_t offset);
 grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg);
 grn_obj *grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i);
 
-int32_t grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec);
+int32_t grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset);
 void grn_expr_take_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj);
 grn_obj *grn_expr_alloc_const(grn_ctx *ctx, grn_obj *expr);
 

  Modified: lib/mrb/mrb_accessor.c (+11 -0)
===================================================================
--- lib/mrb/mrb_accessor.c    2015-03-01 23:37:41 +0900 (bd288af)
+++ lib/mrb/mrb_accessor.c    2015-03-01 23:39:45 +0900 (f9dfaec)
@@ -53,6 +53,15 @@ mrb_grn_accessor_next(mrb_state *mrb, mrb_value self)
   return mrb_cptr_value(mrb, accessor->next);
 }
 
+static mrb_value
+mrb_grn_accessor_have_next_p(mrb_state *mrb, mrb_value self)
+{
+  grn_accessor *accessor;
+
+  accessor = DATA_PTR(self);
+  return mrb_bool_value(accessor->next);
+}
+
 void
 grn_mrb_accessor_init(grn_ctx *ctx)
 {
@@ -67,5 +76,7 @@ grn_mrb_accessor_init(grn_ctx *ctx)
                     mrb_grn_accessor_initialize, MRB_ARGS_REQ(1));
   mrb_define_method(mrb, klass, "next",
                     mrb_grn_accessor_next, MRB_ARGS_NONE());
+  mrb_define_method(mrb, klass, "have_next?",
+                    mrb_grn_accessor_have_next_p, MRB_ARGS_NONE());
 }
 #endif

  Modified: lib/mrb/mrb_expr.c (+32 -99)
===================================================================
--- lib/mrb/mrb_expr.c    2015-03-01 23:37:41 +0900 (a80d24d)
+++ lib/mrb/mrb_expr.c    2015-03-01 23:39:45 +0900 (8e862eb)
@@ -101,17 +101,35 @@ mrb_grn_expr_code_initialize(mrb_state *mrb, mrb_value self)
 static mrb_value
 mrb_grn_scan_info_put_index(mrb_state *mrb, mrb_value self)
 {
+  grn_ctx *ctx = (grn_ctx *)mrb->ud;
+  scan_info *si;
+  mrb_value mrb_index;
   int sid;
   int32_t weight;
-  scan_info *si;
-  grn_ctx *ctx = (grn_ctx *)mrb->ud;
+  mrb_value mrb_scorer;
+  mrb_value mrb_scorer_args_expr;
+  int32_t scorer_args_expr_offset;
   grn_obj *index;
-  mrb_value mrb_index;
-
-  mrb_get_args(mrb, "oii", &mrb_index, &sid, &weight);
+  grn_obj *scorer = NULL;
+  grn_obj *scorer_args_expr = NULL;
+
+  mrb_get_args(mrb, "oiiooi",
+               &mrb_index, &sid, &weight,
+               &mrb_scorer,
+               &mrb_scorer_args_expr,
+               &scorer_args_expr_offset);
   si = DATA_PTR(self);
   index = DATA_PTR(mrb_index);
-  grn_scan_info_put_index(ctx, si, index, sid, weight);
+  if (!mrb_nil_p(mrb_scorer)) {
+    scorer = DATA_PTR(mrb_scorer);
+  }
+  if (!mrb_nil_p(mrb_scorer_args_expr)) {
+    scorer_args_expr = DATA_PTR(mrb_scorer_args_expr);
+  }
+  grn_scan_info_put_index(ctx, si, index, sid, weight,
+                          scorer,
+                          scorer_args_expr,
+                          scorer_args_expr_offset);
   return self;
 }
 
@@ -263,83 +281,6 @@ mrb_grn_scan_info_get_similarity_threshold(mrb_state *mrb, mrb_value self)
 }
 
 static mrb_value
-mrb_grn_scan_info_set_scorer(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  mrb_value mrb_scorer;
-
-  mrb_get_args(mrb, "o", &mrb_scorer);
-  si = DATA_PTR(self);
-  if (mrb_nil_p(mrb_scorer)) {
-    grn_scan_info_set_scorer(si, NULL);
-  } else {
-    grn_scan_info_set_scorer(si, DATA_PTR(mrb_scorer));
-  }
-  return self;
-}
-
-static mrb_value
-mrb_grn_scan_info_get_scorer(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  grn_obj *scorer;
-
-  si = DATA_PTR(self);
-  scorer = grn_scan_info_get_scorer(si);
-  return grn_mrb_value_from_grn_obj(mrb, scorer);
-}
-
-static mrb_value
-mrb_grn_scan_info_get_scorer_args_expr(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  grn_obj *scorer_args_expr;
-
-  si = DATA_PTR(self);
-  scorer_args_expr = grn_scan_info_get_scorer_args_expr(si);
-  return grn_mrb_value_from_grn_obj(mrb, scorer_args_expr);
-}
-
-static mrb_value
-mrb_grn_scan_info_set_scorer_args_expr(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  mrb_value mrb_scorer_args_expr;
-
-  mrb_get_args(mrb, "o", &mrb_scorer_args_expr);
-  si = DATA_PTR(self);
-  if (mrb_nil_p(mrb_scorer_args_expr)) {
-    grn_scan_info_set_scorer_args_expr(si, NULL);
-  } else {
-    grn_scan_info_set_scorer_args_expr(si, DATA_PTR(mrb_scorer_args_expr));
-  }
-  return self;
-}
-
-static mrb_value
-mrb_grn_scan_info_get_scorer_args_expr_offset(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  uint32_t offset;
-
-  si = DATA_PTR(self);
-  offset = grn_scan_info_get_scorer_args_expr_offset(si);
-  return mrb_fixnum_value(offset);
-}
-
-static mrb_value
-mrb_grn_scan_info_set_scorer_args_expr_offset(mrb_state *mrb, mrb_value self)
-{
-  scan_info *si;
-  mrb_int offset;
-
-  mrb_get_args(mrb, "i", &offset);
-  si = DATA_PTR(self);
-  grn_scan_info_set_scorer_args_expr_offset(si, offset);
-  return self;
-}
-
-static mrb_value
 mrb_grn_scan_info_get_arg(mrb_state *mrb, mrb_value self)
 {
   grn_ctx *ctx = (grn_ctx *)mrb->ud;
@@ -374,8 +315,14 @@ static mrb_value
 mrb_grn_expr_code_get_weight(mrb_state *mrb, mrb_value self)
 {
   grn_ctx *ctx = (grn_ctx *)mrb->ud;
+  int32_t weight;
+  uint32_t offset;
+  mrb_value mrb_values[2];
 
-  return mrb_fixnum_value(grn_expr_code_get_weight(ctx, DATA_PTR(self)));
+  weight = grn_expr_code_get_weight(ctx, DATA_PTR(self), &offset);
+  mrb_values[0] = mrb_fixnum_value(weight);
+  mrb_values[1] = mrb_fixnum_value(offset);
+  return mrb_ary_new_from_values(mrb, 2, mrb_values);
 }
 
 static mrb_value
@@ -710,7 +657,7 @@ grn_mrb_expr_init(grn_ctx *ctx)
   mrb_define_method(mrb, klass, "initialize",
                     mrb_grn_scan_info_initialize, MRB_ARGS_REQ(1));
   mrb_define_method(mrb, klass, "put_index",
-                    mrb_grn_scan_info_put_index, MRB_ARGS_REQ(3));
+                    mrb_grn_scan_info_put_index, MRB_ARGS_REQ(6));
   mrb_define_method(mrb, klass, "op",
                     mrb_grn_scan_info_get_op, MRB_ARGS_NONE());
   mrb_define_method(mrb, klass, "op=",
@@ -735,20 +682,6 @@ grn_mrb_expr_init(grn_ctx *ctx)
                     mrb_grn_scan_info_get_similarity_threshold, MRB_ARGS_NONE());
   mrb_define_method(mrb, klass, "similarity_threshold=",
                     mrb_grn_scan_info_set_similarity_threshold, MRB_ARGS_REQ(1));
-  mrb_define_method(mrb, klass, "scorer",
-                    mrb_grn_scan_info_get_scorer, MRB_ARGS_NONE());
-  mrb_define_method(mrb, klass, "scorer=",
-                    mrb_grn_scan_info_set_scorer, MRB_ARGS_REQ(1));
-  mrb_define_method(mrb, klass, "scorer_args_expr",
-                    mrb_grn_scan_info_get_scorer_args_expr, MRB_ARGS_NONE());
-  mrb_define_method(mrb, klass, "scorer_args_expr=",
-                    mrb_grn_scan_info_set_scorer_args_expr, MRB_ARGS_REQ(1));
-  mrb_define_method(mrb, klass, "scorer_args_expr_offset",
-                    mrb_grn_scan_info_get_scorer_args_expr_offset,
-                    MRB_ARGS_NONE());
-  mrb_define_method(mrb, klass, "scorer_args_expr_offset=",
-                    mrb_grn_scan_info_set_scorer_args_expr_offset,
-                    MRB_ARGS_REQ(1));
   mrb_define_method(mrb, klass, "get_arg",
                     mrb_grn_scan_info_get_arg, MRB_ARGS_REQ(1));
   mrb_define_method(mrb, klass, "push_arg",

  Modified: lib/mrb/scripts/scan_info.rb (+4 -8)
===================================================================
--- lib/mrb/scripts/scan_info.rb    2015-03-01 23:37:41 +0900 (af7eea1)
+++ lib/mrb/scripts/scan_info.rb    2015-03-01 23:39:45 +0900 (a98cf79)
@@ -19,20 +19,16 @@ module Groonga
       if data.similarity_threshold
         self.similarity_threshold = data.similarity_threshold
       end
-      if data.scorer
-        self.scorer = data.scorer
-        if data.scorer_args_expr
-          self.scorer_args_expr = data.scorer_args_expr
-          self.scorer_args_expr_offset = data.scorer_args_expr_offset
-        end
-      end
       data.args.each do |arg|
         push_arg(arg)
       end
       data.search_indexes.each do |search_index|
         put_index(search_index.index_column,
                   search_index.section_id,
-                  search_index.weight)
+                  search_index.weight,
+                  search_index.scorer,
+                  search_index.scorer_args_expr,
+                  search_index.scorer_args_expr_offset || 0)
       end
     end
   end

  Modified: lib/mrb/scripts/scan_info_data.rb (+69 -34)
===================================================================
--- lib/mrb/scripts/scan_info_data.rb    2015-03-01 23:37:41 +0900 (9b061ee)
+++ lib/mrb/scripts/scan_info_data.rb    2015-03-01 23:39:45 +0900 (bda89fc)
@@ -12,9 +12,6 @@ module Groonga
     attr_accessor :flags
     attr_accessor :max_interval
     attr_accessor :similarity_threshold
-    attr_accessor :scorer
-    attr_accessor :scorer_args_expr
-    attr_accessor :scorer_args_expr_offset
     def initialize(start)
       @start = start
       @end = 0
@@ -26,9 +23,6 @@ module Groonga
       @flags = ScanInfo::Flags::PUSH
       @max_interval = nil
       @similarity_threshold = nil
-      @scorer = nil
-      @scorer_args_expr = nil
-      @scorer_args_expr_offset = nil
     end
 
     def match_resolve_index
@@ -122,12 +116,75 @@ module Groonga
     def match_resolve_index_expression_codes(expression, codes, i, n_codes)
       code = codes[i]
       value = code.value
+      return i + 1 if value.nil?
+
+      case value
+      when Accessor, Column
+        :xxx # TODO: To avoid mruby bug...
+        index_info, offset =
+          match_resolve_index_expression_find_index(expression,
+                                                    codes, i, n_codes)
+        i += offset - 1
+        if index_info
+          if value.is_a?(Accessor)
+            self.flags |= ScanInfo::Flags::ACCESSOR
+          end
+          weight, offset = codes[i].weight
+          i += offset
+          put_search_index(index_info.index, index_info.section_id, weight)
+       end
+      when Procedure
+        unless value.scorer?
+          message = "procedure must be scorer: #{scorer.name}>"
+          raise ErrorMessage, message
+        end
+        scorer = value
+        i += 1
+        index_info, offset =
+          match_resolve_index_expression_find_index(expression,
+                                                    codes, i, n_codes)
+        i += offset
+        if index_info
+          scorer_args_expr_offset = 0
+          if codes[i].op != Operator::CALL
+            scorer_args_expr_offset = i
+          end
+          while i < n_codes and codes[i].op != Operator::CALL
+            i += 1
+          end
+          weight, offset = codes[i].weight
+          i += offset
+          search_index = ScanInfoSearchIndex.new(index_info.index,
+                                                 index_info.section_id,
+                                                 weight,
+                                                 scorer,
+                                                 expression,
+                                                 scorer_args_expr_offset)
+          @search_indexes << search_index
+        end
+      when Table
+        raise ErrorMessage, "invalid match target: <#{value.name}>"
+      end
+      i + 1
+    end
+
+    def match_resolve_index_expression_find_index(expression, codes, i, n_codes)
+      code = codes[i]
+      value = code.value
+      index_info = nil
+      offset = 1
       case value
       when Accessor
-        match_resolve_index_expression_accessor(code)
+        accessor = value
+        index_info = accessor.find_index(@op)
+        index_info.nil? # TODO: To avoid mruby bug...
+        if index_info and accessor.have_next?
+          index_info = IndexInfo.new(accessor, index_info.section_id)
+        end
       when FixedSizeColumn, VariableSizeColumn
-        match_resolve_index_expression_data_column(code)
+        index_info = value.find_index(@op)
       when IndexColumn
+        index = value
         section_id = 0
         rest_n_codes = n_codes - i
         if rest_n_codes >= 2 and
@@ -136,34 +193,12 @@ module Groonga
            codes[i + 1].value.domain == ID::INT32) and
           codes[i + 2].op == Operator::GET_MEMBER
           section_id = codes[i + 1].value.value + 1
-          code = codes[i + 2]
-          i += 2
-        end
-        put_search_index(value, section_id, code.weight)
-      when Procedure
-        unless value.scorer?
-          message = "procedure must be scorer: #{scorer.name}>"
-          raise ErrorMessage, message
-        end
-        @scorer = value
-        rest_n_codes = n_codes - i
-        if rest_n_codes == 0
-          message = "match target is required as an argument: <#{scorer.name}>"
-          raise ErrorMessage, message
-        end
-        i = match_resolve_index_expression_codes(expression, codes, i + 1,
-                                                 n_codes)
-        unless codes[i].op == Operator::CALL
-          @scorer_args_expr = expression
-          @scorer_args_expr_offset = i
-          until codes[i].op == Operator::CALL
-            i += 1
-          end
+          offset += 2
         end
-      when Table
-        raise ErrorMessage, "invalid match target: <#{value.name}>"
+        index_info = IndexInfo.new(index, section_id)
       end
-      i + 1
+
+      [index_info, offset]
     end
 
     def match_resolve_index_expression_accessor(expr_code)

  Modified: lib/mrb/scripts/scan_info_search_index.rb (+6 -1)
===================================================================
--- lib/mrb/scripts/scan_info_search_index.rb    2015-03-01 23:37:41 +0900 (9114a04)
+++ lib/mrb/scripts/scan_info_search_index.rb    2015-03-01 23:39:45 +0900 (a281816)
@@ -1,4 +1,9 @@
 module Groonga
-  class ScanInfoSearchIndex < Struct.new(:index_column, :section_id, :weight)
+  class ScanInfoSearchIndex < Struct.new(:index_column,
+                                         :section_id,
+                                         :weight,
+                                         :scorer,
+                                         :scorer_args_expr,
+                                         :scorer_args_expr_offset)
   end
 end

  Modified: test/command/suite/select/match_columns/scorer/tf_at_most/weight.test (+0 -1)
===================================================================
--- test/command/suite/select/match_columns/scorer/tf_at_most/weight.test    2015-03-01 23:37:41 +0900 (5acc4e7)
+++ test/command/suite/select/match_columns/scorer/tf_at_most/weight.test    2015-03-01 23:39:45 +0900 (f514a2c)
@@ -1,4 +1,3 @@
-#@omit "TODO: Support this case"
 table_create Memos TABLE_NO_KEY
 column_create Memos title COLUMN_SCALAR ShortText
 column_create Memos content COLUMN_SCALAR Text
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index