Kouhei Sutou
null+****@clear*****
Sun Mar 1 23:39:45 JST 2015
Kouhei Sutou 2015-03-01 23:39:45 +0900 (Sun, 01 Mar 2015) New Revision: 176343145ebae0e5e7f5aa6df28f847a1acce323 https://github.com/groonga/groonga/commit/176343145ebae0e5e7f5aa6df28f847a1acce323 Message: Support scorer with weight Modified files: lib/expr.c lib/grn_expr.h lib/mrb/mrb_accessor.c lib/mrb/mrb_expr.c lib/mrb/scripts/scan_info.rb lib/mrb/scripts/scan_info_data.rb lib/mrb/scripts/scan_info_search_index.rb test/command/suite/select/match_columns/scorer/tf_at_most/weight.test Modified: lib/expr.c (+122 -113) =================================================================== --- lib/expr.c 2015-03-01 23:37:41 +0900 (1bd454f) +++ lib/expr.c 2015-03-01 23:39:45 +0900 (e6a8234) @@ -3785,14 +3785,17 @@ struct _grn_scan_info { grn_obj *args[GRN_SCAN_INFO_MAX_N_ARGS]; int max_interval; int similarity_threshold; - grn_obj *scorer; - grn_obj *scorer_args_expr; - uint32_t scorer_args_expr_offset; + grn_obj scorers; + grn_obj scorer_args_exprs; + grn_obj scorer_args_expr_offsets; }; #define SI_FREE(si) do {\ GRN_OBJ_FIN(ctx, &(si)->wv);\ GRN_OBJ_FIN(ctx, &(si)->index);\ + GRN_OBJ_FIN(ctx, &(si)->scorers);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_exprs);\ + GRN_OBJ_FIN(ctx, &(si)->scorer_args_expr_offsets);\ GRN_FREE(si);\ } while (0) @@ -3811,9 +3814,9 @@ struct _grn_scan_info { (si)->max_interval = DEFAULT_MAX_INTERVAL;\ (si)->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD;\ (si)->start = (st);\ - (si)->scorer = NULL;\ - (si)->scorer_args_expr = NULL;\ - (si)->scorer_args_expr_offset = 0;\ + GRN_PTR_INIT(&(si)->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_PTR_INIT(&(si)->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL);\ + GRN_UINT32_INIT(&(si)->scorer_args_expr_offsets, GRN_OBJ_VECTOR);\ } while (0) static scan_info ** @@ -4039,11 +4042,18 @@ grn_expr_inspect_internal(grn_ctx *ctx, grn_obj *buf, grn_obj *expr) static void -scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { GRN_PTR_PUT(ctx, &si->index, index); GRN_UINT32_PUT(ctx, &si->wv, sid); GRN_INT32_PUT(ctx, &si->wv, weight); + GRN_PTR_PUT(ctx, &si->scorers, scorer); + GRN_PTR_PUT(ctx, &si->scorer_args_exprs, scorer_args_expr); + GRN_UINT32_PUT(ctx, &si->scorer_args_expr_offsets, scorer_args_expr_offset); { int i, ni = (GRN_BULK_VSIZE(&si->index) / sizeof(grn_obj *)) - 1; grn_obj **pi = &GRN_PTR_VALUE_AT(&si->index, ni); @@ -4064,10 +4074,13 @@ scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, i } static int32_t -get_weight(grn_ctx *ctx, grn_expr_code *ec) +get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { if (ec->modify == 2 && ec[2].op == GRN_OP_STAR && ec[1].value && ec[1].value->header.type == GRN_BULK) { + if (offset) { + *offset = 2; + } if (ec[1].value->header.domain == GRN_DB_INT32 || ec[1].value->header.domain == GRN_DB_UINT32) { return GRN_INT32_VALUE(ec[1].value); @@ -4082,6 +4095,9 @@ get_weight(grn_ctx *ctx, grn_expr_code *ec) return weight; } } else { + if (offset) { + *offset = 0; + } return 1; } } @@ -4103,9 +4119,9 @@ grn_scan_info_open(grn_ctx *ctx, int start) si->max_interval = DEFAULT_MAX_INTERVAL; si->similarity_threshold = DEFAULT_SIMILARITY_THRESHOLD; si->start = start; - si->scorer = NULL; - si->scorer_args_expr = NULL; - si->scorer_args_expr_offset = 0; + GRN_PTR_INIT(&si->scorers, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_PTR_INIT(&si->scorer_args_exprs, GRN_OBJ_VECTOR, GRN_ID_NIL); + GRN_UINT32_INIT(&si->scorer_args_expr_offsets, GRN_OBJ_VECTOR); return si; } @@ -4117,9 +4133,16 @@ grn_scan_info_close(grn_ctx *ctx, scan_info *si) } void -grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, uint32_t sid, int32_t weight) +grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, + grn_obj *index, uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset) { - scan_info_put_index(ctx, si, index, sid, weight); + scan_info_put_index(ctx, si, index, sid, weight, + scorer, + scorer_args_expr, + scorer_args_expr_offset); } scan_info ** @@ -4130,9 +4153,9 @@ grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, } int32_t -grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec) +grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset) { - return get_weight(ctx, ec); + return get_weight(ctx, ec, offset); } int @@ -4207,42 +4230,6 @@ grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold) si->similarity_threshold = similarity_threshold; } -grn_obj * -grn_scan_info_get_scorer(scan_info *si) -{ - return si->scorer; -} - -void -grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer) -{ - si->scorer = scorer; -} - -grn_obj * -grn_scan_info_get_scorer_args_expr(scan_info *si) -{ - return si->scorer_args_expr; -} - -void -grn_scan_info_set_scorer_args_expr(scan_info *si, grn_obj *expr) -{ - si->scorer_args_expr = expr; -} - -uint32_t -grn_scan_info_get_scorer_args_expr_offset(scan_info *si) -{ - return si->scorer_args_expr_offset; -} - -void -grn_scan_info_set_scorer_args_expr_offset(scan_info *si, uint32_t offset) -{ - si->scorer_args_expr_offset = offset; -} - grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg) { @@ -4264,32 +4251,51 @@ grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i) } static uint32_t -scan_info_build_find_index_column_index(grn_ctx *ctx, - scan_info *si, - grn_expr_code *ec, - uint32_t n_rest_codes, - grn_operator op) +scan_info_build_match_expr_codes_find_index(grn_ctx *ctx, scan_info *si, + grn_expr *expr, uint32_t i, + grn_obj **index, + int *sid) { - uint32_t offset = 0; - grn_obj *index; - int sid = 0; - int32_t weight; - - index = ec->value; - if (n_rest_codes >= 2 && - ec[1].value && - (ec[1].value->header.domain == GRN_DB_INT32 || - ec[1].value->header.domain == GRN_DB_UINT32) && - ec[2].op == GRN_OP_GET_MEMBER) { - if (ec[1].value->header.domain == GRN_DB_INT32) { - sid = GRN_INT32_VALUE(ec[1].value) + 1; - } else { - sid = GRN_UINT32_VALUE(ec[1].value) + 1; + grn_expr_code *ec; + uint32_t offset = 1; + + ec = &(expr->codes[i]); + switch (ec->value->header.type) { + case GRN_ACCESSOR : + if (grn_column_index(ctx, ec->value, si->op, index, 1, sid)) { + if (((grn_accessor *)ec->value)->next) { + *index = ec->value; + } } - offset = 2; + break; + case GRN_COLUMN_FIX_SIZE : + case GRN_COLUMN_VAR_SIZE : + grn_column_index(ctx, ec->value, si->op, index, 1, sid); + break; + case GRN_COLUMN_INDEX : + { + uint32_t n_rest_codes; + + *index = ec->value; + + n_rest_codes = expr->codes_curr - i; + if (n_rest_codes >= 2 && + ec[1].value && + (ec[1].value->header.domain == GRN_DB_INT32 || + ec[1].value->header.domain == GRN_DB_UINT32) && + ec[2].op == GRN_OP_GET_MEMBER) { + if (ec[1].value->header.domain == GRN_DB_INT32) { + *sid = GRN_INT32_VALUE(ec[1].value) + 1; + } else { + *sid = GRN_UINT32_VALUE(ec[1].value) + 1; + } + offset += 2; + } + } + break; + default : + break; } - weight = get_weight(ctx, ec + offset); - scan_info_put_index(ctx, si, index, sid, weight); return offset; } @@ -4299,8 +4305,9 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, grn_expr *expr, uint32_t i) { grn_expr_code *ec; - grn_obj *index; - int sid; + grn_obj *index = NULL; + int sid = 0; + uint32_t offset = 0; ec = &(expr->codes[i]); if (!ec->value) { @@ -4309,29 +4316,19 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, switch (ec->value->header.type) { case GRN_ACCESSOR : - if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) { - int32_t weight = get_weight(ctx, ec); - si->flags |= SCAN_ACCESSOR; - if (((grn_accessor *)ec->value)->next) { - scan_info_put_index(ctx, si, ec->value, sid, weight); - } else { - scan_info_put_index(ctx, si, index, sid, weight); - } - } - break; case GRN_COLUMN_FIX_SIZE : case GRN_COLUMN_VAR_SIZE : - if (grn_column_index(ctx, ec->value, si->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, get_weight(ctx, ec)); - } - break; case GRN_COLUMN_INDEX : - { - uint32_t n_rest_codes; - uint32_t offset; - n_rest_codes = expr->codes_curr - i; - offset = scan_info_build_find_index_column_index(ctx, si, ec, - n_rest_codes, si->op); + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset - 1; + if (index) { + if (ec->value->header.type == GRN_ACCESSOR) { + si->flags |= SCAN_ACCESSOR; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + NULL, NULL, 0); i += offset; } break; @@ -4347,17 +4344,27 @@ scan_info_build_match_expr_codes(grn_ctx *ctx, scan_info *si, GRN_OBJ_FIN(ctx, &inspected); return expr->codes_curr; } - si->scorer = ec->value; - i = scan_info_build_match_expr_codes(ctx, si, expr, i + 1); - if (expr->codes[i].op != GRN_OP_CALL) { - si->scorer_args_expr = (grn_obj *)expr; - si->scorer_args_expr_offset = i; + i++; + offset = scan_info_build_match_expr_codes_find_index(ctx, si, expr, i, + &index, &sid); + i += offset; + if (index) { + uint32_t scorer_args_expr_offset = 0; + if (expr->codes[i].op != GRN_OP_CALL) { + scorer_args_expr_offset = i; + } + while (i < expr->codes_curr && expr->codes[i].op != GRN_OP_CALL) { + i++; + } + scan_info_put_index(ctx, si, index, sid, + get_weight(ctx, &(expr->codes[i]), &offset), + ec->value, + (grn_obj *)expr, + scorer_args_expr_offset); + i += offset; } break; - case GRN_TABLE_NO_KEY : - case GRN_TABLE_HASH_KEY : - case GRN_TABLE_PAT_KEY : - case GRN_TABLE_DAT_KEY : + default : { char name[GRN_TABLE_MAX_KEY_SIZE]; int name_size; @@ -4393,15 +4400,15 @@ scan_info_build_match(grn_ctx *ctx, scan_info *si) scan_info_build_match_expr(ctx, si, (grn_expr *)(*p)); } else if (GRN_DB_OBJP(*p)) { if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0); } } else if (GRN_ACCESSORP(*p)) { si->flags |= SCAN_ACCESSOR; if (grn_column_index(ctx, *p, si->op, &index, 1, &sid)) { if (((grn_accessor *)(*p))->next) { - scan_info_put_index(ctx, si, *p, sid, 1); + scan_info_put_index(ctx, si, *p, sid, 1, NULL, NULL, 0); } else { - scan_info_put_index(ctx, si, index, sid, 1); + scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0); } } } else { @@ -4632,12 +4639,12 @@ scan_info_build(grn_ctx *ctx, grn_obj *expr, int *n, for (; p < pe; p++) { if (GRN_DB_OBJP(*p)) { if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0); } } else if (GRN_ACCESSORP(*p)) { si->flags |= SCAN_ACCESSOR; if (grn_column_index(ctx, *p, c->op, &index, 1, &sid)) { - scan_info_put_index(ctx, si, index, sid, 1); + scan_info_put_index(ctx, si, index, sid, 1, NULL, NULL, 0); } } else { si->query = *p; @@ -5272,9 +5279,6 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, optarg.vector_size = 1; optarg.proc = NULL; optarg.max_size = 0; - optarg.scorer = si->scorer; - optarg.scorer_args_expr = si->scorer_args_expr; - optarg.scorer_args_expr_offset = si->scorer_args_expr_offset; ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; for (; j--; ip++, wp += 2) { uint32_t sid = (uint32_t) wp[0]; @@ -5288,6 +5292,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, optarg.weight_vector = NULL; optarg.vector_size = weight; } + optarg.scorer = GRN_PTR_VALUE_AT(&(si->scorers), j); + optarg.scorer_args_expr = + GRN_PTR_VALUE_AT(&(si->scorer_args_exprs), j); + optarg.scorer_args_expr_offset = + GRN_UINT32_VALUE_AT(&(si->scorer_args_expr_offsets), j); if (j) { if (sid && ip[0] == ip[1]) { continue; } } else { Modified: lib/grn_expr.h (+5 -10) =================================================================== --- lib/grn_expr.h 2015-03-01 23:37:41 +0900 (7bb134b) +++ lib/grn_expr.h 2015-03-01 23:39:45 +0900 (c59330f) @@ -44,7 +44,10 @@ typedef grn_bool (*grn_scan_info_each_arg_callback)(grn_ctx *ctx, grn_obj *obj, scan_info *grn_scan_info_open(grn_ctx *ctx, int start); void grn_scan_info_close(grn_ctx *ctx, scan_info *si); void grn_scan_info_put_index(grn_ctx *ctx, scan_info *si, grn_obj *index, - uint32_t sid, int32_t weight); + uint32_t sid, int32_t weight, + grn_obj *scorer, + grn_obj *scorer_args_expr, + uint32_t scorer_args_expr_offset); scan_info **grn_scan_info_put_logical_op(grn_ctx *ctx, scan_info **sis, int *ip, grn_operator op, int start); int grn_scan_info_get_flags(scan_info *si); @@ -59,18 +62,10 @@ int grn_scan_info_get_max_interval(scan_info *si); void grn_scan_info_set_max_interval(scan_info *si, int max_interval); int grn_scan_info_get_similarity_threshold(scan_info *si); void grn_scan_info_set_similarity_threshold(scan_info *si, int similarity_threshold); -grn_obj *grn_scan_info_get_scorer(scan_info *si); -void grn_scan_info_set_scorer(scan_info *si, grn_obj *scorer); -grn_obj *grn_scan_info_get_scorer_args_expr(scan_info *si); -void grn_scan_info_set_scorer_args_expr(scan_info *si, - grn_obj *scorer_args_expr); -uint32_t grn_scan_info_get_scorer_args_expr_offset(scan_info *si); -void grn_scan_info_set_scorer_args_expr_offset(scan_info *si, - uint32_t offset); grn_bool grn_scan_info_push_arg(scan_info *si, grn_obj *arg); grn_obj *grn_scan_info_get_arg(grn_ctx *ctx, scan_info *si, int i); -int32_t grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec); +int32_t grn_expr_code_get_weight(grn_ctx *ctx, grn_expr_code *ec, uint32_t *offset); void grn_expr_take_obj(grn_ctx *ctx, grn_obj *expr, grn_obj *obj); grn_obj *grn_expr_alloc_const(grn_ctx *ctx, grn_obj *expr); Modified: lib/mrb/mrb_accessor.c (+11 -0) =================================================================== --- lib/mrb/mrb_accessor.c 2015-03-01 23:37:41 +0900 (bd288af) +++ lib/mrb/mrb_accessor.c 2015-03-01 23:39:45 +0900 (f9dfaec) @@ -53,6 +53,15 @@ mrb_grn_accessor_next(mrb_state *mrb, mrb_value self) return mrb_cptr_value(mrb, accessor->next); } +static mrb_value +mrb_grn_accessor_have_next_p(mrb_state *mrb, mrb_value self) +{ + grn_accessor *accessor; + + accessor = DATA_PTR(self); + return mrb_bool_value(accessor->next); +} + void grn_mrb_accessor_init(grn_ctx *ctx) { @@ -67,5 +76,7 @@ grn_mrb_accessor_init(grn_ctx *ctx) mrb_grn_accessor_initialize, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "next", mrb_grn_accessor_next, MRB_ARGS_NONE()); + mrb_define_method(mrb, klass, "have_next?", + mrb_grn_accessor_have_next_p, MRB_ARGS_NONE()); } #endif Modified: lib/mrb/mrb_expr.c (+32 -99) =================================================================== --- lib/mrb/mrb_expr.c 2015-03-01 23:37:41 +0900 (a80d24d) +++ lib/mrb/mrb_expr.c 2015-03-01 23:39:45 +0900 (8e862eb) @@ -101,17 +101,35 @@ mrb_grn_expr_code_initialize(mrb_state *mrb, mrb_value self) static mrb_value mrb_grn_scan_info_put_index(mrb_state *mrb, mrb_value self) { + grn_ctx *ctx = (grn_ctx *)mrb->ud; + scan_info *si; + mrb_value mrb_index; int sid; int32_t weight; - scan_info *si; - grn_ctx *ctx = (grn_ctx *)mrb->ud; + mrb_value mrb_scorer; + mrb_value mrb_scorer_args_expr; + int32_t scorer_args_expr_offset; grn_obj *index; - mrb_value mrb_index; - - mrb_get_args(mrb, "oii", &mrb_index, &sid, &weight); + grn_obj *scorer = NULL; + grn_obj *scorer_args_expr = NULL; + + mrb_get_args(mrb, "oiiooi", + &mrb_index, &sid, &weight, + &mrb_scorer, + &mrb_scorer_args_expr, + &scorer_args_expr_offset); si = DATA_PTR(self); index = DATA_PTR(mrb_index); - grn_scan_info_put_index(ctx, si, index, sid, weight); + if (!mrb_nil_p(mrb_scorer)) { + scorer = DATA_PTR(mrb_scorer); + } + if (!mrb_nil_p(mrb_scorer_args_expr)) { + scorer_args_expr = DATA_PTR(mrb_scorer_args_expr); + } + grn_scan_info_put_index(ctx, si, index, sid, weight, + scorer, + scorer_args_expr, + scorer_args_expr_offset); return self; } @@ -263,83 +281,6 @@ mrb_grn_scan_info_get_similarity_threshold(mrb_state *mrb, mrb_value self) } static mrb_value -mrb_grn_scan_info_set_scorer(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - mrb_value mrb_scorer; - - mrb_get_args(mrb, "o", &mrb_scorer); - si = DATA_PTR(self); - if (mrb_nil_p(mrb_scorer)) { - grn_scan_info_set_scorer(si, NULL); - } else { - grn_scan_info_set_scorer(si, DATA_PTR(mrb_scorer)); - } - return self; -} - -static mrb_value -mrb_grn_scan_info_get_scorer(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - grn_obj *scorer; - - si = DATA_PTR(self); - scorer = grn_scan_info_get_scorer(si); - return grn_mrb_value_from_grn_obj(mrb, scorer); -} - -static mrb_value -mrb_grn_scan_info_get_scorer_args_expr(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - grn_obj *scorer_args_expr; - - si = DATA_PTR(self); - scorer_args_expr = grn_scan_info_get_scorer_args_expr(si); - return grn_mrb_value_from_grn_obj(mrb, scorer_args_expr); -} - -static mrb_value -mrb_grn_scan_info_set_scorer_args_expr(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - mrb_value mrb_scorer_args_expr; - - mrb_get_args(mrb, "o", &mrb_scorer_args_expr); - si = DATA_PTR(self); - if (mrb_nil_p(mrb_scorer_args_expr)) { - grn_scan_info_set_scorer_args_expr(si, NULL); - } else { - grn_scan_info_set_scorer_args_expr(si, DATA_PTR(mrb_scorer_args_expr)); - } - return self; -} - -static mrb_value -mrb_grn_scan_info_get_scorer_args_expr_offset(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - uint32_t offset; - - si = DATA_PTR(self); - offset = grn_scan_info_get_scorer_args_expr_offset(si); - return mrb_fixnum_value(offset); -} - -static mrb_value -mrb_grn_scan_info_set_scorer_args_expr_offset(mrb_state *mrb, mrb_value self) -{ - scan_info *si; - mrb_int offset; - - mrb_get_args(mrb, "i", &offset); - si = DATA_PTR(self); - grn_scan_info_set_scorer_args_expr_offset(si, offset); - return self; -} - -static mrb_value mrb_grn_scan_info_get_arg(mrb_state *mrb, mrb_value self) { grn_ctx *ctx = (grn_ctx *)mrb->ud; @@ -374,8 +315,14 @@ static mrb_value mrb_grn_expr_code_get_weight(mrb_state *mrb, mrb_value self) { grn_ctx *ctx = (grn_ctx *)mrb->ud; + int32_t weight; + uint32_t offset; + mrb_value mrb_values[2]; - return mrb_fixnum_value(grn_expr_code_get_weight(ctx, DATA_PTR(self))); + weight = grn_expr_code_get_weight(ctx, DATA_PTR(self), &offset); + mrb_values[0] = mrb_fixnum_value(weight); + mrb_values[1] = mrb_fixnum_value(offset); + return mrb_ary_new_from_values(mrb, 2, mrb_values); } static mrb_value @@ -710,7 +657,7 @@ grn_mrb_expr_init(grn_ctx *ctx) mrb_define_method(mrb, klass, "initialize", mrb_grn_scan_info_initialize, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "put_index", - mrb_grn_scan_info_put_index, MRB_ARGS_REQ(3)); + mrb_grn_scan_info_put_index, MRB_ARGS_REQ(6)); mrb_define_method(mrb, klass, "op", mrb_grn_scan_info_get_op, MRB_ARGS_NONE()); mrb_define_method(mrb, klass, "op=", @@ -735,20 +682,6 @@ grn_mrb_expr_init(grn_ctx *ctx) mrb_grn_scan_info_get_similarity_threshold, MRB_ARGS_NONE()); mrb_define_method(mrb, klass, "similarity_threshold=", mrb_grn_scan_info_set_similarity_threshold, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, klass, "scorer", - mrb_grn_scan_info_get_scorer, MRB_ARGS_NONE()); - mrb_define_method(mrb, klass, "scorer=", - mrb_grn_scan_info_set_scorer, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, klass, "scorer_args_expr", - mrb_grn_scan_info_get_scorer_args_expr, MRB_ARGS_NONE()); - mrb_define_method(mrb, klass, "scorer_args_expr=", - mrb_grn_scan_info_set_scorer_args_expr, MRB_ARGS_REQ(1)); - mrb_define_method(mrb, klass, "scorer_args_expr_offset", - mrb_grn_scan_info_get_scorer_args_expr_offset, - MRB_ARGS_NONE()); - mrb_define_method(mrb, klass, "scorer_args_expr_offset=", - mrb_grn_scan_info_set_scorer_args_expr_offset, - MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "get_arg", mrb_grn_scan_info_get_arg, MRB_ARGS_REQ(1)); mrb_define_method(mrb, klass, "push_arg", Modified: lib/mrb/scripts/scan_info.rb (+4 -8) =================================================================== --- lib/mrb/scripts/scan_info.rb 2015-03-01 23:37:41 +0900 (af7eea1) +++ lib/mrb/scripts/scan_info.rb 2015-03-01 23:39:45 +0900 (a98cf79) @@ -19,20 +19,16 @@ module Groonga if data.similarity_threshold self.similarity_threshold = data.similarity_threshold end - if data.scorer - self.scorer = data.scorer - if data.scorer_args_expr - self.scorer_args_expr = data.scorer_args_expr - self.scorer_args_expr_offset = data.scorer_args_expr_offset - end - end data.args.each do |arg| push_arg(arg) end data.search_indexes.each do |search_index| put_index(search_index.index_column, search_index.section_id, - search_index.weight) + search_index.weight, + search_index.scorer, + search_index.scorer_args_expr, + search_index.scorer_args_expr_offset || 0) end end end Modified: lib/mrb/scripts/scan_info_data.rb (+69 -34) =================================================================== --- lib/mrb/scripts/scan_info_data.rb 2015-03-01 23:37:41 +0900 (9b061ee) +++ lib/mrb/scripts/scan_info_data.rb 2015-03-01 23:39:45 +0900 (bda89fc) @@ -12,9 +12,6 @@ module Groonga attr_accessor :flags attr_accessor :max_interval attr_accessor :similarity_threshold - attr_accessor :scorer - attr_accessor :scorer_args_expr - attr_accessor :scorer_args_expr_offset def initialize(start) @start = start @end = 0 @@ -26,9 +23,6 @@ module Groonga @flags = ScanInfo::Flags::PUSH @max_interval = nil @similarity_threshold = nil - @scorer = nil - @scorer_args_expr = nil - @scorer_args_expr_offset = nil end def match_resolve_index @@ -122,12 +116,75 @@ module Groonga def match_resolve_index_expression_codes(expression, codes, i, n_codes) code = codes[i] value = code.value + return i + 1 if value.nil? + + case value + when Accessor, Column + :xxx # TODO: To avoid mruby bug... + index_info, offset = + match_resolve_index_expression_find_index(expression, + codes, i, n_codes) + i += offset - 1 + if index_info + if value.is_a?(Accessor) + self.flags |= ScanInfo::Flags::ACCESSOR + end + weight, offset = codes[i].weight + i += offset + put_search_index(index_info.index, index_info.section_id, weight) + end + when Procedure + unless value.scorer? + message = "procedure must be scorer: #{scorer.name}>" + raise ErrorMessage, message + end + scorer = value + i += 1 + index_info, offset = + match_resolve_index_expression_find_index(expression, + codes, i, n_codes) + i += offset + if index_info + scorer_args_expr_offset = 0 + if codes[i].op != Operator::CALL + scorer_args_expr_offset = i + end + while i < n_codes and codes[i].op != Operator::CALL + i += 1 + end + weight, offset = codes[i].weight + i += offset + search_index = ScanInfoSearchIndex.new(index_info.index, + index_info.section_id, + weight, + scorer, + expression, + scorer_args_expr_offset) + @search_indexes << search_index + end + when Table + raise ErrorMessage, "invalid match target: <#{value.name}>" + end + i + 1 + end + + def match_resolve_index_expression_find_index(expression, codes, i, n_codes) + code = codes[i] + value = code.value + index_info = nil + offset = 1 case value when Accessor - match_resolve_index_expression_accessor(code) + accessor = value + index_info = accessor.find_index(@op) + index_info.nil? # TODO: To avoid mruby bug... + if index_info and accessor.have_next? + index_info = IndexInfo.new(accessor, index_info.section_id) + end when FixedSizeColumn, VariableSizeColumn - match_resolve_index_expression_data_column(code) + index_info = value.find_index(@op) when IndexColumn + index = value section_id = 0 rest_n_codes = n_codes - i if rest_n_codes >= 2 and @@ -136,34 +193,12 @@ module Groonga codes[i + 1].value.domain == ID::INT32) and codes[i + 2].op == Operator::GET_MEMBER section_id = codes[i + 1].value.value + 1 - code = codes[i + 2] - i += 2 - end - put_search_index(value, section_id, code.weight) - when Procedure - unless value.scorer? - message = "procedure must be scorer: #{scorer.name}>" - raise ErrorMessage, message - end - @scorer = value - rest_n_codes = n_codes - i - if rest_n_codes == 0 - message = "match target is required as an argument: <#{scorer.name}>" - raise ErrorMessage, message - end - i = match_resolve_index_expression_codes(expression, codes, i + 1, - n_codes) - unless codes[i].op == Operator::CALL - @scorer_args_expr = expression - @scorer_args_expr_offset = i - until codes[i].op == Operator::CALL - i += 1 - end + offset += 2 end - when Table - raise ErrorMessage, "invalid match target: <#{value.name}>" + index_info = IndexInfo.new(index, section_id) end - i + 1 + + [index_info, offset] end def match_resolve_index_expression_accessor(expr_code) Modified: lib/mrb/scripts/scan_info_search_index.rb (+6 -1) =================================================================== --- lib/mrb/scripts/scan_info_search_index.rb 2015-03-01 23:37:41 +0900 (9114a04) +++ lib/mrb/scripts/scan_info_search_index.rb 2015-03-01 23:39:45 +0900 (a281816) @@ -1,4 +1,9 @@ module Groonga - class ScanInfoSearchIndex < Struct.new(:index_column, :section_id, :weight) + class ScanInfoSearchIndex < Struct.new(:index_column, + :section_id, + :weight, + :scorer, + :scorer_args_expr, + :scorer_args_expr_offset) end end Modified: test/command/suite/select/match_columns/scorer/tf_at_most/weight.test (+0 -1) =================================================================== --- test/command/suite/select/match_columns/scorer/tf_at_most/weight.test 2015-03-01 23:37:41 +0900 (5acc4e7) +++ test/command/suite/select/match_columns/scorer/tf_at_most/weight.test 2015-03-01 23:39:45 +0900 (f514a2c) @@ -1,4 +1,3 @@ -#@omit "TODO: Support this case" table_create Memos TABLE_NO_KEY column_create Memos title COLUMN_SCALAR ShortText column_create Memos content COLUMN_SCALAR Text -------------- next part -------------- HTML����������������������������...Download