Naoya Murakami
null+****@clear*****
Tue Dec 27 17:00:11 JST 2016
Naoya Murakami 2016-12-27 17:00:11 +0900 (Tue, 27 Dec 2016) New Revision: 4fae36bdbd6750e1c3af9294cfdaf1bfd80531e4 https://github.com/groonga/groonga/commit/4fae36bdbd6750e1c3af9294cfdaf1bfd80531e4 Merged 6142da4: Merge pull request #618 from naoa/ii-set-min-for-and-operator Message: Support setting minimum record id to ii_cursor if operator is GRN_OP_AND Modified files: include/groonga/groonga.h lib/expr.c lib/grn_ii.h lib/ii.c lib/proc.c lib/proc/proc_fuzzy_search.c lib/proc/proc_select.c Modified: include/groonga/groonga.h (+1 -0) =================================================================== --- include/groonga/groonga.h 2016-12-27 01:02:50 +0900 (74e1919) +++ include/groonga/groonga.h 2016-12-27 17:00:11 +0900 (f3a23cd) @@ -814,6 +814,7 @@ struct _grn_search_optarg { grn_obj *scorer_args_expr; unsigned int scorer_args_expr_offset; grn_fuzzy_search_optarg fuzzy; + grn_id *min; }; GRN_API grn_rc grn_obj_search(grn_ctx *ctx, grn_obj *obj, grn_obj *query, Modified: lib/expr.c (+20 -2) =================================================================== --- lib/expr.c 2016-12-27 01:02:50 +0900 (828842e) +++ lib/expr.c 2016-12-27 17:00:11 +0900 (6259980) @@ -6545,7 +6545,7 @@ grn_table_select_index_range(grn_ctx *ctx, grn_obj *table, grn_obj *index, static inline grn_bool grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, - grn_obj *res) + grn_obj *res, grn_id *min) { grn_bool processed = GRN_FALSE; if (!si->query) { @@ -6662,6 +6662,7 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, int n_indexes = GRN_BULK_VSIZE(&si->index)/sizeof(grn_obj *); int32_t *wp = &GRN_INT32_VALUE(&si->wv); grn_search_optarg optarg; + grn_id previous_min = GRN_ID_NIL; GRN_INT32_INIT(&wv, GRN_OBJ_VECTOR); if (si->op == GRN_OP_MATCH) { optarg.mode = GRN_OP_EXACT; @@ -6686,10 +6687,18 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, optarg.vector_size = 1; optarg.proc = NULL; optarg.max_size = 0; + if (min) { + previous_min = *min; + } ctx->flags |= GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; for (j = 0; j < n_indexes; j++, ip++, wp += 2) { uint32_t sid = (uint32_t) wp[0]; int32_t weight = wp[1]; + grn_id current_min; + if (min) { + current_min = previous_min; + optarg.min = ¤t_min; + } if (sid) { int weight_index = sid - 1; int current_vector_size; @@ -6723,6 +6732,11 @@ grn_table_select_index(grn_ctx *ctx, grn_obj *table, scan_info *si, } } GRN_BULK_REWIND(&wv); + if (min) { + if (previous_min < *optarg.min && (j == 0 || *optarg.min < *min)) { + *min = *optarg.min; + } + } } GRN_OBJ_FIN(ctx, &wv); } @@ -6843,6 +6857,7 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, grn_expr *e = (grn_expr *)scanner->expr; grn_expr_code *codes = e->codes; uint32_t codes_curr = e->codes_curr; + grn_id min = GRN_ID_NIL; v = grn_expr_get_var_by_offset(ctx, (grn_obj *)e, 0); GRN_PTR_INIT(&res_stack, GRN_OBJ_VECTOR, GRN_ID_NIL); for (i = 0; i < scanner->n_sis; i++) { @@ -6865,7 +6880,10 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, GRN_PTR_PUT(ctx, &res_stack, res); res = res_; } - processed = grn_table_select_index(ctx, table, si, res); + if (si->logical_op != GRN_OP_AND) { + min = GRN_ID_NIL; + } + processed = grn_table_select_index(ctx, table, si, res, &min); if (!processed) { if (ctx->rc) { break; } e->codes = codes + si->start; Modified: lib/grn_ii.h (+1 -0) =================================================================== --- lib/grn_ii.h 2016-12-27 01:02:50 +0900 (752dc0e) +++ lib/grn_ii.h 2016-12-27 17:00:11 +0900 (ea25474) @@ -154,6 +154,7 @@ struct _grn_select_optarg { grn_obj *scorer_args_expr; unsigned int scorer_args_expr_offset; grn_fuzzy_search_optarg fuzzy; + grn_id *min; }; GRN_API grn_rc grn_ii_column_update(grn_ctx *ctx, grn_ii *ii, grn_id id, Modified: lib/ii.c (+49 -35) =================================================================== --- lib/ii.c 2016-12-27 01:02:50 +0900 (67714f1) +++ lib/ii.c 2016-12-27 17:00:11 +0900 (51eecb0) @@ -5420,7 +5420,7 @@ cursor_heap_open(grn_ctx *ctx, int max) static inline grn_rc cursor_heap_push(grn_ctx *ctx, cursor_heap *h, grn_ii *ii, grn_id tid, uint32_t offset2, - int weight) + int weight, grn_id min) { int n, n2; grn_ii_cursor *c, *c2; @@ -5433,7 +5433,7 @@ cursor_heap_push(grn_ctx *ctx, cursor_heap *h, grn_ii *ii, grn_id tid, uint32_t h->bins = bins; } { - if (!(c = grn_ii_cursor_open(ctx, ii, tid, GRN_ID_NIL, GRN_ID_MAX, + if (!(c = grn_ii_cursor_open(ctx, ii, tid, min, GRN_ID_MAX, ii->n_elements, 0))) { GRN_LOG(ctx, GRN_LOG_ERROR, "cursor open failed"); return ctx->rc; @@ -6361,7 +6361,7 @@ token_info_expand_both(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, !(lexicon->header.flags & GRN_OBJ_KEY_WITH_SIS) || key2_size <= 2) { // todo: refine if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { - cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0); + cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, GRN_ID_NIL); ti->ntoken++; ti->size += s; } @@ -6373,7 +6373,7 @@ token_info_expand_both(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, GRN_HASH_EACH(ctx, g, id, &tq, NULL, &offset2, { if ((s = grn_ii_estimate_size(ctx, ii, *tq))) { cursor_heap_push(ctx, ti->cursors, ii, *tq, - /* *offset2 */ 0, 0); + /* *offset2 */ 0, 0, GRN_ID_NIL); ti->ntoken++; ti->size += s; } @@ -6401,7 +6401,7 @@ token_info_close(grn_ctx *ctx, token_info *ti) inline static token_info * token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *key, unsigned int key_size, uint32_t offset, - int mode, grn_fuzzy_search_optarg *args) + int mode, grn_fuzzy_search_optarg *args, grn_id min) { int s = 0; grn_hash *h; @@ -6422,7 +6422,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, if ((tid = grn_table_get(ctx, lexicon, key, key_size)) && (s = grn_ii_estimate_size(ctx, ii, tid)) && (ti->cursors = cursor_heap_open(ctx, 1))) { - cursor_heap_push(ctx, ti->cursors, ii, tid, 0, 0); + cursor_heap_push(ctx, ti->cursors, ii, tid, 0, 0, min); ti->ntoken++; ti->size = s; } @@ -6435,7 +6435,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, if ((ti->cursors = cursor_heap_open(ctx, GRN_HASH_SIZE(h)))) { GRN_HASH_EACH(ctx, h, id, &tp, NULL, NULL, { if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { - cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0); + cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, 0, min); ti->ntoken++; ti->size += s; } @@ -6454,7 +6454,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, uint32_t *offset2; GRN_HASH_EACH(ctx, h, id, &tp, NULL, &offset2, { if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { - cursor_heap_push(ctx, ti->cursors, ii, *tp, /* *offset2 */ 0, 0); + cursor_heap_push(ctx, ti->cursors, ii, *tp, /* *offset2 */ 0, 0, min); ti->ntoken++; ti->size += s; } @@ -6475,7 +6475,7 @@ token_info_open(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, grn_rset_recinfo *ri; GRN_HASH_EACH(ctx, h, id, &tp, NULL, (void **)&ri, { if ((s = grn_ii_estimate_size(ctx, ii, *tp))) { - cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, ri->score - 1); + cursor_heap_push(ctx, ti->cursors, ii, *tp, 0, ri->score - 1, min); ti->ntoken++; ti->size += s; } @@ -6792,7 +6792,7 @@ inline static grn_rc token_candidate_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, token_info **tis, uint32_t *n, token_candidate_node *nodes, uint32_t selected_candidate, - int offset) + int offset, grn_id min) { grn_rc rc = GRN_END_OF_DATA; token_info *ti; @@ -6807,19 +6807,19 @@ token_candidate_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, node->tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, node->pos, - EX_NONE, NULL); + EX_NONE, NULL, min); break; case GRN_TOKEN_CURSOR_DONE : if (node->tid) { key = _grn_table_key(ctx, lexicon, node->tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, node->pos, - node->ef & EX_PREFIX, NULL); + node->ef & EX_PREFIX, NULL, min); break; } /* else fallthru */ default : ti = token_info_open(ctx, lexicon, ii, (char *)node->token, node->token_size, node->pos, - node->ef & EX_PREFIX, NULL); + node->ef & EX_PREFIX, NULL, min); break; } if (!ti) { @@ -6839,7 +6839,7 @@ inline static grn_rc token_info_build_skipping_overlap(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, token_info **tis, uint32_t *n, grn_token_cursor *token_cursor, - grn_id tid, int ef) + grn_id tid, int ef, grn_id min) { grn_rc rc; token_candidate_node *nodes = NULL; @@ -6857,7 +6857,7 @@ token_info_build_skipping_overlap(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, if (rc != GRN_SUCCESS) { goto exit; } - rc = token_candidate_build(ctx, lexicon, ii, tis, n, nodes, selected_candidate, offset); + rc = token_candidate_build(ctx, lexicon, ii, tis, n, nodes, selected_candidate, offset, min); if (rc != GRN_SUCCESS) { goto exit; } @@ -6873,7 +6873,7 @@ exit : inline static grn_rc token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, - token_info **tis, uint32_t *n, grn_bool *only_skip_token, + token_info **tis, uint32_t *n, grn_bool *only_skip_token, grn_id min, grn_operator mode) { token_info *ti; @@ -6889,7 +6889,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, if (!token_cursor) { return GRN_NO_MEMORY_AVAILABLE; } if (mode == GRN_OP_UNSPLIT) { if ((ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, - token_cursor->orig_blen, 0, EX_BOTH, NULL))) { + token_cursor->orig_blen, 0, EX_BOTH, NULL, min))) { tis[(*n)++] = ti; rc = GRN_SUCCESS; } @@ -6916,21 +6916,21 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, - ef & EX_SUFFIX, NULL); + ef & EX_SUFFIX, NULL, min); break; case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, - token_cursor->curr_size, 0, ef, NULL); + token_cursor->curr_size, 0, ef, NULL, min); /* key = _grn_table_key(ctx, lexicon, tid, &size); - ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL); + ti = token_info_open(ctx, lexicon, ii, token_cursor->curr, token_cursor->curr_size, token_cursor->pos, ef, NULL, GRN_ID_NIL); ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, - token_cursor->orig_blen, token_cursor->pos, ef, NULL); + token_cursor->orig_blen, token_cursor->pos, ef, NULL, GRN_ID_NIL); */ break; case GRN_TOKEN_CURSOR_NOT_FOUND : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->orig, - token_cursor->orig_blen, 0, ef, NULL); + token_cursor->orig_blen, 0, ef, NULL, min); break; case GRN_TOKEN_CURSOR_DONE_SKIP : *only_skip_token = GRN_TRUE; @@ -6942,7 +6942,7 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, tis[(*n)++] = ti; if (grn_ii_overlap_token_skip_enable) { - rc = token_info_build_skipping_overlap(ctx, lexicon, ii, tis, n, token_cursor, tid, ef); + rc = token_info_build_skipping_overlap(ctx, lexicon, ii, tis, n, token_cursor, tid, ef, min); goto exit; } @@ -6955,19 +6955,19 @@ token_info_build(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, case GRN_TOKEN_CURSOR_DOING : key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, - EX_NONE, NULL); + EX_NONE, NULL, min); break; case GRN_TOKEN_CURSOR_DONE : if (tid) { key = _grn_table_key(ctx, lexicon, tid, &size); ti = token_info_open(ctx, lexicon, ii, key, size, token_cursor->pos, - ef & EX_PREFIX, NULL); + ef & EX_PREFIX, NULL, min); break; } /* else fallthru */ default : ti = token_info_open(ctx, lexicon, ii, (char *)token_cursor->curr, token_cursor->curr_size, token_cursor->pos, - ef & EX_PREFIX, NULL); + ef & EX_PREFIX, NULL, min); break; } if (!ti) { @@ -6986,7 +6986,7 @@ inline static grn_rc token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, const char *string, unsigned int string_len, token_info **tis, uint32_t *n, grn_bool *only_skip_token, - grn_operator mode, grn_fuzzy_search_optarg *args) + grn_id min, grn_operator mode, grn_fuzzy_search_optarg *args) { token_info *ti; grn_rc rc = GRN_END_OF_DATA; @@ -7006,7 +7006,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, token_cursor->curr_size, token_cursor->pos, EX_FUZZY, - args); + args, min); break; default : ti = NULL; @@ -7025,7 +7025,7 @@ token_info_build_fuzzy(grn_ctx *ctx, grn_obj *lexicon, grn_ii *ii, case GRN_TOKEN_CURSOR_DONE : ti = token_info_open(ctx, lexicon, ii, (const char *)token_cursor->curr, token_cursor->curr_size, token_cursor->pos, EX_FUZZY, - args); + args, min); break; default : break; @@ -7790,6 +7790,7 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, grn_obj *lexicon = ii->lexicon; grn_scorer_score_func *score_func = NULL; grn_scorer_matched_record record; + grn_id current_min = GRN_ID_NIL; if (!lexicon || !ii || !s) { return GRN_INVALID_ARGUMENT; } if (optarg) { @@ -7821,14 +7822,14 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, } if (mode == GRN_OP_FUZZY) { if (token_info_build_fuzzy(ctx, lexicon, ii, string, string_len, - tis, &n, &only_skip_token, mode, - &(optarg->fuzzy)) || + tis, &n, &only_skip_token, *optarg->min, + mode, &(optarg->fuzzy)) || !n) { goto exit; } } else { if (token_info_build(ctx, lexicon, ii, string, string_len, - tis, &n, &only_skip_token, mode) || + tis, &n, &only_skip_token, *optarg->min, mode) || !n) { goto exit; } @@ -8027,6 +8028,11 @@ grn_ii_select(grn_ctx *ctx, grn_ii *ii, } else { record_score = (noccur + tscore) * weight; } + if (optarg->min) { + if (current_min == GRN_ID_NIL) { + current_min = rid; + } + } res_add(ctx, s, &pi, record_score, op); } #undef SKIP_OR_BREAK @@ -8039,6 +8045,13 @@ exit : GRN_OBJ_FIN(ctx, &(record.terms)); GRN_OBJ_FIN(ctx, &(record.term_weights)); } + + if (optarg->min) { + if (current_min > *optarg->min) { + *optarg->min = current_min; + } + } + for (tip = tis; tip < tis + n; tip++) { if (*tip) { token_info_close(ctx, *tip); } } @@ -8144,12 +8157,12 @@ grn_ii_estimate_size_for_query(grn_ctx *ctx, grn_ii *ii, switch (mode) { case GRN_OP_FUZZY : rc = token_info_build_fuzzy(ctx, lexicon, ii, query, query_len, - tis, &n_tis, &only_skip_token, mode, - &(optarg->fuzzy)); + tis, &n_tis, &only_skip_token, *optarg->min, + mode, &(optarg->fuzzy)); break; default : rc = token_info_build(ctx, lexicon, ii, query, query_len, - tis, &n_tis, &only_skip_token, mode); + tis, &n_tis, &only_skip_token, *optarg->min, mode); break; } @@ -8243,6 +8256,7 @@ grn_ii_sel(grn_ctx *ctx, grn_ii *ii, const char *string, unsigned int string_len arg.scorer = optarg->scorer; arg.scorer_args_expr = optarg->scorer_args_expr; arg.scorer_args_expr_offset = optarg->scorer_args_expr_offset; + arg.min = optarg->min; } /* todo : support subrec grn_rset_init(ctx, s, grn_rec_document, 0, grn_rec_none, 0, 0); Modified: lib/proc.c (+1 -0) =================================================================== --- lib/proc.c 2016-12-27 01:02:50 +0900 (b59a23f) +++ lib/proc.c 2016-12-27 17:00:11 +0900 (a639055) @@ -2855,6 +2855,7 @@ selector_in_values(grn_ctx *ctx, grn_obj *table, grn_obj *index, search_options.proc = NULL; search_options.max_size = 0; search_options.scorer = NULL; + search_options.min = NULL; if (i == n_values - 1) { ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; } Modified: lib/proc/proc_fuzzy_search.c (+1 -0) =================================================================== --- lib/proc/proc_fuzzy_search.c 2016-12-27 01:02:50 +0900 (bb1b6a6) +++ lib/proc/proc_fuzzy_search.c 2016-12-27 17:00:11 +0900 (dd1ef41) @@ -447,6 +447,7 @@ selector_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *index, options.fuzzy.max_distance = max_distance; options.fuzzy.max_expansion = max_expansion; options.fuzzy.flags = flags; + options.min = NULL; grn_obj_search(ctx, target, query, res, op, &options); } Modified: lib/proc/proc_select.c (+1 -0) =================================================================== --- lib/proc/proc_select.c 2016-12-27 01:02:50 +0900 (a78dbde) +++ lib/proc/proc_select.c 2016-12-27 17:00:11 +0900 (f37c232) @@ -1517,6 +1517,7 @@ grn_select_apply_adjuster_execute_adjust(grn_ctx *ctx, options.proc = NULL; options.max_size = 0; options.scorer = NULL; + options.min = NULL; grn_obj_search(ctx, index, value, table, GRN_OP_ADJUST, &options); } -------------- next part -------------- HTML����������������������������...Download