Kouhei Sutou 2019-03-04 16:23:13 +0900 (Mon, 04 Mar 2019) Revision: 57fe8e4eb6d151a90fb8c2b9a8653663fd998376 https://github.com/groonga/groonga/commit/57fe8e4eb6d151a90fb8c2b9a8653663fd998376 Message: Fix a stop word handling bug If the first token is skipped as a stop word, following search was buggy. Modified files: lib/expr.c lib/ii.c test/command/suite/token_filters/stop_word/column.expected test/command/suite/token_filters/stop_word/column.test test/command/suite/token_filters/stop_word/offline_index_construction.expected test/command/suite/token_filters/stop_word/offline_index_construction.test test/command/suite/token_filters/stop_word/skip.expected test/command/suite/token_filters/stop_word/skip.test Modified: lib/expr.c (+17 -1) =================================================================== --- lib/expr.c 2019-03-04 16:13:40 +0900 (40ae8d247) +++ lib/expr.c 2019-03-04 16:23:13 +0900 (b2cc25ad9) @@ -3474,6 +3474,8 @@ typedef struct { scan_info *scan_info; grn_obj *res; grn_id min_id; + grn_bool is_skipped; + grn_bool is_first_unskipped_scan_info; } grn_table_select_data; static void @@ -4125,7 +4127,9 @@ grn_table_select_index_match(grn_ctx *ctx, } } ctx->flags &= ~GRN_CTX_TEMPORARY_DISABLE_II_RESOLVE_SEL_AND; - if (!(optarg.match_info.flags & GRN_MATCH_INFO_ONLY_SKIP_TOKEN)) { + if (optarg.match_info.flags & GRN_MATCH_INFO_ONLY_SKIP_TOKEN) { + data->is_skipped = GRN_TRUE; + } else { grn_ii_resolve_sel_and(ctx, (grn_hash *)res, si->logical_op); } if ((si->logical_op == GRN_OP_AND) || @@ -4905,6 +4909,8 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, data.scanner = scanner; data.res = res; data.min_id = GRN_ID_NIL; + data.is_skipped = GRN_FALSE; + data.is_first_unskipped_scan_info = GRN_TRUE; if (res_size > 0 && op == GRN_OP_AND) { grn_bool have_push = GRN_FALSE; for (i = 0; i < scanner->n_sis; i++) { @@ -4941,6 +4947,16 @@ grn_table_select(grn_ctx *ctx, grn_obj *table, grn_obj *expr, scan_info *si = scanner->sis[i]; data.nth_scan_info = i; data.scan_info = si; + if (i > 0 && data.is_first_unskipped_scan_info) { + if (data.is_skipped) { + if (si->logical_op == GRN_OP_AND) { + si->logical_op = GRN_OP_OR; + } + } else { + data.is_first_unskipped_scan_info = GRN_FALSE; + } + } + data.is_skipped = GRN_FALSE; if (si->flags & SCAN_POP) { grn_obj *res_; GRN_PTR_POP(&res_stack, res_); Modified: lib/ii.c (+2 -1) =================================================================== --- lib/ii.c 2019-03-04 16:13:40 +0900 (5a723bad4) +++ lib/ii.c 2019-03-04 16:23:13 +0900 (019c72a3c) @@ -9043,7 +9043,8 @@ grn_ii_select_data_fin(grn_ctx *ctx, GRN_OBJ_FIN(ctx, &(data->record.term_weights)); } - if (data->set_min_enable_for_and_query) { + if (data->set_min_enable_for_and_query && + !data->only_skip_token) { if (data->current_min > data->previous_min) { data->optarg->match_info->min = data->current_min; } Modified: test/command/suite/token_filters/stop_word/column.expected (+1 -1) =================================================================== --- test/command/suite/token_filters/stop_word/column.expected 2019-03-04 16:13:40 +0900 (3c563762c) +++ test/command/suite/token_filters/stop_word/column.expected 2019-03-04 16:23:13 +0900 (4b27062ff) @@ -22,7 +22,7 @@ load --table Memos {"content": "Good-bye"} ] [[0,0.0,0.0],3] -select Memos --match_columns content --query "Hello and" +select Memos --match_columns content --query "Hello and" --match_escalation_threshold -1 [ [ 0, Modified: test/command/suite/token_filters/stop_word/column.test (+4 -1) =================================================================== --- test/command/suite/token_filters/stop_word/column.test 2019-03-04 16:13:40 +0900 (fc4ebd34a) +++ test/command/suite/token_filters/stop_word/column.test 2019-03-04 16:23:13 +0900 (7c33b9ddf) @@ -19,4 +19,7 @@ load --table Memos {"content": "Good-bye"} ] -select Memos --match_columns content --query "Hello and" +select Memos \ + --match_columns content \ + --query "Hello and" \ + --match_escalation_threshold -1 Modified: test/command/suite/token_filters/stop_word/offline_index_construction.expected (+1 -1) =================================================================== --- test/command/suite/token_filters/stop_word/offline_index_construction.expected 2019-03-04 16:13:40 +0900 (3f8a18044) +++ test/command/suite/token_filters/stop_word/offline_index_construction.expected 2019-03-04 16:23:13 +0900 (cc858813e) @@ -22,7 +22,7 @@ load --table Terms {"_key": "and", "is_stop_word": true} ] [[0,0.0,0.0],1] -select Memos --match_columns content --query "Hello and" +select Memos --match_columns content --query "Hello and" --match_escalation_threshold -1 --sort_keys -_score [ [ 0, Modified: test/command/suite/token_filters/stop_word/offline_index_construction.test (+5 -1) =================================================================== --- test/command/suite/token_filters/stop_word/offline_index_construction.test 2019-03-04 16:13:40 +0900 (75c516e47) +++ test/command/suite/token_filters/stop_word/offline_index_construction.test 2019-03-04 16:23:13 +0900 (dfc5c07e4) @@ -22,4 +22,8 @@ load --table Terms {"_key": "and", "is_stop_word": true} ] -select Memos --match_columns content --query "Hello and" +select Memos \ + --match_columns content \ + --query "Hello and" \ + --match_escalation_threshold -1 \ + --sort_keys -_score Modified: test/command/suite/token_filters/stop_word/skip.expected (+1 -1) =================================================================== --- test/command/suite/token_filters/stop_word/skip.expected 2019-03-04 16:13:40 +0900 (d2f6583b0) +++ test/command/suite/token_filters/stop_word/skip.expected 2019-03-04 16:23:13 +0900 (ea73614d9) @@ -22,7 +22,7 @@ load --table Memos {"content": "Good-bye"} ] [[0,0.0,0.0],3] -select Memos --match_columns content --query "Hello and" +select Memos --match_columns content --query "Hello and" --match_escalation_threshold -1 --sort_keys -_score [ [ 0, Modified: test/command/suite/token_filters/stop_word/skip.test (+5 -1) =================================================================== --- test/command/suite/token_filters/stop_word/skip.test 2019-03-04 16:13:40 +0900 (364e09026) +++ test/command/suite/token_filters/stop_word/skip.test 2019-03-04 16:23:13 +0900 (c7acc5fef) @@ -22,4 +22,8 @@ load --table Memos {"content": "Good-bye"} ] -select Memos --match_columns content --query "Hello and" +select Memos \ + --match_columns content \ + --query "Hello and" \ + --match_escalation_threshold -1 \ + --sort_keys -_score -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190304/6df20cbe/attachment-0001.html>