Kouhei Sutou
null+****@clear*****
Fri Aug 7 11:39:11 JST 2015
Kouhei Sutou 2015-08-07 11:39:11 +0900 (Fri, 07 Aug 2015) New Revision: 2598febc44401f9905c96653b3d807c332159ca6 https://github.com/groonga/groonga/commit/2598febc44401f9905c96653b3d807c332159ca6 Message: Support 'vector_column @~ "regular expression"' without index Added files: test/command/suite/select/filter/no_index/regexp/vector_text.expected test/command/suite/select/filter/no_index/regexp/vector_text.test Modified files: lib/operator.c Modified: lib/operator.c (+92 -20) =================================================================== --- lib/operator.c 2015-08-06 21:16:04 +0900 (e57408e) +++ lib/operator.c 2015-08-07 11:39:11 +0900 (6b01dcc) @@ -698,19 +698,17 @@ string_have_prefix(grn_ctx *ctx, strncmp(target, prefix, prefix_len) == 0); } -static grn_bool -string_match_regexp(grn_ctx *ctx, - const char *target, unsigned int target_len, - const char *pattern, unsigned int pattern_len) -{ #ifdef GRN_SUPPORT_REGEXP +static OnigRegex +regexp_compile(grn_ctx *ctx, const char *pattern, unsigned int pattern_len) +{ OnigRegex regex; OnigEncoding onig_encoding; int onig_result; OnigErrorInfo onig_error_info; if (ctx->encoding == GRN_ENC_NONE) { - return GRN_FALSE; + return NULL; } switch (ctx->encoding) { @@ -730,7 +728,7 @@ string_match_regexp(grn_ctx *ctx, onig_encoding = ONIG_ENCODING_KOI8_R; break; default : - return GRN_FALSE; + return NULL; } onig_result = onig_new(®ex, @@ -748,21 +746,46 @@ string_match_regexp(grn_ctx *ctx, "failed to create regular expression object: <%.*s>: %s", pattern_len, pattern, message); - return GRN_FALSE; + return NULL; } - { - OnigPosition position; - position = onig_search(regex, - target, - target + target_len, - target, - target + target_len, - NULL, - ONIG_OPTION_NONE); - onig_free(regex); - return position != ONIG_MISMATCH; + return regex; +} + +static grn_bool +regexp_is_match(grn_ctx *ctx, OnigRegex regex, + const char *target, unsigned int target_len) +{ + OnigPosition position; + + position = onig_search(regex, + target, + target + target_len, + target, + target + target_len, + NULL, + ONIG_OPTION_NONE); + return position != ONIG_MISMATCH; +} +#endif + +static grn_bool +string_match_regexp(grn_ctx *ctx, + const char *target, unsigned int target_len, + const char *pattern, unsigned int pattern_len) +{ +#ifdef GRN_SUPPORT_REGEXP + OnigRegex regex; + grn_bool matched; + + regex = regexp_compile(ctx, pattern, pattern_len); + if (!regex) { + return GRN_FALSE; } + + matched = regexp_is_match(ctx, regex, target, target_len); + onig_free(regex); + return matched; #else return GRN_FALSE; #endif @@ -982,11 +1005,60 @@ grn_operator_exec_prefix(grn_ctx *ctx, grn_obj *target, grn_obj *prefix) GRN_API_RETURN(matched); } +static grn_bool +exec_regexp_vector_bulk(grn_ctx *ctx, grn_obj *vector, grn_obj *pattern) +{ +#ifdef GRN_SUPPORT_REGEXP + grn_bool matched = GRN_FALSE; + unsigned int i, size; + OnigRegex regex; + + size = grn_vector_size(ctx, vector); + if (size == 0) { + return GRN_FALSE; + } + + regex = regexp_compile(ctx, GRN_TEXT_VALUE(pattern), GRN_TEXT_LEN(pattern)); + if (!regex) { + return GRN_FALSE; + } + + for (i = 0; i < size; i++) { + const char *content; + unsigned int content_size; + grn_id domain_id; + + content_size = grn_vector_get_element(ctx, vector, i, + &content, NULL, &domain_id); + if (regexp_is_match(ctx, regex, content, content_size)) { + matched = GRN_TRUE; + break; + } + } + + onig_free(regex); + + return matched; +#else + return GRN_FALSE; +#endif +} + grn_bool grn_operator_exec_regexp(grn_ctx *ctx, grn_obj *target, grn_obj *pattern) { grn_bool matched; GRN_API_ENTER; - matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_REGEXP, target, pattern); + switch (target->header.type) { + case GRN_VECTOR : + matched = exec_regexp_vector_bulk(ctx, target, pattern); + break; + case GRN_BULK : + matched = exec_text_operator_bulk_bulk(ctx, GRN_OP_REGEXP, target, pattern); + break; + default : + matched = GRN_FALSE; + break; + } GRN_API_RETURN(matched); } Added: test/command/suite/select/filter/no_index/regexp/vector_text.expected (+43 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/no_index/regexp/vector_text.expected 2015-08-07 11:39:11 +0900 (deaa379) @@ -0,0 +1,43 @@ +table_create Memos TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Memos tags COLUMN_VECTOR Text +[[0,0.0,0.0],true] +load --table Memos +[ +{"tags": ["Groonga", "Rroonga", "Mroonga"]}, +{"tags": ["Groonga", "PGroonga", "Mroonga"]} +] +[[0,0.0,0.0],2] +select Memos --filter 'tags @~ "\\\\APGr"' +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 1 + ], + [ + [ + "_id", + "UInt32" + ], + [ + "tags", + "Text" + ] + ], + [ + 2, + [ + "Groonga", + "PGroonga", + "Mroonga" + ] + ] + ] + ] +] Added: test/command/suite/select/filter/no_index/regexp/vector_text.test (+10 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/filter/no_index/regexp/vector_text.test 2015-08-07 11:39:11 +0900 (114aff1) @@ -0,0 +1,10 @@ +table_create Memos TABLE_NO_KEY +column_create Memos tags COLUMN_VECTOR Text + +load --table Memos +[ +{"tags": ["Groonga", "Rroonga", "Mroonga"]}, +{"tags": ["Groonga", "PGroonga", "Mroonga"]} +] + +select Memos --filter 'tags @~ "\\\\APGr"' -------------- next part -------------- HTML����������������������������...Download