Kouhei Sutou
null+****@clear*****
Tue Mar 10 16:07:34 JST 2015
Kouhei Sutou 2015-03-10 16:07:34 +0900 (Tue, 10 Mar 2015) New Revision: 0743a8ef7d62c7abe96235c61d1a2b4143bb99f9 https://github.com/groonga/groonga/commit/0743a8ef7d62c7abe96235c61d1a2b4143bb99f9 Message: Add grn_operator_exec_match() Modified files: include/groonga/groonga.h lib/expr.c lib/operator.c test/unit/core/test-operator.c Modified: include/groonga/groonga.h (+2 -0) =================================================================== --- include/groonga/groonga.h 2015-03-10 15:50:23 +0900 (59d505b) +++ include/groonga/groonga.h 2015-03-10 16:07:34 +0900 (915a89b) @@ -724,6 +724,8 @@ GRN_API grn_bool grn_operator_exec_less_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y); GRN_API grn_bool grn_operator_exec_greater_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y); +GRN_API grn_bool grn_operator_exec_match(grn_ctx *ctx, + grn_obj *target, grn_obj *pattern); struct _grn_table_group_result { grn_obj *table; Modified: lib/expr.c (+1 -148) =================================================================== --- lib/expr.c 2015-03-10 15:50:23 +0900 (c1d2f9d) +++ lib/expr.c 2015-03-10 16:07:34 +0900 (1f627ce) @@ -2223,153 +2223,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller) } while (0) static grn_bool -string_is_contained(grn_ctx *ctx, - const char *text, unsigned int text_len, - const char *sub_text, unsigned int sub_text_len) -{ - /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that - * is used in snip.c. */ - const char *text_end = text + text_len; - unsigned int sub_text_current = 0; - - for (; text < text_end; text++) { - if (text[0] == sub_text[sub_text_current]) { - sub_text_current++; - if (sub_text_current == sub_text_len) { - return GRN_TRUE; - } - } else { - sub_text_current = 0; - } - } - - return GRN_FALSE; -} - -static grn_bool -pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx, - const char *x, unsigned int x_len, - const char *y, unsigned int y_len) -{ - grn_obj *normalizer; - grn_obj *norm_x; - grn_obj *norm_y; - const char *norm_x_raw; - const char *norm_y_raw; - unsigned int norm_x_raw_length_in_bytes; - unsigned int norm_y_raw_length_in_bytes; - grn_bool matched = GRN_FALSE; - - if (x_len == 0 || y_len == 0) { - return GRN_FALSE; - } - - normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); - norm_x = grn_string_open(ctx, x, x_len, normalizer, 0); - norm_y = grn_string_open(ctx, y, y_len, normalizer, 0); - grn_string_get_normalized(ctx, norm_x, - &norm_x_raw, &norm_x_raw_length_in_bytes, - NULL); - grn_string_get_normalized(ctx, norm_y, - &norm_y_raw, &norm_y_raw_length_in_bytes, - NULL); - matched = string_is_contained(ctx, - norm_x_raw, norm_x_raw_length_in_bytes, - norm_y_raw, norm_y_raw_length_in_bytes); - - grn_obj_close(ctx, norm_x); - grn_obj_close(ctx, norm_y); - grn_obj_unlink(ctx, normalizer); - - return matched; -} - -static grn_bool -pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table, - grn_obj *y) -{ - grn_obj *normalizer; - char x_key[GRN_TABLE_MAX_KEY_SIZE]; - int x_key_len; - grn_bool matched = GRN_FALSE; - - if (table->header.domain != GRN_DB_SHORT_TEXT) { - return GRN_FALSE; - } - - x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record), - x_key, GRN_TABLE_MAX_KEY_SIZE); - grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL); - if (normalizer) { - grn_obj *norm_y; - const char *norm_y_raw; - unsigned int norm_y_raw_length_in_bytes; - norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y), - normalizer, 0); - grn_string_get_normalized(ctx, norm_y, - &norm_y_raw, &norm_y_raw_length_in_bytes, - NULL); - matched = string_is_contained(ctx, - x_key, x_key_len, - norm_y_raw, norm_y_raw_length_in_bytes); - grn_obj_close(ctx, norm_y); - } else { - matched = pseudo_query_scan_raw_text_raw_text(ctx, - x_key, - x_key_len, - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); - } - - return matched; -} - -static grn_bool -pseudo_query_scan_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - return pseudo_query_scan_raw_text_raw_text(ctx, - GRN_TEXT_VALUE(x), - GRN_TEXT_LEN(x), - GRN_TEXT_VALUE(y), - GRN_TEXT_LEN(y)); -} - -static grn_bool -pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y) -{ - switch (x->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_query_scan_text_text(ctx, x, y); - default : - break; - } - return GRN_FALSE; - default: - { - grn_obj *domain; - domain = grn_ctx_at(ctx, x->header.domain); - if (GRN_OBJ_TABLEP(domain)) { - switch (y->header.domain) { - case GRN_DB_SHORT_TEXT : - case GRN_DB_TEXT : - case GRN_DB_LONG_TEXT : - return pseudo_query_scan_record_text(ctx, x, domain, y); - default : - break; - } - } - } - return GRN_FALSE; - } -} - -static grn_bool pseudo_prefix_search_match(grn_ctx *ctx, const char *x, unsigned int x_len, const char *y, unsigned int y_len) @@ -3202,7 +3055,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs) POP1(y); POP1(x); WITH_SPSAVE({ - matched = pseudo_query_scan(ctx, x, y); + matched = grn_operator_exec_match(ctx, x, y); }); ALLOC1(res); grn_obj_reinit(ctx, res, GRN_DB_INT32, 0); Modified: lib/operator.c (+175 -0) =================================================================== --- lib/operator.c 2015-03-10 15:50:23 +0900 (27aa5df) +++ lib/operator.c 2015-03-10 16:07:34 +0900 (11693a6) @@ -19,6 +19,7 @@ #include "grn.h" #include "grn_db.h" #include "grn_str.h" +#include "grn_normalizer.h" #include <string.h> @@ -594,3 +595,177 @@ grn_operator_exec_greater_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y) DO_COMPARE(x, y, r, >=); GRN_API_RETURN(r); } + +static grn_bool +string_is_contained(grn_ctx *ctx, + const char *text, unsigned int text_len, + const char *sub_text, unsigned int sub_text_len) +{ + /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that + * is used in snip.c. */ + const char *text_end = text + text_len; + unsigned int sub_text_current = 0; + + for (; text < text_end; text++) { + if (text[0] == sub_text[sub_text_current]) { + sub_text_current++; + if (sub_text_current == sub_text_len) { + return GRN_TRUE; + } + } else { + sub_text_current = 0; + } + } + + return GRN_FALSE; +} + +static grn_bool +grn_operator_exec_match_raw_text_raw_text(grn_ctx *ctx, + const char *target, + unsigned int target_len, + const char *sub_text, + unsigned int sub_text_len) +{ + grn_obj *normalizer; + grn_obj *norm_target; + grn_obj *norm_sub_text; + const char *norm_target_raw; + const char *norm_sub_text_raw; + unsigned int norm_target_raw_length_in_bytes; + unsigned int norm_sub_text_raw_length_in_bytes; + grn_bool matched = GRN_FALSE; + + if (target_len == 0 || sub_text_len == 0) { + return GRN_FALSE; + } + + normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1); + norm_target = grn_string_open(ctx, target, target_len, normalizer, 0); + norm_sub_text = grn_string_open(ctx, sub_text, sub_text_len, normalizer, 0); + grn_string_get_normalized(ctx, norm_target, + &norm_target_raw, + &norm_target_raw_length_in_bytes, + NULL); + grn_string_get_normalized(ctx, norm_sub_text, + &norm_sub_text_raw, + &norm_sub_text_raw_length_in_bytes, + NULL); + matched = string_is_contained(ctx, + norm_target_raw, + norm_target_raw_length_in_bytes, + norm_sub_text_raw, + norm_sub_text_raw_length_in_bytes); + + grn_obj_close(ctx, norm_target); + grn_obj_close(ctx, norm_sub_text); + grn_obj_unlink(ctx, normalizer); + + return matched; +} + +static grn_bool +grn_operator_exec_match_record_text(grn_ctx *ctx, + grn_obj *record, grn_obj *table, + grn_obj *sub_text) +{ + grn_obj *normalizer; + char record_key[GRN_TABLE_MAX_KEY_SIZE]; + int record_key_len; + grn_bool matched = GRN_FALSE; + + if (table->header.domain != GRN_DB_SHORT_TEXT) { + return GRN_FALSE; + } + + record_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record), + record_key, GRN_TABLE_MAX_KEY_SIZE); + grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL); + if (normalizer) { + grn_obj *norm_sub_text; + const char *norm_sub_text_raw; + unsigned int norm_sub_text_raw_length_in_bytes; + norm_sub_text = grn_string_open(ctx, + GRN_TEXT_VALUE(sub_text), + GRN_TEXT_LEN(sub_text), + normalizer, + 0); + grn_string_get_normalized(ctx, norm_sub_text, + &norm_sub_text_raw, + &norm_sub_text_raw_length_in_bytes, + NULL); + matched = string_is_contained(ctx, + record_key, + record_key_len, + norm_sub_text_raw, + norm_sub_text_raw_length_in_bytes); + grn_obj_close(ctx, norm_sub_text); + } else { + matched = grn_operator_exec_match_raw_text_raw_text(ctx, + record_key, + record_key_len, + GRN_TEXT_VALUE(sub_text), + GRN_TEXT_LEN(sub_text)); + } + + return matched; +} + +static grn_bool +grn_operator_exec_match_text_text(grn_ctx *ctx, + grn_obj *target, + grn_obj *sub_text) +{ + return grn_operator_exec_match_raw_text_raw_text(ctx, + GRN_TEXT_VALUE(target), + GRN_TEXT_LEN(target), + GRN_TEXT_VALUE(sub_text), + GRN_TEXT_LEN(sub_text)); +} + +static grn_bool +grn_operator_exec_match_bulk_bulk(grn_ctx *ctx, + grn_obj *target, + grn_obj *sub_text) +{ + switch (target->header.domain) { + case GRN_DB_SHORT_TEXT : + case GRN_DB_TEXT : + case GRN_DB_LONG_TEXT : + switch (sub_text->header.domain) { + case GRN_DB_SHORT_TEXT : + case GRN_DB_TEXT : + case GRN_DB_LONG_TEXT : + return grn_operator_exec_match_text_text(ctx, target, sub_text); + default : + break; + } + return GRN_FALSE; + default: + { + grn_obj *domain; + domain = grn_ctx_at(ctx, target->header.domain); + if (GRN_OBJ_TABLEP(domain)) { + switch (sub_text->header.domain) { + case GRN_DB_SHORT_TEXT : + case GRN_DB_TEXT : + case GRN_DB_LONG_TEXT : + return grn_operator_exec_match_record_text(ctx, target, domain, + sub_text); + default : + break; + } + } + } + return GRN_FALSE; + } +} + +grn_bool +grn_operator_exec_match(grn_ctx *ctx, grn_obj *target, grn_obj *sub_text) +{ + grn_bool matched; + GRN_API_ENTER; + matched = grn_operator_exec_match_bulk_bulk(ctx, target, sub_text); + GRN_API_RETURN(matched); +} Modified: test/unit/core/test-operator.c (+23 -25) =================================================================== --- test/unit/core/test-operator.c 2015-03-10 15:50:23 +0900 (0b17d7b) +++ test/unit/core/test-operator.c 2015-03-10 16:07:34 +0900 (ce3a137) @@ -49,6 +49,10 @@ void data_exec_greater_equal_true(void); void test_exec_greater_equal_true(gconstpointer data); void data_exec_greater_equal_false(void); void test_exec_greater_equal_false(gconstpointer data); +void data_exec_match_true(void); +void test_exec_match_true(gconstpointer data); +void data_exec_match_false(void); +void test_exec_match_false(gconstpointer data); static gchar *tmp_directory; @@ -110,11 +114,17 @@ cut_teardown(void) } static void +set_text(grn_obj *bulk, const gchar *value) +{ + grn_obj_reinit(context, bulk, GRN_DB_TEXT, 0); + GRN_TEXT_SETS(context, bulk, value); +} + +static void set_one(grn_obj *value, const gchar *type) { if (strcmp(type, "text") == 0) { - grn_obj_reinit(context, value, GRN_DB_TEXT, 0); - GRN_TEXT_SETS(context, value, "1"); + set_text(value, "1"); } else if (strcmp(type, "int32") == 0) { grn_obj_reinit(context, value, GRN_DB_INT32, 0); GRN_INT32_SET(context, value, 1); @@ -125,8 +135,7 @@ static void set_two(grn_obj *value, const gchar *type) { if (strcmp(type, "text") == 0) { - grn_obj_reinit(context, value, GRN_DB_TEXT, 0); - GRN_TEXT_SETS(context, value, "2"); + set_text(value, "2"); } else if (strcmp(type, "int32") == 0) { grn_obj_reinit(context, value, GRN_DB_INT32, 0); GRN_INT32_SET(context, value, 2); @@ -453,7 +462,7 @@ test_exec_less_equal_false(gconstpointer data) } void -data_exec_greater_equal_true(void) +data_exec_match_true(void) { #define ADD_DATA(lhs_type, rhs_type) \ gcut_add_datum(lhs_type " >= " rhs_type, \ @@ -461,16 +470,13 @@ data_exec_greater_equal_true(void) "rhs_type", G_TYPE_STRING, rhs_type, \ NULL) - ADD_DATA("int32", "int32"); ADD_DATA("text", "text"); - ADD_DATA("text", "int32"); - ADD_DATA("int32", "text"); #undef ADD_DATA } void -test_exec_greater_equal_true(gconstpointer data) +test_exec_match_true(gconstpointer data) { const gchar *lhs_type; const gchar *rhs_type; @@ -478,17 +484,13 @@ test_exec_greater_equal_true(gconstpointer data) lhs_type = gcut_data_get_string(data, "lhs_type"); rhs_type = gcut_data_get_string(data, "rhs_type"); - set_two(&lhs, lhs_type); - - set_two(&rhs, rhs_type); - cut_assert_true(grn_operator_exec_greater_equal(context, &lhs, &rhs)); - - set_one(&rhs, rhs_type); - cut_assert_true(grn_operator_exec_greater_equal(context, &lhs, &rhs)); + set_text(&lhs, "Hello"); + set_text(&rhs, "ll"); + cut_assert_true(grn_operator_exec_match(context, &lhs, &rhs)); } void -data_exec_greater_equal_false(void) +data_exec_match_false(void) { #define ADD_DATA(lhs_type, rhs_type) \ gcut_add_datum(lhs_type " >= " rhs_type, \ @@ -496,16 +498,13 @@ data_exec_greater_equal_false(void) "rhs_type", G_TYPE_STRING, rhs_type, \ NULL) - ADD_DATA("int32", "int32"); ADD_DATA("text", "text"); - ADD_DATA("text", "int32"); - ADD_DATA("int32", "text"); #undef ADD_DATA } void -test_exec_greater_equal_false(gconstpointer data) +test_exec_match_false(gconstpointer data) { const gchar *lhs_type; const gchar *rhs_type; @@ -513,8 +512,7 @@ test_exec_greater_equal_false(gconstpointer data) lhs_type = gcut_data_get_string(data, "lhs_type"); rhs_type = gcut_data_get_string(data, "rhs_type"); - set_one(&lhs, lhs_type); - set_two(&rhs, rhs_type); - - cut_assert_false(grn_operator_exec_greater_equal(context, &lhs, &rhs)); + set_text(&lhs, "Hello"); + set_text(&rhs, "lo!"); + cut_assert_false(grn_operator_exec_match(context, &lhs, &rhs)); } -------------- next part -------------- HTML����������������������������...Download