naoa
null+****@clear*****
Fri Feb 5 13:03:40 JST 2016
naoa 2016-02-05 13:03:40 +0900 (Fri, 05 Feb 2016) New Revision: 184b63dfe20818a1a11c0c356722c8e1873e8a06 https://github.com/groonga/groonga/commit/184b63dfe20818a1a11c0c356722c8e1873e8a06 Merged a300aa1: Merge pull request #464 from naoa/master Message: Suport transposition flags for edit_distance Added files: test/command/suite/select/function/edit_distance/no_transposition.expected test/command/suite/select/function/edit_distance/no_transposition.test test/command/suite/select/function/edit_distance/transposition.expected test/command/suite/select/function/edit_distance/transposition.test test/command/suite/select/function/fuzzy_search/sequential/transposition.expected test/command/suite/select/function/fuzzy_search/sequential/transposition.test Modified files: lib/proc.c Modified: lib/proc.c (+22 -7) =================================================================== --- lib/proc.c 2016-02-05 12:42:02 +0900 (6467e92) +++ lib/proc.c 2016-02-05 13:03:40 +0900 (0c21915) @@ -4319,7 +4319,7 @@ func_geo_distance3(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_ #define DIST(ox,oy) (dists[((lx + 1) * (oy)) + (ox)]) static uint32_t -calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey) +calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey, int flags) { int d = 0; uint32_t cx, lx, cy, ly, *dists; @@ -4341,6 +4341,13 @@ calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey) uint32_t b = DIST(x, y - 1) + 1; uint32_t c = DIST(x - 1, y - 1) + 1; DIST(x, y) = ((a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c)); + if (flags == GRN_TABLE_FUZZY_WITH_TRANSPOSITION + && x > 1 && y > 1 && cx == cy + && memcmp(px, py - cy, cx) == 0 + && memcmp(px - cx, py, cx) == 0) { + uint32_t t = DIST(x - 2, y - 2) + 1; + DIST(x, y) = ((DIST(x, y) < t) ? DIST(x, y) : t); + } } } } @@ -4353,16 +4360,24 @@ calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey) static grn_obj * func_edit_distance(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data) { +#define N_REQUIRED_ARGS 2 +#define MAX_ARGS 3 int d = 0; + int flags = 0; grn_obj *obj; - if (nargs == 2) { + if (nargs >= N_REQUIRED_ARGS && nargs <= MAX_ARGS) { + if (nargs == MAX_ARGS && GRN_BOOL_VALUE(args[2])) { + flags = GRN_TABLE_FUZZY_WITH_TRANSPOSITION; + } d = calc_edit_distance(ctx, GRN_TEXT_VALUE(args[0]), GRN_BULK_CURR(args[0]), - GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1])); + GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1]), flags); } if ((obj = GRN_PROC_ALLOC(GRN_DB_UINT32, 0))) { GRN_UINT32_SET(ctx, obj, d); } return obj; +#undef N_REQUIRED_ARGS +#undef MAX_ARGS } static grn_obj * @@ -6916,7 +6931,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj * !memcmp(sx, vector_value, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, (char *)vector_value, - (char *)vector_value + length); + (char *)vector_value + length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; @@ -6937,7 +6952,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj * (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, - key_name, key_name + key_length); + key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); break; @@ -6955,7 +6970,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj * (prefix_match_size > 0 && key_length >= prefix_match_size && !memcmp(sx, key_name, prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, - key_name, key_name + key_length); + key_name, key_name + key_length, flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } @@ -6966,7 +6981,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj * !memcmp(sx, GRN_TEXT_VALUE(&value), prefix_match_size))) { distance = calc_edit_distance(ctx, sx, ex, GRN_TEXT_VALUE(&value), - GRN_BULK_CURR(&value)); + GRN_BULK_CURR(&value), flags); if (distance <= max_distance) { score_heap_push(ctx, heap, id, distance); } Added: test/command/suite/select/function/edit_distance/no_transposition.expected (+9 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/edit_distance/no_transposition.expected 2016-02-05 13:03:40 +0900 (2407b15) @@ -0,0 +1,9 @@ +table_create Tags TABLE_PAT_KEY ShortText UInt32 +[[0,0.0,0.0],true] +load --table Tags +[ +{"_key": "Groonga"} +] +[[0,0.0,0.0],1] +select Tags --filter 'all_records()' --output_columns '_key, _score' --scorer '_score = edit_distance(_key, "Goronga")' +[[0,0.0,0.0],[[[1],[["_key","ShortText"],["_score","Int32"]],["Groonga",2]]]] Added: test/command/suite/select/function/edit_distance/no_transposition.test (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/edit_distance/no_transposition.test 2016-02-05 13:03:40 +0900 (e2de27c) @@ -0,0 +1,11 @@ +table_create Tags TABLE_PAT_KEY ShortText UInt32 + +load --table Tags +[ +{"_key": "Groonga"} +] + +select Tags \ + --filter 'all_records()' \ + --output_columns '_key, _score' \ + --scorer '_score = edit_distance(_key, "Goronga")' Added: test/command/suite/select/function/edit_distance/transposition.expected (+9 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/edit_distance/transposition.expected 2016-02-05 13:03:40 +0900 (d4f9781) @@ -0,0 +1,9 @@ +table_create Tags TABLE_PAT_KEY ShortText UInt32 +[[0,0.0,0.0],true] +load --table Tags +[ +{"_key": "Groonga"} +] +[[0,0.0,0.0],1] +select Tags --filter 'all_records()' --output_columns '_key, _score' --scorer '_score = edit_distance(_key, "Goronga", true)' +[[0,0.0,0.0],[[[1],[["_key","ShortText"],["_score","Int32"]],["Groonga",1]]]] Added: test/command/suite/select/function/edit_distance/transposition.test (+11 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/edit_distance/transposition.test 2016-02-05 13:03:40 +0900 (f18261a) @@ -0,0 +1,11 @@ +table_create Tags TABLE_PAT_KEY ShortText UInt32 + +load --table Tags +[ +{"_key": "Groonga"} +] + +select Tags \ + --filter 'all_records()' \ + --output_columns '_key, _score' \ + --scorer '_score = edit_distance(_key, "Goronga", true)' Added: test/command/suite/select/function/fuzzy_search/sequential/transposition.expected (+44 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/fuzzy_search/sequential/transposition.expected 2016-02-05 13:03:40 +0900 (78e2290) @@ -0,0 +1,44 @@ +table_create Users TABLE_NO_KEY +[[0,0.0,0.0],true] +column_create Users name COLUMN_SCALAR ShortText +[[0,0.0,0.0],true] +load --table Users +[ +{"name": "Tom"}, +{"name": "Tomy"}, +{"name": "Ken"} +] +[[0,0.0,0.0],3] +select Users --filter 'fuzzy_search(name, "Toym", 1, 0, 0, true)' --output_columns 'name, _score' --match_escalation_threshold -1 +[ + [ + 0, + 0.0, + 0.0 + ], + [ + [ + [ + 2 + ], + [ + [ + "name", + "ShortText" + ], + [ + "_score", + "Int32" + ] + ], + [ + "Tom", + 1 + ], + [ + "Tomy", + 1 + ] + ] + ] +] Added: test/command/suite/select/function/fuzzy_search/sequential/transposition.test (+13 -0) 100644 =================================================================== --- /dev/null +++ test/command/suite/select/function/fuzzy_search/sequential/transposition.test 2016-02-05 13:03:40 +0900 (7a421bf) @@ -0,0 +1,13 @@ +table_create Users TABLE_NO_KEY +column_create Users name COLUMN_SCALAR ShortText + +load --table Users +[ +{"name": "Tom"}, +{"name": "Tomy"}, +{"name": "Ken"} +] + +select Users --filter 'fuzzy_search(name, "Toym", 1, 0, 0, true)' \ + --output_columns 'name, _score' \ + --match_escalation_threshold -1 -------------- next part -------------- HTML����������������������������...Download