[Groonga-commit] groonga/groonga at 184b63d [master] Suport transposition flags for edit_distance

Back to archive index

naoa null+****@clear*****
Fri Feb 5 13:03:40 JST 2016


naoa	2016-02-05 13:03:40 +0900 (Fri, 05 Feb 2016)

  New Revision: 184b63dfe20818a1a11c0c356722c8e1873e8a06
  https://github.com/groonga/groonga/commit/184b63dfe20818a1a11c0c356722c8e1873e8a06

  Merged a300aa1: Merge pull request #464 from naoa/master

  Message:
    Suport transposition flags for edit_distance

  Added files:
    test/command/suite/select/function/edit_distance/no_transposition.expected
    test/command/suite/select/function/edit_distance/no_transposition.test
    test/command/suite/select/function/edit_distance/transposition.expected
    test/command/suite/select/function/edit_distance/transposition.test
    test/command/suite/select/function/fuzzy_search/sequential/transposition.expected
    test/command/suite/select/function/fuzzy_search/sequential/transposition.test
  Modified files:
    lib/proc.c

  Modified: lib/proc.c (+22 -7)
===================================================================
--- lib/proc.c    2016-02-05 12:42:02 +0900 (6467e92)
+++ lib/proc.c    2016-02-05 13:03:40 +0900 (0c21915)
@@ -4319,7 +4319,7 @@ func_geo_distance3(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_
 #define DIST(ox,oy) (dists[((lx + 1) * (oy)) + (ox)])
 
 static uint32_t
-calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey)
+calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey, int flags)
 {
   int d = 0;
   uint32_t cx, lx, cy, ly, *dists;
@@ -4341,6 +4341,13 @@ calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey)
           uint32_t b = DIST(x, y - 1) + 1;
           uint32_t c = DIST(x - 1, y - 1) + 1;
           DIST(x, y) = ((a < b) ? ((a < c) ? a : c) : ((b < c) ? b : c));
+          if (flags == GRN_TABLE_FUZZY_WITH_TRANSPOSITION
+              && x > 1 && y > 1 && cx == cy
+              && memcmp(px, py - cy, cx) == 0
+              && memcmp(px - cx, py, cx) == 0) {
+            uint32_t t = DIST(x - 2, y - 2) + 1;
+            DIST(x, y) = ((DIST(x, y) < t) ? DIST(x, y) : t);
+          }
         }
       }
     }
@@ -4353,16 +4360,24 @@ calc_edit_distance(grn_ctx *ctx, char *sx, char *ex, char *sy, char *ey)
 static grn_obj *
 func_edit_distance(grn_ctx *ctx, int nargs, grn_obj **args, grn_user_data *user_data)
 {
+#define N_REQUIRED_ARGS 2
+#define MAX_ARGS 3
   int d = 0;
+  int flags = 0;
   grn_obj *obj;
-  if (nargs == 2) {
+  if (nargs >= N_REQUIRED_ARGS && nargs <= MAX_ARGS) {
+    if (nargs == MAX_ARGS && GRN_BOOL_VALUE(args[2])) {
+      flags = GRN_TABLE_FUZZY_WITH_TRANSPOSITION;
+    }
     d = calc_edit_distance(ctx, GRN_TEXT_VALUE(args[0]), GRN_BULK_CURR(args[0]),
-                           GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1]));
+                           GRN_TEXT_VALUE(args[1]), GRN_BULK_CURR(args[1]), flags);
   }
   if ((obj = GRN_PROC_ALLOC(GRN_DB_UINT32, 0))) {
     GRN_UINT32_SET(ctx, obj, d);
   }
   return obj;
+#undef N_REQUIRED_ARGS
+#undef MAX_ARGS
 }
 
 static grn_obj *
@@ -6916,7 +6931,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *
                !memcmp(sx, vector_value, prefix_match_size))) {
             distance = calc_edit_distance(ctx, sx, ex,
                                           (char *)vector_value,
-                                          (char *)vector_value + length);
+                                          (char *)vector_value + length, flags);
             if (distance <= max_distance) {
               score_heap_push(ctx, heap, id, distance);
               break;
@@ -6937,7 +6952,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *
               (prefix_match_size > 0 && key_length >= prefix_match_size &&
                !memcmp(sx, key_name, prefix_match_size))) {
             distance = calc_edit_distance(ctx, sx, ex,
-                                          key_name, key_name + key_length);
+                                          key_name, key_name + key_length, flags);
             if (distance <= max_distance) {
               score_heap_push(ctx, heap, id, distance);
               break;
@@ -6955,7 +6970,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *
               (prefix_match_size > 0 && key_length >= prefix_match_size &&
                !memcmp(sx, key_name, prefix_match_size))) {
             distance = calc_edit_distance(ctx, sx, ex,
-                                          key_name, key_name + key_length);
+                                          key_name, key_name + key_length, flags);
             if (distance <= max_distance) {
               score_heap_push(ctx, heap, id, distance);
             }
@@ -6966,7 +6981,7 @@ sequential_fuzzy_search(grn_ctx *ctx, grn_obj *table, grn_obj *column, grn_obj *
                !memcmp(sx, GRN_TEXT_VALUE(&value), prefix_match_size))) {
             distance = calc_edit_distance(ctx, sx, ex,
                                           GRN_TEXT_VALUE(&value),
-                                          GRN_BULK_CURR(&value));
+                                          GRN_BULK_CURR(&value), flags);
             if (distance <= max_distance) {
               score_heap_push(ctx, heap, id, distance);
             }

  Added: test/command/suite/select/function/edit_distance/no_transposition.expected (+9 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/edit_distance/no_transposition.expected    2016-02-05 13:03:40 +0900 (2407b15)
@@ -0,0 +1,9 @@
+table_create Tags TABLE_PAT_KEY ShortText UInt32
+[[0,0.0,0.0],true]
+load --table Tags
+[
+{"_key": "Groonga"}
+]
+[[0,0.0,0.0],1]
+select Tags   --filter 'all_records()'   --output_columns '_key, _score'   --scorer '_score = edit_distance(_key, "Goronga")'
+[[0,0.0,0.0],[[[1],[["_key","ShortText"],["_score","Int32"]],["Groonga",2]]]]

  Added: test/command/suite/select/function/edit_distance/no_transposition.test (+11 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/edit_distance/no_transposition.test    2016-02-05 13:03:40 +0900 (e2de27c)
@@ -0,0 +1,11 @@
+table_create Tags TABLE_PAT_KEY ShortText UInt32
+
+load --table Tags
+[
+{"_key": "Groonga"}
+]
+
+select Tags \
+  --filter 'all_records()' \
+  --output_columns '_key, _score' \
+  --scorer '_score = edit_distance(_key, "Goronga")'

  Added: test/command/suite/select/function/edit_distance/transposition.expected (+9 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/edit_distance/transposition.expected    2016-02-05 13:03:40 +0900 (d4f9781)
@@ -0,0 +1,9 @@
+table_create Tags TABLE_PAT_KEY ShortText UInt32
+[[0,0.0,0.0],true]
+load --table Tags
+[
+{"_key": "Groonga"}
+]
+[[0,0.0,0.0],1]
+select Tags   --filter 'all_records()'   --output_columns '_key, _score'   --scorer '_score = edit_distance(_key, "Goronga", true)'
+[[0,0.0,0.0],[[[1],[["_key","ShortText"],["_score","Int32"]],["Groonga",1]]]]

  Added: test/command/suite/select/function/edit_distance/transposition.test (+11 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/edit_distance/transposition.test    2016-02-05 13:03:40 +0900 (f18261a)
@@ -0,0 +1,11 @@
+table_create Tags TABLE_PAT_KEY ShortText UInt32
+
+load --table Tags
+[
+{"_key": "Groonga"}
+]
+
+select Tags \
+  --filter 'all_records()' \
+  --output_columns '_key, _score' \
+  --scorer '_score = edit_distance(_key, "Goronga", true)'

  Added: test/command/suite/select/function/fuzzy_search/sequential/transposition.expected (+44 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/fuzzy_search/sequential/transposition.expected    2016-02-05 13:03:40 +0900 (78e2290)
@@ -0,0 +1,44 @@
+table_create Users TABLE_NO_KEY
+[[0,0.0,0.0],true]
+column_create Users name COLUMN_SCALAR ShortText
+[[0,0.0,0.0],true]
+load --table Users
+[
+{"name": "Tom"},
+{"name": "Tomy"},
+{"name": "Ken"}
+]
+[[0,0.0,0.0],3]
+select Users --filter 'fuzzy_search(name, "Toym", 1, 0, 0, true)'   --output_columns 'name, _score'   --match_escalation_threshold -1
+[
+  [
+    0,
+    0.0,
+    0.0
+  ],
+  [
+    [
+      [
+        2
+      ],
+      [
+        [
+          "name",
+          "ShortText"
+        ],
+        [
+          "_score",
+          "Int32"
+        ]
+      ],
+      [
+        "Tom",
+        1
+      ],
+      [
+        "Tomy",
+        1
+      ]
+    ]
+  ]
+]

  Added: test/command/suite/select/function/fuzzy_search/sequential/transposition.test (+13 -0) 100644
===================================================================
--- /dev/null
+++ test/command/suite/select/function/fuzzy_search/sequential/transposition.test    2016-02-05 13:03:40 +0900 (7a421bf)
@@ -0,0 +1,13 @@
+table_create Users TABLE_NO_KEY
+column_create Users name COLUMN_SCALAR ShortText
+
+load --table Users
+[
+{"name": "Tom"},
+{"name": "Tomy"},
+{"name": "Ken"}
+]
+
+select Users --filter 'fuzzy_search(name, "Toym", 1, 0, 0, true)' \
+  --output_columns 'name, _score' \
+  --match_escalation_threshold -1
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index