[Groonga-commit] groonga/groonga at b1e3db9 [master] Implement no NUL-terminated support strstr()

Back to archive index

Kouhei Sutou null+****@clear*****
Mon Jan 12 22:51:06 JST 2015


Kouhei Sutou	2015-01-12 22:51:06 +0900 (Mon, 12 Jan 2015)

  New Revision: b1e3db99f52bea9b68e43ffa9eb88c0de5d79ffc
  https://github.com/groonga/groonga/commit/b1e3db99f52bea9b68e43ffa9eb88c0de5d79ffc

  Message:
    Implement no NUL-terminated support strstr()
    
    It should be implemented by more fast algorithm such as Boyer-Moor
    algorithm. The algorithm is used in snip.c.

  Modified files:
    lib/expr.c
    test/command/suite/select/query/match/no_index.expected
    test/command/suite/select/query/match/no_index.test

  Modified: lib/expr.c (+42 -18)
===================================================================
--- lib/expr.c    2015-01-10 23:29:00 +0900 (b4e0f1e)
+++ lib/expr.c    2015-01-12 22:51:06 +0900 (988c76c)
@@ -2666,6 +2666,30 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
 } while (0)
 
 static grn_bool
+string_is_contained(grn_ctx *ctx,
+                    const char *text, unsigned int text_len,
+                    const char *sub_text, unsigned int sub_text_len)
+{
+  /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that
+   * is used in snip.c. */
+  const char *text_end = text + text_len;
+  unsigned int sub_text_current = 0;
+
+  for (; text < text_end; text++) {
+    if (text[0] == sub_text[sub_text_current]) {
+      sub_text_current++;
+      if (sub_text_current == sub_text_len) {
+        return GRN_TRUE;
+      }
+    } else {
+      sub_text_current = 0;
+    }
+  }
+
+  return GRN_FALSE;
+}
+
+static grn_bool
 pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
                                     const char *x, unsigned int x_len,
                                     const char *y, unsigned int y_len)
@@ -2675,15 +2699,22 @@ pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
   grn_obj *norm_y;
   const char *norm_x_raw;
   const char *norm_y_raw;
+  unsigned int norm_x_raw_length_in_bytes;
+  unsigned int norm_y_raw_length_in_bytes;
   grn_bool matched = GRN_FALSE;
 
   normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
   norm_x = grn_string_open(ctx, x, x_len, normalizer, 0);
   norm_y = grn_string_open(ctx, y, y_len, normalizer, 0);
-  grn_string_get_normalized(ctx, norm_x, &norm_x_raw, NULL, NULL);
-  grn_string_get_normalized(ctx, norm_y, &norm_y_raw, NULL, NULL);
-  /* normalized str doesn't contain '\0'. */
-  matched = (strstr(norm_x_raw, norm_y_raw) != NULL);
+  grn_string_get_normalized(ctx, norm_x,
+                            &norm_x_raw, &norm_x_raw_length_in_bytes,
+                            NULL);
+  grn_string_get_normalized(ctx, norm_y,
+                            &norm_y_raw, &norm_y_raw_length_in_bytes,
+                            NULL);
+  matched = string_is_contained(ctx,
+                                norm_x_raw, norm_x_raw_length_in_bytes,
+                                norm_y_raw, norm_y_raw_length_in_bytes);
 
   grn_obj_close(ctx, norm_x);
   grn_obj_close(ctx, norm_y);
@@ -2711,22 +2742,15 @@ pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table,
   if (normalizer) {
     grn_obj *norm_y;
     const char *norm_y_raw;
+    unsigned int norm_y_raw_length_in_bytes;
     norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
                              normalizer, 0);
-    grn_string_get_normalized(ctx, norm_y, &norm_y_raw, NULL, NULL);
-
-    if (x_key_len == GRN_TABLE_MAX_KEY_SIZE) {
-      grn_obj x_key_buffer;
-      GRN_TEXT_INIT(&x_key_buffer, 0);
-      GRN_TEXT_PUT(ctx, &x_key_buffer, x_key, x_key_len);
-      GRN_TEXT_PUTC(ctx, &x_key_buffer, '\0');
-      matched = (strstr(GRN_TEXT_VALUE(&x_key_buffer), norm_y_raw) != NULL);
-      GRN_OBJ_FIN(ctx, &x_key_buffer);
-    } else {
-      x_key[x_key_len] = '\0';
-      matched = (strstr(x_key, norm_y_raw) != NULL);
-    }
-
+    grn_string_get_normalized(ctx, norm_y,
+                              &norm_y_raw, &norm_y_raw_length_in_bytes,
+                              NULL);
+    matched = string_is_contained(ctx,
+                                  x_key, x_key_len,
+                                  norm_y_raw, norm_y_raw_length_in_bytes);
     grn_obj_close(ctx, norm_y);
   } else {
     matched = pseudo_query_scan_raw_text_raw_text(ctx,

  Modified: test/command/suite/select/query/match/no_index.expected (+2 -33)
===================================================================
--- test/command/suite/select/query/match/no_index.expected    2015-01-10 23:29:00 +0900 (a9a60dd)
+++ test/command/suite/select/query/match/no_index.expected    2015-01-12 22:51:06 +0900 (d6eaf02)
@@ -9,36 +9,5 @@ load --table Users
 {"name": "Carlos"}
 ]
 [[0,0.0,0.0],3]
-select Users --query name:@a
-[
-  [
-    0,
-    0.0,
-    0.0
-  ],
-  [
-    [
-      [
-        2
-      ],
-      [
-        [
-          "_id",
-          "UInt32"
-        ],
-        [
-          "name",
-          "ShortText"
-        ]
-      ],
-      [
-        1,
-        "Alice"
-      ],
-      [
-        3,
-        "Carlos"
-      ]
-    ]
-  ]
-]
+select Users --query name:@aRl
+[[0,0.0,0.0],[[[1],[["_id","UInt32"],["name","ShortText"]],[3,"Carlos"]]]]

  Modified: test/command/suite/select/query/match/no_index.test (+1 -1)
===================================================================
--- test/command/suite/select/query/match/no_index.test    2015-01-10 23:29:00 +0900 (ae50977)
+++ test/command/suite/select/query/match/no_index.test    2015-01-12 22:51:06 +0900 (de99679)
@@ -8,4 +8,4 @@ load --table Users
 {"name": "Carlos"}
 ]
 
-select Users --query name:@a
+select Users --query name:@aRl
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index