[Groonga-commit] groonga/groonga at 0743a8e [master] Add grn_operator_exec_match()

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Mar 10 16:07:34 JST 2015


Kouhei Sutou	2015-03-10 16:07:34 +0900 (Tue, 10 Mar 2015)

  New Revision: 0743a8ef7d62c7abe96235c61d1a2b4143bb99f9
  https://github.com/groonga/groonga/commit/0743a8ef7d62c7abe96235c61d1a2b4143bb99f9

  Message:
    Add grn_operator_exec_match()

  Modified files:
    include/groonga/groonga.h
    lib/expr.c
    lib/operator.c
    test/unit/core/test-operator.c

  Modified: include/groonga/groonga.h (+2 -0)
===================================================================
--- include/groonga/groonga.h    2015-03-10 15:50:23 +0900 (59d505b)
+++ include/groonga/groonga.h    2015-03-10 16:07:34 +0900 (915a89b)
@@ -724,6 +724,8 @@ GRN_API grn_bool grn_operator_exec_less_equal(grn_ctx *ctx,
                                               grn_obj *x, grn_obj *y);
 GRN_API grn_bool grn_operator_exec_greater_equal(grn_ctx *ctx,
                                                  grn_obj *x, grn_obj *y);
+GRN_API grn_bool grn_operator_exec_match(grn_ctx *ctx,
+                                         grn_obj *target, grn_obj *pattern);
 
 struct _grn_table_group_result {
   grn_obj *table;

  Modified: lib/expr.c (+1 -148)
===================================================================
--- lib/expr.c    2015-03-10 15:50:23 +0900 (c1d2f9d)
+++ lib/expr.c    2015-03-10 16:07:34 +0900 (1f627ce)
@@ -2223,153 +2223,6 @@ grn_proc_call(grn_ctx *ctx, grn_obj *proc, int nargs, grn_obj *caller)
 } while (0)
 
 static grn_bool
-string_is_contained(grn_ctx *ctx,
-                    const char *text, unsigned int text_len,
-                    const char *sub_text, unsigned int sub_text_len)
-{
-  /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that
-   * is used in snip.c. */
-  const char *text_end = text + text_len;
-  unsigned int sub_text_current = 0;
-
-  for (; text < text_end; text++) {
-    if (text[0] == sub_text[sub_text_current]) {
-      sub_text_current++;
-      if (sub_text_current == sub_text_len) {
-        return GRN_TRUE;
-      }
-    } else {
-      sub_text_current = 0;
-    }
-  }
-
-  return GRN_FALSE;
-}
-
-static grn_bool
-pseudo_query_scan_raw_text_raw_text(grn_ctx *ctx,
-                                    const char *x, unsigned int x_len,
-                                    const char *y, unsigned int y_len)
-{
-  grn_obj *normalizer;
-  grn_obj *norm_x;
-  grn_obj *norm_y;
-  const char *norm_x_raw;
-  const char *norm_y_raw;
-  unsigned int norm_x_raw_length_in_bytes;
-  unsigned int norm_y_raw_length_in_bytes;
-  grn_bool matched = GRN_FALSE;
-
-  if (x_len == 0 || y_len == 0) {
-    return GRN_FALSE;
-  }
-
-  normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
-  norm_x = grn_string_open(ctx, x, x_len, normalizer, 0);
-  norm_y = grn_string_open(ctx, y, y_len, normalizer, 0);
-  grn_string_get_normalized(ctx, norm_x,
-                            &norm_x_raw, &norm_x_raw_length_in_bytes,
-                            NULL);
-  grn_string_get_normalized(ctx, norm_y,
-                            &norm_y_raw, &norm_y_raw_length_in_bytes,
-                            NULL);
-  matched = string_is_contained(ctx,
-                                norm_x_raw, norm_x_raw_length_in_bytes,
-                                norm_y_raw, norm_y_raw_length_in_bytes);
-
-  grn_obj_close(ctx, norm_x);
-  grn_obj_close(ctx, norm_y);
-  grn_obj_unlink(ctx, normalizer);
-
-  return matched;
-}
-
-static grn_bool
-pseudo_query_scan_record_text(grn_ctx *ctx, grn_obj *record, grn_obj *table,
-                              grn_obj *y)
-{
-  grn_obj *normalizer;
-  char x_key[GRN_TABLE_MAX_KEY_SIZE];
-  int x_key_len;
-  grn_bool matched = GRN_FALSE;
-
-  if (table->header.domain != GRN_DB_SHORT_TEXT) {
-    return GRN_FALSE;
-  }
-
-  x_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
-                                x_key, GRN_TABLE_MAX_KEY_SIZE);
-  grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
-  if (normalizer) {
-    grn_obj *norm_y;
-    const char *norm_y_raw;
-    unsigned int norm_y_raw_length_in_bytes;
-    norm_y = grn_string_open(ctx, GRN_TEXT_VALUE(y), GRN_TEXT_LEN(y),
-                             normalizer, 0);
-    grn_string_get_normalized(ctx, norm_y,
-                              &norm_y_raw, &norm_y_raw_length_in_bytes,
-                              NULL);
-    matched = string_is_contained(ctx,
-                                  x_key, x_key_len,
-                                  norm_y_raw, norm_y_raw_length_in_bytes);
-    grn_obj_close(ctx, norm_y);
-  } else {
-    matched = pseudo_query_scan_raw_text_raw_text(ctx,
-                                                  x_key,
-                                                  x_key_len,
-                                                  GRN_TEXT_VALUE(y),
-                                                  GRN_TEXT_LEN(y));
-  }
-
-  return matched;
-}
-
-static grn_bool
-pseudo_query_scan_text_text(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
-  return pseudo_query_scan_raw_text_raw_text(ctx,
-                                             GRN_TEXT_VALUE(x),
-                                             GRN_TEXT_LEN(x),
-                                             GRN_TEXT_VALUE(y),
-                                             GRN_TEXT_LEN(y));
-}
-
-static grn_bool
-pseudo_query_scan(grn_ctx *ctx, grn_obj *x, grn_obj *y)
-{
-  switch (x->header.domain) {
-  case GRN_DB_SHORT_TEXT :
-  case GRN_DB_TEXT :
-  case GRN_DB_LONG_TEXT :
-    switch (y->header.domain) {
-    case GRN_DB_SHORT_TEXT :
-    case GRN_DB_TEXT :
-    case GRN_DB_LONG_TEXT :
-      return pseudo_query_scan_text_text(ctx, x, y);
-    default :
-      break;
-    }
-    return GRN_FALSE;
-  default:
-    {
-      grn_obj *domain;
-      domain = grn_ctx_at(ctx, x->header.domain);
-      if (GRN_OBJ_TABLEP(domain)) {
-        switch (y->header.domain) {
-        case GRN_DB_SHORT_TEXT :
-        case GRN_DB_TEXT :
-        case GRN_DB_LONG_TEXT :
-          return pseudo_query_scan_record_text(ctx, x, domain, y);
-        default :
-          break;
-        }
-      }
-    }
-    return GRN_FALSE;
-  }
-}
-
-static grn_bool
 pseudo_prefix_search_match(grn_ctx *ctx,
                            const char *x, unsigned int x_len,
                            const char *y, unsigned int y_len)
@@ -3202,7 +3055,7 @@ grn_expr_exec(grn_ctx *ctx, grn_obj *expr, int nargs)
           POP1(y);
           POP1(x);
           WITH_SPSAVE({
-            matched = pseudo_query_scan(ctx, x, y);
+            matched = grn_operator_exec_match(ctx, x, y);
           });
           ALLOC1(res);
           grn_obj_reinit(ctx, res, GRN_DB_INT32, 0);

  Modified: lib/operator.c (+175 -0)
===================================================================
--- lib/operator.c    2015-03-10 15:50:23 +0900 (27aa5df)
+++ lib/operator.c    2015-03-10 16:07:34 +0900 (11693a6)
@@ -19,6 +19,7 @@
 #include "grn.h"
 #include "grn_db.h"
 #include "grn_str.h"
+#include "grn_normalizer.h"
 
 #include <string.h>
 
@@ -594,3 +595,177 @@ grn_operator_exec_greater_equal(grn_ctx *ctx, grn_obj *x, grn_obj *y)
   DO_COMPARE(x, y, r, >=);
   GRN_API_RETURN(r);
 }
+
+static grn_bool
+string_is_contained(grn_ctx *ctx,
+                    const char *text, unsigned int text_len,
+                    const char *sub_text, unsigned int sub_text_len)
+{
+  /* TODO: Use more fast algorithm such as Boyer-Moore algorithm that
+   * is used in snip.c. */
+  const char *text_end = text + text_len;
+  unsigned int sub_text_current = 0;
+
+  for (; text < text_end; text++) {
+    if (text[0] == sub_text[sub_text_current]) {
+      sub_text_current++;
+      if (sub_text_current == sub_text_len) {
+        return GRN_TRUE;
+      }
+    } else {
+      sub_text_current = 0;
+    }
+  }
+
+  return GRN_FALSE;
+}
+
+static grn_bool
+grn_operator_exec_match_raw_text_raw_text(grn_ctx *ctx,
+                                          const char *target,
+                                          unsigned int target_len,
+                                          const char *sub_text,
+                                          unsigned int sub_text_len)
+{
+  grn_obj *normalizer;
+  grn_obj *norm_target;
+  grn_obj *norm_sub_text;
+  const char *norm_target_raw;
+  const char *norm_sub_text_raw;
+  unsigned int norm_target_raw_length_in_bytes;
+  unsigned int norm_sub_text_raw_length_in_bytes;
+  grn_bool matched = GRN_FALSE;
+
+  if (target_len == 0 || sub_text_len == 0) {
+    return GRN_FALSE;
+  }
+
+  normalizer = grn_ctx_get(ctx, GRN_NORMALIZER_AUTO_NAME, -1);
+  norm_target   = grn_string_open(ctx, target,   target_len,   normalizer, 0);
+  norm_sub_text = grn_string_open(ctx, sub_text, sub_text_len, normalizer, 0);
+  grn_string_get_normalized(ctx, norm_target,
+                            &norm_target_raw,
+                            &norm_target_raw_length_in_bytes,
+                            NULL);
+  grn_string_get_normalized(ctx, norm_sub_text,
+                            &norm_sub_text_raw,
+                            &norm_sub_text_raw_length_in_bytes,
+                            NULL);
+  matched = string_is_contained(ctx,
+                                norm_target_raw,
+                                norm_target_raw_length_in_bytes,
+                                norm_sub_text_raw,
+                                norm_sub_text_raw_length_in_bytes);
+
+  grn_obj_close(ctx, norm_target);
+  grn_obj_close(ctx, norm_sub_text);
+  grn_obj_unlink(ctx, normalizer);
+
+  return matched;
+}
+
+static grn_bool
+grn_operator_exec_match_record_text(grn_ctx *ctx,
+                                    grn_obj *record, grn_obj *table,
+                                    grn_obj *sub_text)
+{
+  grn_obj *normalizer;
+  char record_key[GRN_TABLE_MAX_KEY_SIZE];
+  int record_key_len;
+  grn_bool matched = GRN_FALSE;
+
+  if (table->header.domain != GRN_DB_SHORT_TEXT) {
+    return GRN_FALSE;
+  }
+
+  record_key_len = grn_table_get_key(ctx, table, GRN_RECORD_VALUE(record),
+                                     record_key, GRN_TABLE_MAX_KEY_SIZE);
+  grn_table_get_info(ctx, table, NULL, NULL, NULL, &normalizer, NULL);
+  if (normalizer) {
+    grn_obj *norm_sub_text;
+    const char *norm_sub_text_raw;
+    unsigned int norm_sub_text_raw_length_in_bytes;
+    norm_sub_text = grn_string_open(ctx,
+                                   GRN_TEXT_VALUE(sub_text),
+                                   GRN_TEXT_LEN(sub_text),
+                                   normalizer,
+                                   0);
+    grn_string_get_normalized(ctx, norm_sub_text,
+                              &norm_sub_text_raw,
+                              &norm_sub_text_raw_length_in_bytes,
+                              NULL);
+    matched = string_is_contained(ctx,
+                                  record_key,
+                                  record_key_len,
+                                  norm_sub_text_raw,
+                                  norm_sub_text_raw_length_in_bytes);
+    grn_obj_close(ctx, norm_sub_text);
+  } else {
+    matched = grn_operator_exec_match_raw_text_raw_text(ctx,
+                                                        record_key,
+                                                        record_key_len,
+                                                        GRN_TEXT_VALUE(sub_text),
+                                                        GRN_TEXT_LEN(sub_text));
+  }
+
+  return matched;
+}
+
+static grn_bool
+grn_operator_exec_match_text_text(grn_ctx *ctx,
+                                  grn_obj *target,
+                                  grn_obj *sub_text)
+{
+  return grn_operator_exec_match_raw_text_raw_text(ctx,
+                                                   GRN_TEXT_VALUE(target),
+                                                   GRN_TEXT_LEN(target),
+                                                   GRN_TEXT_VALUE(sub_text),
+                                                   GRN_TEXT_LEN(sub_text));
+}
+
+static grn_bool
+grn_operator_exec_match_bulk_bulk(grn_ctx *ctx,
+                                  grn_obj *target,
+                                  grn_obj *sub_text)
+{
+  switch (target->header.domain) {
+  case GRN_DB_SHORT_TEXT :
+  case GRN_DB_TEXT :
+  case GRN_DB_LONG_TEXT :
+    switch (sub_text->header.domain) {
+    case GRN_DB_SHORT_TEXT :
+    case GRN_DB_TEXT :
+    case GRN_DB_LONG_TEXT :
+      return grn_operator_exec_match_text_text(ctx, target, sub_text);
+    default :
+      break;
+    }
+    return GRN_FALSE;
+  default:
+    {
+      grn_obj *domain;
+      domain = grn_ctx_at(ctx, target->header.domain);
+      if (GRN_OBJ_TABLEP(domain)) {
+        switch (sub_text->header.domain) {
+        case GRN_DB_SHORT_TEXT :
+        case GRN_DB_TEXT :
+        case GRN_DB_LONG_TEXT :
+          return grn_operator_exec_match_record_text(ctx, target, domain,
+                                                     sub_text);
+        default :
+          break;
+        }
+      }
+    }
+    return GRN_FALSE;
+  }
+}
+
+grn_bool
+grn_operator_exec_match(grn_ctx *ctx, grn_obj *target, grn_obj *sub_text)
+{
+  grn_bool matched;
+  GRN_API_ENTER;
+  matched = grn_operator_exec_match_bulk_bulk(ctx, target, sub_text);
+  GRN_API_RETURN(matched);
+}

  Modified: test/unit/core/test-operator.c (+23 -25)
===================================================================
--- test/unit/core/test-operator.c    2015-03-10 15:50:23 +0900 (0b17d7b)
+++ test/unit/core/test-operator.c    2015-03-10 16:07:34 +0900 (ce3a137)
@@ -49,6 +49,10 @@ void data_exec_greater_equal_true(void);
 void test_exec_greater_equal_true(gconstpointer data);
 void data_exec_greater_equal_false(void);
 void test_exec_greater_equal_false(gconstpointer data);
+void data_exec_match_true(void);
+void test_exec_match_true(gconstpointer data);
+void data_exec_match_false(void);
+void test_exec_match_false(gconstpointer data);
 
 static gchar *tmp_directory;
 
@@ -110,11 +114,17 @@ cut_teardown(void)
 }
 
 static void
+set_text(grn_obj *bulk, const gchar *value)
+{
+  grn_obj_reinit(context, bulk, GRN_DB_TEXT, 0);
+  GRN_TEXT_SETS(context, bulk, value);
+}
+
+static void
 set_one(grn_obj *value, const gchar *type)
 {
   if (strcmp(type, "text") == 0) {
-    grn_obj_reinit(context, value, GRN_DB_TEXT, 0);
-    GRN_TEXT_SETS(context, value, "1");
+    set_text(value, "1");
   } else if (strcmp(type, "int32") == 0) {
     grn_obj_reinit(context, value, GRN_DB_INT32, 0);
     GRN_INT32_SET(context, value, 1);
@@ -125,8 +135,7 @@ static void
 set_two(grn_obj *value, const gchar *type)
 {
   if (strcmp(type, "text") == 0) {
-    grn_obj_reinit(context, value, GRN_DB_TEXT, 0);
-    GRN_TEXT_SETS(context, value, "2");
+    set_text(value, "2");
   } else if (strcmp(type, "int32") == 0) {
     grn_obj_reinit(context, value, GRN_DB_INT32, 0);
     GRN_INT32_SET(context, value, 2);
@@ -453,7 +462,7 @@ test_exec_less_equal_false(gconstpointer data)
 }
 
 void
-data_exec_greater_equal_true(void)
+data_exec_match_true(void)
 {
 #define ADD_DATA(lhs_type, rhs_type)                            \
   gcut_add_datum(lhs_type " >= " rhs_type,                      \
@@ -461,16 +470,13 @@ data_exec_greater_equal_true(void)
                  "rhs_type", G_TYPE_STRING, rhs_type,           \
                  NULL)
 
-  ADD_DATA("int32", "int32");
   ADD_DATA("text", "text");
-  ADD_DATA("text", "int32");
-  ADD_DATA("int32", "text");
 
 #undef ADD_DATA
 }
 
 void
-test_exec_greater_equal_true(gconstpointer data)
+test_exec_match_true(gconstpointer data)
 {
   const gchar *lhs_type;
   const gchar *rhs_type;
@@ -478,17 +484,13 @@ test_exec_greater_equal_true(gconstpointer data)
   lhs_type = gcut_data_get_string(data, "lhs_type");
   rhs_type = gcut_data_get_string(data, "rhs_type");
 
-  set_two(&lhs, lhs_type);
-
-  set_two(&rhs, rhs_type);
-  cut_assert_true(grn_operator_exec_greater_equal(context, &lhs, &rhs));
-
-  set_one(&rhs, rhs_type);
-  cut_assert_true(grn_operator_exec_greater_equal(context, &lhs, &rhs));
+  set_text(&lhs, "Hello");
+  set_text(&rhs, "ll");
+  cut_assert_true(grn_operator_exec_match(context, &lhs, &rhs));
 }
 
 void
-data_exec_greater_equal_false(void)
+data_exec_match_false(void)
 {
 #define ADD_DATA(lhs_type, rhs_type)                            \
   gcut_add_datum(lhs_type " >= " rhs_type,                      \
@@ -496,16 +498,13 @@ data_exec_greater_equal_false(void)
                  "rhs_type", G_TYPE_STRING, rhs_type,           \
                  NULL)
 
-  ADD_DATA("int32", "int32");
   ADD_DATA("text", "text");
-  ADD_DATA("text", "int32");
-  ADD_DATA("int32", "text");
 
 #undef ADD_DATA
 }
 
 void
-test_exec_greater_equal_false(gconstpointer data)
+test_exec_match_false(gconstpointer data)
 {
   const gchar *lhs_type;
   const gchar *rhs_type;
@@ -513,8 +512,7 @@ test_exec_greater_equal_false(gconstpointer data)
   lhs_type = gcut_data_get_string(data, "lhs_type");
   rhs_type = gcut_data_get_string(data, "rhs_type");
 
-  set_one(&lhs, lhs_type);
-  set_two(&rhs, rhs_type);
-
-  cut_assert_false(grn_operator_exec_greater_equal(context, &lhs, &rhs));
+  set_text(&lhs, "Hello");
+  set_text(&rhs, "lo!");
+  cut_assert_false(grn_operator_exec_match(context, &lhs, &rhs));
 }
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index