[Groonga-commit] groonga/groonga [master] added grn_dat_scan().

Back to archive index

null+****@clear***** null+****@clear*****
2012年 1月 10日 (火) 14:40:55 JST


Susumu Yata	2012-01-10 14:40:55 +0900 (Tue, 10 Jan 2012)

  New Revision: 3d9d38d9afe98f2562b1688a5c20381255092242

  Log:
    added grn_dat_scan().

  Modified files:
    lib/dat.cpp
    lib/dat.h

  Modified: lib/dat.cpp (+106 -0)
===================================================================
--- lib/dat.cpp    2012-01-10 13:18:50 +0900 (aac3586)
+++ lib/dat.cpp    2012-01-10 14:40:55 +0900 (4d87651)
@@ -612,6 +612,112 @@ grn_dat_update(grn_ctx *ctx, grn_dat *dat,
   return GRN_SUCCESS;
 }
 
+int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str,
+                 unsigned int str_size, grn_dat_scan_hit *scan_hits,
+                 unsigned int max_num_scan_hits, const char **str_rest) {
+  if (!grn_dat_open_trie_if_needed(ctx, dat) || !str ||
+      !(dat->obj.header.flags & GRN_OBJ_KEY_VAR_SIZE) || !scan_hits) {
+    return -1;
+  }
+
+  grn::dat::Trie * const trie = static_cast<grn::dat::Trie *>(dat->trie);
+  if (!trie) {
+    return -1;
+  }
+
+  if (!max_num_scan_hits || !str_size) {
+    if (str_rest) {
+      *str_rest = str;
+    }
+    return 0;
+  }
+
+  int num_scan_hits = 0;
+  try {
+    if (dat->obj.header.flags & GRN_OBJ_KEY_NORMALIZE) {
+      grn_str * const normalized_str = grn_str_open(
+          ctx, str, str_size, GRN_STR_NORMALIZE | GRN_STR_WITH_CHECKS);
+      if (!normalized_str) {
+        fprintf(stderr, "error: grn_str_open() failed!\n");
+        return -1;
+      }
+      str = normalized_str->norm;
+      str_size = normalized_str->norm_blen;
+      const short *checks = normalized_str->checks;
+      unsigned int offset = 0;
+      while (str_size) {
+        if (*checks) {
+          grn::dat::UInt32 key_pos;
+          if (trie->lcp_search(str, str_size, &key_pos)) {
+            const grn::dat::Key &key = trie->get_key(key_pos);
+            const grn::dat::UInt32 key_length = key.length();
+            if ((key_length == str_size) || (checks[key_length])) {
+              unsigned int length = 0;
+              for (grn::dat::UInt32 i = 0; i < key_length; ++i) {
+                if (checks[i] > 0) {
+                  length += checks[i];
+                }
+              }
+              scan_hits[num_scan_hits].id = key.id();
+              scan_hits[num_scan_hits].offset = offset;
+              scan_hits[num_scan_hits].length = length;
+              offset += length;
+              str += key_length;
+              str_size -= key_length;
+              checks += key_length;
+              if (++num_scan_hits >= max_num_scan_hits) {
+                break;
+              }
+              continue;
+            }
+          }
+          offset += *checks;
+        }
+        ++str;
+        --str_size;
+        ++checks;
+      }
+      if (str_rest) {
+        *str_rest = normalized_str->orig + offset;
+      }
+      grn_str_close(ctx, normalized_str);
+    } else {
+      const char * const begin = str;
+      while (str_size) {
+        grn::dat::UInt32 key_pos;
+        if (trie->lcp_search(str, str_size, &key_pos)) {
+          const grn::dat::Key &key = trie->get_key(key_pos);
+          scan_hits[num_scan_hits].id = key.id();
+          scan_hits[num_scan_hits].offset = str - begin;
+          scan_hits[num_scan_hits].length = key.length();
+          str += key.length();
+          str_size -= key.length();
+          if (++num_scan_hits >= max_num_scan_hits) {
+            break;
+          }
+        } else {
+          const int char_length = grn_charlen(ctx, str, str + str_size);
+          if (char_length) {
+            str += char_length;
+            str_size -= char_length;
+          } else {
+            ++str;
+            --str_size;
+          }
+        }
+      }
+      if (str_rest) {
+        *str_rest = str;
+      }
+    }
+  } catch (const grn::dat::Exception &ex) {
+    ERR(grn_dat_translate_error_code(ex.code()),
+        const_cast<char *>("grn::dat::lcp_search failed"));
+    return -1;
+  }
+  return num_scan_hits;
+}
+
 grn_id
 grn_dat_lcp_search(grn_ctx *ctx, grn_dat *dat,
                    const void *key, unsigned int key_size)

  Modified: lib/dat.h (+11 -0)
===================================================================
--- lib/dat.h    2012-01-10 13:18:50 +0900 (5d028cb)
+++ lib/dat.h    2012-01-10 14:40:55 +0900 (bb7656e)
@@ -54,6 +54,17 @@ struct _grn_dat_cursor {
   grn_id curr_rec;
 };
 
+typedef struct _grn_dat_scan_hit grn_dat_scan_hit;
+
+struct _grn_dat_scan_hit {
+  grn_id id;
+  unsigned int offset;
+  unsigned int length;
+};
+
+int grn_dat_scan(grn_ctx *ctx, grn_dat *dat, const char *str,
+                 unsigned int str_size, grn_dat_scan_hit *scan_hits,
+                 unsigned int max_num_scan_hits, const char **str_rest);
 grn_id grn_dat_lcp_search(grn_ctx *ctx, grn_dat *dat,
                           const void *key, unsigned int key_size);
 




Groonga-commit メーリングリストの案内
Back to archive index