[Groonga-commit] groonga/groonga [master] added da8.

Back to archive index

null+****@clear***** null+****@clear*****
2011年 6月 17日 (金) 23:46:20 JST


Susumu Yata	2011-06-17 14:46:20 +0000 (Fri, 17 Jun 2011)

  New Revision: edc2954041e274c04368bf28cb7ccf07d7c95b06

  Log:
    added da8.

  Added files:
    lib/dat/base.h
    lib/dat/block.h
    lib/dat/check.h
    lib/dat/common.h
    lib/dat/header.h
    lib/dat/key-info.h
    lib/dat/key.h
    lib/dat/node.h
    lib/dat/timer.h
    lib/dat/trie.cc
    lib/dat/trie.h
    lib/dat/usage.h

  Added: lib/dat/base.h (+54 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/base.h    2011-06-17 14:46:20 +0000 (7449524)
@@ -0,0 +1,54 @@
+#ifndef GRN_DAT_BASE_H
+#define GRN_DAT_BASE_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+// The most significant bit represents whether or not the node is a terminal
+// node. The BASE of a terminal node represents the ID of its associated key.
+// On the other hand, the BASE of a non-terminal node represents the offset to
+// its child nodes.
+class Base {
+ public:
+  Base()
+      : base_(0) {}
+
+  UInt32 base() const {
+    return base_;
+  }
+  bool is_terminal() const {
+    return (base_ & IS_TERMINAL_FLAG) == IS_TERMINAL_FLAG;
+  }
+  UInt32 offset() const {
+    DA_DEBUG_THROW_IF(is_terminal());
+    return base_;
+  }
+  UInt32 key_id() const {
+    DA_DEBUG_THROW_IF(!is_terminal());
+    return base_ & ~IS_TERMINAL_FLAG;
+  }
+
+  void set_base(UInt32 x) {
+    base_ = x;
+  }
+  void set_offset(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_OFFSET);
+    base_ = x;
+  }
+  void set_key_id(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_KEY_ID);
+    base_ = IS_TERMINAL_FLAG | x;
+  }
+
+ private:
+  UInt32 base_;
+
+  static const UInt32 IS_TERMINAL_FLAG = 0x80000000U;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_BASE_H

  Added: lib/dat/block.h (+79 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/block.h    2011-06-17 14:46:20 +0000 (97a9098)
@@ -0,0 +1,79 @@
+#ifndef GRN_DAT_BLOCK_H
+#define GRN_DAT_BLOCK_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Block {
+ public:
+  Block()
+      : next_(0),
+        prev_(0),
+        first_phantom_(0),
+        num_phantoms_(0) {}
+
+  UInt32 next() const {
+    return next_ / BLOCK_SIZE;
+  }
+  UInt32 prev() const {
+    return prev_ / BLOCK_SIZE;
+  }
+
+  UInt32 level() const {
+    return next_ & BLOCK_MASK;
+  }
+  UInt32 fail_count() const {
+    return prev_ & BLOCK_MASK;
+  }
+
+  UInt32 first_phantom() const {
+    return first_phantom_;
+  }
+  UInt32 num_phantoms() const {
+    return num_phantoms_;
+  }
+
+  void set_next(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
+    next_ = (next_ & BLOCK_MASK) | (x * BLOCK_SIZE);
+  }
+  void set_prev(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
+    prev_ = (prev_ & BLOCK_MASK) | (x * BLOCK_SIZE);
+  }
+
+  void set_level(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_BLOCK_LEVEL);
+    DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+    next_ = (next_ & ~BLOCK_MASK) | x;
+  }
+  void set_fail_count(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > MAX_FAIL_COUNT);
+    DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+    prev_ = (prev_ & ~BLOCK_MASK) | x;
+  }
+
+  void set_first_phantom(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+    first_phantom_ = (UInt16)x;
+  }
+  void set_num_phantoms(UInt32 x) {
+    DA_DEBUG_THROW_IF(x > BLOCK_SIZE);
+    num_phantoms_ = (UInt16)x;
+  }
+
+ private:
+  UInt32 next_;
+  UInt32 prev_;
+  UInt16 first_phantom_;
+  UInt16 num_phantoms_;
+
+  static const UInt32 SHIFT = 9;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_BLOCK_H

  Added: lib/dat/check.h (+142 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/check.h    2011-06-17 14:46:20 +0000 (113cb7c)
@@ -0,0 +1,142 @@
+#ifndef GRN_DAT_CHECK_H
+#define GRN_DAT_CHECK_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+// The most significant bit (MSB) represents whether or not the node ID is used
+// as an offset. This bit is independent of the other bits.
+// The second most significant bit represents whether or not the node is a
+// phantom node. If it is true, next() and prev() are available. These
+// functions return the next/previous phantom node respectively.
+// A non-phantom node 
+class Check {
+ public:
+  Check()
+      : check_(0) {}
+
+  UInt32 check() const {
+    return check_;
+  }
+
+  // The most significant bit (MSB) represents whether or not the node ID is
+  // used as an offset. Note that the MSB is independent of the other bits.
+  bool is_offset() const {
+    return (check_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG;
+  }
+
+  UInt32 except_is_offset() const {
+    DA_DEBUG_THROW_IF(is_phantom());
+    return check_ & ~IS_OFFSET_FLAG;
+  }
+
+  // A phantom node is a node that has never been used, and such a node is also
+  // called an empty element. Phantom nodes form a doubly linked list in each
+  // block, and the linked list is represented by next() and prev().
+  bool is_phantom() const {
+    return (check_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG;
+  }
+
+  UInt32 next() const {
+    DA_DEBUG_THROW_IF(!is_phantom());
+    return (check_ >> NEXT_SHIFT) & BLOCK_MASK;
+  }
+  UInt32 prev() const {
+    DA_DEBUG_THROW_IF(!is_phantom());
+    return (check_ >> PREV_SHIFT) & BLOCK_MASK;
+  }
+
+  // A label is attached to each non-phantom node. A label is represented by
+  // a byte except for a terminal label '\256'. Note that a phantom node always
+  // returns an invalid label with its phantom flag bit so as to reject a
+  // transition to the phantom node.
+  UInt32 label() const {
+    return check_ & (IS_PHANTOM_FLAG | LABEL_MASK);
+  }
+
+  UInt32 child() const {
+    return (check_ >> CHILD_SHIFT) & LABEL_MASK;
+  }
+  UInt32 sibling() const {
+    return (check_ >> SIBLING_SHIFT) & LABEL_MASK;
+  }
+
+  void set_check(UInt32 x) {
+    check_ = x;
+  }
+
+  void set_is_offset(bool x) {
+    if (x) {
+      DA_DEBUG_THROW_IF(is_offset());
+      check_ |= IS_OFFSET_FLAG;
+    } else {
+      DA_DEBUG_THROW_IF(!is_offset());
+      check_ &= ~IS_OFFSET_FLAG;
+    }
+  }
+
+  void set_except_is_offset(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    DA_DEBUG_THROW_IF((x & IS_OFFSET_FLAG) == IS_OFFSET_FLAG);
+    check_ = (check_ & IS_OFFSET_FLAG) | x;
+  }
+
+  // To reject a transition to an incomplete node, its label is overwritten
+  // with an invalid label when it becomes a non-phantom node.
+  void set_is_phantom(bool x) {
+    if (x) {
+      DA_DEBUG_THROW_IF(is_phantom());
+      check_ |= IS_PHANTOM_FLAG;
+    } else {
+      DA_DEBUG_THROW_IF(!is_phantom());
+      check_ = (check_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) |
+          (INVALID_LABEL << SIBLING_SHIFT) | INVALID_LABEL;
+    }
+  }
+
+  void set_next(UInt32 x) {
+    DA_DEBUG_THROW_IF(!is_phantom());
+    DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+    check_ = (check_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT);
+  }
+  void set_prev(UInt32 x) {
+    DA_DEBUG_THROW_IF(!is_phantom());
+    DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+    check_ = (check_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT);
+  }
+
+  void set_label(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    DA_DEBUG_THROW_IF(x > MAX_LABEL);
+    check_ = (check_ & ~LABEL_MASK) | x;
+  }
+
+  void set_child(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    DA_DEBUG_THROW_IF(x > MAX_LABEL);
+    check_ = (check_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT);
+  }
+  void set_sibling(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    DA_DEBUG_THROW_IF(label() > MAX_LABEL);
+    DA_DEBUG_THROW_IF((sibling() != INVALID_LABEL) && (x == INVALID_LABEL));
+    check_ = (check_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT);
+  }
+
+ private:
+  UInt32 check_;
+
+  static const UInt32 IS_OFFSET_FLAG = 1U << 31;
+  static const UInt32 IS_PHANTOM_FLAG = 1U << 30;
+  static const UInt32 NEXT_SHIFT = 9;
+  static const UInt32 PREV_SHIFT = 18;
+  static const UInt32 CHILD_SHIFT = 9;
+  static const UInt32 SIBLING_SHIFT = 18;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_CHECK_H

  Added: lib/dat/common.h (+146 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/common.h    2011-06-17 14:46:20 +0000 (6b45e53)
@@ -0,0 +1,146 @@
+#ifndef GRN_DAT_COMMON_H
+#define GRN_DAT_COMMON_H
+
+#ifndef _MSC_VER
+ #include <stdint.h>
+#endif  // _MSC_VER
+
+#include <cstddef>
+#include <exception>
+
+#ifdef _DEBUG
+ #include <iostream>
+#endif  // _DEBUG
+
+namespace grn {
+namespace dat {
+
+#ifdef _MSC_VER
+typedef unsigned __int8  UInt8;
+typedef unsigned __int16 UInt16;
+typedef unsigned __int32 UInt32;
+typedef unsigned __int64 UInt64;
+#else  // _MSC_VER
+typedef ::uint8_t UInt8;
+typedef ::uint16_t UInt16;
+typedef ::uint32_t UInt32;
+typedef ::uint64_t UInt64;
+#endif  // _MSC_VER
+
+enum {
+  // The special label attached to a terminal node.
+  TERMINAL_LABEL  = 0x100,
+
+  LABEL_MASK      = 0x1FF,
+  BLOCK_MASK      = 0x1FF,
+
+  // The number of nodes in each block.
+  BLOCK_SIZE      = 0x200,
+
+  ROOT_NODE_ID    = 0,
+  KEY_ID_OFFSET   = 1,
+
+  MAX_LABEL       = TERMINAL_LABEL,
+  MAX_NODE_ID     = 0x7FFFFFFF,
+  MAX_BLOCK_ID    = MAX_NODE_ID / BLOCK_SIZE,
+  MAX_OFFSET      = MAX_NODE_ID,
+  MAX_KEY_ID      = MAX_NODE_ID,
+  MAX_KEY_LENGTH  = 0x7FFF,
+  // The level of a block is incremented when find_offset() has failed to find
+  // a good offset in that block MAX_FAIL_COUNT times.
+  MAX_FAIL_COUNT  = 4,
+  // The maximum number of blocks tested in each call of find_offset().
+  MAX_BLOCK_COUNT = 16,
+  // A higher level block has less phantom nodes.
+  MAX_BLOCK_LEVEL = 5,
+
+  INVALID_LABEL   = 0x1FF,
+  INVALID_OFFSET  = 0,
+  INVALID_ENTRY   = 0x7FFFFFFF,
+
+  SORT_THRESHOLD  = 10
+};
+
+class Exception : public std::exception {
+ public:
+  Exception() throw()
+      : what_("") {}
+  Exception(const char *what) throw()
+      : what_((what != NULL) ? what : "") {}
+  Exception(const Exception &ex) throw()
+      : what_(ex.what_) {}
+  virtual ~Exception() throw() {}
+
+  Exception &operator=(const Exception &ex) throw() {
+    what_ = ex.what_;
+    return *this;
+  }
+
+  virtual const char *what() const throw() {
+    return what_;
+  }
+
+ private:
+  const char *what_;
+};
+
+#define DA_DEFINE_ERROR(error_type) \
+    class error_type : public Exception { \
+     public: \
+      error_type() throw() \
+          : Exception() {} \
+      error_type(const char *what) throw() \
+          : Exception(what) {} \
+      error_type(const error_type &ex) throw() \
+          : Exception(ex) {} \
+      virtual ~error_type() throw() {} \
+     \
+      error_type &operator=(const error_type &ex) throw() { \
+        *static_cast<Exception *>(this) = ex; \
+        return *this; \
+      } \
+    }
+
+DA_DEFINE_ERROR(ParamError);
+DA_DEFINE_ERROR(IOError);
+DA_DEFINE_ERROR(MemoryError);
+DA_DEFINE_ERROR(SizeError);
+
+#undef DA_DEFINE_ERROR
+
+#define DA_INT_TO_STR(value) \
+    #value
+#define DA_LINE_TO_STR(line) \
+    DA_INT_TO_STR(line)
+#define DA_LINE_STR \
+    DA_LINE_TO_STR(__LINE__)
+
+#define DA_THROW(exception, what) \
+    (throw exception(__FILE__ ":" DA_LINE_STR ": " what))
+#define DA_THROW_IF(cond) \
+    (void)((!(cond)) || (DA_THROW(grn::dat::Exception, #cond), 0))
+
+#define DA_PARAM_ERROR_IF(cond) \
+    (void)((!(cond)) || (DA_THROW(grn::dat::ParamError, #cond), 0))
+#define DA_IO_ERROR_IF(cond) \
+    (void)((!(cond)) || (DA_THROW(grn::dat::IOError, #cond), 0))
+#define DA_MEMORY_ERROR_IF(cond) \
+    (void)((!(cond)) || (DA_THROW(grn::dat::MemoryError, #cond), 0))
+#define DA_SIZE_ERROR_IF(cond) \
+    (void)((!(cond)) || (DA_THROW(grn::dat::SizeError, #cond), 0))
+
+#ifdef _DEBUG
+ #define DA_DEBUG_THROW_IF(cond) \
+     DA_THROW_IF(cond)
+ #define DA_DEBUG_LOG(var) \
+     (std::clog << __FILE__ ":" DA_LINE_STR ": " #var ": " \
+                << (var) << std::endl)
+#else  // _DEBUG
+ #define DA_DEBUG_THROW_IF(cond)
+ #define DA_DEBUG_LOG(var)
+#endif  // _DEBUG
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_COMMON_H

  Added: lib/dat/header.h (+107 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/header.h    2011-06-17 14:46:20 +0000 (27abdb6)
@@ -0,0 +1,107 @@
+#ifndef GRN_DAT_HEADER_H
+#define GRN_DAT_HEADER_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Header {
+ public:
+  Header()
+      : file_size_(0),
+        num_keys_(0),
+        max_num_keys_(0),
+        num_phantoms_(0),
+        num_zombies_(0),
+        num_blocks_(0),
+        max_num_blocks_(0),
+        key_buf_size_(0),
+        entries_() {
+    for (int i = 0; i <= MAX_BLOCK_LEVEL; ++i) {
+      entries_[i] = INVALID_ENTRY;
+    }
+  }
+
+  UInt64 file_size() const {
+    return file_size_;
+  }
+  UInt32 num_keys() const {
+    return num_keys_;
+  }
+  UInt32 max_num_keys() const {
+    return max_num_keys_;
+  }
+  UInt32 num_nodes() const {
+    return num_blocks() * BLOCK_SIZE;
+  }
+  UInt32 num_phantoms() const {
+    return num_phantoms_;
+  }
+  UInt32 num_zombies() const {
+    return num_zombies_;
+  }
+  UInt32 max_num_nodes() const {
+    return max_num_blocks() * BLOCK_SIZE;
+  }
+  UInt32 num_blocks() const {
+    return num_blocks_;
+  }
+  UInt32 max_num_blocks() const {
+    return max_num_blocks_;
+  }
+  UInt32 key_buf_size() const {
+    return key_buf_size_;
+  }
+  UInt32 ith_entry(UInt32 i) const {
+    DA_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
+    return entries_[i];
+  }
+
+  void set_file_size(UInt64 x) {
+    file_size_ = x;
+  }
+  void set_num_keys(UInt32 x) {
+    num_keys_ = x;
+  }
+  void set_max_num_keys(UInt32 x) {
+    max_num_keys_ = x;
+  }
+  void set_num_phantoms(UInt32 x) {
+    num_phantoms_ = x;
+  }
+  void set_num_zombies(UInt32 x) {
+    num_zombies_ = x;
+  }
+  void set_num_blocks(UInt32 x) {
+    num_blocks_ = x;
+  }
+  void set_max_num_blocks(UInt32 x) {
+    max_num_blocks_ = x;
+  }
+  void set_key_buf_size(UInt32 x) {
+    key_buf_size_ = x;
+  }
+  void set_ith_entry(UInt32 i,
+                     UInt32 x) {
+    DA_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
+    DA_DEBUG_THROW_IF((x != INVALID_ENTRY) && (x >= num_blocks()));
+    entries_[i] = x;
+  }
+
+ private:
+  UInt64 file_size_;
+  UInt32 num_keys_;
+  UInt32 max_num_keys_;
+  UInt32 num_phantoms_;
+  UInt32 num_zombies_;
+  UInt32 num_blocks_;
+  UInt32 max_num_blocks_;
+  UInt32 key_buf_size_;
+  UInt32 entries_[MAX_BLOCK_LEVEL + 1];
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_HEADER_H

  Added: lib/dat/key-info.h (+29 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key-info.h    2011-06-17 14:46:20 +0000 (3640501)
@@ -0,0 +1,29 @@
+#ifndef GRN_DAT_KEY_INFO_H
+#define GRN_DAT_KEY_INFO_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class KeyInfo {
+ public:
+  KeyInfo()
+      : offset_(0) {}
+
+  UInt32 offset() const {
+    return offset_;
+  }
+
+  void set_offset(UInt32 x) {
+    offset_ = x;
+  }
+
+ private:
+  UInt32 offset_;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_KEY_INFO_H

  Added: lib/dat/key.h (+45 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key.h    2011-06-17 14:46:20 +0000 (3e7d242)
@@ -0,0 +1,45 @@
+#ifndef GRN_DAT_KEY_H
+#define GRN_DAT_KEY_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Key {
+ public:
+  const char &operator[](UInt32 i) const {
+    DA_DEBUG_THROW_IF(i > length_);
+    return ptr_[i];
+  }
+
+  const char *ptr() const {
+    return ptr_;
+  }
+  UInt32 length() const {
+    return length_;
+  }
+  UInt32 id() const {
+    return id_;
+  }
+
+  void set_ptr(const char *x) {
+    ptr_ = x;
+  }
+  void set_length(UInt32 x) {
+    length_ = x;
+  }
+  void set_id(UInt32 x) {
+    id_ = x;
+  }
+
+ private:
+  const char *ptr_;
+  UInt32 length_;
+  UInt32 id_;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_KEY_H

  Added: lib/dat/node.h (+111 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/node.h    2011-06-17 14:46:20 +0000 (0c9d1d6)
@@ -0,0 +1,111 @@
+#ifndef GRN_DAT_NODE_H
+#define GRN_DAT_NODE_H
+
+#include "./base.h"
+#include "./check.h"
+
+namespace grn {
+namespace dat {
+
+class Node {
+ public:
+  Node() : base_(), check_() {}
+
+  Base base() const {
+    return base_;
+  }
+  bool is_terminal() const {
+    DA_DEBUG_THROW_IF(is_phantom());
+    return base_.is_terminal();
+  }
+  UInt32 offset() const {
+    DA_DEBUG_THROW_IF(is_phantom());
+    return base_.offset();
+  }
+  UInt32 key_id() const {
+    DA_DEBUG_THROW_IF(is_phantom());
+    return base_.key_id();
+  }
+
+  Check check() const {
+    return check_;
+  }
+  bool is_offset() const {
+    return check_.is_offset();
+  }
+  UInt32 except_is_offset() const {
+    return check_.except_is_offset();
+  }
+  bool is_phantom() const {
+    return check_.is_phantom();
+  }
+  UInt32 next() const {
+    return check_.next();
+  }
+  UInt32 prev() const {
+    return check_.prev();
+  }
+  UInt32 label() const {
+    return check_.label();
+  }
+  UInt32 child() const {
+    return check_.child();
+  }
+  UInt32 sibling() const {
+    return check_.sibling();
+  }
+
+  void set_base(Base x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    base_ = x;
+  }
+  void set_offset(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    base_.set_offset(x);
+  }
+  void set_key_id(UInt32 x) {
+    DA_DEBUG_THROW_IF(is_phantom());
+    base_.set_key_id(x);
+  }
+
+  void set_check(Check x) {
+    check_ = x;
+  }
+  void set_is_offset(bool x) {
+    check_.set_is_offset(x);
+  }
+  void set_except_is_offset(UInt32 x) {
+    check_.set_except_is_offset(x);
+  }
+  void set_is_phantom(bool x) {
+    DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+    check_.set_is_phantom(x);
+  }
+  void set_next(UInt32 x) {
+    DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+    check_.set_next(x);
+  }
+  void set_prev(UInt32 x) {
+    DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+    check_.set_prev(x);
+  }
+  void set_label(UInt32 x) {
+    DA_DEBUG_THROW_IF(offset() != INVALID_OFFSET);
+    check_.set_label(x);
+  }
+  void set_child(UInt32 x) {
+    check_.set_child(x);
+  }
+  void set_sibling(UInt32 x) {
+    check_.set_sibling(x);
+  }
+
+ private:
+  Base base_;
+  Check check_;
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_NODE_H

  Added: lib/dat/timer.h (+39 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/timer.h    2011-06-17 14:46:20 +0000 (c852070)
@@ -0,0 +1,39 @@
+#ifndef DA_TIMER_H_
+#define DA_TIMER_H_
+
+#include "./common.h"
+
+#include <sys/time.h>
+
+namespace da {
+
+class Timer {
+ public:
+  Timer()
+      : base_(get_time()) {}
+
+  double elapsed() const {
+    return get_time() - base_;
+  }
+
+  void reset() {
+    base_ = get_time();
+  }
+
+ private:
+  double base_;
+
+  static double get_time() {
+    struct timeval tv;
+    ::gettimeofday(&tv, NULL);
+    return tv.tv_sec + (tv.tv_usec * 0.000001);
+  }
+
+  // Disallows copy and assignment.
+  Timer(const Timer &);
+  Timer &operator=(const Timer &);
+};
+
+}  // namespace da
+
+#endif  // DA_TIMER_H_

  Added: lib/dat/trie.cc (+900 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/trie.cc    2011-06-17 14:46:20 +0000 (f5dc7d7)
@@ -0,0 +1,900 @@
+#include "./trie.h"
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <new>
+
+namespace grn {
+namespace dat {
+
+Trie::~Trie() {
+  if (header_ != NULL) {
+    ::munmap(header_, header_->file_size());
+  }
+}
+
+Trie *Trie::create(const char *file_name,
+                   UInt64 file_size,
+                   UInt32 max_num_keys,
+                   double num_nodes_per_key,
+                   double average_key_length) {
+  DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+  if (num_nodes_per_key == 0.0) {
+    num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY;
+  }
+  DA_PARAM_ERROR_IF(num_nodes_per_key < 1.0);
+
+  if (average_key_length == 0.0) {
+    average_key_length = DEFAULT_AVERAGE_KEY_LENGTH;
+  }
+  DA_PARAM_ERROR_IF(average_key_length < 1.0);
+  DA_PARAM_ERROR_IF(average_key_length > MAX_KEY_LENGTH);
+
+  if (max_num_keys == 0) {
+    if (file_size == 0) {
+      file_size = DEFAULT_FILE_SIZE;
+    }
+  } else {
+    DA_PARAM_ERROR_IF(max_num_keys > MAX_NUM_KEYS);
+  }
+
+  Trie *new_trie = new (std::nothrow) Trie;
+  DA_MEMORY_ERROR_IF(new_trie == NULL);
+  try {
+    new_trie->create_file(file_name, file_size, max_num_keys,
+                          num_nodes_per_key, average_key_length);
+    return new_trie;
+  } catch (...) {
+    delete new_trie;
+    throw;
+  }
+}
+
+Trie *Trie::create(const Trie &trie,
+                   const char *file_name,
+                   UInt64 file_size,
+                   UInt32 max_num_keys,
+                   double num_nodes_per_key,
+                   double average_key_length) {
+  DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+  if (num_nodes_per_key == 0.0) {
+    if (trie.num_keys() == 0) {
+      num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY;
+    } else {
+      num_nodes_per_key = 1.0 * trie.num_nodes() / trie.num_keys();
+    }
+  }
+  DA_PARAM_ERROR_IF(num_nodes_per_key < 1.0);
+
+  if (average_key_length == 0.0) {
+    if (trie.num_keys() == 0) {
+      average_key_length = DEFAULT_AVERAGE_KEY_LENGTH;
+    } else {
+      average_key_length = 1.0 * trie.total_key_length() / trie.num_keys();
+    }
+  }
+  DA_PARAM_ERROR_IF(average_key_length < 1.0);
+  DA_PARAM_ERROR_IF(average_key_length > MAX_KEY_LENGTH);
+
+  if (max_num_keys == 0) {
+    if (file_size == 0) {
+      file_size = trie.file_size();
+    }
+    DA_PARAM_ERROR_IF(file_size < trie.virtual_size());
+  } else {
+    DA_PARAM_ERROR_IF(max_num_keys < trie.num_keys());
+    DA_PARAM_ERROR_IF(max_num_keys > MAX_KEY_ID);
+  }
+
+  Trie *new_trie = new (std::nothrow) Trie;
+  DA_MEMORY_ERROR_IF(new_trie == NULL);
+  try {
+    new_trie->create_file(file_name, file_size, max_num_keys,
+                          num_nodes_per_key, average_key_length);
+    if (trie.num_keys() != 0) {
+      new_trie->build_from_trie(trie);
+    }
+    return new_trie;
+  } catch (...) {
+    delete new_trie;
+    throw;
+  }
+}
+
+Trie *Trie::open(const char *file_name) {
+  DA_PARAM_ERROR_IF(file_name == NULL);
+
+  Trie *new_trie = new Trie;
+  try {
+    new_trie->open_file(file_name);
+    return new_trie;
+  } catch (...) {
+    delete new_trie;
+    throw;
+  }
+}
+
+Trie::Trie()
+    : header_(NULL),
+      nodes_(NULL),
+      blocks_(NULL),
+      key_infos_(NULL),
+      key_buf_(NULL) {}
+
+void Trie::create_file(const char *file_name,
+                       UInt64 file_size,
+                       UInt32 max_num_keys,
+                       double num_nodes_per_key,
+                       double average_key_length) {
+  DA_PARAM_ERROR_IF((file_size == 0) && (max_num_keys == 0));
+  DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+  UInt32 max_num_blocks;
+  UInt32 key_buf_size;
+  if (max_num_keys == 0) {
+    const UInt64 avail = file_size - sizeof(Header);
+    const double num_bytes_per_key = (sizeof(Node) * num_nodes_per_key)
+        + (1.0 * sizeof(Block) / BLOCK_SIZE * num_nodes_per_key)
+        + sizeof(KeyInfo) + average_key_length;
+    DA_PARAM_ERROR_IF((avail / num_bytes_per_key) > MAX_NUM_KEYS);
+    max_num_keys = (UInt32)(avail / num_bytes_per_key);
+    DA_PARAM_ERROR_IF(max_num_keys == 0);
+  }
+
+  {
+    const double max_num_nodes = num_nodes_per_key * max_num_keys;
+    DA_PARAM_ERROR_IF(max_num_nodes > MAX_NUM_NODES);
+    max_num_blocks = ((UInt32)max_num_nodes + BLOCK_SIZE - 1) / BLOCK_SIZE;
+    DA_PARAM_ERROR_IF(max_num_blocks == 0);
+    DA_PARAM_ERROR_IF(max_num_blocks > MAX_NUM_BLOCKS);
+  }
+
+  if (file_size == 0) {
+    const double total_key_length = average_key_length * max_num_keys;
+    DA_PARAM_ERROR_IF(total_key_length > MAX_KEY_BUF_SIZE);
+    key_buf_size = (UInt32)total_key_length;
+
+    file_size = sizeof(Header)
+        + (sizeof(Block) * max_num_blocks)
+        + (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+        + (sizeof(KeyInfo) * (max_num_keys + 1))
+        + key_buf_size;
+  } else {
+    const UInt64 avail = file_size - sizeof(Header)
+        - (sizeof(Block) * max_num_blocks)
+        - (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+        - (sizeof(KeyInfo) * (max_num_keys + 1));
+    DA_PARAM_ERROR_IF(avail == 0);
+    DA_PARAM_ERROR_IF(avail > MAX_KEY_BUF_SIZE);
+    key_buf_size = (UInt32)avail;
+  }
+
+  create_file(file_name, file_size, max_num_keys,
+              max_num_blocks, key_buf_size);
+}
+
+void Trie::create_file(const char *file_name,
+                       UInt64 file_size,
+                       UInt32 max_num_keys,
+                       UInt32 max_num_blocks,
+                       UInt32 key_buf_size) {
+  DA_PARAM_ERROR_IF(file_size != (sizeof(Header)
+      + (sizeof(Block) * max_num_blocks)
+      + (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+      + (sizeof(KeyInfo) * (max_num_keys + 1))
+      + key_buf_size));
+
+  int fd = -1;
+  if ((file_name != NULL) && (file_name[0] != '\0')) {
+    fd = ::open(file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+    DA_IO_ERROR_IF(fd == -1);
+    if (::ftruncate(fd, file_size) == -1) {
+      ::close(fd);
+      DA_THROW(IOError, "::ftruncate(fd, file_size) == -1");
+    }
+  }
+
+  const int flags = (fd == -1) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
+  void * const address = ::mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+                                flags, fd, 0);
+  if (address == MAP_FAILED) {
+    if (fd != -1) {
+      ::close(fd);
+    }
+    DA_THROW(IOError, "address == MAP_FAILED");
+  } else if ((fd != -1) && (::close(fd) == -1)) {
+    ::munmap(address, file_size);
+    DA_THROW(IOError, "(fd != -1) && (::close(fd) == -1)");
+  }
+
+  Header * const header = static_cast<Header *>(address);
+  *header = Header();
+  header->set_file_size(file_size);
+  header->set_max_num_keys(max_num_keys);
+  header->set_max_num_blocks(max_num_blocks);
+  header->set_key_buf_size(key_buf_size);
+
+  map_address(address);
+
+  reserve_node(ROOT_NODE_ID);
+  ith_node(INVALID_OFFSET).set_is_offset(true);
+  ith_key_info(KEY_ID_OFFSET).set_offset(0);
+}
+
+void Trie::open_file(const char *file_name) {
+  DA_PARAM_ERROR_IF(file_name == NULL);
+
+  const int fd = ::open(file_name, O_RDWR);
+  DA_IO_ERROR_IF(fd == -1);
+
+  void *address = ::mmap(NULL, sizeof(Header), PROT_READ, MAP_PRIVATE, fd, 0);
+  if (address == MAP_FAILED) {
+    ::close(fd);
+    DA_THROW(IOError, "address == MAP_FAILED");
+  }
+
+  const UInt64 file_size = static_cast<const Header *>(address)->file_size();
+  if (::munmap(address, sizeof(Header)) == -1) {
+    ::close(fd);
+    DA_THROW(IOError, "::munmap(address, sizeof(Header)) == -1");
+  }
+
+  address = ::mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+  if (address == MAP_FAILED) {
+    ::close(fd);
+    DA_THROW(IOError, "address == MAP_FAILED");
+  } else if (::close(fd) == -1) {
+    ::munmap(address, file_size);
+    DA_THROW(IOError, "::close(fd) == -1");
+  }
+
+  map_address(address);
+}
+
+void Trie::map_address(void *address) {
+  DA_PARAM_ERROR_IF(address == NULL);
+
+  header_ = static_cast<Header *>(address);
+  nodes_ = static_cast<Node *>(static_cast<void *>(header_ + 1));
+  blocks_ = static_cast<Block *>(static_cast<void *>(
+      nodes_ + max_num_nodes()));
+  key_infos_ = static_cast<KeyInfo *>(static_cast<void *>(
+      blocks_ + max_num_blocks()));
+  key_buf_ = static_cast<char *>(static_cast<void *>(
+      key_infos_ + (max_num_keys() + 1)));
+}
+
+void Trie::build_from_trie(const Trie &trie) {
+  DA_DEBUG_THROW_IF(trie.num_keys() == 0);
+
+  header_->set_num_keys(trie.num_keys());
+  std::memcpy(key_infos_, trie.key_infos_,
+              sizeof(KeyInfo) * (num_keys() + 1));
+  std::memcpy(key_buf_, trie.key_buf_,
+              ith_key_info(num_keys() + KEY_ID_OFFSET).offset());
+  build_from_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID);
+}
+
+void Trie::build_from_trie(const Trie &trie,
+                           UInt32 src,
+                           UInt32 dest) {
+  ith_node(dest).set_except_is_offset(trie.ith_node(src).except_is_offset());
+  if (trie.ith_node(src).is_terminal()) {
+    ith_node(dest).set_key_id(trie.ith_node(src).key_id());
+    return;
+  }
+
+  const UInt32 src_offset = trie.ith_node(src).offset();
+  UInt32 dest_offset;
+  {
+    UInt16 labels[MAX_LABEL + 1];
+    UInt32 num_labels = 0;
+
+    UInt32 label = trie.ith_node(src).child();
+    DA_DEBUG_THROW_IF(label == INVALID_LABEL);
+    while (label != INVALID_LABEL) {
+      DA_DEBUG_THROW_IF(label > MAX_LABEL);
+      labels[num_labels++] = label;
+      label = trie.ith_node(src_offset ^ label).sibling();
+    }
+    DA_DEBUG_THROW_IF(num_labels == 0);
+
+    dest_offset = find_offset(labels, num_labels);
+    for (UInt32 i = 0; i < num_labels; ++i) {
+      reserve_node(dest_offset ^ labels[i]);
+    }
+
+    DA_DEBUG_THROW_IF(ith_node(dest_offset).is_offset());
+    ith_node(dest_offset).set_is_offset(true);
+    ith_node(dest).set_offset(dest_offset);
+  }
+
+  UInt32 label = trie.ith_node(src).child();
+  while (label != INVALID_LABEL) {
+    build_from_trie(trie, src_offset ^ label, dest_offset ^ label);
+    label = trie.ith_node(src_offset ^ label).sibling();
+  }
+}
+
+bool Trie::search_from_root(const UInt8 *ptr,
+                            UInt32 length,
+                            Key *key) const {
+  DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+
+  UInt32 node_id = ROOT_NODE_ID;
+  for (UInt32 i = 0; i < length; ++i) {
+    const Base base = ith_node(node_id).base();
+    if (base.is_terminal()) {
+      return search_from_terminal(ptr, length, key, base.key_id(), i);
+    }
+
+    node_id = base.offset() ^ ptr[i];
+    if (ith_node(node_id).label() != ptr[i]) {
+      return false;
+    }
+  }
+
+  const Base base = ith_node(node_id).base();
+  if (base.is_terminal()) {
+    return search_from_terminal(ptr, length, key, base.key_id(), length);
+  }
+
+  node_id = base.offset() ^ TERMINAL_LABEL;
+  if (ith_node(node_id).label() != TERMINAL_LABEL) {
+    return false;
+  }
+  DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+
+  if (key != NULL) {
+    const UInt32 key_id = ith_node(node_id).key_id();
+    key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+    key->set_length(ith_key_info(key_id + 1).offset()
+        - ith_key_info(key_id).offset());
+    key->set_id(key_id);
+  }
+  return true;
+}
+
+bool Trie::search_from_terminal(const UInt8 *ptr,
+                                UInt32 length,
+                                Key *key,
+                                UInt32 key_id,
+                                UInt32 i) const {
+  DA_DEBUG_THROW_IF(i > length);
+  DA_DEBUG_THROW_IF(key_id < KEY_ID_OFFSET);
+  DA_DEBUG_THROW_IF(key_id >= (num_keys() + KEY_ID_OFFSET));
+
+  const UInt32 key_length = ith_key_info(key_id + 1).offset()
+      - ith_key_info(key_id).offset();
+  if (key_length != length) {
+    return false;
+  }
+
+  const char * const key_ptr = key_buf_ + ith_key_info(key_id).offset();
+  for ( ; i < length; ++i) {
+    if (ptr[i] != (UInt8)key_ptr[i]) {
+      return false;
+    }
+  }
+
+  if (key != NULL) {
+    key->set_ptr(key_ptr);
+    key->set_length(key_length);
+    key->set_id(key_id);
+  }
+  return true;
+}
+
+bool Trie::insert_from_root(const UInt8 *ptr,
+                            UInt32 length,
+                            Key *key) {
+  DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+
+  UInt32 node_id = ROOT_NODE_ID;
+  for (UInt32 i = 0; i < length; ++i) {
+    const Base base = ith_node(node_id).base();
+    if (base.is_terminal()) {
+      return insert_from_terminal(ptr, length, key, node_id, i);
+    }
+
+    const UInt32 next = base.offset() ^ ptr[i];
+    if (ith_node(next).label() != ptr[i]) {
+      return insert_from_nonterminal(ptr, length, key, node_id, i);
+    }
+    node_id = next;
+  }
+
+  const Base base = ith_node(node_id).base();
+  if (base.is_terminal()) {
+    return insert_from_terminal(ptr, length, key, node_id, length);
+  }
+
+  const UInt32 next = base.offset() ^ TERMINAL_LABEL;
+  if (ith_node(next).label() != TERMINAL_LABEL) {
+    return insert_from_nonterminal(ptr, length, key, node_id, length);
+  }
+  DA_DEBUG_THROW_IF(!ith_node(next).is_terminal());
+
+  if (key != NULL) {
+    const UInt32 key_id = ith_node(next).key_id();
+    key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+    key->set_length(ith_key_info(key_id + 1).offset()
+        - ith_key_info(key_id).offset());
+    key->set_id(key_id);
+  }
+  return false;
+}
+
+bool Trie::insert_from_terminal(const UInt8 *ptr,
+                                UInt32 length,
+                                Key *key,
+                                UInt32 node_id,
+                                UInt32 i) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+  DA_DEBUG_THROW_IF(i > length);
+
+  UInt32 key_id = ith_node(node_id).key_id();
+  const char *key_ptr = key_buf_ + ith_key_info(key_id).offset();
+  UInt32 key_length = ith_key_info(key_id + 1).offset()
+      - ith_key_info(key_id).offset();
+
+  UInt32 j = i;
+  while ((j < length) && (j < key_length)) {
+    if (ptr[j] != (UInt8)key_ptr[j]) {
+      break;
+    }
+    ++j;
+  }
+
+  if ((j == length) && (j == key_length)) {
+    if (key != NULL) {
+      key->set_ptr(key_ptr);
+      key->set_length(key_length);
+      key->set_id(key_id);
+    }
+    return false;
+  }
+
+  DA_SIZE_ERROR_IF(num_keys() >= max_num_keys());
+  key_id = num_keys() + KEY_ID_OFFSET;
+
+  const UInt32 key_offset = ith_key_info(key_id).offset();
+  DA_SIZE_ERROR_IF(length > (key_buf_size() - key_offset));
+
+  for (UInt32 k = i; k < j; ++k) {
+    node_id = insert_node(node_id, ptr[k]);
+  }
+  node_id = separate(ptr, length, node_id, j);
+
+  ith_key_info(key_id + 1).set_offset(key_offset + length);
+  std::memcpy(key_buf_ + key_offset, ptr, length);
+  header_->set_num_keys(num_keys() + 1);
+
+  ith_node(node_id).set_key_id(key_id);
+
+  if (key != NULL) {
+    key->set_ptr(key_buf_ + key_offset);
+    key->set_length(length);
+    key->set_id(key_id);
+  }
+  return true;
+}
+
+bool Trie::insert_from_nonterminal(const UInt8 *ptr,
+                                   UInt32 length,
+                                   Key *key,
+                                   UInt32 node_id,
+                                   UInt32 i) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+  DA_DEBUG_THROW_IF(i > length);
+
+  DA_SIZE_ERROR_IF(num_keys() >= max_num_keys());
+  const UInt32 key_id = num_keys() + KEY_ID_OFFSET;
+
+  const UInt32 key_offset = ith_key_info(key_id).offset();
+  DA_SIZE_ERROR_IF(length > (key_buf_size() - key_offset));
+
+  const UInt16 label = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL;
+  const Base base = ith_node(node_id).base();
+  if ((base.offset() == INVALID_OFFSET) ||
+      !ith_node(base.offset() ^ label).is_phantom()) {
+    resolve(node_id, label);
+  }
+  node_id = insert_node(node_id, label);
+
+  ith_key_info(key_id + 1).set_offset(key_offset + length);
+  std::memcpy(key_buf_ + key_offset, ptr, length);
+  header_->set_num_keys(num_keys() + 1);
+
+  ith_node(node_id).set_key_id(key_id);
+
+  if (key != NULL) {
+    key->set_ptr(key_buf_ + key_offset);
+    key->set_length(length);
+    key->set_id(key_id);
+  }
+  return true;
+}
+
+UInt32 Trie::insert_node(UInt32 node_id,
+                         UInt16 label) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(label > MAX_LABEL);
+
+  const Base base = ith_node(node_id).base();
+  UInt32 offset;
+  if (base.is_terminal() || (base.offset() == INVALID_OFFSET)) {
+    offset = find_offset(&label, 1);
+  } else {
+    offset = base.offset();
+  }
+
+  const UInt32 next = offset ^ label;
+  reserve_node(next);
+
+  ith_node(next).set_label(label);
+  if (base.is_terminal()) {
+    DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+    ith_node(offset).set_is_offset(true);
+    ith_node(next).set_key_id(base.key_id());
+  } else if (base.offset() == INVALID_OFFSET) {
+    DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+    ith_node(offset).set_is_offset(true);
+  } else {
+    DA_DEBUG_THROW_IF(!ith_node(offset).is_offset());
+  }
+  ith_node(node_id).set_offset(offset);
+
+  const UInt32 child_label = ith_node(node_id).child();
+  DA_DEBUG_THROW_IF(child_label == label);
+  if (child_label == INVALID_LABEL) {
+    ith_node(node_id).set_child(label);
+  } else if ((label == TERMINAL_LABEL) || (label < child_label)) {
+    DA_DEBUG_THROW_IF(ith_node(offset ^ child_label).is_phantom());
+    DA_DEBUG_THROW_IF(ith_node(offset ^ child_label).label() != child_label);
+    ith_node(next).set_sibling(child_label);
+    ith_node(node_id).set_child(label);
+  } else {
+    UInt32 prev = offset ^ child_label;
+    DA_DEBUG_THROW_IF(ith_node(prev).label() != child_label);
+    UInt32 sibling_label = ith_node(prev).sibling();
+    while (label > sibling_label) {
+      prev = offset ^ sibling_label;
+      DA_DEBUG_THROW_IF(ith_node(prev).label() != sibling_label);
+      sibling_label = ith_node(prev).sibling();
+    }
+    DA_DEBUG_THROW_IF(label == sibling_label);
+    ith_node(next).set_sibling(ith_node(prev).sibling());
+    ith_node(prev).set_sibling(label);
+  }
+  return next;
+}
+
+UInt32 Trie::separate(const UInt8 *ptr,
+                      UInt32 length,
+                      UInt32 node_id,
+                      UInt32 i) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+  DA_DEBUG_THROW_IF(i > length);
+
+  const UInt32 key_id = ith_node(node_id).key_id();
+  const UInt32 key_offset = ith_key_info(key_id).offset();
+  const char *key_ptr = key_buf_ + key_offset;
+  const UInt32 key_length = ith_key_info(key_id + 1).offset() - key_offset;
+
+  UInt16 labels[2];
+  labels[0] = (i < key_length) ?
+      (UInt16)(UInt8)key_ptr[i] : (UInt16)TERMINAL_LABEL;
+  labels[1] = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL;
+  DA_DEBUG_THROW_IF(labels[0] == labels[1]);
+
+  const UInt32 offset = find_offset(labels, 2);
+  DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+
+  UInt32 next = offset ^ labels[0];
+  reserve_node(next);
+
+  ith_node(next).set_label(labels[0]);
+  ith_node(next).set_key_id(key_id);
+
+  next = offset ^ labels[1];
+  reserve_node(next);
+
+  ith_node(next).set_label(labels[1]);
+
+  ith_node(offset).set_is_offset(true);
+  ith_node(node_id).set_offset(offset);
+
+  if ((labels[0] == TERMINAL_LABEL) || (labels[0] < labels[1])) {
+    ith_node(node_id).set_child(labels[0]);
+    ith_node(offset ^ labels[0]).set_sibling(labels[1]);
+  } else {
+    ith_node(node_id).set_child(labels[1]);
+    ith_node(offset ^ labels[1]).set_sibling(labels[0]);
+  }
+  return next;
+}
+
+void Trie::resolve(UInt32 node_id,
+                   UInt16 label) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+  DA_DEBUG_THROW_IF(label > MAX_LABEL);
+
+  UInt32 offset = ith_node(node_id).offset();
+  if (offset != INVALID_OFFSET) {
+    UInt16 labels[MAX_LABEL + 1];
+    UInt32 num_labels = 0;
+
+    UInt32 next_label = ith_node(node_id).child();
+    DA_DEBUG_THROW_IF(next_label == INVALID_LABEL);
+    while (next_label != INVALID_LABEL) {
+      DA_DEBUG_THROW_IF(next_label > MAX_LABEL);
+      labels[num_labels++] = next_label;
+      next_label = ith_node(offset ^ next_label).sibling();
+    }
+    DA_DEBUG_THROW_IF(num_labels == 0);
+
+    labels[num_labels] = label;
+    offset = find_offset(labels, num_labels + 1);
+    move_nodes(node_id, offset, labels, num_labels);
+  } else {
+    offset = find_offset(&label, 1);
+    if (offset >= num_nodes()) {
+      DA_DEBUG_THROW_IF((offset / BLOCK_SIZE) != num_blocks());
+      reserve_block(num_blocks());
+    }
+    ith_node(offset).set_is_offset(true);
+    ith_node(node_id).set_offset(offset);
+  }
+}
+
+void Trie::move_nodes(UInt32 node_id,
+                      UInt32 dest_offset,
+                      const UInt16 *labels,
+                      UInt32 num_labels) {
+  DA_DEBUG_THROW_IF(node_id >= num_nodes());
+  DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+  DA_DEBUG_THROW_IF(labels == NULL);
+  DA_DEBUG_THROW_IF(num_labels == 0);
+  DA_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1));
+
+  const UInt32 src_offset = ith_node(node_id).offset();
+  DA_DEBUG_THROW_IF(src_offset == INVALID_OFFSET);
+  DA_DEBUG_THROW_IF(!ith_node(src_offset).is_offset());
+
+  for (UInt32 i = 0; i < num_labels; ++i) {
+    const UInt32 src_node_id = src_offset ^ labels[i];
+    const UInt32 dest_node_id = dest_offset ^ labels[i];
+    DA_DEBUG_THROW_IF(ith_node(src_node_id).is_phantom());
+    DA_DEBUG_THROW_IF(ith_node(src_node_id).label() != labels[i]);
+
+    reserve_node(dest_node_id);
+    ith_node(dest_node_id).set_except_is_offset(
+        ith_node(src_node_id).except_is_offset());
+    ith_node(dest_node_id).set_base(ith_node(src_node_id).base());
+  }
+  header_->set_num_zombies(num_zombies() + num_labels);
+
+  DA_DEBUG_THROW_IF(ith_node(dest_offset).is_offset());
+  ith_node(dest_offset).set_is_offset(true);
+  ith_node(node_id).set_offset(dest_offset);
+}
+
+UInt32 Trie::find_offset(const UInt16 *labels,
+                         UInt32 num_labels) {
+  DA_DEBUG_THROW_IF(labels == NULL);
+  DA_DEBUG_THROW_IF(num_labels == 0);
+  DA_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1));
+
+  // Blocks are tested in descending order of level, a lower level block
+  // contains more phantom nodes.
+  UInt32 level = 1;
+  while (num_labels >= (1U << level)) {
+    ++level;
+  }
+  level = (level < MAX_BLOCK_LEVEL) ? (MAX_BLOCK_LEVEL - level) : 0;
+
+  UInt32 block_count = 0;
+  do {
+    UInt32 entry = header_->ith_entry(level);
+    if (entry == INVALID_ENTRY) {
+      // This level has no blocks and it is thus skipped.
+      continue;
+    }
+
+    UInt32 block_id = entry;
+    do {
+      const Block &block = ith_block(block_id);
+      DA_DEBUG_THROW_IF(block.level() != level);
+
+      const UInt32 first = (block_id * BLOCK_SIZE) | block.first_phantom();
+      UInt32 node_id = first;
+      do {
+        DA_DEBUG_THROW_IF(!ith_node(node_id).is_phantom());
+        const UInt32 offset = node_id ^ labels[0];
+        if (!ith_node(offset).is_offset()) {
+          UInt32 i = 1;
+          for ( ; i < num_labels; ++i) {
+            if (!ith_node(offset ^ labels[i]).is_phantom()) {
+              break;
+            }
+          }
+          if (i >= num_labels) {
+            return offset;
+          }
+        }
+        node_id = (block_id * BLOCK_SIZE) | ith_node(node_id).next();
+      } while (node_id != first);
+
+      const UInt32 prev = block_id;
+      const UInt32 next = block.next();
+      block_id = next;
+      ith_block(prev).set_fail_count(ith_block(prev).fail_count() + 1);
+
+      // The level of a block is updated when this function fails many times,
+      // actually MAX_FAIL_COUNT times, in that block.
+      if (ith_block(prev).fail_count() == MAX_FAIL_COUNT) {
+        update_block_level(prev, level + 1);
+        if (next == entry) {
+          break;
+        } else {
+          // Note that the entry might be updated in the level update.
+          entry = header_->ith_entry(level);
+          continue;
+        }
+      }
+    } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != entry));
+  } while ((block_count < MAX_BLOCK_COUNT) && (level-- != 0));
+
+  return num_nodes() ^ labels[0];
+}
+
+void Trie::reserve_node(UInt32 node_id) {
+  DA_DEBUG_THROW_IF(node_id > num_nodes());
+  if (node_id >= num_nodes()) {
+    reserve_block(node_id / BLOCK_SIZE);
+  }
+
+  Node &node = ith_node(node_id);
+  DA_DEBUG_THROW_IF(!node.is_phantom());
+
+  const UInt32 block_id = node_id / BLOCK_SIZE;
+  Block &block = ith_block(block_id);
+  DA_DEBUG_THROW_IF(block.num_phantoms() == 0);
+
+  const UInt32 next = (block_id * BLOCK_SIZE) | node.next();
+  const UInt32 prev = (block_id * BLOCK_SIZE) | node.prev();
+  DA_DEBUG_THROW_IF(next >= num_nodes());
+  DA_DEBUG_THROW_IF(prev >= num_nodes());
+
+  if ((node_id & BLOCK_MASK) == block.first_phantom()) {
+    // The first phantom node is removed from the block and the second phantom
+    // node comes first.
+    block.set_first_phantom(next & BLOCK_MASK);
+  }
+
+  ith_node(next).set_prev(prev & BLOCK_MASK);
+  ith_node(prev).set_next(next & BLOCK_MASK);
+
+  if (block.level() != MAX_BLOCK_LEVEL) {
+    const UInt32 threshold = 1U << ((MAX_BLOCK_LEVEL - block.level() - 1) * 2);
+    if (block.num_phantoms() == threshold) {
+      update_block_level(block_id, block.level() + 1);
+    }
+  }
+  block.set_num_phantoms(block.num_phantoms() - 1);
+
+  node.set_is_phantom(false);
+
+  DA_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET);
+  DA_DEBUG_THROW_IF(node.label() != INVALID_LABEL);
+
+  header_->set_num_phantoms(num_phantoms() - 1);
+}
+
+void Trie::reserve_block(UInt32 block_id) {
+  DA_DEBUG_THROW_IF(block_id != num_blocks());
+  DA_SIZE_ERROR_IF(block_id >= max_num_blocks());
+
+  header_->set_num_blocks(block_id + 1);
+  ith_block(block_id).set_fail_count(0);
+  ith_block(block_id).set_first_phantom(0);
+  ith_block(block_id).set_num_phantoms(BLOCK_SIZE);
+
+  const UInt32 begin = block_id * BLOCK_SIZE;
+  const UInt32 end = begin + BLOCK_SIZE;
+  DA_DEBUG_THROW_IF(end != num_nodes());
+
+  Base base;
+  base.set_offset(INVALID_OFFSET);
+
+  Check check;
+  check.set_is_phantom(true);
+
+  for (UInt32 i = begin; i < end; ++i) {
+    check.set_prev((i - 1) & BLOCK_MASK);
+    check.set_next((i + 1) & BLOCK_MASK);
+    ith_node(i).set_base(base);
+    ith_node(i).set_check(check);
+  }
+
+  // The leve of the new block is 0.
+  set_block_level(block_id, 0);
+  header_->set_num_phantoms(num_phantoms() + BLOCK_SIZE);
+}
+
+void Trie::update_block_level(UInt32 block_id,
+                              UInt32 level) {
+  DA_DEBUG_THROW_IF(block_id >= num_blocks());
+  DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+  clear_block_level(block_id);
+  set_block_level(block_id, level);
+}
+
+void Trie::set_block_level(UInt32 block_id,
+                           UInt32 level) {
+  DA_DEBUG_THROW_IF(block_id >= num_blocks());
+  DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+  const UInt32 entry = header_->ith_entry(level);
+  if (entry == INVALID_ENTRY) {
+    // The new block becomes the only one block of the linked list.
+    ith_block(block_id).set_next(block_id);
+    ith_block(block_id).set_prev(block_id);
+    header_->set_ith_entry(level, block_id);
+  } else {
+    // The new block is added to the end of the list.
+    const UInt32 next = entry;
+    const UInt32 prev = ith_block(entry).prev();
+    DA_DEBUG_THROW_IF(next >= num_blocks());
+    DA_DEBUG_THROW_IF(prev >= num_blocks());
+    ith_block(block_id).set_next(next);
+    ith_block(block_id).set_prev(prev);
+    ith_block(next).set_prev(block_id);
+    ith_block(prev).set_next(block_id);
+  }
+  ith_block(block_id).set_level(level);
+  ith_block(block_id).set_fail_count(0);
+}
+
+void Trie::clear_block_level(UInt32 block_id) {
+  DA_DEBUG_THROW_IF(block_id >= num_blocks());
+
+  const UInt32 level = ith_block(block_id).level();
+  DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+  const UInt32 entry = header_->ith_entry(level);
+  DA_DEBUG_THROW_IF(entry == INVALID_ENTRY);
+
+  const UInt32 next = ith_block(block_id).next();
+  const UInt32 prev = ith_block(block_id).prev();
+  DA_DEBUG_THROW_IF(next >= num_blocks());
+  DA_DEBUG_THROW_IF(prev >= num_blocks());
+
+  if (next == block_id) {
+    // The linked list becomes empty.
+    header_->set_ith_entry(level, INVALID_ENTRY);
+  } else {
+    ith_block(next).set_prev(prev);
+    ith_block(prev).set_next(next);
+    if (block_id == entry) {
+      // The second block becomes the entry to the linked list.
+      header_->set_ith_entry(level, next);
+    }
+  }
+}
+
+}  // namespace dat
+}  // namespace grn

  Added: lib/dat/trie.h (+223 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/trie.h    2011-06-17 14:46:20 +0000 (f507de4)
@@ -0,0 +1,223 @@
+#ifndef GRN_DAT_TRIE_H
+#define GRN_DAT_TRIE_H
+
+#include "./header.h"
+#include "./node.h"
+#include "./block.h"
+#include "./key-info.h"
+#include "./key.h"
+
+namespace grn {
+namespace dat {
+
+class Trie {
+ public:
+  static const UInt32 DEFAULT_FILE_SIZE          = 1U << 20;
+  static const double DEFAULT_NUM_NODES_PER_KEY  = 4.0;
+  static const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0;
+
+  static const UInt32 MAX_NUM_NODES    = (UInt32)MAX_NODE_ID + 1;
+  static const UInt32 MAX_NUM_BLOCKS   = (UInt32)MAX_BLOCK_ID + 1;
+  static const UInt32 MAX_NUM_KEYS     = (UInt32)MAX_KEY_ID + 1;
+  static const UInt32 MAX_KEY_BUF_SIZE = 0xFFFFFFFFU;
+
+  ~Trie();
+
+  static Trie *create(const char *file_name = NULL,
+                      UInt64 file_size = 0,
+                      UInt32 max_num_keys = 0,
+                      double num_nodes_per_key = 0.0,
+                      double average_key_length = 0.0);
+
+  static Trie *create(const Trie &src_trie,
+                      const char *file_name = NULL,
+                      UInt64 file_size = 0,
+                      UInt32 max_num_keys = 0,
+                      double num_nodes_per_key = 0.0,
+                      double average_key_length = 0.0);
+
+  static Trie *open(const char *file_name);
+
+//  void close();
+
+  void ith_key(UInt32 key_id, Key *key) const {
+    DA_DEBUG_THROW_IF(key_id < KEY_ID_OFFSET);
+    DA_DEBUG_THROW_IF(key_id >= (num_keys() + KEY_ID_OFFSET));
+    DA_DEBUG_THROW_IF(key == NULL);
+
+    key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+    key->set_length(ith_key_info(key_id + 1).offset()
+        - ith_key_info(key_id).offset());
+    key->set_id(key_id);
+  }
+
+  bool search(const void *ptr, UInt32 length, Key *key = NULL) const {
+    DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+    return search_from_root(static_cast<const UInt8 *>(ptr), length, key);
+  }
+  bool insert(const void *ptr, UInt32 length, Key *key = NULL) {
+    DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+    return insert_from_root(static_cast<const UInt8 *>(ptr), length, key);
+  }
+
+  const Header &header() const {
+    return *header_;
+  }
+
+  UInt64 file_size() const {
+    return header_->file_size();
+  }
+  UInt64 virtual_size() const {
+    return sizeof(Header)
+        + (sizeof(KeyInfo) * (num_keys() + 1))
+        + (sizeof(Block) * num_blocks())
+        + (sizeof(Node) * num_nodes())
+        + total_key_length();
+  }
+  UInt32 total_key_length() const {
+    return ith_key_info(num_keys() + 1).offset();
+  }
+  UInt32 num_keys() const {
+    return header_->num_keys();
+  }
+  UInt32 max_num_keys() const {
+    return header_->max_num_keys();
+  }
+  UInt32 num_nodes() const {
+    return header_->num_nodes();
+  }
+  UInt32 num_phantoms() const {
+    return header_->num_phantoms();
+  }
+  UInt32 num_zombies() const {
+    return header_->num_zombies();
+  }
+  UInt32 max_num_nodes() const {
+    return header_->max_num_nodes();
+  }
+  UInt32 num_blocks() const {
+    return header_->num_blocks();
+  }
+  UInt32 max_num_blocks() const {
+    return header_->max_num_blocks();
+  }
+  UInt32 key_buf_size() const {
+    return header_->key_buf_size();
+  }
+
+ private:
+  Header *header_;
+  Node *nodes_;
+  Block *blocks_;
+  KeyInfo *key_infos_;
+  char *key_buf_;
+
+  Trie();
+
+  void create_file(const char *file_name,
+                   UInt64 file_size,
+                   UInt32 max_num_keys,
+                   double num_nodes_per_key,
+                   double average_key_length);
+  void create_file(const char *file_name,
+                   UInt64 file_size,
+                   UInt32 max_num_keys,
+                   UInt32 max_num_blocks,
+                   UInt32 key_buf_size);
+  void create_root();
+
+  void open_file(const char *file_name);
+
+  void map_address(void *address);
+
+  void build_from_trie(const Trie &trie);
+  void build_from_trie(const Trie &trie,
+                       UInt32 src,
+                       UInt32 dest);
+
+  bool search_from_root(const UInt8 *ptr,
+                        UInt32 length,
+                        Key *key) const;
+  bool search_from_terminal(const UInt8 *ptr,
+                            UInt32 length,
+                            Key *key,
+                            UInt32 key_id,
+                            UInt32 i) const;
+
+  bool insert_from_root(const UInt8 *ptr,
+                        UInt32 length,
+                        Key *key);
+  bool insert_from_terminal(const UInt8 *ptr,
+                            UInt32 length,
+                            Key *key,
+                            UInt32 node_id,
+                            UInt32 i);
+  bool insert_from_nonterminal(const UInt8 *ptr,
+                               UInt32 length,
+                               Key *key,
+                               UInt32 node_id,
+                               UInt32 i);
+
+  UInt32 insert_node(UInt32 node_id,
+                     UInt16 label);
+
+  UInt32 separate(const UInt8 *ptr,
+                  UInt32 length,
+                  UInt32 node_id,
+                  UInt32 i);
+  void resolve(UInt32 node_id,
+               UInt16 label);
+  void move_nodes(UInt32 node_id,
+                  UInt32 dest_offset,
+                  const UInt16 *labels,
+                  UInt32 num_labels);
+
+  UInt32 find_offset(const UInt16 *labels,
+                     UInt32 num_labels);
+
+  void reserve_node(UInt32 node_id);
+  void reserve_block(UInt32 block_id);
+
+  void update_block_level(UInt32 block_id,
+                          UInt32 level);
+  void set_block_level(UInt32 block_id,
+                       UInt32 level);
+  void clear_block_level(UInt32 block_id);
+
+  const Node &ith_node(UInt32 i) const {
+    DA_DEBUG_THROW_IF(i >= num_nodes());
+    return nodes_[i];
+  }
+  const Block &ith_block(UInt32 i) const {
+    DA_DEBUG_THROW_IF(i >= num_blocks());
+    return blocks_[i];
+  }
+  const KeyInfo &ith_key_info(UInt32 i) const {
+    DA_DEBUG_THROW_IF(i < KEY_ID_OFFSET);
+    DA_DEBUG_THROW_IF(i > (num_keys() + KEY_ID_OFFSET));
+    return key_infos_[i - KEY_ID_OFFSET];
+  }
+
+  Node &ith_node(UInt32 i) {
+    DA_DEBUG_THROW_IF(i >= num_nodes());
+    return nodes_[i];
+  }
+  Block &ith_block(UInt32 i) {
+    DA_DEBUG_THROW_IF(i >= num_blocks());
+    return blocks_[i];
+  }
+  KeyInfo &ith_key_info(UInt32 i) {
+    DA_DEBUG_THROW_IF(i < KEY_ID_OFFSET);
+    DA_DEBUG_THROW_IF(i > (num_keys() + KEY_ID_OFFSET + 1));
+    return key_infos_[i - KEY_ID_OFFSET];
+  }
+
+  // Disallows copy and assignment.
+  Trie(const Trie &);
+  Trie &operator=(const Trie &);
+};
+
+}  // namespace dat
+}  // namespace grn
+
+#endif  // GRN_DAT_TRIE_H

  Added: lib/dat/usage.h (+46 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/usage.h    2011-06-17 14:46:20 +0000 (844ffac)
@@ -0,0 +1,46 @@
+#ifndef DA_USAGE_H_
+#define DA_USAGE_H_
+
+#include "./common.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+namespace da {
+
+class Usage {
+ public:
+  Usage()
+      : usage_() {
+    ::getrusage(RUSAGE_SELF, &usage_);
+  }
+
+  double user_time() const {
+    return usage_.ru_utime.tv_sec + (usage_.ru_utime.tv_usec * 0.000001);
+  }
+  double system_time() const {
+    return usage_.ru_stime.tv_sec + (usage_.ru_stime.tv_usec * 0.000001);
+  }
+  long max_resident_set_size() const {
+    return usage_.ru_maxrss;
+  }
+  long minor_page_faults() const {
+    return usage_.ru_minflt;
+  }
+  long major_page_faults() const {
+    return usage_.ru_majflt;
+  }
+  long file_system_inputs() const {
+    return usage_.ru_inblock;
+  }
+  long file_system_outputs() const {
+    return usage_.ru_oublock;
+  }
+
+ private:
+  struct rusage usage_;
+};
+
+}  // namespace da
+
+#endif  // DA_USAGE_H_




Groonga-commit メーリングリストの案内
Back to archive index