null+****@clear*****
null+****@clear*****
2011年 6月 17日 (金) 23:46:20 JST
Susumu Yata 2011-06-17 14:46:20 +0000 (Fri, 17 Jun 2011)
New Revision: edc2954041e274c04368bf28cb7ccf07d7c95b06
Log:
added da8.
Added files:
lib/dat/base.h
lib/dat/block.h
lib/dat/check.h
lib/dat/common.h
lib/dat/header.h
lib/dat/key-info.h
lib/dat/key.h
lib/dat/node.h
lib/dat/timer.h
lib/dat/trie.cc
lib/dat/trie.h
lib/dat/usage.h
Added: lib/dat/base.h (+54 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/base.h 2011-06-17 14:46:20 +0000 (7449524)
@@ -0,0 +1,54 @@
+#ifndef GRN_DAT_BASE_H
+#define GRN_DAT_BASE_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+// The most significant bit represents whether or not the node is a terminal
+// node. The BASE of a terminal node represents the ID of its associated key.
+// On the other hand, the BASE of a non-terminal node represents the offset to
+// its child nodes.
+class Base {
+ public:
+ Base()
+ : base_(0) {}
+
+ UInt32 base() const {
+ return base_;
+ }
+ bool is_terminal() const {
+ return (base_ & IS_TERMINAL_FLAG) == IS_TERMINAL_FLAG;
+ }
+ UInt32 offset() const {
+ DA_DEBUG_THROW_IF(is_terminal());
+ return base_;
+ }
+ UInt32 key_id() const {
+ DA_DEBUG_THROW_IF(!is_terminal());
+ return base_ & ~IS_TERMINAL_FLAG;
+ }
+
+ void set_base(UInt32 x) {
+ base_ = x;
+ }
+ void set_offset(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_OFFSET);
+ base_ = x;
+ }
+ void set_key_id(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_KEY_ID);
+ base_ = IS_TERMINAL_FLAG | x;
+ }
+
+ private:
+ UInt32 base_;
+
+ static const UInt32 IS_TERMINAL_FLAG = 0x80000000U;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_BASE_H
Added: lib/dat/block.h (+79 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/block.h 2011-06-17 14:46:20 +0000 (97a9098)
@@ -0,0 +1,79 @@
+#ifndef GRN_DAT_BLOCK_H
+#define GRN_DAT_BLOCK_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Block {
+ public:
+ Block()
+ : next_(0),
+ prev_(0),
+ first_phantom_(0),
+ num_phantoms_(0) {}
+
+ UInt32 next() const {
+ return next_ / BLOCK_SIZE;
+ }
+ UInt32 prev() const {
+ return prev_ / BLOCK_SIZE;
+ }
+
+ UInt32 level() const {
+ return next_ & BLOCK_MASK;
+ }
+ UInt32 fail_count() const {
+ return prev_ & BLOCK_MASK;
+ }
+
+ UInt32 first_phantom() const {
+ return first_phantom_;
+ }
+ UInt32 num_phantoms() const {
+ return num_phantoms_;
+ }
+
+ void set_next(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
+ next_ = (next_ & BLOCK_MASK) | (x * BLOCK_SIZE);
+ }
+ void set_prev(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_BLOCK_ID);
+ prev_ = (prev_ & BLOCK_MASK) | (x * BLOCK_SIZE);
+ }
+
+ void set_level(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_BLOCK_LEVEL);
+ DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+ next_ = (next_ & ~BLOCK_MASK) | x;
+ }
+ void set_fail_count(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > MAX_FAIL_COUNT);
+ DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+ prev_ = (prev_ & ~BLOCK_MASK) | x;
+ }
+
+ void set_first_phantom(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+ first_phantom_ = (UInt16)x;
+ }
+ void set_num_phantoms(UInt32 x) {
+ DA_DEBUG_THROW_IF(x > BLOCK_SIZE);
+ num_phantoms_ = (UInt16)x;
+ }
+
+ private:
+ UInt32 next_;
+ UInt32 prev_;
+ UInt16 first_phantom_;
+ UInt16 num_phantoms_;
+
+ static const UInt32 SHIFT = 9;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_BLOCK_H
Added: lib/dat/check.h (+142 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/check.h 2011-06-17 14:46:20 +0000 (113cb7c)
@@ -0,0 +1,142 @@
+#ifndef GRN_DAT_CHECK_H
+#define GRN_DAT_CHECK_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+// The most significant bit (MSB) represents whether or not the node ID is used
+// as an offset. This bit is independent of the other bits.
+// The second most significant bit represents whether or not the node is a
+// phantom node. If it is true, next() and prev() are available. These
+// functions return the next/previous phantom node respectively.
+// A non-phantom node
+class Check {
+ public:
+ Check()
+ : check_(0) {}
+
+ UInt32 check() const {
+ return check_;
+ }
+
+ // The most significant bit (MSB) represents whether or not the node ID is
+ // used as an offset. Note that the MSB is independent of the other bits.
+ bool is_offset() const {
+ return (check_ & IS_OFFSET_FLAG) == IS_OFFSET_FLAG;
+ }
+
+ UInt32 except_is_offset() const {
+ DA_DEBUG_THROW_IF(is_phantom());
+ return check_ & ~IS_OFFSET_FLAG;
+ }
+
+ // A phantom node is a node that has never been used, and such a node is also
+ // called an empty element. Phantom nodes form a doubly linked list in each
+ // block, and the linked list is represented by next() and prev().
+ bool is_phantom() const {
+ return (check_ & IS_PHANTOM_FLAG) == IS_PHANTOM_FLAG;
+ }
+
+ UInt32 next() const {
+ DA_DEBUG_THROW_IF(!is_phantom());
+ return (check_ >> NEXT_SHIFT) & BLOCK_MASK;
+ }
+ UInt32 prev() const {
+ DA_DEBUG_THROW_IF(!is_phantom());
+ return (check_ >> PREV_SHIFT) & BLOCK_MASK;
+ }
+
+ // A label is attached to each non-phantom node. A label is represented by
+ // a byte except for a terminal label '\256'. Note that a phantom node always
+ // returns an invalid label with its phantom flag bit so as to reject a
+ // transition to the phantom node.
+ UInt32 label() const {
+ return check_ & (IS_PHANTOM_FLAG | LABEL_MASK);
+ }
+
+ UInt32 child() const {
+ return (check_ >> CHILD_SHIFT) & LABEL_MASK;
+ }
+ UInt32 sibling() const {
+ return (check_ >> SIBLING_SHIFT) & LABEL_MASK;
+ }
+
+ void set_check(UInt32 x) {
+ check_ = x;
+ }
+
+ void set_is_offset(bool x) {
+ if (x) {
+ DA_DEBUG_THROW_IF(is_offset());
+ check_ |= IS_OFFSET_FLAG;
+ } else {
+ DA_DEBUG_THROW_IF(!is_offset());
+ check_ &= ~IS_OFFSET_FLAG;
+ }
+ }
+
+ void set_except_is_offset(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ DA_DEBUG_THROW_IF((x & IS_OFFSET_FLAG) == IS_OFFSET_FLAG);
+ check_ = (check_ & IS_OFFSET_FLAG) | x;
+ }
+
+ // To reject a transition to an incomplete node, its label is overwritten
+ // with an invalid label when it becomes a non-phantom node.
+ void set_is_phantom(bool x) {
+ if (x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ check_ |= IS_PHANTOM_FLAG;
+ } else {
+ DA_DEBUG_THROW_IF(!is_phantom());
+ check_ = (check_ & IS_OFFSET_FLAG) | (INVALID_LABEL << CHILD_SHIFT) |
+ (INVALID_LABEL << SIBLING_SHIFT) | INVALID_LABEL;
+ }
+ }
+
+ void set_next(UInt32 x) {
+ DA_DEBUG_THROW_IF(!is_phantom());
+ DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+ check_ = (check_ & ~(BLOCK_MASK << NEXT_SHIFT)) | (x << NEXT_SHIFT);
+ }
+ void set_prev(UInt32 x) {
+ DA_DEBUG_THROW_IF(!is_phantom());
+ DA_DEBUG_THROW_IF(x > BLOCK_MASK);
+ check_ = (check_ & ~(BLOCK_MASK << PREV_SHIFT)) | (x << PREV_SHIFT);
+ }
+
+ void set_label(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ DA_DEBUG_THROW_IF(x > MAX_LABEL);
+ check_ = (check_ & ~LABEL_MASK) | x;
+ }
+
+ void set_child(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ DA_DEBUG_THROW_IF(x > MAX_LABEL);
+ check_ = (check_ & ~(LABEL_MASK << CHILD_SHIFT)) | (x << CHILD_SHIFT);
+ }
+ void set_sibling(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ DA_DEBUG_THROW_IF(label() > MAX_LABEL);
+ DA_DEBUG_THROW_IF((sibling() != INVALID_LABEL) && (x == INVALID_LABEL));
+ check_ = (check_ & ~(LABEL_MASK << SIBLING_SHIFT)) | (x << SIBLING_SHIFT);
+ }
+
+ private:
+ UInt32 check_;
+
+ static const UInt32 IS_OFFSET_FLAG = 1U << 31;
+ static const UInt32 IS_PHANTOM_FLAG = 1U << 30;
+ static const UInt32 NEXT_SHIFT = 9;
+ static const UInt32 PREV_SHIFT = 18;
+ static const UInt32 CHILD_SHIFT = 9;
+ static const UInt32 SIBLING_SHIFT = 18;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_CHECK_H
Added: lib/dat/common.h (+146 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/common.h 2011-06-17 14:46:20 +0000 (6b45e53)
@@ -0,0 +1,146 @@
+#ifndef GRN_DAT_COMMON_H
+#define GRN_DAT_COMMON_H
+
+#ifndef _MSC_VER
+ #include <stdint.h>
+#endif // _MSC_VER
+
+#include <cstddef>
+#include <exception>
+
+#ifdef _DEBUG
+ #include <iostream>
+#endif // _DEBUG
+
+namespace grn {
+namespace dat {
+
+#ifdef _MSC_VER
+typedef unsigned __int8 UInt8;
+typedef unsigned __int16 UInt16;
+typedef unsigned __int32 UInt32;
+typedef unsigned __int64 UInt64;
+#else // _MSC_VER
+typedef ::uint8_t UInt8;
+typedef ::uint16_t UInt16;
+typedef ::uint32_t UInt32;
+typedef ::uint64_t UInt64;
+#endif // _MSC_VER
+
+enum {
+ // The special label attached to a terminal node.
+ TERMINAL_LABEL = 0x100,
+
+ LABEL_MASK = 0x1FF,
+ BLOCK_MASK = 0x1FF,
+
+ // The number of nodes in each block.
+ BLOCK_SIZE = 0x200,
+
+ ROOT_NODE_ID = 0,
+ KEY_ID_OFFSET = 1,
+
+ MAX_LABEL = TERMINAL_LABEL,
+ MAX_NODE_ID = 0x7FFFFFFF,
+ MAX_BLOCK_ID = MAX_NODE_ID / BLOCK_SIZE,
+ MAX_OFFSET = MAX_NODE_ID,
+ MAX_KEY_ID = MAX_NODE_ID,
+ MAX_KEY_LENGTH = 0x7FFF,
+ // The level of a block is incremented when find_offset() has failed to find
+ // a good offset in that block MAX_FAIL_COUNT times.
+ MAX_FAIL_COUNT = 4,
+ // The maximum number of blocks tested in each call of find_offset().
+ MAX_BLOCK_COUNT = 16,
+ // A higher level block has less phantom nodes.
+ MAX_BLOCK_LEVEL = 5,
+
+ INVALID_LABEL = 0x1FF,
+ INVALID_OFFSET = 0,
+ INVALID_ENTRY = 0x7FFFFFFF,
+
+ SORT_THRESHOLD = 10
+};
+
+class Exception : public std::exception {
+ public:
+ Exception() throw()
+ : what_("") {}
+ Exception(const char *what) throw()
+ : what_((what != NULL) ? what : "") {}
+ Exception(const Exception &ex) throw()
+ : what_(ex.what_) {}
+ virtual ~Exception() throw() {}
+
+ Exception &operator=(const Exception &ex) throw() {
+ what_ = ex.what_;
+ return *this;
+ }
+
+ virtual const char *what() const throw() {
+ return what_;
+ }
+
+ private:
+ const char *what_;
+};
+
+#define DA_DEFINE_ERROR(error_type) \
+ class error_type : public Exception { \
+ public: \
+ error_type() throw() \
+ : Exception() {} \
+ error_type(const char *what) throw() \
+ : Exception(what) {} \
+ error_type(const error_type &ex) throw() \
+ : Exception(ex) {} \
+ virtual ~error_type() throw() {} \
+ \
+ error_type &operator=(const error_type &ex) throw() { \
+ *static_cast<Exception *>(this) = ex; \
+ return *this; \
+ } \
+ }
+
+DA_DEFINE_ERROR(ParamError);
+DA_DEFINE_ERROR(IOError);
+DA_DEFINE_ERROR(MemoryError);
+DA_DEFINE_ERROR(SizeError);
+
+#undef DA_DEFINE_ERROR
+
+#define DA_INT_TO_STR(value) \
+ #value
+#define DA_LINE_TO_STR(line) \
+ DA_INT_TO_STR(line)
+#define DA_LINE_STR \
+ DA_LINE_TO_STR(__LINE__)
+
+#define DA_THROW(exception, what) \
+ (throw exception(__FILE__ ":" DA_LINE_STR ": " what))
+#define DA_THROW_IF(cond) \
+ (void)((!(cond)) || (DA_THROW(grn::dat::Exception, #cond), 0))
+
+#define DA_PARAM_ERROR_IF(cond) \
+ (void)((!(cond)) || (DA_THROW(grn::dat::ParamError, #cond), 0))
+#define DA_IO_ERROR_IF(cond) \
+ (void)((!(cond)) || (DA_THROW(grn::dat::IOError, #cond), 0))
+#define DA_MEMORY_ERROR_IF(cond) \
+ (void)((!(cond)) || (DA_THROW(grn::dat::MemoryError, #cond), 0))
+#define DA_SIZE_ERROR_IF(cond) \
+ (void)((!(cond)) || (DA_THROW(grn::dat::SizeError, #cond), 0))
+
+#ifdef _DEBUG
+ #define DA_DEBUG_THROW_IF(cond) \
+ DA_THROW_IF(cond)
+ #define DA_DEBUG_LOG(var) \
+ (std::clog << __FILE__ ":" DA_LINE_STR ": " #var ": " \
+ << (var) << std::endl)
+#else // _DEBUG
+ #define DA_DEBUG_THROW_IF(cond)
+ #define DA_DEBUG_LOG(var)
+#endif // _DEBUG
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_COMMON_H
Added: lib/dat/header.h (+107 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/header.h 2011-06-17 14:46:20 +0000 (27abdb6)
@@ -0,0 +1,107 @@
+#ifndef GRN_DAT_HEADER_H
+#define GRN_DAT_HEADER_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Header {
+ public:
+ Header()
+ : file_size_(0),
+ num_keys_(0),
+ max_num_keys_(0),
+ num_phantoms_(0),
+ num_zombies_(0),
+ num_blocks_(0),
+ max_num_blocks_(0),
+ key_buf_size_(0),
+ entries_() {
+ for (int i = 0; i <= MAX_BLOCK_LEVEL; ++i) {
+ entries_[i] = INVALID_ENTRY;
+ }
+ }
+
+ UInt64 file_size() const {
+ return file_size_;
+ }
+ UInt32 num_keys() const {
+ return num_keys_;
+ }
+ UInt32 max_num_keys() const {
+ return max_num_keys_;
+ }
+ UInt32 num_nodes() const {
+ return num_blocks() * BLOCK_SIZE;
+ }
+ UInt32 num_phantoms() const {
+ return num_phantoms_;
+ }
+ UInt32 num_zombies() const {
+ return num_zombies_;
+ }
+ UInt32 max_num_nodes() const {
+ return max_num_blocks() * BLOCK_SIZE;
+ }
+ UInt32 num_blocks() const {
+ return num_blocks_;
+ }
+ UInt32 max_num_blocks() const {
+ return max_num_blocks_;
+ }
+ UInt32 key_buf_size() const {
+ return key_buf_size_;
+ }
+ UInt32 ith_entry(UInt32 i) const {
+ DA_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
+ return entries_[i];
+ }
+
+ void set_file_size(UInt64 x) {
+ file_size_ = x;
+ }
+ void set_num_keys(UInt32 x) {
+ num_keys_ = x;
+ }
+ void set_max_num_keys(UInt32 x) {
+ max_num_keys_ = x;
+ }
+ void set_num_phantoms(UInt32 x) {
+ num_phantoms_ = x;
+ }
+ void set_num_zombies(UInt32 x) {
+ num_zombies_ = x;
+ }
+ void set_num_blocks(UInt32 x) {
+ num_blocks_ = x;
+ }
+ void set_max_num_blocks(UInt32 x) {
+ max_num_blocks_ = x;
+ }
+ void set_key_buf_size(UInt32 x) {
+ key_buf_size_ = x;
+ }
+ void set_ith_entry(UInt32 i,
+ UInt32 x) {
+ DA_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
+ DA_DEBUG_THROW_IF((x != INVALID_ENTRY) && (x >= num_blocks()));
+ entries_[i] = x;
+ }
+
+ private:
+ UInt64 file_size_;
+ UInt32 num_keys_;
+ UInt32 max_num_keys_;
+ UInt32 num_phantoms_;
+ UInt32 num_zombies_;
+ UInt32 num_blocks_;
+ UInt32 max_num_blocks_;
+ UInt32 key_buf_size_;
+ UInt32 entries_[MAX_BLOCK_LEVEL + 1];
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_HEADER_H
Added: lib/dat/key-info.h (+29 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key-info.h 2011-06-17 14:46:20 +0000 (3640501)
@@ -0,0 +1,29 @@
+#ifndef GRN_DAT_KEY_INFO_H
+#define GRN_DAT_KEY_INFO_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class KeyInfo {
+ public:
+ KeyInfo()
+ : offset_(0) {}
+
+ UInt32 offset() const {
+ return offset_;
+ }
+
+ void set_offset(UInt32 x) {
+ offset_ = x;
+ }
+
+ private:
+ UInt32 offset_;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_KEY_INFO_H
Added: lib/dat/key.h (+45 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/key.h 2011-06-17 14:46:20 +0000 (3e7d242)
@@ -0,0 +1,45 @@
+#ifndef GRN_DAT_KEY_H
+#define GRN_DAT_KEY_H
+
+#include "./common.h"
+
+namespace grn {
+namespace dat {
+
+class Key {
+ public:
+ const char &operator[](UInt32 i) const {
+ DA_DEBUG_THROW_IF(i > length_);
+ return ptr_[i];
+ }
+
+ const char *ptr() const {
+ return ptr_;
+ }
+ UInt32 length() const {
+ return length_;
+ }
+ UInt32 id() const {
+ return id_;
+ }
+
+ void set_ptr(const char *x) {
+ ptr_ = x;
+ }
+ void set_length(UInt32 x) {
+ length_ = x;
+ }
+ void set_id(UInt32 x) {
+ id_ = x;
+ }
+
+ private:
+ const char *ptr_;
+ UInt32 length_;
+ UInt32 id_;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_KEY_H
Added: lib/dat/node.h (+111 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/node.h 2011-06-17 14:46:20 +0000 (0c9d1d6)
@@ -0,0 +1,111 @@
+#ifndef GRN_DAT_NODE_H
+#define GRN_DAT_NODE_H
+
+#include "./base.h"
+#include "./check.h"
+
+namespace grn {
+namespace dat {
+
+class Node {
+ public:
+ Node() : base_(), check_() {}
+
+ Base base() const {
+ return base_;
+ }
+ bool is_terminal() const {
+ DA_DEBUG_THROW_IF(is_phantom());
+ return base_.is_terminal();
+ }
+ UInt32 offset() const {
+ DA_DEBUG_THROW_IF(is_phantom());
+ return base_.offset();
+ }
+ UInt32 key_id() const {
+ DA_DEBUG_THROW_IF(is_phantom());
+ return base_.key_id();
+ }
+
+ Check check() const {
+ return check_;
+ }
+ bool is_offset() const {
+ return check_.is_offset();
+ }
+ UInt32 except_is_offset() const {
+ return check_.except_is_offset();
+ }
+ bool is_phantom() const {
+ return check_.is_phantom();
+ }
+ UInt32 next() const {
+ return check_.next();
+ }
+ UInt32 prev() const {
+ return check_.prev();
+ }
+ UInt32 label() const {
+ return check_.label();
+ }
+ UInt32 child() const {
+ return check_.child();
+ }
+ UInt32 sibling() const {
+ return check_.sibling();
+ }
+
+ void set_base(Base x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ base_ = x;
+ }
+ void set_offset(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ base_.set_offset(x);
+ }
+ void set_key_id(UInt32 x) {
+ DA_DEBUG_THROW_IF(is_phantom());
+ base_.set_key_id(x);
+ }
+
+ void set_check(Check x) {
+ check_ = x;
+ }
+ void set_is_offset(bool x) {
+ check_.set_is_offset(x);
+ }
+ void set_except_is_offset(UInt32 x) {
+ check_.set_except_is_offset(x);
+ }
+ void set_is_phantom(bool x) {
+ DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+ check_.set_is_phantom(x);
+ }
+ void set_next(UInt32 x) {
+ DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+ check_.set_next(x);
+ }
+ void set_prev(UInt32 x) {
+ DA_DEBUG_THROW_IF(base_.offset() != INVALID_OFFSET);
+ check_.set_prev(x);
+ }
+ void set_label(UInt32 x) {
+ DA_DEBUG_THROW_IF(offset() != INVALID_OFFSET);
+ check_.set_label(x);
+ }
+ void set_child(UInt32 x) {
+ check_.set_child(x);
+ }
+ void set_sibling(UInt32 x) {
+ check_.set_sibling(x);
+ }
+
+ private:
+ Base base_;
+ Check check_;
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_NODE_H
Added: lib/dat/timer.h (+39 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/timer.h 2011-06-17 14:46:20 +0000 (c852070)
@@ -0,0 +1,39 @@
+#ifndef DA_TIMER_H_
+#define DA_TIMER_H_
+
+#include "./common.h"
+
+#include <sys/time.h>
+
+namespace da {
+
+class Timer {
+ public:
+ Timer()
+ : base_(get_time()) {}
+
+ double elapsed() const {
+ return get_time() - base_;
+ }
+
+ void reset() {
+ base_ = get_time();
+ }
+
+ private:
+ double base_;
+
+ static double get_time() {
+ struct timeval tv;
+ ::gettimeofday(&tv, NULL);
+ return tv.tv_sec + (tv.tv_usec * 0.000001);
+ }
+
+ // Disallows copy and assignment.
+ Timer(const Timer &);
+ Timer &operator=(const Timer &);
+};
+
+} // namespace da
+
+#endif // DA_TIMER_H_
Added: lib/dat/trie.cc (+900 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/trie.cc 2011-06-17 14:46:20 +0000 (f5dc7d7)
@@ -0,0 +1,900 @@
+#include "./trie.h"
+
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <cstring>
+#include <new>
+
+namespace grn {
+namespace dat {
+
+Trie::~Trie() {
+ if (header_ != NULL) {
+ ::munmap(header_, header_->file_size());
+ }
+}
+
+Trie *Trie::create(const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ double num_nodes_per_key,
+ double average_key_length) {
+ DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+ if (num_nodes_per_key == 0.0) {
+ num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY;
+ }
+ DA_PARAM_ERROR_IF(num_nodes_per_key < 1.0);
+
+ if (average_key_length == 0.0) {
+ average_key_length = DEFAULT_AVERAGE_KEY_LENGTH;
+ }
+ DA_PARAM_ERROR_IF(average_key_length < 1.0);
+ DA_PARAM_ERROR_IF(average_key_length > MAX_KEY_LENGTH);
+
+ if (max_num_keys == 0) {
+ if (file_size == 0) {
+ file_size = DEFAULT_FILE_SIZE;
+ }
+ } else {
+ DA_PARAM_ERROR_IF(max_num_keys > MAX_NUM_KEYS);
+ }
+
+ Trie *new_trie = new (std::nothrow) Trie;
+ DA_MEMORY_ERROR_IF(new_trie == NULL);
+ try {
+ new_trie->create_file(file_name, file_size, max_num_keys,
+ num_nodes_per_key, average_key_length);
+ return new_trie;
+ } catch (...) {
+ delete new_trie;
+ throw;
+ }
+}
+
+Trie *Trie::create(const Trie &trie,
+ const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ double num_nodes_per_key,
+ double average_key_length) {
+ DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+ if (num_nodes_per_key == 0.0) {
+ if (trie.num_keys() == 0) {
+ num_nodes_per_key = DEFAULT_NUM_NODES_PER_KEY;
+ } else {
+ num_nodes_per_key = 1.0 * trie.num_nodes() / trie.num_keys();
+ }
+ }
+ DA_PARAM_ERROR_IF(num_nodes_per_key < 1.0);
+
+ if (average_key_length == 0.0) {
+ if (trie.num_keys() == 0) {
+ average_key_length = DEFAULT_AVERAGE_KEY_LENGTH;
+ } else {
+ average_key_length = 1.0 * trie.total_key_length() / trie.num_keys();
+ }
+ }
+ DA_PARAM_ERROR_IF(average_key_length < 1.0);
+ DA_PARAM_ERROR_IF(average_key_length > MAX_KEY_LENGTH);
+
+ if (max_num_keys == 0) {
+ if (file_size == 0) {
+ file_size = trie.file_size();
+ }
+ DA_PARAM_ERROR_IF(file_size < trie.virtual_size());
+ } else {
+ DA_PARAM_ERROR_IF(max_num_keys < trie.num_keys());
+ DA_PARAM_ERROR_IF(max_num_keys > MAX_KEY_ID);
+ }
+
+ Trie *new_trie = new (std::nothrow) Trie;
+ DA_MEMORY_ERROR_IF(new_trie == NULL);
+ try {
+ new_trie->create_file(file_name, file_size, max_num_keys,
+ num_nodes_per_key, average_key_length);
+ if (trie.num_keys() != 0) {
+ new_trie->build_from_trie(trie);
+ }
+ return new_trie;
+ } catch (...) {
+ delete new_trie;
+ throw;
+ }
+}
+
+Trie *Trie::open(const char *file_name) {
+ DA_PARAM_ERROR_IF(file_name == NULL);
+
+ Trie *new_trie = new Trie;
+ try {
+ new_trie->open_file(file_name);
+ return new_trie;
+ } catch (...) {
+ delete new_trie;
+ throw;
+ }
+}
+
+Trie::Trie()
+ : header_(NULL),
+ nodes_(NULL),
+ blocks_(NULL),
+ key_infos_(NULL),
+ key_buf_(NULL) {}
+
+void Trie::create_file(const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ double num_nodes_per_key,
+ double average_key_length) {
+ DA_PARAM_ERROR_IF((file_size == 0) && (max_num_keys == 0));
+ DA_PARAM_ERROR_IF((file_size != 0) && (max_num_keys != 0));
+
+ UInt32 max_num_blocks;
+ UInt32 key_buf_size;
+ if (max_num_keys == 0) {
+ const UInt64 avail = file_size - sizeof(Header);
+ const double num_bytes_per_key = (sizeof(Node) * num_nodes_per_key)
+ + (1.0 * sizeof(Block) / BLOCK_SIZE * num_nodes_per_key)
+ + sizeof(KeyInfo) + average_key_length;
+ DA_PARAM_ERROR_IF((avail / num_bytes_per_key) > MAX_NUM_KEYS);
+ max_num_keys = (UInt32)(avail / num_bytes_per_key);
+ DA_PARAM_ERROR_IF(max_num_keys == 0);
+ }
+
+ {
+ const double max_num_nodes = num_nodes_per_key * max_num_keys;
+ DA_PARAM_ERROR_IF(max_num_nodes > MAX_NUM_NODES);
+ max_num_blocks = ((UInt32)max_num_nodes + BLOCK_SIZE - 1) / BLOCK_SIZE;
+ DA_PARAM_ERROR_IF(max_num_blocks == 0);
+ DA_PARAM_ERROR_IF(max_num_blocks > MAX_NUM_BLOCKS);
+ }
+
+ if (file_size == 0) {
+ const double total_key_length = average_key_length * max_num_keys;
+ DA_PARAM_ERROR_IF(total_key_length > MAX_KEY_BUF_SIZE);
+ key_buf_size = (UInt32)total_key_length;
+
+ file_size = sizeof(Header)
+ + (sizeof(Block) * max_num_blocks)
+ + (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+ + (sizeof(KeyInfo) * (max_num_keys + 1))
+ + key_buf_size;
+ } else {
+ const UInt64 avail = file_size - sizeof(Header)
+ - (sizeof(Block) * max_num_blocks)
+ - (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+ - (sizeof(KeyInfo) * (max_num_keys + 1));
+ DA_PARAM_ERROR_IF(avail == 0);
+ DA_PARAM_ERROR_IF(avail > MAX_KEY_BUF_SIZE);
+ key_buf_size = (UInt32)avail;
+ }
+
+ create_file(file_name, file_size, max_num_keys,
+ max_num_blocks, key_buf_size);
+}
+
+void Trie::create_file(const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ UInt32 max_num_blocks,
+ UInt32 key_buf_size) {
+ DA_PARAM_ERROR_IF(file_size != (sizeof(Header)
+ + (sizeof(Block) * max_num_blocks)
+ + (sizeof(Node) * BLOCK_SIZE * max_num_blocks)
+ + (sizeof(KeyInfo) * (max_num_keys + 1))
+ + key_buf_size));
+
+ int fd = -1;
+ if ((file_name != NULL) && (file_name[0] != '\0')) {
+ fd = ::open(file_name, O_RDWR | O_CREAT | O_TRUNC, 0666);
+ DA_IO_ERROR_IF(fd == -1);
+ if (::ftruncate(fd, file_size) == -1) {
+ ::close(fd);
+ DA_THROW(IOError, "::ftruncate(fd, file_size) == -1");
+ }
+ }
+
+ const int flags = (fd == -1) ? (MAP_PRIVATE | MAP_ANONYMOUS) : MAP_SHARED;
+ void * const address = ::mmap(NULL, file_size, PROT_READ | PROT_WRITE,
+ flags, fd, 0);
+ if (address == MAP_FAILED) {
+ if (fd != -1) {
+ ::close(fd);
+ }
+ DA_THROW(IOError, "address == MAP_FAILED");
+ } else if ((fd != -1) && (::close(fd) == -1)) {
+ ::munmap(address, file_size);
+ DA_THROW(IOError, "(fd != -1) && (::close(fd) == -1)");
+ }
+
+ Header * const header = static_cast<Header *>(address);
+ *header = Header();
+ header->set_file_size(file_size);
+ header->set_max_num_keys(max_num_keys);
+ header->set_max_num_blocks(max_num_blocks);
+ header->set_key_buf_size(key_buf_size);
+
+ map_address(address);
+
+ reserve_node(ROOT_NODE_ID);
+ ith_node(INVALID_OFFSET).set_is_offset(true);
+ ith_key_info(KEY_ID_OFFSET).set_offset(0);
+}
+
+void Trie::open_file(const char *file_name) {
+ DA_PARAM_ERROR_IF(file_name == NULL);
+
+ const int fd = ::open(file_name, O_RDWR);
+ DA_IO_ERROR_IF(fd == -1);
+
+ void *address = ::mmap(NULL, sizeof(Header), PROT_READ, MAP_PRIVATE, fd, 0);
+ if (address == MAP_FAILED) {
+ ::close(fd);
+ DA_THROW(IOError, "address == MAP_FAILED");
+ }
+
+ const UInt64 file_size = static_cast<const Header *>(address)->file_size();
+ if (::munmap(address, sizeof(Header)) == -1) {
+ ::close(fd);
+ DA_THROW(IOError, "::munmap(address, sizeof(Header)) == -1");
+ }
+
+ address = ::mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (address == MAP_FAILED) {
+ ::close(fd);
+ DA_THROW(IOError, "address == MAP_FAILED");
+ } else if (::close(fd) == -1) {
+ ::munmap(address, file_size);
+ DA_THROW(IOError, "::close(fd) == -1");
+ }
+
+ map_address(address);
+}
+
+void Trie::map_address(void *address) {
+ DA_PARAM_ERROR_IF(address == NULL);
+
+ header_ = static_cast<Header *>(address);
+ nodes_ = static_cast<Node *>(static_cast<void *>(header_ + 1));
+ blocks_ = static_cast<Block *>(static_cast<void *>(
+ nodes_ + max_num_nodes()));
+ key_infos_ = static_cast<KeyInfo *>(static_cast<void *>(
+ blocks_ + max_num_blocks()));
+ key_buf_ = static_cast<char *>(static_cast<void *>(
+ key_infos_ + (max_num_keys() + 1)));
+}
+
+void Trie::build_from_trie(const Trie &trie) {
+ DA_DEBUG_THROW_IF(trie.num_keys() == 0);
+
+ header_->set_num_keys(trie.num_keys());
+ std::memcpy(key_infos_, trie.key_infos_,
+ sizeof(KeyInfo) * (num_keys() + 1));
+ std::memcpy(key_buf_, trie.key_buf_,
+ ith_key_info(num_keys() + KEY_ID_OFFSET).offset());
+ build_from_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID);
+}
+
+void Trie::build_from_trie(const Trie &trie,
+ UInt32 src,
+ UInt32 dest) {
+ ith_node(dest).set_except_is_offset(trie.ith_node(src).except_is_offset());
+ if (trie.ith_node(src).is_terminal()) {
+ ith_node(dest).set_key_id(trie.ith_node(src).key_id());
+ return;
+ }
+
+ const UInt32 src_offset = trie.ith_node(src).offset();
+ UInt32 dest_offset;
+ {
+ UInt16 labels[MAX_LABEL + 1];
+ UInt32 num_labels = 0;
+
+ UInt32 label = trie.ith_node(src).child();
+ DA_DEBUG_THROW_IF(label == INVALID_LABEL);
+ while (label != INVALID_LABEL) {
+ DA_DEBUG_THROW_IF(label > MAX_LABEL);
+ labels[num_labels++] = label;
+ label = trie.ith_node(src_offset ^ label).sibling();
+ }
+ DA_DEBUG_THROW_IF(num_labels == 0);
+
+ dest_offset = find_offset(labels, num_labels);
+ for (UInt32 i = 0; i < num_labels; ++i) {
+ reserve_node(dest_offset ^ labels[i]);
+ }
+
+ DA_DEBUG_THROW_IF(ith_node(dest_offset).is_offset());
+ ith_node(dest_offset).set_is_offset(true);
+ ith_node(dest).set_offset(dest_offset);
+ }
+
+ UInt32 label = trie.ith_node(src).child();
+ while (label != INVALID_LABEL) {
+ build_from_trie(trie, src_offset ^ label, dest_offset ^ label);
+ label = trie.ith_node(src_offset ^ label).sibling();
+ }
+}
+
+bool Trie::search_from_root(const UInt8 *ptr,
+ UInt32 length,
+ Key *key) const {
+ DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+
+ UInt32 node_id = ROOT_NODE_ID;
+ for (UInt32 i = 0; i < length; ++i) {
+ const Base base = ith_node(node_id).base();
+ if (base.is_terminal()) {
+ return search_from_terminal(ptr, length, key, base.key_id(), i);
+ }
+
+ node_id = base.offset() ^ ptr[i];
+ if (ith_node(node_id).label() != ptr[i]) {
+ return false;
+ }
+ }
+
+ const Base base = ith_node(node_id).base();
+ if (base.is_terminal()) {
+ return search_from_terminal(ptr, length, key, base.key_id(), length);
+ }
+
+ node_id = base.offset() ^ TERMINAL_LABEL;
+ if (ith_node(node_id).label() != TERMINAL_LABEL) {
+ return false;
+ }
+ DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+
+ if (key != NULL) {
+ const UInt32 key_id = ith_node(node_id).key_id();
+ key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+ key->set_length(ith_key_info(key_id + 1).offset()
+ - ith_key_info(key_id).offset());
+ key->set_id(key_id);
+ }
+ return true;
+}
+
+bool Trie::search_from_terminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 key_id,
+ UInt32 i) const {
+ DA_DEBUG_THROW_IF(i > length);
+ DA_DEBUG_THROW_IF(key_id < KEY_ID_OFFSET);
+ DA_DEBUG_THROW_IF(key_id >= (num_keys() + KEY_ID_OFFSET));
+
+ const UInt32 key_length = ith_key_info(key_id + 1).offset()
+ - ith_key_info(key_id).offset();
+ if (key_length != length) {
+ return false;
+ }
+
+ const char * const key_ptr = key_buf_ + ith_key_info(key_id).offset();
+ for ( ; i < length; ++i) {
+ if (ptr[i] != (UInt8)key_ptr[i]) {
+ return false;
+ }
+ }
+
+ if (key != NULL) {
+ key->set_ptr(key_ptr);
+ key->set_length(key_length);
+ key->set_id(key_id);
+ }
+ return true;
+}
+
+bool Trie::insert_from_root(const UInt8 *ptr,
+ UInt32 length,
+ Key *key) {
+ DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+
+ UInt32 node_id = ROOT_NODE_ID;
+ for (UInt32 i = 0; i < length; ++i) {
+ const Base base = ith_node(node_id).base();
+ if (base.is_terminal()) {
+ return insert_from_terminal(ptr, length, key, node_id, i);
+ }
+
+ const UInt32 next = base.offset() ^ ptr[i];
+ if (ith_node(next).label() != ptr[i]) {
+ return insert_from_nonterminal(ptr, length, key, node_id, i);
+ }
+ node_id = next;
+ }
+
+ const Base base = ith_node(node_id).base();
+ if (base.is_terminal()) {
+ return insert_from_terminal(ptr, length, key, node_id, length);
+ }
+
+ const UInt32 next = base.offset() ^ TERMINAL_LABEL;
+ if (ith_node(next).label() != TERMINAL_LABEL) {
+ return insert_from_nonterminal(ptr, length, key, node_id, length);
+ }
+ DA_DEBUG_THROW_IF(!ith_node(next).is_terminal());
+
+ if (key != NULL) {
+ const UInt32 key_id = ith_node(next).key_id();
+ key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+ key->set_length(ith_key_info(key_id + 1).offset()
+ - ith_key_info(key_id).offset());
+ key->set_id(key_id);
+ }
+ return false;
+}
+
+bool Trie::insert_from_terminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 node_id,
+ UInt32 i) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+ DA_DEBUG_THROW_IF(i > length);
+
+ UInt32 key_id = ith_node(node_id).key_id();
+ const char *key_ptr = key_buf_ + ith_key_info(key_id).offset();
+ UInt32 key_length = ith_key_info(key_id + 1).offset()
+ - ith_key_info(key_id).offset();
+
+ UInt32 j = i;
+ while ((j < length) && (j < key_length)) {
+ if (ptr[j] != (UInt8)key_ptr[j]) {
+ break;
+ }
+ ++j;
+ }
+
+ if ((j == length) && (j == key_length)) {
+ if (key != NULL) {
+ key->set_ptr(key_ptr);
+ key->set_length(key_length);
+ key->set_id(key_id);
+ }
+ return false;
+ }
+
+ DA_SIZE_ERROR_IF(num_keys() >= max_num_keys());
+ key_id = num_keys() + KEY_ID_OFFSET;
+
+ const UInt32 key_offset = ith_key_info(key_id).offset();
+ DA_SIZE_ERROR_IF(length > (key_buf_size() - key_offset));
+
+ for (UInt32 k = i; k < j; ++k) {
+ node_id = insert_node(node_id, ptr[k]);
+ }
+ node_id = separate(ptr, length, node_id, j);
+
+ ith_key_info(key_id + 1).set_offset(key_offset + length);
+ std::memcpy(key_buf_ + key_offset, ptr, length);
+ header_->set_num_keys(num_keys() + 1);
+
+ ith_node(node_id).set_key_id(key_id);
+
+ if (key != NULL) {
+ key->set_ptr(key_buf_ + key_offset);
+ key->set_length(length);
+ key->set_id(key_id);
+ }
+ return true;
+}
+
+bool Trie::insert_from_nonterminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 node_id,
+ UInt32 i) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+ DA_DEBUG_THROW_IF(i > length);
+
+ DA_SIZE_ERROR_IF(num_keys() >= max_num_keys());
+ const UInt32 key_id = num_keys() + KEY_ID_OFFSET;
+
+ const UInt32 key_offset = ith_key_info(key_id).offset();
+ DA_SIZE_ERROR_IF(length > (key_buf_size() - key_offset));
+
+ const UInt16 label = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL;
+ const Base base = ith_node(node_id).base();
+ if ((base.offset() == INVALID_OFFSET) ||
+ !ith_node(base.offset() ^ label).is_phantom()) {
+ resolve(node_id, label);
+ }
+ node_id = insert_node(node_id, label);
+
+ ith_key_info(key_id + 1).set_offset(key_offset + length);
+ std::memcpy(key_buf_ + key_offset, ptr, length);
+ header_->set_num_keys(num_keys() + 1);
+
+ ith_node(node_id).set_key_id(key_id);
+
+ if (key != NULL) {
+ key->set_ptr(key_buf_ + key_offset);
+ key->set_length(length);
+ key->set_id(key_id);
+ }
+ return true;
+}
+
+UInt32 Trie::insert_node(UInt32 node_id,
+ UInt16 label) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(label > MAX_LABEL);
+
+ const Base base = ith_node(node_id).base();
+ UInt32 offset;
+ if (base.is_terminal() || (base.offset() == INVALID_OFFSET)) {
+ offset = find_offset(&label, 1);
+ } else {
+ offset = base.offset();
+ }
+
+ const UInt32 next = offset ^ label;
+ reserve_node(next);
+
+ ith_node(next).set_label(label);
+ if (base.is_terminal()) {
+ DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+ ith_node(offset).set_is_offset(true);
+ ith_node(next).set_key_id(base.key_id());
+ } else if (base.offset() == INVALID_OFFSET) {
+ DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+ ith_node(offset).set_is_offset(true);
+ } else {
+ DA_DEBUG_THROW_IF(!ith_node(offset).is_offset());
+ }
+ ith_node(node_id).set_offset(offset);
+
+ const UInt32 child_label = ith_node(node_id).child();
+ DA_DEBUG_THROW_IF(child_label == label);
+ if (child_label == INVALID_LABEL) {
+ ith_node(node_id).set_child(label);
+ } else if ((label == TERMINAL_LABEL) || (label < child_label)) {
+ DA_DEBUG_THROW_IF(ith_node(offset ^ child_label).is_phantom());
+ DA_DEBUG_THROW_IF(ith_node(offset ^ child_label).label() != child_label);
+ ith_node(next).set_sibling(child_label);
+ ith_node(node_id).set_child(label);
+ } else {
+ UInt32 prev = offset ^ child_label;
+ DA_DEBUG_THROW_IF(ith_node(prev).label() != child_label);
+ UInt32 sibling_label = ith_node(prev).sibling();
+ while (label > sibling_label) {
+ prev = offset ^ sibling_label;
+ DA_DEBUG_THROW_IF(ith_node(prev).label() != sibling_label);
+ sibling_label = ith_node(prev).sibling();
+ }
+ DA_DEBUG_THROW_IF(label == sibling_label);
+ ith_node(next).set_sibling(ith_node(prev).sibling());
+ ith_node(prev).set_sibling(label);
+ }
+ return next;
+}
+
+UInt32 Trie::separate(const UInt8 *ptr,
+ UInt32 length,
+ UInt32 node_id,
+ UInt32 i) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(!ith_node(node_id).is_terminal());
+ DA_DEBUG_THROW_IF(i > length);
+
+ const UInt32 key_id = ith_node(node_id).key_id();
+ const UInt32 key_offset = ith_key_info(key_id).offset();
+ const char *key_ptr = key_buf_ + key_offset;
+ const UInt32 key_length = ith_key_info(key_id + 1).offset() - key_offset;
+
+ UInt16 labels[2];
+ labels[0] = (i < key_length) ?
+ (UInt16)(UInt8)key_ptr[i] : (UInt16)TERMINAL_LABEL;
+ labels[1] = (i < length) ? (UInt16)ptr[i] : (UInt16)TERMINAL_LABEL;
+ DA_DEBUG_THROW_IF(labels[0] == labels[1]);
+
+ const UInt32 offset = find_offset(labels, 2);
+ DA_DEBUG_THROW_IF(ith_node(offset).is_offset());
+
+ UInt32 next = offset ^ labels[0];
+ reserve_node(next);
+
+ ith_node(next).set_label(labels[0]);
+ ith_node(next).set_key_id(key_id);
+
+ next = offset ^ labels[1];
+ reserve_node(next);
+
+ ith_node(next).set_label(labels[1]);
+
+ ith_node(offset).set_is_offset(true);
+ ith_node(node_id).set_offset(offset);
+
+ if ((labels[0] == TERMINAL_LABEL) || (labels[0] < labels[1])) {
+ ith_node(node_id).set_child(labels[0]);
+ ith_node(offset ^ labels[0]).set_sibling(labels[1]);
+ } else {
+ ith_node(node_id).set_child(labels[1]);
+ ith_node(offset ^ labels[1]).set_sibling(labels[0]);
+ }
+ return next;
+}
+
+void Trie::resolve(UInt32 node_id,
+ UInt16 label) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+ DA_DEBUG_THROW_IF(label > MAX_LABEL);
+
+ UInt32 offset = ith_node(node_id).offset();
+ if (offset != INVALID_OFFSET) {
+ UInt16 labels[MAX_LABEL + 1];
+ UInt32 num_labels = 0;
+
+ UInt32 next_label = ith_node(node_id).child();
+ DA_DEBUG_THROW_IF(next_label == INVALID_LABEL);
+ while (next_label != INVALID_LABEL) {
+ DA_DEBUG_THROW_IF(next_label > MAX_LABEL);
+ labels[num_labels++] = next_label;
+ next_label = ith_node(offset ^ next_label).sibling();
+ }
+ DA_DEBUG_THROW_IF(num_labels == 0);
+
+ labels[num_labels] = label;
+ offset = find_offset(labels, num_labels + 1);
+ move_nodes(node_id, offset, labels, num_labels);
+ } else {
+ offset = find_offset(&label, 1);
+ if (offset >= num_nodes()) {
+ DA_DEBUG_THROW_IF((offset / BLOCK_SIZE) != num_blocks());
+ reserve_block(num_blocks());
+ }
+ ith_node(offset).set_is_offset(true);
+ ith_node(node_id).set_offset(offset);
+ }
+}
+
+void Trie::move_nodes(UInt32 node_id,
+ UInt32 dest_offset,
+ const UInt16 *labels,
+ UInt32 num_labels) {
+ DA_DEBUG_THROW_IF(node_id >= num_nodes());
+ DA_DEBUG_THROW_IF(ith_node(node_id).is_terminal());
+ DA_DEBUG_THROW_IF(labels == NULL);
+ DA_DEBUG_THROW_IF(num_labels == 0);
+ DA_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1));
+
+ const UInt32 src_offset = ith_node(node_id).offset();
+ DA_DEBUG_THROW_IF(src_offset == INVALID_OFFSET);
+ DA_DEBUG_THROW_IF(!ith_node(src_offset).is_offset());
+
+ for (UInt32 i = 0; i < num_labels; ++i) {
+ const UInt32 src_node_id = src_offset ^ labels[i];
+ const UInt32 dest_node_id = dest_offset ^ labels[i];
+ DA_DEBUG_THROW_IF(ith_node(src_node_id).is_phantom());
+ DA_DEBUG_THROW_IF(ith_node(src_node_id).label() != labels[i]);
+
+ reserve_node(dest_node_id);
+ ith_node(dest_node_id).set_except_is_offset(
+ ith_node(src_node_id).except_is_offset());
+ ith_node(dest_node_id).set_base(ith_node(src_node_id).base());
+ }
+ header_->set_num_zombies(num_zombies() + num_labels);
+
+ DA_DEBUG_THROW_IF(ith_node(dest_offset).is_offset());
+ ith_node(dest_offset).set_is_offset(true);
+ ith_node(node_id).set_offset(dest_offset);
+}
+
+UInt32 Trie::find_offset(const UInt16 *labels,
+ UInt32 num_labels) {
+ DA_DEBUG_THROW_IF(labels == NULL);
+ DA_DEBUG_THROW_IF(num_labels == 0);
+ DA_DEBUG_THROW_IF(num_labels > (MAX_LABEL + 1));
+
+ // Blocks are tested in descending order of level, a lower level block
+ // contains more phantom nodes.
+ UInt32 level = 1;
+ while (num_labels >= (1U << level)) {
+ ++level;
+ }
+ level = (level < MAX_BLOCK_LEVEL) ? (MAX_BLOCK_LEVEL - level) : 0;
+
+ UInt32 block_count = 0;
+ do {
+ UInt32 entry = header_->ith_entry(level);
+ if (entry == INVALID_ENTRY) {
+ // This level has no blocks and it is thus skipped.
+ continue;
+ }
+
+ UInt32 block_id = entry;
+ do {
+ const Block &block = ith_block(block_id);
+ DA_DEBUG_THROW_IF(block.level() != level);
+
+ const UInt32 first = (block_id * BLOCK_SIZE) | block.first_phantom();
+ UInt32 node_id = first;
+ do {
+ DA_DEBUG_THROW_IF(!ith_node(node_id).is_phantom());
+ const UInt32 offset = node_id ^ labels[0];
+ if (!ith_node(offset).is_offset()) {
+ UInt32 i = 1;
+ for ( ; i < num_labels; ++i) {
+ if (!ith_node(offset ^ labels[i]).is_phantom()) {
+ break;
+ }
+ }
+ if (i >= num_labels) {
+ return offset;
+ }
+ }
+ node_id = (block_id * BLOCK_SIZE) | ith_node(node_id).next();
+ } while (node_id != first);
+
+ const UInt32 prev = block_id;
+ const UInt32 next = block.next();
+ block_id = next;
+ ith_block(prev).set_fail_count(ith_block(prev).fail_count() + 1);
+
+ // The level of a block is updated when this function fails many times,
+ // actually MAX_FAIL_COUNT times, in that block.
+ if (ith_block(prev).fail_count() == MAX_FAIL_COUNT) {
+ update_block_level(prev, level + 1);
+ if (next == entry) {
+ break;
+ } else {
+ // Note that the entry might be updated in the level update.
+ entry = header_->ith_entry(level);
+ continue;
+ }
+ }
+ } while ((++block_count < MAX_BLOCK_COUNT) && (block_id != entry));
+ } while ((block_count < MAX_BLOCK_COUNT) && (level-- != 0));
+
+ return num_nodes() ^ labels[0];
+}
+
+void Trie::reserve_node(UInt32 node_id) {
+ DA_DEBUG_THROW_IF(node_id > num_nodes());
+ if (node_id >= num_nodes()) {
+ reserve_block(node_id / BLOCK_SIZE);
+ }
+
+ Node &node = ith_node(node_id);
+ DA_DEBUG_THROW_IF(!node.is_phantom());
+
+ const UInt32 block_id = node_id / BLOCK_SIZE;
+ Block &block = ith_block(block_id);
+ DA_DEBUG_THROW_IF(block.num_phantoms() == 0);
+
+ const UInt32 next = (block_id * BLOCK_SIZE) | node.next();
+ const UInt32 prev = (block_id * BLOCK_SIZE) | node.prev();
+ DA_DEBUG_THROW_IF(next >= num_nodes());
+ DA_DEBUG_THROW_IF(prev >= num_nodes());
+
+ if ((node_id & BLOCK_MASK) == block.first_phantom()) {
+ // The first phantom node is removed from the block and the second phantom
+ // node comes first.
+ block.set_first_phantom(next & BLOCK_MASK);
+ }
+
+ ith_node(next).set_prev(prev & BLOCK_MASK);
+ ith_node(prev).set_next(next & BLOCK_MASK);
+
+ if (block.level() != MAX_BLOCK_LEVEL) {
+ const UInt32 threshold = 1U << ((MAX_BLOCK_LEVEL - block.level() - 1) * 2);
+ if (block.num_phantoms() == threshold) {
+ update_block_level(block_id, block.level() + 1);
+ }
+ }
+ block.set_num_phantoms(block.num_phantoms() - 1);
+
+ node.set_is_phantom(false);
+
+ DA_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET);
+ DA_DEBUG_THROW_IF(node.label() != INVALID_LABEL);
+
+ header_->set_num_phantoms(num_phantoms() - 1);
+}
+
+void Trie::reserve_block(UInt32 block_id) {
+ DA_DEBUG_THROW_IF(block_id != num_blocks());
+ DA_SIZE_ERROR_IF(block_id >= max_num_blocks());
+
+ header_->set_num_blocks(block_id + 1);
+ ith_block(block_id).set_fail_count(0);
+ ith_block(block_id).set_first_phantom(0);
+ ith_block(block_id).set_num_phantoms(BLOCK_SIZE);
+
+ const UInt32 begin = block_id * BLOCK_SIZE;
+ const UInt32 end = begin + BLOCK_SIZE;
+ DA_DEBUG_THROW_IF(end != num_nodes());
+
+ Base base;
+ base.set_offset(INVALID_OFFSET);
+
+ Check check;
+ check.set_is_phantom(true);
+
+ for (UInt32 i = begin; i < end; ++i) {
+ check.set_prev((i - 1) & BLOCK_MASK);
+ check.set_next((i + 1) & BLOCK_MASK);
+ ith_node(i).set_base(base);
+ ith_node(i).set_check(check);
+ }
+
+ // The leve of the new block is 0.
+ set_block_level(block_id, 0);
+ header_->set_num_phantoms(num_phantoms() + BLOCK_SIZE);
+}
+
+void Trie::update_block_level(UInt32 block_id,
+ UInt32 level) {
+ DA_DEBUG_THROW_IF(block_id >= num_blocks());
+ DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+ clear_block_level(block_id);
+ set_block_level(block_id, level);
+}
+
+void Trie::set_block_level(UInt32 block_id,
+ UInt32 level) {
+ DA_DEBUG_THROW_IF(block_id >= num_blocks());
+ DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+ const UInt32 entry = header_->ith_entry(level);
+ if (entry == INVALID_ENTRY) {
+ // The new block becomes the only one block of the linked list.
+ ith_block(block_id).set_next(block_id);
+ ith_block(block_id).set_prev(block_id);
+ header_->set_ith_entry(level, block_id);
+ } else {
+ // The new block is added to the end of the list.
+ const UInt32 next = entry;
+ const UInt32 prev = ith_block(entry).prev();
+ DA_DEBUG_THROW_IF(next >= num_blocks());
+ DA_DEBUG_THROW_IF(prev >= num_blocks());
+ ith_block(block_id).set_next(next);
+ ith_block(block_id).set_prev(prev);
+ ith_block(next).set_prev(block_id);
+ ith_block(prev).set_next(block_id);
+ }
+ ith_block(block_id).set_level(level);
+ ith_block(block_id).set_fail_count(0);
+}
+
+void Trie::clear_block_level(UInt32 block_id) {
+ DA_DEBUG_THROW_IF(block_id >= num_blocks());
+
+ const UInt32 level = ith_block(block_id).level();
+ DA_DEBUG_THROW_IF(level > MAX_BLOCK_LEVEL);
+
+ const UInt32 entry = header_->ith_entry(level);
+ DA_DEBUG_THROW_IF(entry == INVALID_ENTRY);
+
+ const UInt32 next = ith_block(block_id).next();
+ const UInt32 prev = ith_block(block_id).prev();
+ DA_DEBUG_THROW_IF(next >= num_blocks());
+ DA_DEBUG_THROW_IF(prev >= num_blocks());
+
+ if (next == block_id) {
+ // The linked list becomes empty.
+ header_->set_ith_entry(level, INVALID_ENTRY);
+ } else {
+ ith_block(next).set_prev(prev);
+ ith_block(prev).set_next(next);
+ if (block_id == entry) {
+ // The second block becomes the entry to the linked list.
+ header_->set_ith_entry(level, next);
+ }
+ }
+}
+
+} // namespace dat
+} // namespace grn
Added: lib/dat/trie.h (+223 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/trie.h 2011-06-17 14:46:20 +0000 (f507de4)
@@ -0,0 +1,223 @@
+#ifndef GRN_DAT_TRIE_H
+#define GRN_DAT_TRIE_H
+
+#include "./header.h"
+#include "./node.h"
+#include "./block.h"
+#include "./key-info.h"
+#include "./key.h"
+
+namespace grn {
+namespace dat {
+
+class Trie {
+ public:
+ static const UInt32 DEFAULT_FILE_SIZE = 1U << 20;
+ static const double DEFAULT_NUM_NODES_PER_KEY = 4.0;
+ static const double DEFAULT_AVERAGE_KEY_LENGTH = 16.0;
+
+ static const UInt32 MAX_NUM_NODES = (UInt32)MAX_NODE_ID + 1;
+ static const UInt32 MAX_NUM_BLOCKS = (UInt32)MAX_BLOCK_ID + 1;
+ static const UInt32 MAX_NUM_KEYS = (UInt32)MAX_KEY_ID + 1;
+ static const UInt32 MAX_KEY_BUF_SIZE = 0xFFFFFFFFU;
+
+ ~Trie();
+
+ static Trie *create(const char *file_name = NULL,
+ UInt64 file_size = 0,
+ UInt32 max_num_keys = 0,
+ double num_nodes_per_key = 0.0,
+ double average_key_length = 0.0);
+
+ static Trie *create(const Trie &src_trie,
+ const char *file_name = NULL,
+ UInt64 file_size = 0,
+ UInt32 max_num_keys = 0,
+ double num_nodes_per_key = 0.0,
+ double average_key_length = 0.0);
+
+ static Trie *open(const char *file_name);
+
+// void close();
+
+ void ith_key(UInt32 key_id, Key *key) const {
+ DA_DEBUG_THROW_IF(key_id < KEY_ID_OFFSET);
+ DA_DEBUG_THROW_IF(key_id >= (num_keys() + KEY_ID_OFFSET));
+ DA_DEBUG_THROW_IF(key == NULL);
+
+ key->set_ptr(key_buf_ + ith_key_info(key_id).offset());
+ key->set_length(ith_key_info(key_id + 1).offset()
+ - ith_key_info(key_id).offset());
+ key->set_id(key_id);
+ }
+
+ bool search(const void *ptr, UInt32 length, Key *key = NULL) const {
+ DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+ return search_from_root(static_cast<const UInt8 *>(ptr), length, key);
+ }
+ bool insert(const void *ptr, UInt32 length, Key *key = NULL) {
+ DA_DEBUG_THROW_IF((ptr == NULL) && (length != 0));
+ return insert_from_root(static_cast<const UInt8 *>(ptr), length, key);
+ }
+
+ const Header &header() const {
+ return *header_;
+ }
+
+ UInt64 file_size() const {
+ return header_->file_size();
+ }
+ UInt64 virtual_size() const {
+ return sizeof(Header)
+ + (sizeof(KeyInfo) * (num_keys() + 1))
+ + (sizeof(Block) * num_blocks())
+ + (sizeof(Node) * num_nodes())
+ + total_key_length();
+ }
+ UInt32 total_key_length() const {
+ return ith_key_info(num_keys() + 1).offset();
+ }
+ UInt32 num_keys() const {
+ return header_->num_keys();
+ }
+ UInt32 max_num_keys() const {
+ return header_->max_num_keys();
+ }
+ UInt32 num_nodes() const {
+ return header_->num_nodes();
+ }
+ UInt32 num_phantoms() const {
+ return header_->num_phantoms();
+ }
+ UInt32 num_zombies() const {
+ return header_->num_zombies();
+ }
+ UInt32 max_num_nodes() const {
+ return header_->max_num_nodes();
+ }
+ UInt32 num_blocks() const {
+ return header_->num_blocks();
+ }
+ UInt32 max_num_blocks() const {
+ return header_->max_num_blocks();
+ }
+ UInt32 key_buf_size() const {
+ return header_->key_buf_size();
+ }
+
+ private:
+ Header *header_;
+ Node *nodes_;
+ Block *blocks_;
+ KeyInfo *key_infos_;
+ char *key_buf_;
+
+ Trie();
+
+ void create_file(const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ double num_nodes_per_key,
+ double average_key_length);
+ void create_file(const char *file_name,
+ UInt64 file_size,
+ UInt32 max_num_keys,
+ UInt32 max_num_blocks,
+ UInt32 key_buf_size);
+ void create_root();
+
+ void open_file(const char *file_name);
+
+ void map_address(void *address);
+
+ void build_from_trie(const Trie &trie);
+ void build_from_trie(const Trie &trie,
+ UInt32 src,
+ UInt32 dest);
+
+ bool search_from_root(const UInt8 *ptr,
+ UInt32 length,
+ Key *key) const;
+ bool search_from_terminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 key_id,
+ UInt32 i) const;
+
+ bool insert_from_root(const UInt8 *ptr,
+ UInt32 length,
+ Key *key);
+ bool insert_from_terminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 node_id,
+ UInt32 i);
+ bool insert_from_nonterminal(const UInt8 *ptr,
+ UInt32 length,
+ Key *key,
+ UInt32 node_id,
+ UInt32 i);
+
+ UInt32 insert_node(UInt32 node_id,
+ UInt16 label);
+
+ UInt32 separate(const UInt8 *ptr,
+ UInt32 length,
+ UInt32 node_id,
+ UInt32 i);
+ void resolve(UInt32 node_id,
+ UInt16 label);
+ void move_nodes(UInt32 node_id,
+ UInt32 dest_offset,
+ const UInt16 *labels,
+ UInt32 num_labels);
+
+ UInt32 find_offset(const UInt16 *labels,
+ UInt32 num_labels);
+
+ void reserve_node(UInt32 node_id);
+ void reserve_block(UInt32 block_id);
+
+ void update_block_level(UInt32 block_id,
+ UInt32 level);
+ void set_block_level(UInt32 block_id,
+ UInt32 level);
+ void clear_block_level(UInt32 block_id);
+
+ const Node &ith_node(UInt32 i) const {
+ DA_DEBUG_THROW_IF(i >= num_nodes());
+ return nodes_[i];
+ }
+ const Block &ith_block(UInt32 i) const {
+ DA_DEBUG_THROW_IF(i >= num_blocks());
+ return blocks_[i];
+ }
+ const KeyInfo &ith_key_info(UInt32 i) const {
+ DA_DEBUG_THROW_IF(i < KEY_ID_OFFSET);
+ DA_DEBUG_THROW_IF(i > (num_keys() + KEY_ID_OFFSET));
+ return key_infos_[i - KEY_ID_OFFSET];
+ }
+
+ Node &ith_node(UInt32 i) {
+ DA_DEBUG_THROW_IF(i >= num_nodes());
+ return nodes_[i];
+ }
+ Block &ith_block(UInt32 i) {
+ DA_DEBUG_THROW_IF(i >= num_blocks());
+ return blocks_[i];
+ }
+ KeyInfo &ith_key_info(UInt32 i) {
+ DA_DEBUG_THROW_IF(i < KEY_ID_OFFSET);
+ DA_DEBUG_THROW_IF(i > (num_keys() + KEY_ID_OFFSET + 1));
+ return key_infos_[i - KEY_ID_OFFSET];
+ }
+
+ // Disallows copy and assignment.
+ Trie(const Trie &);
+ Trie &operator=(const Trie &);
+};
+
+} // namespace dat
+} // namespace grn
+
+#endif // GRN_DAT_TRIE_H
Added: lib/dat/usage.h (+46 -0) 100644
===================================================================
--- /dev/null
+++ lib/dat/usage.h 2011-06-17 14:46:20 +0000 (844ffac)
@@ -0,0 +1,46 @@
+#ifndef DA_USAGE_H_
+#define DA_USAGE_H_
+
+#include "./common.h"
+
+#include <sys/time.h>
+#include <sys/resource.h>
+
+namespace da {
+
+class Usage {
+ public:
+ Usage()
+ : usage_() {
+ ::getrusage(RUSAGE_SELF, &usage_);
+ }
+
+ double user_time() const {
+ return usage_.ru_utime.tv_sec + (usage_.ru_utime.tv_usec * 0.000001);
+ }
+ double system_time() const {
+ return usage_.ru_stime.tv_sec + (usage_.ru_stime.tv_usec * 0.000001);
+ }
+ long max_resident_set_size() const {
+ return usage_.ru_maxrss;
+ }
+ long minor_page_faults() const {
+ return usage_.ru_minflt;
+ }
+ long major_page_faults() const {
+ return usage_.ru_majflt;
+ }
+ long file_system_inputs() const {
+ return usage_.ru_inblock;
+ }
+ long file_system_outputs() const {
+ return usage_.ru_oublock;
+ }
+
+ private:
+ struct rusage usage_;
+};
+
+} // namespace da
+
+#endif // DA_USAGE_H_