susumu.yata
null+****@clear*****
Thu Feb 14 15:34:45 JST 2013
susumu.yata 2013-02-14 15:34:45 +0900 (Thu, 14 Feb 2013) New Revision: a3bd274673871f771b7f08f7bb2a2dd0a931407e https://github.com/groonga/grnxx/commit/a3bd274673871f771b7f08f7bb2a2dd0a931407e Log: Implement defragmentation of grnxx::map::da::basic::Trie. Modified files: lib/map/da/basic_trie.cpp lib/map/da/basic_trie.hpp lib/map/da/trie.cpp lib/map/da/trie.hpp Modified: lib/map/da/basic_trie.cpp (+179 -30) =================================================================== --- lib/map/da/basic_trie.cpp 2013-02-14 12:56:55 +0900 (f5a8522) +++ lib/map/da/basic_trie.cpp 2013-02-14 15:34:45 +0900 (06ab83d) @@ -55,14 +55,13 @@ Trie::~Trie() { } } -// TODO: Currently, "options" is not used. -Trie *Trie::create(const TrieOptions &, io::Pool pool) { +Trie *Trie::create(const TrieOptions &options, io::Pool pool) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { GRNXX_ERROR() << "new grnxx::map::Trie failed"; GRNXX_THROW(); } - trie->create_double_array(pool); + trie->create_trie(options, pool); return trie.release(); } @@ -72,13 +71,19 @@ Trie *Trie::open(io::Pool pool, uint32_t block_id) { GRNXX_ERROR() << "new grnxx::map::Trie failed"; GRNXX_THROW(); } - trie->open_double_array(pool, block_id); + trie->open_trie(pool, block_id); return trie.release(); } -// TODO -//da::Trie *Trie::defrag() { -//} +Trie *Trie::defrag(const TrieOptions &options) { + std::unique_ptr<Trie> trie(new (std::nothrow) Trie); + if (!trie) { + GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_THROW(); + } + trie->defrag_trie(options, *this, pool_); + return trie.release(); +} uint32_t Trie::block_id() const { return block_info_->id(); @@ -253,7 +258,7 @@ Trie::Trie() keys_(nullptr), initialized_(false) {} -void Trie::create_double_array(io::Pool pool) { +void Trie::create_trie(const TrieOptions &options, io::Pool pool) { pool_ = pool; block_info_ = pool_.create_block(sizeof(Header)); @@ -262,27 +267,22 @@ void Trie::create_double_array(io::Pool pool) { header_ = static_cast<Header *>(block_address); *header_ = Header(); - const io::BlockInfo *block_info; - - block_info = pool_.create_block(sizeof(Node) * INITIAL_NODES_SIZE); - header_->nodes_block_id = block_info->id(); - header_->nodes_size = INITIAL_NODES_SIZE; - nodes_ = static_cast<Node *>(pool_.get_block_address(*block_info)); - - block_info = pool_.create_block(sizeof(Chunk) * INITIAL_CHUNKS_SIZE); - header_->chunks_block_id = block_info->id(); - header_->chunks_size = INITIAL_CHUNKS_SIZE; - chunks_ = static_cast<Chunk *>(pool_.get_block_address(*block_info)); - - block_info = pool_.create_block(sizeof(Entry) * INITIAL_ENTRIES_SIZE); - header_->entries_block_id = block_info->id(); - header_->entries_size = INITIAL_ENTRIES_SIZE; - entries_ = static_cast<Entry *>(pool_.get_block_address(*block_info)); + header_->nodes_size = static_cast<uint32_t>(options.nodes_size); + header_->nodes_size &= ~CHUNK_MASK; + if (header_->nodes_size == 0) { + header_->nodes_size = INITIAL_NODES_SIZE; + } + header_->chunks_size = header_->nodes_size / CHUNK_SIZE; + header_->entries_size = static_cast<uint32_t>(options.entries_size); + if (header_->entries_size == 0) { + header_->entries_size = INITIAL_ENTRIES_SIZE; + } + header_->keys_size = static_cast<uint32_t>(options.keys_size); + if (header_->keys_size == 0) { + header_->keys_size = INITIAL_KEYS_SIZE; + } - block_info = pool_.create_block(sizeof(uint32_t) * INITIAL_KEYS_SIZE); - header_->keys_block_id = block_info->id(); - header_->keys_size = INITIAL_KEYS_SIZE; - keys_ = static_cast<uint32_t *>(pool_.get_block_address(*block_info)); + create_arrays(); reserve_node(ROOT_NODE_ID); nodes_[INVALID_OFFSET].set_is_origin(true); @@ -290,7 +290,7 @@ void Trie::create_double_array(io::Pool pool) { initialized_ = true; } -void Trie::open_double_array(io::Pool pool, uint32_t block_id) { +void Trie::open_trie(io::Pool pool, uint32_t block_id) { pool_ = pool; initialized_ = true; @@ -311,6 +311,155 @@ void Trie::open_double_array(io::Pool pool, uint32_t block_id) { pool_.get_block_address(header_->keys_block_id)); } +void Trie::defrag_trie(const TrieOptions &options, const Trie &trie, + io::Pool pool) { + uint64_t nodes_size = options.nodes_size; + if (nodes_size == 0) { + nodes_size = trie.header_->num_chunks * CHUNK_SIZE; + nodes_size *= 2; + } + uint64_t entries_size = options.entries_size; + if (entries_size == 0) { + entries_size = trie.header_->max_key_id; + entries_size *= 2; + } + uint64_t keys_size = options.keys_size; + if (keys_size == 0) { + keys_size = trie.header_->next_key_pos; + keys_size *= 2; + } + + if (nodes_size > MAX_NODES_SIZE) { + GRNXX_ERROR() << "too large request: nodes_size = " << nodes_size + << ", MAX_NODES_SIZE = " << MAX_NODES_SIZE; + GRNXX_THROW(); + } + if (entries_size > MAX_ENTRIES_SIZE) { + GRNXX_ERROR() << "too large request: entries_size = " << entries_size + << ", MAX_ENTRIES_SIZE = " << MAX_ENTRIES_SIZE; + GRNXX_THROW(); + } + if (keys_size > MAX_KEYS_SIZE) { + GRNXX_ERROR() << "too large request: keys_size = " << keys_size + << ", MAX_KEYS_SIZE = " << MAX_KEYS_SIZE; + GRNXX_THROW(); + } + + pool_ = pool; + + block_info_ = pool_.create_block(sizeof(Header)); + + void * const block_address = pool_.get_block_address(*block_info_); + header_ = static_cast<Header *>(block_address); + *header_ = Header(); + + header_->nodes_size = static_cast<uint32_t>(nodes_size); + header_->nodes_size &= ~CHUNK_MASK; + if (header_->nodes_size == 0) { + header_->nodes_size = INITIAL_NODES_SIZE; + } + header_->chunks_size = header_->nodes_size / CHUNK_SIZE; + header_->entries_size = static_cast<uint32_t>(entries_size); + if (header_->entries_size == 0) { + header_->entries_size = INITIAL_ENTRIES_SIZE; + } + header_->keys_size = static_cast<uint32_t>(keys_size); + if (header_->keys_size == 0) { + header_->keys_size = INITIAL_KEYS_SIZE; + } + + create_arrays(); + + reserve_node(ROOT_NODE_ID); + nodes_[INVALID_OFFSET].set_is_origin(true); + + header_->total_key_length = trie.header_->total_key_length; + header_->num_keys = trie.header_->num_keys; + header_->max_key_id = trie.header_->max_key_id; + header_->next_key_id = trie.header_->next_key_id; + for (int32_t key_id = MIN_KEY_ID; key_id <= header_->max_key_id; ++key_id) { + entries_[key_id] = trie.entries_[key_id]; + } + + defrag_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID); + + initialized_ = true; +} + +void Trie::defrag_trie(const Trie &trie, uint32_t src, uint32_t dest) { + if (trie.nodes_[src].is_leaf()) { + const Key &key = trie.get_key(trie.nodes_[src].key_pos()); + const uint32_t key_pos = header_->next_key_pos; + new (&keys_[key_pos]) Key(key.id(), key.slice()); + nodes_[dest].set_key_pos(key_pos); + entries_[key.id()] = Entry::valid_entry(key_pos); + header_->next_key_pos += Key::estimate_size(key.size()); + return; + } + + const uint32_t src_offset = trie.nodes_[src].offset(); + uint32_t dest_offset; + { + uint16_t labels[MAX_LABEL + 1]; + uint16_t num_labels = 0; + + uint16_t label = trie.nodes_[src].child(); + while (label != INVALID_LABEL) { + GRNXX_DEBUG_THROW_IF(label > MAX_LABEL); + const uint32_t child = src_offset ^ label; + if (trie.nodes_[child].is_leaf() || + (trie.nodes_[child].child() != INVALID_LABEL)) { + labels[num_labels++] = label; + } + label = trie.nodes_[child].sibling(); + } + if (num_labels == 0) { + return; + } + + dest_offset = find_offset(labels, num_labels); + for (uint16_t i = 0; i < num_labels; ++i) { + const uint32_t child = dest_offset ^ labels[i]; + reserve_node(child); + nodes_[child].set_label(labels[i]); + if ((i + 1) < num_labels) { + nodes_[child].set_sibling(labels[i + 1]); + } + } + + GRNXX_DEBUG_THROW_IF(nodes_[dest_offset].is_origin()); + nodes_[dest_offset].set_is_origin(true); + nodes_[dest].set_offset(dest_offset); + nodes_[dest].set_child(labels[0]); + } + + uint16_t label = nodes_[dest].child(); + while (label != INVALID_LABEL) { + defrag_trie(trie, src_offset ^ label, dest_offset ^ label); + label = nodes_[dest_offset ^ label].sibling(); + } +} + +void Trie::create_arrays() { + const io::BlockInfo *block_info; + + block_info = pool_.create_block(sizeof(Node) * header_->nodes_size); + header_->nodes_block_id = block_info->id(); + nodes_ = static_cast<Node *>(pool_.get_block_address(*block_info)); + + block_info = pool_.create_block(sizeof(Chunk) * header_->chunks_size); + header_->chunks_block_id = block_info->id(); + chunks_ = static_cast<Chunk *>(pool_.get_block_address(*block_info)); + + block_info = pool_.create_block(sizeof(Entry) * header_->entries_size); + header_->entries_block_id = block_info->id(); + entries_ = static_cast<Entry *>(pool_.get_block_address(*block_info)); + + block_info = pool_.create_block(sizeof(uint32_t) * header_->keys_size); + header_->keys_block_id = block_info->id(); + keys_ = static_cast<uint32_t *>(pool_.get_block_address(*block_info)); +} + bool Trie::remove_key(const Slice &key) { uint32_t node_id = ROOT_NODE_ID; size_t query_pos = 0; @@ -501,7 +650,7 @@ uint32_t Trie::append_key(const Slice &key, int32_t key_id) { } const uint32_t key_pos = header_->next_key_pos; - const uint32_t key_size = Key::estimate_size(key); + const uint32_t key_size = Key::estimate_size(key.size()); if (key_size > (header_->keys_size - key_pos)) { GRNXX_ERROR() << "too many keys: key_size = " << key_size Modified: lib/map/da/basic_trie.hpp (+19 -10) =================================================================== --- lib/map/da/basic_trie.hpp 2013-02-14 12:56:55 +0900 (3921ef2) +++ lib/map/da/basic_trie.hpp 2013-02-14 15:34:45 +0900 (f22e2cb) @@ -20,7 +20,7 @@ #include "trie.hpp" -#if 0 +#if 1 # define GRNXX_DEBUG_THROW(msg)\ ({ GRNXX_ERROR() << msg; GRNXX_THROW(); }) # define GRNXX_DEBUG_THROW_IF(cond)\ @@ -36,7 +36,7 @@ namespace da { namespace basic { constexpr int32_t MIN_KEY_ID = 0; -constexpr int32_t MAX_KEY_ID = 0x7FFFFFFF; +constexpr int32_t MAX_KEY_ID = 0x7FFFFFFE; constexpr size_t MIN_KEY_SIZE = 1; constexpr size_t MAX_KEY_SIZE = 4095; @@ -55,10 +55,14 @@ constexpr uint32_t CHUNK_MASK = 0x1FF; // Assume that #nodes per key is 4 and #uint32_ts per key is 8. // Note that an entries is associated with a key. constexpr uint32_t INITIAL_NODES_SIZE = 1 << 16; -constexpr uint32_t INITIAL_CHUNKS_SIZE = INITIAL_NODES_SIZE / CHUNK_SIZE; constexpr uint32_t INITIAL_ENTRIES_SIZE = 1 << 14; constexpr uint32_t INITIAL_KEYS_SIZE = 1 << 17; +constexpr uint32_t MAX_NODES_SIZE = + std::numeric_limits<uint32_t>::max() & ~CHUNK_MASK; +constexpr uint32_t MAX_ENTRIES_SIZE = uint32_t(MAX_KEY_ID) + 1; +constexpr uint32_t MAX_KEYS_SIZE = uint32_t(1) << 31; + // Chunks are grouped by the level which indicates how easily update operations // can find a good offset in that chunk. The chunk level rises when // find_offset() fails in that chunk many times. MAX_FAILURE_COUNT @@ -395,8 +399,8 @@ class Key { return true; } - static uint32_t estimate_size(const Slice &key) { - return (9 + key.size()) / sizeof(uint32_t); + static uint32_t estimate_size(const size_t key_size) { + return (9 + key_size) / sizeof(uint32_t); } private: @@ -412,8 +416,7 @@ class Trie : public da::Trie { static Trie *create(const TrieOptions &options, io::Pool pool); static Trie *open(io::Pool pool, uint32_t block_id); - // TODO -// da::Trie *defrag(); + Trie *defrag(const TrieOptions &options); uint32_t block_id() const; @@ -441,10 +444,16 @@ class Trie : public da::Trie { Trie(); - void create_double_array(io::Pool pool); - void open_double_array(io::Pool pool, uint32_t block_id); + void create_trie(const TrieOptions &options, io::Pool pool); + void open_trie(io::Pool pool, uint32_t block_id); + + void defrag_trie(const TrieOptions &options, const Trie &trie, + io::Pool pool); + void defrag_trie(const Trie &trie, uint32_t src, uint32_t dest); + + void create_arrays(); - const Key &get_key(uint32_t key_pos) { + const Key &get_key(uint32_t key_pos) const { return *reinterpret_cast<const Key *>(&keys_[key_pos]); } Modified: lib/map/da/trie.cpp (+4 -1) =================================================================== --- lib/map/da/trie.cpp 2013-02-14 12:56:55 +0900 (a7cd495) +++ lib/map/da/trie.cpp 2013-02-14 15:34:45 +0900 (99da9cc) @@ -5,7 +5,10 @@ namespace grnxx { namespace map { namespace da { -TrieOptions::TrieOptions() {} +TrieOptions::TrieOptions() + : nodes_size(0), + entries_size(0), + keys_size(0) {} Trie::Trie() {} Trie::~Trie() {} Modified: lib/map/da/trie.hpp (+5 -2) =================================================================== --- lib/map/da/trie.hpp 2013-02-14 12:56:55 +0900 (6376a79) +++ lib/map/da/trie.hpp 2013-02-14 15:34:45 +0900 (993a862) @@ -25,6 +25,10 @@ namespace map { namespace da { struct TrieOptions { + uint64_t nodes_size; + uint64_t entries_size; + uint64_t keys_size; + TrieOptions(); }; @@ -36,8 +40,7 @@ class Trie { static Trie *create(const TrieOptions &options, io::Pool pool); static Trie *open(io::Pool pool, uint32_t block_id); - // TODO -// virtual Trie *defrag() = 0; + virtual Trie *defrag(const TrieOptions &options) = 0; virtual uint32_t block_id() const = 0; -------------- next part -------------- HTML����������������������������...Download