susumu.yata
null+****@clear*****
Wed Jul 31 10:37:51 JST 2013
susumu.yata 2013-07-31 10:37:51 +0900 (Wed, 31 Jul 2013) New Revision: b992eba25b808d915247323181308a1eb98fe057 https://github.com/groonga/grnxx/commit/b992eba25b808d915247323181308a1eb98fe057 Message: Implement reconstruction of Patricia. Modified files: lib/grnxx/map/patricia.cpp lib/grnxx/map/patricia.hpp Modified: lib/grnxx/map/patricia.cpp (+203 -47) =================================================================== --- lib/grnxx/map/patricia.cpp 2013-07-29 16:18:02 +0900 (9db63bc) +++ lib/grnxx/map/patricia.cpp 2013-07-31 10:37:51 +0900 (f455428) @@ -22,12 +22,14 @@ #include "grnxx/bytes.hpp" #include "grnxx/exception.hpp" #include "grnxx/geo_point.hpp" +#include "grnxx/lock.hpp" #include "grnxx/logger.hpp" #include "grnxx/map/common_header.hpp" #include "grnxx/map/hash.hpp" #include "grnxx/map/helper.hpp" #include "grnxx/map/key_pool.hpp" #include "grnxx/map/patricia/node.hpp" +#include "grnxx/mutex.hpp" #include "grnxx/storage.hpp" namespace grnxx { @@ -36,7 +38,14 @@ namespace { constexpr char FORMAT_STRING[] = "grnxx::map::Patricia"; -constexpr uint64_t ROOT_NODE_ID = 0; +constexpr uint64_t ROOT_NODE_ID = 0; +constexpr uint64_t DUMMY_NODE_ID = ROOT_NODE_ID + 1; + +constexpr uint64_t MIN_NODES_SIZE = 1ULL << 8; +constexpr uint64_t MIN_CACHE_SIZE = 1ULL << 8; +constexpr uint64_t MAX_CACHE_SIZE = 1ULL << 20; + +constexpr int64_t INVALID_CACHE_VALUE = -1; using patricia::NODE_INVALID_OFFSET; @@ -51,9 +60,11 @@ struct PatriciaHeader { CommonHeader common_header; MapType map_type; uint64_t next_node_id; + uint64_t nodes_id; uint32_t nodes_storage_node_id; uint32_t pool_storage_node_id; uint32_t cache_storage_node_id; + Mutex mutex; // Initialize the member variables. PatriciaHeader(); @@ -65,9 +76,11 @@ struct PatriciaHeader { PatriciaHeader::PatriciaHeader() : common_header(FORMAT_STRING, MAP_PATRICIA), next_node_id(2), + nodes_id(0), nodes_storage_node_id(STORAGE_INVALID_NODE_ID), pool_storage_node_id(STORAGE_INVALID_NODE_ID), - cache_storage_node_id(STORAGE_INVALID_NODE_ID) {} + cache_storage_node_id(STORAGE_INVALID_NODE_ID), + mutex() {} PatriciaHeader::operator bool() const { return common_header.format() == FORMAT_STRING; @@ -481,8 +494,8 @@ void Patricia<T>::create_map(Storage *storage, uint32_t storage_node_id, pool_.reset(KeyPool<T>::create(storage, storage_node_id_)); header_->nodes_storage_node_id = nodes_->storage_node_id(); header_->pool_storage_node_id = pool_->storage_node_id(); - Node * const root_node = &nodes_->get_value(ROOT_NODE_ID); - *root_node = Node::dead_node(); + nodes_->set(ROOT_NODE_ID, Node::dead_node()); + nodes_->set(DUMMY_NODE_ID, Node::dead_node()); } catch (...) { storage->unlink_node(storage_node_id_); throw; @@ -574,8 +587,11 @@ Patricia<Bytes>::Patricia() storage_node_id_(STORAGE_INVALID_NODE_ID), header_(nullptr), nodes_(), + old_nodes_(), pool_(), - cache_() {} + cache_(), + old_cache_(), + nodes_id_(0) {} Patricia<Bytes>::~Patricia() {} @@ -627,6 +643,7 @@ bool Patricia<Bytes>::get(int64_t key_id, Key *key) { } bool Patricia<Bytes>::unset(int64_t key_id) { + refresh_nodes(); Key key; if (!get(key_id, &key)) { // Not found. @@ -674,6 +691,7 @@ bool Patricia<Bytes>::unset(int64_t key_id) { } bool Patricia<Bytes>::reset(int64_t key_id, KeyArg dest_key) { + refresh_nodes(); // Find the source key. Key src_key; if (!get(key_id, &src_key)) { @@ -713,6 +731,9 @@ bool Patricia<Bytes>::reset(int64_t key_id, KeyArg dest_key) { } src_prev_node = src_node; } + if (header_->next_node_id >= nodes_->size()) { + resize_nodes(); + } // Add the destination key. constexpr std::size_t HISTORY_SIZE = 8; uint64_t dest_node_id = ROOT_NODE_ID; @@ -848,9 +869,11 @@ bool Patricia<Bytes>::reset(int64_t key_id, KeyArg dest_key) { } bool Patricia<Bytes>::find(KeyArg key, int64_t *key_id) { + refresh_nodes(); + NodeArray * const nodes = nodes_.get(); const uint64_t bit_size = key.size() * 8; uint64_t node_id = ROOT_NODE_ID; - Node node = nodes_->get(node_id); + Node node = nodes->get(node_id); if (node.status() == NODE_DEAD) { // Not found. return false; @@ -885,26 +908,28 @@ bool Patricia<Bytes>::find(KeyArg key, int64_t *key_id) { break; } } - node = nodes_->get(node_id); + node = nodes->get(node_id); } } bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) { - constexpr std::size_t HISTORY_SIZE = 8; - int64_t * const cache = nullptr; -// int64_t * const cache = -// &cache_->get_value(Hash<Key>()(key) % cache_->size()); -// if ((*cache >= 0) && (*cache < pool_->max_key_id())) { -// Key cached_key; -// if (pool_->get(*cache, &cached_key)) { -// if (key == cached_key) { -// if (key_id) { -// *key_id = *cache; -// } -// return false; -// } -// } -// } + refresh_nodes(); + int64_t * const cache = + &cache_->get_value(Hash<Key>()(key) & (cache_->size() - 1)); + if ((*cache >= 0) && (*cache <= pool_->max_key_id())) { + Key cached_key; + if (pool_->get(*cache, &cached_key)) { + if (key == cached_key) { + if (key_id) { + *key_id = *cache; + } + return false; + } + } + } + if (header_->next_node_id >= nodes_->size()) { + resize_nodes(); + } uint64_t node_id = ROOT_NODE_ID; Node *node = &nodes_->get_value(node_id); if (node->status() == NODE_DEAD) { @@ -914,12 +939,11 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) { if (key_id) { *key_id = next_key_id; } - if (cache) { - *cache = next_key_id; - } + *cache = next_key_id; return true; } const uint64_t bit_size = key.size() * 8; + constexpr std::size_t HISTORY_SIZE = 8; Node *history[HISTORY_SIZE]; int depth = 0; history[0] = node; @@ -959,9 +983,7 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) { if (key_id) { *key_id = node->key_id(); } - if (cache) { - *cache = node->key_id(); - } + *cache = node->key_id(); return false; } node = history[depth % HISTORY_SIZE]; @@ -971,20 +993,17 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) { // "key" is a prefix of "stored_key". next_nodes[0] = Node::leaf_node(next_key_id); next_nodes[1] = *node; - *node = Node::terminal_node(count * 8, header_->next_node_id); } else { // "stored_key" is a prefix of "key". next_nodes[0] = *node; next_nodes[1] = Node::leaf_node(next_key_id); - *node = Node::terminal_node(count * 8, header_->next_node_id); } + *node = Node::terminal_node(count * 8, header_->next_node_id); header_->next_node_id += 2; if (key_id) { *key_id = next_key_id; } - if (cache) { - *cache = next_key_id; - } + *cache = next_key_id; return true; } count = (count * 8) + 7 - bit_scan_reverse(key[count] ^ stored_key[count]); @@ -1037,13 +1056,12 @@ bool Patricia<Bytes>::add(KeyArg key, int64_t *key_id) { if (key_id) { *key_id = next_key_id; } - if (cache) { - *cache = next_key_id; - } + *cache = next_key_id; return true; } bool Patricia<Bytes>::remove(KeyArg key) { + refresh_nodes(); const uint64_t bit_size = key.size() * 8; uint64_t node_id = ROOT_NODE_ID; Node *node = &nodes_->get_value(node_id); @@ -1093,6 +1111,7 @@ bool Patricia<Bytes>::remove(KeyArg key) { bool Patricia<Bytes>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) { + refresh_nodes(); // Find the source key. const uint64_t src_bit_size = src_key.size() * 8; int64_t src_key_id; @@ -1137,6 +1156,9 @@ bool Patricia<Bytes>::replace(KeyArg src_key, KeyArg dest_key, src_prev_node = src_node; src_node = &nodes_->get_value(src_node_id); } + if (header_->next_node_id >= nodes_->size()) { + resize_nodes(); + } // Add the destination key. constexpr std::size_t HISTORY_SIZE = 8; uint64_t dest_node_id = ROOT_NODE_ID; @@ -1273,11 +1295,13 @@ bool Patricia<Bytes>::replace(KeyArg src_key, KeyArg dest_key, bool Patricia<Bytes>::find_longest_prefix_match(KeyArg query, int64_t *key_id, Key *key) { + refresh_nodes(); + NodeArray * const nodes = nodes_.get(); const uint64_t bit_size = query.size() * 8; bool found = false; uint64_t node_id = ROOT_NODE_ID; for ( ; ; ) { - Node node = nodes_->get(node_id); + Node node = nodes->get(node_id); switch (node.status()) { case NODE_DEAD: { // Not found. @@ -1307,7 +1331,7 @@ bool Patricia<Bytes>::find_longest_prefix_match(KeyArg query, int64_t *key_id, if (node.bit_size() > bit_size) { return found; } else if (node.bit_size() < bit_size) { - Node leaf_node = nodes_->get(node.offset()); + Node leaf_node = nodes->get(node.offset()); Key stored_key = pool_->get_key(leaf_node.key_id()); if (query.starts_with(stored_key)) { if (key_id) { @@ -1327,9 +1351,42 @@ bool Patricia<Bytes>::find_longest_prefix_match(KeyArg query, int64_t *key_id, } bool Patricia<Bytes>::truncate() { - Node * const root_node = &nodes_->get_value(ROOT_NODE_ID); - pool_->truncate(); - *root_node = Node::dead_node(); + refresh_nodes(); + if (max_key_id() == MAP_MIN_KEY_ID) { + // Nothing to do. + return true; + } + std::unique_ptr<NodeArray> new_nodes( + NodeArray::create(storage_, storage_node_id_, MIN_NODES_SIZE)); + std::unique_ptr<Cache> new_cache; + try { + new_cache.reset(Cache::create(storage_, storage_node_id_, + MIN_CACHE_SIZE, INVALID_CACHE_VALUE)); + try { + pool_->truncate(); + } catch (...) { + Cache::unlink(storage_, new_cache->storage_node_id()); + throw; + } + } catch (...) { + NodeArray::unlink(storage_, new_nodes->storage_node_id()); + throw; + } + { + // Validate a new nodes. + Lock lock(&header_->mutex); + header_->nodes_storage_node_id = new_nodes->storage_node_id(); + header_->cache_storage_node_id = new_cache->storage_node_id(); + header_->next_node_id = 2; + ++header_->nodes_id; + old_nodes_.swap(new_nodes); + nodes_.swap(old_nodes_); + old_cache_.swap(new_cache); + cache_.swap(old_cache_); + nodes_id_ = header_->nodes_id; + } + NodeArray::unlink(storage_, old_nodes_->storage_node_id()); + Cache::unlink(storage_, old_cache_->storage_node_id()); return true; } @@ -1342,16 +1399,15 @@ void Patricia<Bytes>::create_map(Storage *storage, uint32_t storage_node_id, try { header_ = static_cast<Header *>(storage_node.body()); *header_ = Header(); - // TODO: Remove a magic number. - nodes_.reset(NodeArray::create(storage, storage_node_id_, 1ULL << 41)); + nodes_.reset(NodeArray::create(storage, storage_node_id_, MIN_NODES_SIZE)); pool_.reset(KeyPool<Bytes>::create(storage, storage_node_id_)); - // TODO: Remove a magic number. - cache_.reset(Cache::create(storage, storage_node_id_, 1ULL << 20, -1)); + cache_.reset(Cache::create(storage, storage_node_id_, + MIN_CACHE_SIZE, INVALID_CACHE_VALUE)); header_->nodes_storage_node_id = nodes_->storage_node_id(); header_->pool_storage_node_id = pool_->storage_node_id(); header_->cache_storage_node_id = cache_->storage_node_id(); - Node * const root_node = &nodes_->get_value(ROOT_NODE_ID); - *root_node = Node::dead_node(); + nodes_->set(ROOT_NODE_ID, Node::dead_node()); + nodes_->set(DUMMY_NODE_ID, Node::dead_node()); } catch (...) { storage->unlink_node(storage_node_id_); throw; @@ -1373,9 +1429,109 @@ void Patricia<Bytes>::open_map(Storage *storage, uint32_t storage_node_id) { << ", actual = " << header_->common_header.format(); throw LogicError(); } + Lock lock(&header_->mutex); nodes_.reset(NodeArray::open(storage, header_->nodes_storage_node_id)); pool_.reset(KeyPool<Bytes>::open(storage, header_->pool_storage_node_id)); cache_.reset(Cache::open(storage, header_->cache_storage_node_id)); + nodes_id_ = header_->nodes_id; +} + +void Patricia<Bytes>::resize_nodes() { + // Create a new nodes. + uint64_t new_nodes_size = num_keys() * 4; + if (new_nodes_size < MIN_NODES_SIZE) { + new_nodes_size = MIN_NODES_SIZE; + } + new_nodes_size = 2ULL << bit_scan_reverse(new_nodes_size - 1); + uint64_t new_cache_size = new_nodes_size / 16; + if (new_cache_size < MIN_CACHE_SIZE) { + new_cache_size = MIN_CACHE_SIZE; + } else if (new_cache_size > MAX_CACHE_SIZE) { + new_cache_size = MAX_CACHE_SIZE; + } + std::unique_ptr<NodeArray> new_nodes( + NodeArray::create(storage_, storage_node_id_, new_nodes_size)); + std::unique_ptr<Cache> new_cache; + uint64_t next_node_id = 2; + try { + new_cache.reset(Cache::create(storage_, storage_node_id_, + new_cache_size, INVALID_CACHE_VALUE)); + try { + next_node_id = rearrange_nodes(ROOT_NODE_ID, ROOT_NODE_ID, + next_node_id, new_nodes.get()); + next_node_id = rearrange_nodes(DUMMY_NODE_ID, DUMMY_NODE_ID, + next_node_id, new_nodes.get()); + } catch (...) { + Cache::unlink(storage_, new_cache->storage_node_id()); + throw; + } + } catch (...) { + NodeArray::unlink(storage_, new_nodes->storage_node_id()); + throw; + } + { + // Validate a new nodes. + Lock lock(&header_->mutex); + header_->nodes_storage_node_id = new_nodes->storage_node_id(); + header_->cache_storage_node_id = new_cache->storage_node_id(); + header_->next_node_id = next_node_id; + ++header_->nodes_id; + old_nodes_.swap(new_nodes); + nodes_.swap(old_nodes_); + old_cache_.swap(new_cache); + cache_.swap(old_cache_); + nodes_id_ = header_->nodes_id; + } + NodeArray::unlink(storage_, old_nodes_->storage_node_id()); + Cache::unlink(storage_, old_cache_->storage_node_id()); +} + +uint64_t Patricia<Bytes>::rearrange_nodes(uint64_t src_node_id, + uint64_t dest_node_id, + uint64_t next_node_id, + NodeArray *dest_nodes) { + const Node src_node = nodes_->get(src_node_id); + Node &dest_node = dest_nodes->get_value(dest_node_id); + switch (src_node.status()) { + case NODE_DEAD: + case NODE_LEAF: { + dest_node = src_node; + return next_node_id; + } + case NODE_BRANCH: { + dest_node = Node::branch_node(src_node.bit_pos(), next_node_id); + break; + } + case NODE_TERMINAL: { + dest_node = Node::terminal_node(src_node.bit_size(), next_node_id); + break; + } + } + src_node_id = src_node.offset(); + dest_node_id = next_node_id; + next_node_id += 2; + next_node_id = rearrange_nodes(src_node_id, dest_node_id, + next_node_id, dest_nodes); + next_node_id = rearrange_nodes(src_node_id + 1, dest_node_id + 1, + next_node_id, dest_nodes); + return next_node_id; +} + +void Patricia<Bytes>::refresh_nodes() { + if (nodes_id_ != header_->nodes_id) { + Lock lock(&header_->mutex); + if (nodes_id_ != header_->nodes_id) { + std::unique_ptr<NodeArray> new_nodes( + NodeArray::open(storage_, header_->nodes_storage_node_id)); + std::unique_ptr<Cache> new_cache( + Cache::open(storage_, header_->cache_storage_node_id)); + old_nodes_.swap(new_nodes); + nodes_.swap(old_nodes_); + old_cache_.swap(new_cache); + cache_.swap(old_cache_); + nodes_id_ = header_->nodes_id; + } + } } uint64_t Patricia<Bytes>::get_ith_bit(KeyArg key, uint64_t bit_pos) { Modified: lib/grnxx/map/patricia.hpp (+13 -1) =================================================================== --- lib/grnxx/map/patricia.hpp 2013-07-29 16:18:02 +0900 (4611331) +++ lib/grnxx/map/patricia.hpp 2013-07-31 10:37:51 +0900 (19a8171) @@ -96,7 +96,7 @@ template <> class Patricia<Bytes> : public Map<Bytes> { using Header = PatriciaHeader; using Node = patricia::Node; - using NodeArray = Array<Node, 65536, 8192>; + using NodeArray = Array<Node>; using Cache = Array<int64_t>; public: @@ -136,13 +136,25 @@ class Patricia<Bytes> : public Map<Bytes> { uint32_t storage_node_id_; Header *header_; std::unique_ptr<NodeArray> nodes_; + std::unique_ptr<NodeArray> old_nodes_; std::unique_ptr<KeyPool<Bytes>> pool_; std::unique_ptr<Cache> cache_; + std::unique_ptr<Cache> old_cache_; + uint64_t nodes_id_; void create_map(Storage *storage, uint32_t storage_node_id, const MapOptions &options); void open_map(Storage *storage, uint32_t storage_node_id); + // Resize "nodes_" and "cache_". + void resize_nodes(); + // Recursively arrange nodes. + uint64_t rearrange_nodes(uint64_t src_node_id, uint64_t dest_node_id, + uint64_t next_node_id, NodeArray *dest_nodes); + + // Refresh "nodes_" and "cache_" if new ones are available. + void refresh_nodes(); + static uint64_t get_ith_bit(KeyArg key, uint64_t bit_pos); }; -------------- next part -------------- HTML����������������������������...Download