susumu.yata
null+****@clear*****
Tue Apr 30 14:59:17 JST 2013
susumu.yata 2013-04-30 14:59:17 +0900 (Tue, 30 Apr 2013) New Revision: 57003dcaf2c35e363674861e239d0632f992cfea https://github.com/groonga/grnxx/commit/57003dcaf2c35e363674861e239d0632f992cfea Message: Implement grnxx::Storage::sweep(). Modified files: lib/grnxx/storage.cpp lib/grnxx/storage.hpp lib/grnxx/storage/chunk_index.hpp lib/grnxx/storage/header.hpp lib/grnxx/storage/node_header.cpp lib/grnxx/storage/node_header.hpp lib/grnxx/storage/storage_impl.cpp lib/grnxx/storage/storage_impl.hpp Modified: lib/grnxx/storage.cpp (+7 -6) =================================================================== --- lib/grnxx/storage.cpp 2013-04-29 19:46:58 +0900 (beabc2d) +++ lib/grnxx/storage.cpp 2013-04-30 14:59:17 +0900 (d906d71) @@ -24,11 +24,13 @@ namespace grnxx { namespace { -constexpr uint64_t MAX_FILE_SIZE_LOWER_LIMIT = 1ULL << 30; -constexpr uint64_t MAX_FILE_SIZE_UPPER_LIMIT = 1ULL << 40; +constexpr uint64_t MAX_FILE_SIZE_LOWER_LIMIT = 1ULL << 30; // 1GB. +constexpr uint64_t MAX_FILE_SIZE_UPPER_LIMIT = 1ULL << 63; // 8EB. +constexpr uint64_t MAX_FILE_SIZE_DEFAULT = 1ULL << 40; // 1TB. constexpr uint16_t MAX_NUM_FILES_LOWER_LIMIT = 1; constexpr uint16_t MAX_NUM_FILES_UPPER_LIMIT = 1000; -constexpr uint64_t ROOT_SIZE_DEFAULT = 1ULL << 12; +constexpr uint16_t MAX_NUM_FILES_DEFAULT = MAX_NUM_FILES_UPPER_LIMIT; +constexpr uint64_t ROOT_SIZE_DEFAULT = 1ULL << 12; // 4KB. } // namespace @@ -63,7 +65,6 @@ StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status) { switch (status) { GRNXX_STATUS_CASE(STORAGE_NODE_PHANTOM) GRNXX_STATUS_CASE(STORAGE_NODE_ACTIVE) - GRNXX_STATUS_CASE(STORAGE_NODE_MARKED) GRNXX_STATUS_CASE(STORAGE_NODE_UNLINKED) GRNXX_STATUS_CASE(STORAGE_NODE_IDLE) default: { @@ -73,8 +74,8 @@ StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status) { } StorageOptions::StorageOptions() - : max_file_size(MAX_FILE_SIZE_UPPER_LIMIT), - max_num_files(MAX_NUM_FILES_UPPER_LIMIT), + : max_file_size(MAX_FILE_SIZE_DEFAULT), + max_num_files(MAX_NUM_FILES_DEFAULT), root_size(ROOT_SIZE_DEFAULT) {} bool StorageOptions::is_valid() const { Modified: lib/grnxx/storage.hpp (+7 -10) =================================================================== --- lib/grnxx/storage.hpp 2013-04-29 19:46:58 +0900 (3d30255) +++ lib/grnxx/storage.hpp 2013-04-30 14:59:17 +0900 (144c0f1) @@ -57,12 +57,10 @@ enum StorageNodeStatus : uint8_t { STORAGE_NODE_PHANTOM = 0, // An active node. STORAGE_NODE_ACTIVE = 1, - // A marked node to be unlinked. - STORAGE_NODE_MARKED = 2, // An unlinked node. - STORAGE_NODE_UNLINKED = 3, + STORAGE_NODE_UNLINKED = 2, // An unused node. - STORAGE_NODE_IDLE = 4 + STORAGE_NODE_IDLE = 3 }; StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status); @@ -109,7 +107,7 @@ class StorageNode { uint64_t size() const; // Return the last modified time. Time modified_time() const; - // Return the address to the user data (16 bytes) in the header. + // Return the address to the user data (8 bytes) in the header. void *user_data() const; // Return the address to the body. void *body() const { @@ -155,13 +153,12 @@ class Storage { // Open a node. virtual StorageNode open_node(uint32_t node_id) = 0; - // Mark a node to be unlinked. Note that the marked node and its descendants - // will be unlinked by sweep(). + // Unlink a node and true on success. + // The unlinked node and its descendants will be removed by sweep(). virtual bool unlink_node(uint32_t node_id) = 0; - // Sweep marked nodes whose last modified time < (now - lifetime). - virtual bool sweep(Duration lifetime, - uint32_t root_node_id = STORAGE_ROOT_NODE_ID) = 0; + // Sweep unlinked nodes whose modified time < (now - lifetime). + virtual bool sweep(Duration lifetime) = 0; // Return the storage path. // Note that an anonymous or temporary storage may return nullptr. Modified: lib/grnxx/storage/chunk_index.hpp (+4 -0) =================================================================== --- lib/grnxx/storage/chunk_index.hpp 2013-04-29 19:46:58 +0900 (5856d51) +++ lib/grnxx/storage/chunk_index.hpp 2013-04-30 14:59:17 +0900 (e851e2d) @@ -26,10 +26,14 @@ namespace storage { constexpr size_t CHUNK_INDEX_SIZE = 32; struct ChunkIndex { + // The chunk ID. uint32_t id; + // The ID of the file to which the chunk belongs. uint16_t file_id; uint16_t reserved_0; + // The offset in file. uint64_t offset; + // The chunk size. uint64_t size; uint64_t reserved_1; Modified: lib/grnxx/storage/header.hpp (+3 -0) =================================================================== --- lib/grnxx/storage/header.hpp 2013-04-29 19:46:58 +0900 (257c86e) +++ lib/grnxx/storage/header.hpp 2013-04-30 14:59:17 +0900 (74848c0) @@ -54,10 +54,13 @@ struct Header { // This value is extended when a node header chunk is added. uint32_t max_num_nodes; // The ID of the latest phantom node. + // STORAGE_INVALID_NODE_ID indicates that there are no phantom nodes. uint32_t latest_phantom_node_id; // The ID of the latest unlinked node. + // STORAGE_INVALID_NODE_ID indicates that there are no unlinked nodes. uint32_t latest_unlinked_node_id; // The IDs of the oldest idle nodes. + // STORAGE_INVALID_NODE_ID indicates that the idle node list is empty. uint32_t oldest_idle_node_ids[NUM_IDLE_NODE_LISTS]; // A mutex object for exclusively updating data. Mutex data_mutex; Modified: lib/grnxx/storage/node_header.cpp (+5 -3) =================================================================== --- lib/grnxx/storage/node_header.cpp 2013-04-29 19:46:58 +0900 (99eb7f7) +++ lib/grnxx/storage/node_header.cpp 2013-04-30 14:59:17 +0900 (468d40c) @@ -20,15 +20,17 @@ namespace grnxx { namespace storage { -NodeHeader::NodeHeader() - : id(STORAGE_INVALID_NODE_ID), +NodeHeader::NodeHeader(uint32_t id) + : id(id), status(STORAGE_NODE_PHANTOM), - reserved_(0), + reserved_0(0), chunk_id(0), offset(0), size(0), next_node_id(STORAGE_INVALID_NODE_ID), prev_node_id(STORAGE_INVALID_NODE_ID), + from_node_id(STORAGE_INVALID_NODE_ID), + reserved_1(0), next_phantom_node_id(STORAGE_INVALID_NODE_ID), sibling_node_id(STORAGE_INVALID_NODE_ID), modified_time(0), Modified: lib/grnxx/storage/node_header.hpp (+14 -5) =================================================================== --- lib/grnxx/storage/node_header.hpp 2013-04-29 19:46:58 +0900 (0cab3df) +++ lib/grnxx/storage/node_header.hpp 2013-04-30 14:59:17 +0900 (394685a) @@ -30,7 +30,7 @@ struct NodeHeader { uint32_t id; // The node status. StorageNodeStatus status; - uint8_t reserved_; + uint8_t reserved_0; // (Non-phantom) // The ID of the chunk to which the node belongs. uint16_t chunk_id; @@ -48,20 +48,27 @@ struct NodeHeader { // The ID of the previous node in chunk. // STORAGE_INVALID_NODE_ID indicates that the node is the first node in chunk. uint32_t prev_node_id; + // (Active) + // The ID of the from node. + // STORAGE_INVALID_NODE_ID indicates that the node is the root node. + uint32_t from_node_id; + uint32_t reserved_1; union { // (Phantom) // The ID of the next phantom node. uint32_t next_phantom_node_id; - // (Active, marked, or unlinked) + // (Active or unlinked) // The ID of the latest child node. // STORAGE_INVALID_NODE_ID indicates that the node has no children. uint32_t child_node_id; // (Idle) // The ID of the next idle node. + // id == next_idle_node_id indicates that the node is the only member of + // an idle node list. uint32_t next_idle_node_id; }; union { - // (Active or marked) + // (Active) // The ID of the next sibling node. // STORAGE_INVALID_NODE_ID indicates that the node has no elder siblings. uint32_t sibling_node_id; @@ -72,15 +79,17 @@ struct NodeHeader { uint32_t next_unlinked_node_id; // (Idle) // The ID of the previous idle node. + // id == prev_idle_node_id indicates that the node is the only member of + // an idle node list. uint32_t prev_idle_node_id; }; // The last modified time. Time modified_time; // User data. - uint8_t user_data[16]; + uint8_t user_data[8]; // Initialize the members. - NodeHeader(); + explicit NodeHeader(uint32_t id); }; static_assert(sizeof(NodeHeader) == NODE_HEADER_SIZE, Modified: lib/grnxx/storage/storage_impl.cpp (+154 -24) =================================================================== --- lib/grnxx/storage/storage_impl.cpp 2013-04-29 19:46:58 +0900 (ce6486a) +++ lib/grnxx/storage/storage_impl.cpp 2013-04-30 14:59:17 +0900 (c4eeaa1) @@ -32,8 +32,8 @@ namespace grnxx { namespace storage { namespace { -constexpr uint64_t CHUNK_UNIT_SIZE = 1 << 16; -constexpr uint64_t NODE_UNIT_SIZE = 1 << 12; +constexpr uint64_t CHUNK_UNIT_SIZE = 1 << 16; // 64KB. +constexpr uint64_t NODE_UNIT_SIZE = 1 << 12; // 4KB. constexpr uint64_t HEADER_CHUNK_SIZE = CHUNK_UNIT_SIZE; constexpr uint64_t HEADER_INDEX_SIZE = HEADER_CHUNK_SIZE - HEADER_SIZE; @@ -51,8 +51,8 @@ constexpr uint16_t MAX_NUM_NODE_BODY_CHUNKS = static_assert(MAX_NUM_NODE_BODY_CHUNKS >= 2000, "MAX_NUM_NODE_BODY_CHUNKS < 2000"); -// A chunk for a remaining space can be smaller than this size. -constexpr uint64_t NODE_BODY_MIN_CHUNK_SIZE = 1 << 21; +// The size of an end-of-file chunk can be less than NODE_BODY_MIN_CHUNK_SIZE. +constexpr uint64_t NODE_BODY_MIN_CHUNK_SIZE = 1 << 21; // 2MB. constexpr double NODE_BODY_CHUNK_SIZE_RATIO = 1.0 / 64; } // namespace @@ -165,6 +165,7 @@ bool StorageImpl::unlink(const char *path) { } StorageNode StorageImpl::create_node(uint32_t parent_node_id, uint64_t size) { + Lock data_lock(&header_->data_mutex); if (parent_node_id >= header_->num_nodes) { GRNXX_ERROR() << "invalid argument: parent_node_id = " << parent_node_id << ", num_nodes = " << header_->num_nodes; @@ -178,13 +179,29 @@ StorageNode StorageImpl::create_node(uint32_t parent_node_id, uint64_t size) { if (!parent_node_header) { return StorageNode(nullptr); } - Lock lock(&header_->data_mutex); + if (parent_node_header->status != STORAGE_NODE_ACTIVE) { + // TODO: How about an unlinked node? + GRNXX_WARNING() << "invalid argument: status = " + << parent_node_header->status; + return StorageNode(nullptr); + } + NodeHeader *child_node_header = nullptr; + if (parent_node_header->child_node_id != STORAGE_INVALID_NODE_ID) { + child_node_header = get_node_header(parent_node_header->child_node_id); + if (!child_node_header) { + return StorageNode(nullptr); + } + } NodeHeader * const node_header = create_active_node(size); if (!node_header) { return StorageNode(nullptr); } node_header->sibling_node_id = parent_node_header->child_node_id; + node_header->from_node_id = parent_node_id; parent_node_header->child_node_id = node_header->id; + if (child_node_header) { + child_node_header->from_node_id = node_header->id; + } void * const body = get_node_body(node_header); if (!body) { return StorageNode(nullptr); @@ -197,6 +214,11 @@ StorageNode StorageImpl::open_node(uint32_t node_id) { if (!node_header) { return StorageNode(nullptr); } + if (node_header->status != STORAGE_NODE_ACTIVE) { + // TODO: How about an unlinked node? + GRNXX_WARNING() << "invalid argument: status = " << node_header->status; + return StorageNode(nullptr); + } void * const body = get_node_body(node_header); if (!body) { return StorageNode(nullptr); @@ -205,6 +227,7 @@ StorageNode StorageImpl::open_node(uint32_t node_id) { } bool StorageImpl::unlink_node(uint32_t node_id) { + Lock data_lock(&header_->data_mutex); if ((node_id == STORAGE_ROOT_NODE_ID) || (node_id >= header_->num_nodes)) { GRNXX_ERROR() << "invalid argument: node_id = " << node_id << ", num_nodes = " << header_->num_nodes; @@ -218,18 +241,83 @@ bool StorageImpl::unlink_node(uint32_t node_id) { GRNXX_WARNING() << "invalid argument: status = " << node_header->status; return false; } - node_header->status = STORAGE_NODE_MARKED; + NodeHeader * const from_node_header = + get_node_header(node_header->from_node_id); + if (!from_node_header) { + return false; + } + NodeHeader *latest_node_header = nullptr; + if (header_->latest_unlinked_node_id != STORAGE_INVALID_NODE_ID) { + latest_node_header = get_node_header(header_->latest_unlinked_node_id); + if (!latest_node_header) { + return false; + } + } + if (node_id == from_node_header->child_node_id) { + from_node_header->child_node_id = node_header->sibling_node_id; + } else if (node_id == from_node_header->sibling_node_id) { + from_node_header->sibling_node_id = node_header->sibling_node_id; + } else { + // This error must not occur. + GRNXX_ERROR() << "broken link: node_id = " << node_id + << ", from_node_id = " << from_node_header->id + << ", child_node_id = " << from_node_header->child_node_id + << ", sibling_node_id = " + << from_node_header->sibling_node_id; + return false; + } + node_header->status = STORAGE_NODE_UNLINKED; + if (latest_node_header) { + node_header->next_unlinked_node_id = + latest_node_header->next_unlinked_node_id; + latest_node_header->next_unlinked_node_id = node_id; + } else { + node_header->next_unlinked_node_id = node_id; + } + header_->latest_unlinked_node_id = node_id; node_header->modified_time = clock_.now(); return true; } -bool StorageImpl::sweep(Duration lifetime, uint32_t root_node_id) { - Lock lock(&header_->data_mutex); - NodeHeader * const node_header = get_node_header(root_node_id); - if (!node_header) { +bool StorageImpl::sweep(Duration lifetime) { + Lock data_lock(&header_->data_mutex); + if (header_->latest_unlinked_node_id == STORAGE_INVALID_NODE_ID) { + // Nothing to do. + return true; + } + NodeHeader * const latest_node_header = + get_node_header(header_->latest_unlinked_node_id); + if (!latest_node_header) { return false; } - return sweep_subtree(clock_.now() - lifetime, node_header); + const Time threshold = clock_.now() - lifetime; + do { + NodeHeader * const oldest_node_header = + get_node_header(latest_node_header->next_unlinked_node_id); + if (!oldest_node_header) { + return false; + } + if (oldest_node_header->status != STORAGE_NODE_UNLINKED) { + // This error must not occur. + GRNXX_ERROR() << "invalid argument: status = " + << oldest_node_header->status; + return false; + } + if (oldest_node_header->modified_time > threshold) { + // Remaining unlinked nodes are too early for reuse. + return true; + } + const uint32_t next_node_id = oldest_node_header->next_unlinked_node_id; + if (!sweep_subtree(threshold, oldest_node_header)) { + return false; + } + if (oldest_node_header != latest_node_header) { + latest_node_header->next_unlinked_node_id = next_node_id; + } else { + header_->latest_unlinked_node_id = STORAGE_INVALID_NODE_ID; + } + } while (header_->latest_unlinked_node_id != STORAGE_INVALID_NODE_ID); + return true; } const char *StorageImpl::path() const { @@ -464,7 +552,11 @@ void StorageImpl::prepare_indexes() { } NodeHeader *StorageImpl::create_active_node(uint64_t size) { - size = (size + NODE_UNIT_SIZE - 1) & ~(NODE_UNIT_SIZE - 1); + if (size == 0) { + size = NODE_UNIT_SIZE; + } else { + size = (size + NODE_UNIT_SIZE - 1) & ~(NODE_UNIT_SIZE - 1); + } NodeHeader *node_header = find_idle_node(size); if (!node_header) { node_header = create_idle_node(size); @@ -580,8 +672,7 @@ NodeHeader *StorageImpl::create_phantom_node() { if (!node_header) { return nullptr; } - *node_header = NodeHeader(); - node_header->id = node_id; + *node_header = NodeHeader(node_id); node_header->next_phantom_node_id = header_->latest_phantom_node_id; node_header->modified_time = clock_.now(); ++header_->num_nodes; @@ -627,22 +718,61 @@ bool StorageImpl::associate_node_with_chunk(NodeHeader *node_header, } bool StorageImpl::sweep_subtree(Time threshold, NodeHeader *node_header) { - if (node_header->status == STORAGE_NODE_MARKED) { - if (node_header->modified_time <= threshold) { - // TODO: Unlink the node. - return true; + uint32_t child_node_id = node_header->child_node_id; + while (child_node_id != STORAGE_INVALID_NODE_ID) { + NodeHeader * const child_node_header = get_node_header(child_node_id); + if (!child_node_header) { + return false; + } + child_node_id = child_node_header->sibling_node_id; + if (!sweep_subtree(threshold, child_node_header)) { + return false; } } - uint32_t node_id = node_header->child_node_id; - while (node_id != STORAGE_INVALID_NODE_ID) { - node_header = get_node_header(node_id); - if (!node_header) { + node_header->status = STORAGE_NODE_IDLE; + node_header->modified_time = clock_.now(); + register_idle_node(node_header); + if (node_header->next_node_id != STORAGE_INVALID_NODE_ID) { + NodeHeader * const next_node_header = + get_node_header(node_header->next_node_id); + if (!next_node_header) { return false; } - if (!sweep_subtree(threshold, node_header)) { + if (next_node_header->status == STORAGE_NODE_IDLE) { + if (!merge_idle_nodes(node_header, next_node_header)) { + return false; + } + } + } + if (node_header->prev_node_id != STORAGE_INVALID_NODE_ID) { + NodeHeader * const prev_node_header = + get_node_header(node_header->prev_node_id); + if (!prev_node_header) { return false; } - node_id = node_header->sibling_node_id; + if (prev_node_header->status == STORAGE_NODE_IDLE) { + if (!merge_idle_nodes(prev_node_header, node_header)) { + return false; + } + } + } + return true; +} + +bool StorageImpl::merge_idle_nodes(NodeHeader *node_header, + NodeHeader *next_node_header) { + if (!unregister_idle_node(node_header) || + !unregister_idle_node(next_node_header)) { + return false; + } + node_header->size += next_node_header->size; + node_header->next_node_id = next_node_header->next_node_id; + *next_node_header = NodeHeader(next_node_header->id); + next_node_header->next_phantom_node_id = header_->latest_phantom_node_id; + next_node_header->modified_time = clock_.now(); + header_->latest_phantom_node_id = next_node_header->id; + if (!register_idle_node(node_header)) { + return false; } return true; } Modified: lib/grnxx/storage/storage_impl.hpp (+2 -1) =================================================================== --- lib/grnxx/storage/storage_impl.hpp 2013-04-29 19:46:58 +0900 (d89819c) +++ lib/grnxx/storage/storage_impl.hpp 2013-04-30 14:59:17 +0900 (e89c0c4) @@ -52,7 +52,7 @@ class StorageImpl : public Storage { bool unlink_node(uint32_t node_id); - bool sweep(Duration lifetime, uint32_t root_node_id); + bool sweep(Duration lifetime); const char *path() const; StorageFlags flags() const; @@ -100,6 +100,7 @@ class StorageImpl : public Storage { ChunkIndex *create_node_body_chunk(uint64_t size); bool sweep_subtree(Time threshold, NodeHeader *node_header); + bool merge_idle_nodes(NodeHeader *node_header, NodeHeader *next_node_header); bool register_idle_node(NodeHeader *node_header); bool unregister_idle_node(NodeHeader *node_header); -------------- next part -------------- HTML����������������������������...Download