[Groonga-commit] groonga/grnxx at 57003dc [master] Implement grnxx::Storage::sweep().

Back to archive index

susumu.yata null+****@clear*****
Tue Apr 30 14:59:17 JST 2013


susumu.yata	2013-04-30 14:59:17 +0900 (Tue, 30 Apr 2013)

  New Revision: 57003dcaf2c35e363674861e239d0632f992cfea
  https://github.com/groonga/grnxx/commit/57003dcaf2c35e363674861e239d0632f992cfea

  Message:
    Implement grnxx::Storage::sweep().

  Modified files:
    lib/grnxx/storage.cpp
    lib/grnxx/storage.hpp
    lib/grnxx/storage/chunk_index.hpp
    lib/grnxx/storage/header.hpp
    lib/grnxx/storage/node_header.cpp
    lib/grnxx/storage/node_header.hpp
    lib/grnxx/storage/storage_impl.cpp
    lib/grnxx/storage/storage_impl.hpp

  Modified: lib/grnxx/storage.cpp (+7 -6)
===================================================================
--- lib/grnxx/storage.cpp    2013-04-29 19:46:58 +0900 (beabc2d)
+++ lib/grnxx/storage.cpp    2013-04-30 14:59:17 +0900 (d906d71)
@@ -24,11 +24,13 @@
 namespace grnxx {
 namespace {
 
-constexpr uint64_t MAX_FILE_SIZE_LOWER_LIMIT = 1ULL << 30;
-constexpr uint64_t MAX_FILE_SIZE_UPPER_LIMIT = 1ULL << 40;
+constexpr uint64_t MAX_FILE_SIZE_LOWER_LIMIT = 1ULL << 30;  // 1GB.
+constexpr uint64_t MAX_FILE_SIZE_UPPER_LIMIT = 1ULL << 63;  // 8EB.
+constexpr uint64_t MAX_FILE_SIZE_DEFAULT     = 1ULL << 40;  // 1TB.
 constexpr uint16_t MAX_NUM_FILES_LOWER_LIMIT = 1;
 constexpr uint16_t MAX_NUM_FILES_UPPER_LIMIT = 1000;
-constexpr uint64_t ROOT_SIZE_DEFAULT         = 1ULL << 12;
+constexpr uint16_t MAX_NUM_FILES_DEFAULT     = MAX_NUM_FILES_UPPER_LIMIT;
+constexpr uint64_t ROOT_SIZE_DEFAULT         = 1ULL << 12;  // 4KB.
 
 }  // namespace
 
@@ -63,7 +65,6 @@ StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status) {
   switch (status) {
     GRNXX_STATUS_CASE(STORAGE_NODE_PHANTOM)
     GRNXX_STATUS_CASE(STORAGE_NODE_ACTIVE)
-    GRNXX_STATUS_CASE(STORAGE_NODE_MARKED)
     GRNXX_STATUS_CASE(STORAGE_NODE_UNLINKED)
     GRNXX_STATUS_CASE(STORAGE_NODE_IDLE)
     default: {
@@ -73,8 +74,8 @@ StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status) {
 }
 
 StorageOptions::StorageOptions()
-    : max_file_size(MAX_FILE_SIZE_UPPER_LIMIT),
-      max_num_files(MAX_NUM_FILES_UPPER_LIMIT),
+    : max_file_size(MAX_FILE_SIZE_DEFAULT),
+      max_num_files(MAX_NUM_FILES_DEFAULT),
       root_size(ROOT_SIZE_DEFAULT) {}
 
 bool StorageOptions::is_valid() const {

  Modified: lib/grnxx/storage.hpp (+7 -10)
===================================================================
--- lib/grnxx/storage.hpp    2013-04-29 19:46:58 +0900 (3d30255)
+++ lib/grnxx/storage.hpp    2013-04-30 14:59:17 +0900 (144c0f1)
@@ -57,12 +57,10 @@ enum StorageNodeStatus : uint8_t {
   STORAGE_NODE_PHANTOM  = 0,
   // An active node.
   STORAGE_NODE_ACTIVE   = 1,
-  // A marked node to be unlinked.
-  STORAGE_NODE_MARKED   = 2,
   // An unlinked node.
-  STORAGE_NODE_UNLINKED = 3,
+  STORAGE_NODE_UNLINKED = 2,
   // An unused node.
-  STORAGE_NODE_IDLE     = 4
+  STORAGE_NODE_IDLE     = 3
 };
 
 StringBuilder &operator<<(StringBuilder &builder, StorageNodeStatus status);
@@ -109,7 +107,7 @@ class StorageNode {
   uint64_t size() const;
   // Return the last modified time.
   Time modified_time() const;
-  // Return the address to the user data (16 bytes) in the header.
+  // Return the address to the user data (8 bytes) in the header.
   void *user_data() const;
   // Return the address to the body.
   void *body() const {
@@ -155,13 +153,12 @@ class Storage {
   // Open a node.
   virtual StorageNode open_node(uint32_t node_id) = 0;
 
-  // Mark a node to be unlinked. Note that the marked node and its descendants
-  // will be unlinked by sweep().
+  // Unlink a node and true on success.
+  // The unlinked node and its descendants will be removed by sweep().
   virtual bool unlink_node(uint32_t node_id) = 0;
 
-  // Sweep marked nodes whose last modified time < (now - lifetime).
-  virtual bool sweep(Duration lifetime,
-                     uint32_t root_node_id = STORAGE_ROOT_NODE_ID) = 0;
+  // Sweep unlinked nodes whose modified time < (now - lifetime).
+  virtual bool sweep(Duration lifetime) = 0;
 
   // Return the storage path.
   // Note that an anonymous or temporary storage may return nullptr.

  Modified: lib/grnxx/storage/chunk_index.hpp (+4 -0)
===================================================================
--- lib/grnxx/storage/chunk_index.hpp    2013-04-29 19:46:58 +0900 (5856d51)
+++ lib/grnxx/storage/chunk_index.hpp    2013-04-30 14:59:17 +0900 (e851e2d)
@@ -26,10 +26,14 @@ namespace storage {
 constexpr size_t CHUNK_INDEX_SIZE = 32;
 
 struct ChunkIndex {
+  // The chunk ID.
   uint32_t id;
+  // The ID of the file to which the chunk belongs.
   uint16_t file_id;
   uint16_t reserved_0;
+  // The offset in file.
   uint64_t offset;
+  // The chunk size.
   uint64_t size;
   uint64_t reserved_1;
 

  Modified: lib/grnxx/storage/header.hpp (+3 -0)
===================================================================
--- lib/grnxx/storage/header.hpp    2013-04-29 19:46:58 +0900 (257c86e)
+++ lib/grnxx/storage/header.hpp    2013-04-30 14:59:17 +0900 (74848c0)
@@ -54,10 +54,13 @@ struct Header {
   // This value is extended when a node header chunk is added.
   uint32_t max_num_nodes;
   // The ID of the latest phantom node.
+  // STORAGE_INVALID_NODE_ID indicates that there are no phantom nodes.
   uint32_t latest_phantom_node_id;
   // The ID of the latest unlinked node.
+  // STORAGE_INVALID_NODE_ID indicates that there are no unlinked nodes.
   uint32_t latest_unlinked_node_id;
   // The IDs of the oldest idle nodes.
+  // STORAGE_INVALID_NODE_ID indicates that the idle node list is empty.
   uint32_t oldest_idle_node_ids[NUM_IDLE_NODE_LISTS];
   // A mutex object for exclusively updating data.
   Mutex data_mutex;

  Modified: lib/grnxx/storage/node_header.cpp (+5 -3)
===================================================================
--- lib/grnxx/storage/node_header.cpp    2013-04-29 19:46:58 +0900 (99eb7f7)
+++ lib/grnxx/storage/node_header.cpp    2013-04-30 14:59:17 +0900 (468d40c)
@@ -20,15 +20,17 @@
 namespace grnxx {
 namespace storage {
 
-NodeHeader::NodeHeader()
-    : id(STORAGE_INVALID_NODE_ID),
+NodeHeader::NodeHeader(uint32_t id)
+    : id(id),
       status(STORAGE_NODE_PHANTOM),
-      reserved_(0),
+      reserved_0(0),
       chunk_id(0),
       offset(0),
       size(0),
       next_node_id(STORAGE_INVALID_NODE_ID),
       prev_node_id(STORAGE_INVALID_NODE_ID),
+      from_node_id(STORAGE_INVALID_NODE_ID),
+      reserved_1(0),
       next_phantom_node_id(STORAGE_INVALID_NODE_ID),
       sibling_node_id(STORAGE_INVALID_NODE_ID),
       modified_time(0),

  Modified: lib/grnxx/storage/node_header.hpp (+14 -5)
===================================================================
--- lib/grnxx/storage/node_header.hpp    2013-04-29 19:46:58 +0900 (0cab3df)
+++ lib/grnxx/storage/node_header.hpp    2013-04-30 14:59:17 +0900 (394685a)
@@ -30,7 +30,7 @@ struct NodeHeader {
   uint32_t id;
   // The node status.
   StorageNodeStatus status;
-  uint8_t reserved_;
+  uint8_t reserved_0;
   // (Non-phantom)
   // The ID of the chunk to which the node belongs.
   uint16_t chunk_id;
@@ -48,20 +48,27 @@ struct NodeHeader {
   // The ID of the previous node in chunk.
   // STORAGE_INVALID_NODE_ID indicates that the node is the first node in chunk.
   uint32_t prev_node_id;
+  // (Active)
+  // The ID of the from node.
+  // STORAGE_INVALID_NODE_ID indicates that the node is the root node.
+  uint32_t from_node_id;
+  uint32_t reserved_1;
   union {
     // (Phantom)
     // The ID of the next phantom node.
     uint32_t next_phantom_node_id;
-    // (Active, marked, or unlinked)
+    // (Active or unlinked)
     // The ID of the latest child node.
     // STORAGE_INVALID_NODE_ID indicates that the node has no children.
     uint32_t child_node_id;
     // (Idle)
     // The ID of the next idle node.
+    // id == next_idle_node_id indicates that the node is the only member of
+    // an idle node list.
     uint32_t next_idle_node_id;
   };
   union {
-    // (Active or marked)
+    // (Active)
     // The ID of the next sibling node.
     // STORAGE_INVALID_NODE_ID indicates that the node has no elder siblings.
     uint32_t sibling_node_id;
@@ -72,15 +79,17 @@ struct NodeHeader {
     uint32_t next_unlinked_node_id;
     // (Idle)
     // The ID of the previous idle node.
+    // id == prev_idle_node_id indicates that the node is the only member of
+    // an idle node list.
     uint32_t prev_idle_node_id;
   };
   // The last modified time.
   Time modified_time;
   // User data.
-  uint8_t user_data[16];
+  uint8_t user_data[8];
 
   // Initialize the members.
-  NodeHeader();
+  explicit NodeHeader(uint32_t id);
 };
 
 static_assert(sizeof(NodeHeader) == NODE_HEADER_SIZE,

  Modified: lib/grnxx/storage/storage_impl.cpp (+154 -24)
===================================================================
--- lib/grnxx/storage/storage_impl.cpp    2013-04-29 19:46:58 +0900 (ce6486a)
+++ lib/grnxx/storage/storage_impl.cpp    2013-04-30 14:59:17 +0900 (c4eeaa1)
@@ -32,8 +32,8 @@ namespace grnxx {
 namespace storage {
 namespace {
 
-constexpr uint64_t CHUNK_UNIT_SIZE   = 1 << 16;
-constexpr uint64_t NODE_UNIT_SIZE    = 1 << 12;
+constexpr uint64_t CHUNK_UNIT_SIZE   = 1 << 16;  // 64KB.
+constexpr uint64_t NODE_UNIT_SIZE    = 1 << 12;  // 4KB.
 
 constexpr uint64_t HEADER_CHUNK_SIZE = CHUNK_UNIT_SIZE;
 constexpr uint64_t HEADER_INDEX_SIZE = HEADER_CHUNK_SIZE - HEADER_SIZE;
@@ -51,8 +51,8 @@ constexpr uint16_t MAX_NUM_NODE_BODY_CHUNKS   =
 static_assert(MAX_NUM_NODE_BODY_CHUNKS >= 2000,
               "MAX_NUM_NODE_BODY_CHUNKS < 2000");
 
-// A chunk for a remaining space can be smaller than this size.
-constexpr uint64_t NODE_BODY_MIN_CHUNK_SIZE = 1 << 21;
+// The size of an end-of-file chunk can be less than NODE_BODY_MIN_CHUNK_SIZE.
+constexpr uint64_t NODE_BODY_MIN_CHUNK_SIZE = 1 << 21;  // 2MB.
 constexpr double NODE_BODY_CHUNK_SIZE_RATIO = 1.0 / 64;
 
 }  // namespace
@@ -165,6 +165,7 @@ bool StorageImpl::unlink(const char *path) {
 }
 
 StorageNode StorageImpl::create_node(uint32_t parent_node_id, uint64_t size) {
+  Lock data_lock(&header_->data_mutex);
   if (parent_node_id >= header_->num_nodes) {
     GRNXX_ERROR() << "invalid argument: parent_node_id = " << parent_node_id
                   << ", num_nodes = " << header_->num_nodes;
@@ -178,13 +179,29 @@ StorageNode StorageImpl::create_node(uint32_t parent_node_id, uint64_t size) {
   if (!parent_node_header) {
     return StorageNode(nullptr);
   }
-  Lock lock(&header_->data_mutex);
+  if (parent_node_header->status != STORAGE_NODE_ACTIVE) {
+    // TODO: How about an unlinked node?
+    GRNXX_WARNING() << "invalid argument: status = "
+                    << parent_node_header->status;
+    return StorageNode(nullptr);
+  }
+  NodeHeader *child_node_header = nullptr;
+  if (parent_node_header->child_node_id != STORAGE_INVALID_NODE_ID) {
+    child_node_header = get_node_header(parent_node_header->child_node_id);
+    if (!child_node_header) {
+      return StorageNode(nullptr);
+    }
+  }
   NodeHeader * const node_header = create_active_node(size);
   if (!node_header) {
     return StorageNode(nullptr);
   }
   node_header->sibling_node_id = parent_node_header->child_node_id;
+  node_header->from_node_id = parent_node_id;
   parent_node_header->child_node_id = node_header->id;
+  if (child_node_header) {
+    child_node_header->from_node_id = node_header->id;
+  }
   void * const body = get_node_body(node_header);
   if (!body) {
     return StorageNode(nullptr);
@@ -197,6 +214,11 @@ StorageNode StorageImpl::open_node(uint32_t node_id) {
   if (!node_header) {
     return StorageNode(nullptr);
   }
+  if (node_header->status != STORAGE_NODE_ACTIVE) {
+    // TODO: How about an unlinked node?
+    GRNXX_WARNING() << "invalid argument: status = " << node_header->status;
+    return StorageNode(nullptr);
+  }
   void * const body = get_node_body(node_header);
   if (!body) {
     return StorageNode(nullptr);
@@ -205,6 +227,7 @@ StorageNode StorageImpl::open_node(uint32_t node_id) {
 }
 
 bool StorageImpl::unlink_node(uint32_t node_id) {
+  Lock data_lock(&header_->data_mutex);
   if ((node_id == STORAGE_ROOT_NODE_ID) || (node_id >= header_->num_nodes)) {
     GRNXX_ERROR() << "invalid argument: node_id = " << node_id
                   << ", num_nodes = " << header_->num_nodes;
@@ -218,18 +241,83 @@ bool StorageImpl::unlink_node(uint32_t node_id) {
     GRNXX_WARNING() << "invalid argument: status = " << node_header->status;
     return false;
   }
-  node_header->status = STORAGE_NODE_MARKED;
+  NodeHeader * const from_node_header =
+      get_node_header(node_header->from_node_id);
+  if (!from_node_header) {
+    return false;
+  }
+  NodeHeader *latest_node_header = nullptr;
+  if (header_->latest_unlinked_node_id != STORAGE_INVALID_NODE_ID) {
+    latest_node_header = get_node_header(header_->latest_unlinked_node_id);
+    if (!latest_node_header) {
+      return false;
+    }
+  }
+  if (node_id == from_node_header->child_node_id) {
+    from_node_header->child_node_id = node_header->sibling_node_id;
+  } else if (node_id == from_node_header->sibling_node_id) {
+    from_node_header->sibling_node_id = node_header->sibling_node_id;
+  } else {
+    // This error must not occur.
+    GRNXX_ERROR() << "broken link: node_id = " << node_id
+                  << ", from_node_id = " << from_node_header->id
+                  << ", child_node_id = " << from_node_header->child_node_id
+                  << ", sibling_node_id = "
+                  << from_node_header->sibling_node_id;
+    return false;
+  }
+  node_header->status = STORAGE_NODE_UNLINKED;
+  if (latest_node_header) {
+    node_header->next_unlinked_node_id =
+        latest_node_header->next_unlinked_node_id;
+    latest_node_header->next_unlinked_node_id = node_id;
+  } else {
+    node_header->next_unlinked_node_id = node_id;
+  }
+  header_->latest_unlinked_node_id = node_id;
   node_header->modified_time = clock_.now();
   return true;
 }
 
-bool StorageImpl::sweep(Duration lifetime, uint32_t root_node_id) {
-  Lock lock(&header_->data_mutex);
-  NodeHeader * const node_header = get_node_header(root_node_id);
-  if (!node_header) {
+bool StorageImpl::sweep(Duration lifetime) {
+  Lock data_lock(&header_->data_mutex);
+  if (header_->latest_unlinked_node_id == STORAGE_INVALID_NODE_ID) {
+    // Nothing to do.
+    return true;
+  }
+  NodeHeader * const latest_node_header =
+      get_node_header(header_->latest_unlinked_node_id);
+  if (!latest_node_header) {
     return false;
   }
-  return sweep_subtree(clock_.now() - lifetime, node_header);
+  const Time threshold = clock_.now() - lifetime;
+  do {
+    NodeHeader * const oldest_node_header =
+        get_node_header(latest_node_header->next_unlinked_node_id);
+    if (!oldest_node_header) {
+      return false;
+    }
+    if (oldest_node_header->status != STORAGE_NODE_UNLINKED) {
+      // This error must not occur.
+      GRNXX_ERROR() << "invalid argument: status = "
+                    << oldest_node_header->status;
+      return false;
+    }
+    if (oldest_node_header->modified_time > threshold) {
+      // Remaining unlinked nodes are too early for reuse.
+      return true;
+    }
+    const uint32_t next_node_id = oldest_node_header->next_unlinked_node_id;
+    if (!sweep_subtree(threshold, oldest_node_header)) {
+      return false;
+    }
+    if (oldest_node_header != latest_node_header) {
+      latest_node_header->next_unlinked_node_id = next_node_id;
+    } else {
+      header_->latest_unlinked_node_id = STORAGE_INVALID_NODE_ID;
+    }
+  } while (header_->latest_unlinked_node_id != STORAGE_INVALID_NODE_ID);
+  return true;
 }
 
 const char *StorageImpl::path() const {
@@ -464,7 +552,11 @@ void StorageImpl::prepare_indexes() {
 }
 
 NodeHeader *StorageImpl::create_active_node(uint64_t size) {
-  size = (size + NODE_UNIT_SIZE - 1) & ~(NODE_UNIT_SIZE - 1);
+  if (size == 0) {
+    size = NODE_UNIT_SIZE;
+  } else {
+    size = (size + NODE_UNIT_SIZE - 1) & ~(NODE_UNIT_SIZE - 1);
+  }
   NodeHeader *node_header = find_idle_node(size);
   if (!node_header) {
     node_header = create_idle_node(size);
@@ -580,8 +672,7 @@ NodeHeader *StorageImpl::create_phantom_node() {
   if (!node_header) {
     return nullptr;
   }
-  *node_header = NodeHeader();
-  node_header->id = node_id;
+  *node_header = NodeHeader(node_id);
   node_header->next_phantom_node_id = header_->latest_phantom_node_id;
   node_header->modified_time = clock_.now();
   ++header_->num_nodes;
@@ -627,22 +718,61 @@ bool StorageImpl::associate_node_with_chunk(NodeHeader *node_header,
 }
 
 bool StorageImpl::sweep_subtree(Time threshold, NodeHeader *node_header) {
-  if (node_header->status == STORAGE_NODE_MARKED) {
-    if (node_header->modified_time <= threshold) {
-      // TODO: Unlink the node.
-      return true;
+  uint32_t child_node_id = node_header->child_node_id;
+  while (child_node_id != STORAGE_INVALID_NODE_ID) {
+    NodeHeader * const child_node_header = get_node_header(child_node_id);
+    if (!child_node_header) {
+      return false;
+    }
+    child_node_id = child_node_header->sibling_node_id;
+    if (!sweep_subtree(threshold, child_node_header)) {
+      return false;
     }
   }
-  uint32_t node_id = node_header->child_node_id;
-  while (node_id != STORAGE_INVALID_NODE_ID) {
-    node_header = get_node_header(node_id);
-    if (!node_header) {
+  node_header->status = STORAGE_NODE_IDLE;
+  node_header->modified_time = clock_.now();
+  register_idle_node(node_header);
+  if (node_header->next_node_id != STORAGE_INVALID_NODE_ID) {
+    NodeHeader * const next_node_header =
+        get_node_header(node_header->next_node_id);
+    if (!next_node_header) {
       return false;
     }
-    if (!sweep_subtree(threshold, node_header)) {
+    if (next_node_header->status == STORAGE_NODE_IDLE) {
+      if (!merge_idle_nodes(node_header, next_node_header)) {
+        return false;
+      }
+    }
+  }
+  if (node_header->prev_node_id != STORAGE_INVALID_NODE_ID) {
+    NodeHeader * const prev_node_header =
+        get_node_header(node_header->prev_node_id);
+    if (!prev_node_header) {
       return false;
     }
-    node_id = node_header->sibling_node_id;
+    if (prev_node_header->status == STORAGE_NODE_IDLE) {
+      if (!merge_idle_nodes(prev_node_header, node_header)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool StorageImpl::merge_idle_nodes(NodeHeader *node_header,
+                                   NodeHeader *next_node_header) {
+  if (!unregister_idle_node(node_header) ||
+      !unregister_idle_node(next_node_header)) {
+    return false;
+  }
+  node_header->size += next_node_header->size;
+  node_header->next_node_id = next_node_header->next_node_id;
+  *next_node_header = NodeHeader(next_node_header->id);
+  next_node_header->next_phantom_node_id = header_->latest_phantom_node_id;
+  next_node_header->modified_time = clock_.now();
+  header_->latest_phantom_node_id = next_node_header->id;
+  if (!register_idle_node(node_header)) {
+    return false;
   }
   return true;
 }

  Modified: lib/grnxx/storage/storage_impl.hpp (+2 -1)
===================================================================
--- lib/grnxx/storage/storage_impl.hpp    2013-04-29 19:46:58 +0900 (d89819c)
+++ lib/grnxx/storage/storage_impl.hpp    2013-04-30 14:59:17 +0900 (e89c0c4)
@@ -52,7 +52,7 @@ class StorageImpl : public Storage {
 
   bool unlink_node(uint32_t node_id);
 
-  bool sweep(Duration lifetime, uint32_t root_node_id);
+  bool sweep(Duration lifetime);
 
   const char *path() const;
   StorageFlags flags() const;
@@ -100,6 +100,7 @@ class StorageImpl : public Storage {
   ChunkIndex *create_node_body_chunk(uint64_t size);
 
   bool sweep_subtree(Time threshold, NodeHeader *node_header);
+  bool merge_idle_nodes(NodeHeader *node_header, NodeHeader *next_node_header);
 
   bool register_idle_node(NodeHeader *node_header);
   bool unregister_idle_node(NodeHeader *node_header);
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index