[Groonga-commit] groonga/grnxx [master] Add components of insert(). (DoubleArray)

Back to archive index

susumu.yata null+****@clear*****
Sun Jan 20 15:18:02 JST 2013


susumu.yata	2013-01-20 15:18:02 +0900 (Sun, 20 Jan 2013)

  New Revision: 2ee513e9f8429149c7af559e9fa3a2d77856871a
  https://github.com/groonga/grnxx/commit/2ee513e9f8429149c7af559e9fa3a2d77856871a

  Log:
    Add components of insert(). (DoubleArray)

  Modified files:
    lib/alpha/double_array.cpp
    lib/alpha/double_array.hpp

  Modified: lib/alpha/double_array.cpp (+75 -9)
===================================================================
--- lib/alpha/double_array.cpp    2013-01-20 12:40:11 +0900 (ca6d4f5)
+++ lib/alpha/double_array.cpp    2013-01-20 15:18:02 +0900 (c68db51)
@@ -33,8 +33,13 @@ DoubleArrayHeader::DoubleArrayHeader()
     entries_block_id_(io::BLOCK_INVALID_ID),
     keys_block_id_(io::BLOCK_INVALID_ID),
     root_node_id_(0),
+    total_key_length_(0),
+    next_key_id_(0),
+    max_key_id_(0),
+    num_keys_(0),
     num_chunks_(0),
     num_phantoms_(0),
+    num_zombies_(0),
     leaders_(),
     inter_process_mutex_() {
   for (uint32_t i = 0; i < DOUBLE_ARRAY_MAX_CHUNK_LEVEL; ++i) {
@@ -42,7 +47,7 @@ DoubleArrayHeader::DoubleArrayHeader()
   }
 }
 
-DoubleArrayKey::DoubleArrayKey(uint64_t id, const char *address,
+DoubleArrayKey::DoubleArrayKey(uint64_t id, const void *address,
                                uint64_t length)
   : id_low_(static_cast<uint32_t>(id)),
     id_high_(static_cast<uint8_t>(id >> 32)),
@@ -93,7 +98,7 @@ std::unique_ptr<DoubleArrayImpl> DoubleArrayImpl::open(io::Pool pool,
 }
 
 bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length,
-                             uint64_t *key_offset) {
+                             uint64_t *key_pos) {
   uint64_t node_id = root_node_id();
   uint64_t query_pos = 0;
   if (!search_leaf(ptr, length, node_id, query_pos)) {
@@ -106,9 +111,9 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length,
   }
 
   if (node.key_length() == length) {
-    if (get_key(node.key_offset()).equals_to(ptr, length, query_pos)) {
-      if (key_offset) {
-        *key_offset = node.key_offset();
+    if (get_key(node.key_pos()).equals_to(ptr, length, query_pos)) {
+      if (key_pos) {
+        *key_pos = node.key_pos();
       }
       return true;
     }
@@ -116,6 +121,46 @@ bool DoubleArrayImpl::search(const uint8_t *ptr, uint64_t length,
   return false;
 }
 
+bool DoubleArrayImpl::insert(const uint8_t *ptr, uint64_t length,
+                             uint64_t *key_pos) {
+  // TODO
+//  GRN_DAT_THROW_IF(STATUS_ERROR, (status_flags() & CHANGING_MASK) != 0);
+//  StatusFlagManager status_flag_manager(header_, INSERTING_FLAG);
+
+//  GRN_DAT_DEBUG_THROW_IF(!ptr && (length != 0));
+
+  uint64_t node_id = root_node_id();
+  uint64_t query_pos = 0;
+
+  // TODO
+//  search_leaf(ptr, length, node_id, query_pos);
+//  if (!insert_leaf(ptr, length, node_id, query_pos)) {
+//    if (key_pos) {
+//      *key_pos = nodes_[node_id].key_pos();
+//    }
+//    return false;
+//  }
+
+  const uint64_t new_key_id = header_->next_key_id();
+  const uint64_t new_key_pos = append_key(ptr, length, new_key_id);
+
+  header_->set_total_key_length(header_->total_key_length() + length);
+  header_->set_num_keys(header_->num_keys() + 1);
+  if (new_key_id > header_->max_key_id()) {
+    header_->set_max_key_id(new_key_id);
+    header_->set_next_key_id(new_key_id + 1);
+  } else {
+    header_->set_next_key_id(entries_[new_key_id].next());
+  }
+
+  entries_[new_key_id].set_key(new_key_pos, length);
+  nodes_[node_id].set_key(new_key_pos, length);
+  if (key_pos) {
+    *key_pos = new_key_pos;
+  }
+  return true;
+}
+
 DoubleArrayImpl::DoubleArrayImpl()
   : pool_(),
     block_info_(nullptr),
@@ -201,9 +246,30 @@ bool DoubleArrayImpl::search_leaf(const uint8_t *ptr, uint64_t length,
   return nodes_[next].is_leaf();
 }
 
+uint64_t DoubleArrayImpl::append_key(const uint8_t *ptr, uint64_t length,
+                                     uint64_t key_id) {
+  // TODO
+//  GRN_DAT_THROW_IF(SIZE_ERROR, key_id > max_num_keys());
+
+  uint64_t key_pos = header_->next_key_pos();
+  const uint64_t key_size = DoubleArrayKey::estimate_size(length);
+
+  // TODO
+//  GRN_DAT_THROW_IF(SIZE_ERROR, key_size > (key_buf_size() - key_pos));
+  const uint64_t size_left_in_page =
+      (~key_pos + 1) % DOUBLE_ARRAY_KEYS_PAGE_SIZE;
+  if (size_left_in_page < key_size) {
+    key_pos += size_left_in_page;
+  }
+  new (&keys_[key_pos]) DoubleArrayKey(key_id, ptr, length);
+
+  header_->set_next_key_pos(key_pos + key_size);
+  return key_pos;
+}
+
 void DoubleArrayImpl::reserve_node(uint64_t node_id) {
   if (node_id >= header_->num_nodes()) {
-//    reserve_block(node_id / DOUBLE_ARRAY_BLOCK_SIZE);
+    reserve_chunk(node_id / DOUBLE_ARRAY_CHUNK_SIZE);
   }
 
   DoubleArrayNode &node = nodes_[node_id];
@@ -231,15 +297,15 @@ void DoubleArrayImpl::reserve_node(uint64_t node_id) {
     const uint64_t threshold =
         uint64_t(1) << ((DOUBLE_ARRAY_MAX_CHUNK_LEVEL - chunk.level() - 1) * 2);
     if (chunk.num_phantoms() == threshold) {
-//      update_chunk_level(chunk_id, chunk.level() + 1);
+      update_chunk_level(chunk_id, chunk.level() + 1);
     }
   }
   chunk.set_num_phantoms(chunk.num_phantoms() - 1);
 
   node.set_is_phantom(false);
 
-//  GRN_DAT_DEBUG_THROW_IF(node.offset() != INVALID_OFFSET);
-//  GRN_DAT_DEBUG_THROW_IF(node.label() != INVALID_LABEL);
+//  GRN_DAT_DEBUG_THROW_IF(node.offset() != DOUBLE_ARRAY_INVALID_OFFSET);
+//  GRN_DAT_DEBUG_THROW_IF(node.label() != DOUBLE_ARRAY_INVALID_LABEL);
 
   header_->set_num_phantoms(header_->num_phantoms() - 1);
 }

  Modified: lib/alpha/double_array.hpp (+78 -26)
===================================================================
--- lib/alpha/double_array.hpp    2013-01-20 12:40:11 +0900 (a8b5041)
+++ lib/alpha/double_array.hpp    2013-01-20 15:18:02 +0900 (aa7dc40)
@@ -23,6 +23,7 @@
 namespace grnxx {
 namespace alpha {
 
+// FIXME: To be removed when moved to grnxx::db.
 using namespace grnxx::db;
 
 extern struct DoubleArrayCreate {} DOUBLE_ARRAY_CREATE;
@@ -51,6 +52,9 @@ constexpr uint64_t DOUBLE_ARRAY_MAX_CHUNK_LEVEL    = 5;
 // the linked list is empty and there exists no leader.
 constexpr uint64_t DOUBLE_ARRAY_INVALID_LEADER     = 0x7FFFFFFF;
 
+constexpr uint64_t DOUBLE_ARRAY_KEYS_PAGE_SIZE     =
+    VECTOR_DEFAULT_PAGE_SIZE;
+
 class DoubleArrayString {
  public:
   DoubleArrayString() : ptr_(nullptr), length_(0) {}
@@ -160,6 +164,21 @@ class DoubleArrayHeader {
   uint64_t root_node_id() const {
     return root_node_id_;
   }
+  uint64_t total_key_length() const {
+    return total_key_length_;
+  }
+  uint64_t next_key_id() const {
+    return next_key_id_;
+  }
+  uint64_t next_key_pos() const {
+    return next_key_pos_;
+  }
+  uint64_t max_key_id() const {
+    return max_key_id_;
+  }
+  uint64_t num_keys() const {
+    return num_keys_;
+  }
   uint64_t num_chunks() const {
     return num_chunks_;
   }
@@ -169,8 +188,10 @@ class DoubleArrayHeader {
   uint64_t num_phantoms() const {
     return num_phantoms_;
   }
+  uint64_t num_zombies() const {
+    return num_zombies_;
+  }
   uint64_t ith_leader(uint64_t i) const {
-//    GRN_DAT_DEBUG_THROW_IF(i > DOUBLE_ARRAY_MAX_CHUNK_LEVEL);
     return leaders_[i];
   }
 
@@ -189,15 +210,31 @@ class DoubleArrayHeader {
   void set_root_node_id(uint64_t value) {
     root_node_id_ = value;
   }
+  void set_total_key_length(uint64_t value) {
+    total_key_length_ = value;
+  }
+  void set_next_key_id(uint64_t value) {
+    next_key_id_ = value;
+  }
+  void set_next_key_pos(uint64_t value) {
+    next_key_pos_ = value;
+  }
+  void set_max_key_id(uint64_t value) {
+    max_key_id_ = value;
+  }
+  void set_num_keys(uint64_t value) {
+    num_keys_ = value;
+  }
   void set_num_chunks(uint64_t value) {
     num_chunks_ = value;
   }
   void set_num_phantoms(uint64_t value) {
     num_phantoms_ = value;
   }
+  void set_num_zombies(uint64_t value) {
+    num_zombies_ = value;
+  }
   void set_ith_leader(uint64_t i, uint64_t x) {
-//    GRN_DAT_DEBUG_THROW_IF(i > MAX_BLOCK_LEVEL);
-//    GRN_DAT_DEBUG_THROW_IF((x != INVALID_LEADER) && (x >= num_blocks()));
     leaders_[i] = x;
   }
 
@@ -211,8 +248,14 @@ class DoubleArrayHeader {
   uint32_t entries_block_id_;
   uint32_t keys_block_id_;
   uint64_t root_node_id_;
+  uint64_t total_key_length_;
+  uint64_t next_key_id_;
+  uint64_t next_key_pos_;
+  uint64_t max_key_id_;
+  uint64_t num_keys_;
   uint64_t num_chunks_;
   uint64_t num_phantoms_;
+  uint64_t num_zombies_;
   uint64_t leaders_[DOUBLE_ARRAY_MAX_CHUNK_LEVEL + 1];
   Mutex inter_process_mutex_;
 };
@@ -300,24 +343,31 @@ class DoubleArrayNode {
     qword_ = (qword_ & ~LABEL_MASK) | value;
   }
 
-  // A leaf node stores the offset and the length of its associated key.
-  uint64_t key_offset() const {
+  // A leaf node stores the start position and the length of the associated
+  // key.
+  uint64_t key_pos() const {
     // 40 bits.
-    return (qword_ >> KEY_OFFSET_SHIFT) & KEY_OFFSET_MASK;
+    return (qword_ >> KEY_POS_SHIFT) & KEY_POS_MASK;
   }
   uint64_t key_length() const {
     // 12 bits.
     return (qword_ >> KEY_LENGTH_SHIFT) & KEY_LENGTH_MASK;
   }
 
-  void set_key_offset(uint64_t value) {
-    qword_ = (qword_ & ~(KEY_OFFSET_MASK << KEY_OFFSET_SHIFT)) |
-             (value << KEY_OFFSET_SHIFT);
-  }
-  void set_key_length(uint64_t value) {
-    qword_ = (qword_ & ~(KEY_LENGTH_MASK << KEY_LENGTH_SHIFT)) |
-             (value << KEY_LENGTH_SHIFT);
+  void set_key(uint64_t key_pos, uint64_t key_length) {
+    qword_ = (qword_ & ~((KEY_POS_MASK << KEY_POS_SHIFT) |
+                         (KEY_LENGTH_MASK << KEY_LENGTH_SHIFT))) |
+             (key_pos << KEY_POS_SHIFT) | (key_length << KEY_LENGTH_SHIFT);
   }
+  // FIXME: may not be required.
+//  void set_key_pos(uint64_t value) {
+//    qword_ = (qword_ & ~(KEY_POS_MASK << KEY_POS_SHIFT)) |
+//             (value << KEY_POS_SHIFT);
+//  }
+//  void set_key_length(uint64_t value) {
+//    qword_ = (qword_ & ~(KEY_LENGTH_MASK << KEY_LENGTH_SHIFT)) |
+//             (value << KEY_LENGTH_SHIFT);
+//  }
 
   // A non-phantom and non-leaf node stores the offset to its children,
   // the label of its next sibling, and the label of its first child.
@@ -362,8 +412,8 @@ class DoubleArrayNode {
 
   static constexpr uint64_t LABEL_MASK = 0xFF;
 
-  static constexpr uint64_t KEY_OFFSET_MASK  = (uint64_t(1) << 40) - 1;
-  static constexpr uint8_t  KEY_OFFSET_SHIFT = 8;
+  static constexpr uint64_t KEY_POS_MASK     = (uint64_t(1) << 40) - 1;
+  static constexpr uint8_t  KEY_POS_SHIFT    = 8;
   static constexpr uint64_t KEY_LENGTH_MASK  = (uint64_t(1) << 12) - 1;
   static constexpr uint8_t  KEY_LENGTH_SHIFT = 48;
 
@@ -452,15 +502,15 @@ class DoubleArrayEntry {
   }
 
   // A valid entry stores the offset and the length of its associated key.
-  uint64_t key_offset() const {
-    return qword_ & OFFSET_MASK;
+  uint64_t key_pos() const {
+    return qword_ & POS_MASK;
   }
   uint64_t key_length() const {
     return qword_ >> 48;
   }
 
-  void set_key(uint64_t offset, uint64_t length) {
-    qword_ = IS_VALID_FLAG | offset | (length << 48);
+  void set_key(uint64_t pos, uint64_t length) {
+    qword_ = IS_VALID_FLAG | pos | (length << 48);
   }
 
   // An invalid entry stores the index of the next invalid entry.
@@ -476,14 +526,14 @@ class DoubleArrayEntry {
   uint64_t qword_;
 
   // 11 (= 64 - (1 + 40 + 12)) bits are not used.
-  static constexpr uint64_t OFFSET_MASK = uint64_t(1) << 40;
+  static constexpr uint64_t POS_MASK = uint64_t(1) << 40;
   static constexpr uint64_t IS_VALID_FLAG = uint64_t(1) << 47;
 };
 
 // TODO
 class DoubleArrayKey {
  public:
-  DoubleArrayKey(uint64_t id, const char *address, uint64_t length);
+  DoubleArrayKey(uint64_t id, const void *address, uint64_t length);
 
   explicit operator bool() const {
     // FIXME: Magic number.
@@ -532,18 +582,18 @@ class DoubleArrayImpl {
                                                uint32_t block_id);
 
   bool search(const uint8_t *ptr, uint64_t length,
-              uint64_t *key_offset = nullptr);
+              uint64_t *key_pos = nullptr);
 
   // TODO
   bool insert(const uint8_t *ptr, uint64_t length,
-              uint64_t *key_offset = nullptr);
+              uint64_t *key_pos = nullptr);
 
-  const DoubleArrayKey &get_key(uint64_t key_offset) {
-    return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_offset]);
+  const DoubleArrayKey &get_key(uint64_t key_pos) {
+    return *reinterpret_cast<const DoubleArrayKey *>(&keys_[key_pos]);
   }
   const DoubleArrayKey &ith_key(uint64_t key_id) {
     if (entries_[key_id]) {
-      return get_key(entries_[key_id].key_offset());
+      return get_key(entries_[key_id].key_pos());
     }
     return DoubleArrayKey::invalid_key();
   }
@@ -576,6 +626,8 @@ class DoubleArrayImpl {
   bool search_leaf(const uint8_t *ptr, uint64_t length,
                    uint64_t &node_id, uint64_t &query_pos);
 
+  uint64_t append_key(const uint8_t *ptr, uint64_t length, uint64_t key_id);
+
   void reserve_node(uint64_t node_id);
   void reserve_chunk(uint64_t chunk_id);
 
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index