[Groonga-commit] groonga/grnxx at d351e20 [master] Refine grnxx::map::HashTable.

Back to archive index

susumu.yata null+****@clear*****
Thu Jul 25 18:36:50 JST 2013


susumu.yata	2013-07-25 18:36:50 +0900 (Thu, 25 Jul 2013)

  New Revision: d351e20a797eeaaee7cc111d98ed1d511d2b7027
  https://github.com/groonga/grnxx/commit/d351e20a797eeaaee7cc111d98ed1d511d2b7027

  Message:
    Refine grnxx::map::HashTable.

  Modified files:
    lib/grnxx/map/hash_table.cpp
    lib/grnxx/map/hash_table.hpp

  Modified: lib/grnxx/map/hash_table.cpp (+87 -90)
===================================================================
--- lib/grnxx/map/hash_table.cpp    2013-07-25 17:41:55 +0900 (56171c1)
+++ lib/grnxx/map/hash_table.cpp    2013-07-25 18:36:50 +0900 (101e614)
@@ -47,10 +47,10 @@ constexpr uint64_t MIN_TABLE_SIZE = 256;
 
 struct HashTableHeader {
   CommonHeader common_header;
-  uint32_t key_ids_storage_node_id;
-  uint32_t old_key_ids_storage_node_id;
+  uint32_t table_storage_node_id;
+  uint32_t old_table_storage_node_id;
   uint32_t pool_storage_node_id;
-  uint64_t num_key_ids;
+  uint64_t num_entries;
   Mutex mutex;
 
   // Initialize the member variables.
@@ -62,10 +62,10 @@ struct HashTableHeader {
 
 HashTableHeader::HashTableHeader()
     : common_header(FORMAT_STRING, MAP_HASH_TABLE),
-      key_ids_storage_node_id(STORAGE_INVALID_NODE_ID),
-      old_key_ids_storage_node_id(STORAGE_INVALID_NODE_ID),
+      table_storage_node_id(STORAGE_INVALID_NODE_ID),
+      old_table_storage_node_id(STORAGE_INVALID_NODE_ID),
       pool_storage_node_id(STORAGE_INVALID_NODE_ID),
-      num_key_ids(0),
+      num_entries(0),
       mutex() {}
 
 HashTableHeader::operator bool() const {
@@ -77,8 +77,8 @@ HashTable<T>::HashTable()
     : storage_(nullptr),
       storage_node_id_(STORAGE_INVALID_NODE_ID),
       header_(nullptr),
-      key_ids_(),
-      old_key_ids_(),
+      table_(),
+      old_table_(),
       pool_() {}
 
 template <typename T>
@@ -138,27 +138,27 @@ bool HashTable<T>::get(int64_t key_id, Key *key) {
 
 template <typename T>
 bool HashTable<T>::unset(int64_t key_id) {
-  refresh_key_ids();
-  int64_t * const stored_key_id = find_key_id(key_id);
-  if (!stored_key_id) {
+  refresh_table();
+  int64_t * const entry = find_key_id(key_id);
+  if (!entry) {
     // Not found.
     return false;
   }
   pool_->unset(key_id);
-  *stored_key_id = TABLE_ENTRY_REMOVED;
+  *entry = TABLE_ENTRY_REMOVED;
   return true;
 }
 
 template <typename T>
 bool HashTable<T>::reset(int64_t key_id, KeyArg dest_key) {
-  refresh_key_ids();
+  refresh_table();
   Key src_key;
   if (!get(key_id, &src_key)) {
     // Not found.
     return false;
   }
-  int64_t * const src_key_id = find_key_id(key_id);
-  if (!src_key_id) {
+  int64_t * const src_entry = find_key_id(key_id);
+  if (!src_entry) {
     // Not found.
     return false;
   }
@@ -170,16 +170,16 @@ bool HashTable<T>::reset(int64_t key_id, KeyArg dest_key) {
   }
   pool_->reset(key_id, dest_normalized_key);
   if (*dest_key_id == TABLE_ENTRY_UNUSED) {
-    ++header_->num_key_ids;
+    ++header_->num_entries;
   }
   *dest_key_id = key_id;
-  *src_key_id = TABLE_ENTRY_REMOVED;
+  *src_entry = TABLE_ENTRY_REMOVED;
   return true;
 }
 
 template <typename T>
 bool HashTable<T>::find(KeyArg key, int64_t *key_id) {
-  refresh_key_ids();
+  refresh_table();
   const Key normalized_key = Helper<T>::normalize(key);
   int64_t *stored_key_id;
   if (!find_key(normalized_key, &stored_key_id)) {
@@ -194,10 +194,10 @@ bool HashTable<T>::find(KeyArg key, int64_t *key_id) {
 
 template <typename T>
 bool HashTable<T>::add(KeyArg key, int64_t *key_id) {
-  refresh_key_ids();
+  refresh_table();
   // Rebuild the hash table if the filling rate is greater than 62.5%.
-  const uint64_t key_ids_size = key_ids_->size();
-  if (header_->num_key_ids > ((key_ids_size + (key_ids_size / 4)) / 2)) {
+  const uint64_t table_size = table_->size();
+  if (header_->num_entries > ((table_size + (table_size / 4)) / 2)) {
     rebuild();
   }
   const Key normalized_key = Helper<T>::normalize(key);
@@ -211,7 +211,7 @@ bool HashTable<T>::add(KeyArg key, int64_t *key_id) {
   }
   int64_t next_key_id = pool_->add(normalized_key);
   if (*stored_key_id == TABLE_ENTRY_UNUSED) {
-    ++header_->num_key_ids;
+    ++header_->num_entries;
   }
   *stored_key_id = next_key_id;
   if (key_id) {
@@ -222,7 +222,7 @@ bool HashTable<T>::add(KeyArg key, int64_t *key_id) {
 
 template <typename T>
 bool HashTable<T>::remove(KeyArg key) {
-  refresh_key_ids();
+  refresh_table();
   const Key normalized_key = Helper<T>::normalize(key);
   int64_t *stored_key_id;
   if (!find_key(normalized_key, &stored_key_id)) {
@@ -236,7 +236,7 @@ bool HashTable<T>::remove(KeyArg key) {
 
 template <typename T>
 bool HashTable<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) {
-  refresh_key_ids();
+  refresh_table();
   const Key src_normalized_key = Helper<T>::normalize(src_key);
   int64_t *src_key_id;
   if (!find_key(src_normalized_key, &src_key_id)) {
@@ -251,7 +251,7 @@ bool HashTable<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) {
   }
   pool_->reset(*src_key_id, dest_normalized_key);
   if (*dest_key_id == TABLE_ENTRY_UNUSED) {
-    ++header_->num_key_ids;
+    ++header_->num_entries;
   }
   *dest_key_id = *src_key_id;
   *src_key_id = TABLE_ENTRY_REMOVED;
@@ -263,28 +263,32 @@ bool HashTable<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) {
 
 template <typename T>
 bool HashTable<T>::truncate() {
-  refresh_key_ids();
+  refresh_table();
   if (max_key_id() == MAP_MIN_KEY_ID) {
     // Nothing to do.
     return true;
   }
-  std::unique_ptr<KeyIDArray> new_key_ids(
-      KeyIDArray::create(storage_, storage_node_id_, MIN_TABLE_SIZE,
-                         TABLE_ENTRY_UNUSED));
-  if (header_->old_key_ids_storage_node_id != STORAGE_INVALID_NODE_ID) try {
-    KeyIDArray::unlink(storage_, header_->old_key_ids_storage_node_id);
+  // Create an empty table.
+  std::unique_ptr<Table> new_table(
+      Table::create(storage_, storage_node_id_,
+                    MIN_TABLE_SIZE, TABLE_ENTRY_UNUSED));
+  try {
+    if (header_->old_table_storage_node_id != STORAGE_INVALID_NODE_ID) {
+      Table::unlink(storage_, header_->old_table_storage_node_id);
+      header_->old_table_storage_node_id = STORAGE_INVALID_NODE_ID;
+    }
+    pool_->truncate();
   } catch (...) {
-    KeyIDArray::unlink(storage_, new_key_ids->storage_node_id());
+    Table::unlink(storage_, new_table->storage_node_id());
     throw;
   }
-  pool_->truncate();
-  header_->num_key_ids = 0;
-  header_->old_key_ids_storage_node_id = header_->key_ids_storage_node_id;
-  header_->key_ids_storage_node_id = new_key_ids->storage_node_id();
+  header_->num_entries = 0;
+  header_->old_table_storage_node_id = header_->table_storage_node_id;
+  header_->table_storage_node_id = new_table->storage_node_id();
   Lock lock(&header_->mutex);
-  if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) {
-    old_key_ids_.swap(new_key_ids);
-    key_ids_.swap(old_key_ids_);
+  if (table_->storage_node_id() != header_->table_storage_node_id) {
+    old_table_.swap(new_table);
+    table_.swap(old_table_);
   }
   return true;
 }
@@ -299,10 +303,10 @@ void HashTable<T>::create_map(Storage *storage, uint32_t storage_node_id,
   try {
     header_ = static_cast<Header *>(storage_node.body());
     *header_ = Header();
-    key_ids_.reset(KeyIDArray::create(storage, storage_node_id_,
-                                      MIN_TABLE_SIZE, TABLE_ENTRY_UNUSED));
+    table_.reset(Table::create(storage, storage_node_id_,
+                               MIN_TABLE_SIZE, TABLE_ENTRY_UNUSED));
     pool_.reset(KeyPool<T>::create(storage, storage_node_id_));
-    header_->key_ids_storage_node_id = key_ids_->storage_node_id();
+    header_->table_storage_node_id = table_->storage_node_id();
     header_->pool_storage_node_id = pool_->storage_node_id();
   } catch (...) {
     storage->unlink_node(storage_node_id_);
@@ -326,24 +330,25 @@ void HashTable<T>::open_map(Storage *storage, uint32_t storage_node_id) {
                   << ", actual = " << header_->common_header.format();
     throw LogicError();
   }
-  key_ids_.reset(KeyIDArray::open(storage, header_->key_ids_storage_node_id));
+  table_.reset(Table::open(storage, header_->table_storage_node_id));
   pool_.reset(KeyPool<T>::open(storage, header_->pool_storage_node_id));
 }
 
 template <typename T>
 int64_t *HashTable<T>::find_key_id(int64_t key_id) {
-  KeyIDArray * const key_ids = key_ids_.get();
+  Table * const table = table_.get();
   Key stored_key;
   if (!get(key_id, &stored_key)) {
     // Not found.
     return nullptr;
   }
+  const uint64_t hash_mask = table->size() - 1;
   const uint64_t first_hash = Hash<T>()(stored_key);
   for (uint64_t hash = first_hash; ; ) {
-    int64_t &stored_key_id = key_ids->get_value(hash & (key_ids->size() - 1));
-    if (stored_key_id == key_id) {
+    int64_t &entry = table->get_value(hash & hash_mask);
+    if (entry == key_id) {
       // Found.
-      return &stored_key_id;
+      return &entry;
     }
     hash = rehash(hash);
     if (hash == first_hash) {
@@ -355,28 +360,29 @@ int64_t *HashTable<T>::find_key_id(int64_t key_id) {
 }
 
 template <typename T>
-bool HashTable<T>::find_key(KeyArg key, int64_t **stored_key_id) {
-  KeyIDArray * const key_ids = key_ids_.get();
-  *stored_key_id = nullptr;
+bool HashTable<T>::find_key(KeyArg key, int64_t **entry) {
+  Table * const table = table_.get();
+  *entry = nullptr;
+  const uint64_t hash_mask = table->size() - 1;
   const uint64_t first_hash = Hash<T>()(key);
   for (uint64_t hash = first_hash; ; ) {
-    int64_t * const key_id = &key_ids->get_value(hash & (key_ids->size() - 1));
-    if (*key_id == TABLE_ENTRY_UNUSED) {
+    int64_t &this_entry = table->get_value(hash & hash_mask);
+    if (this_entry == TABLE_ENTRY_UNUSED) {
       // Not found.
-      if (!*stored_key_id) {
-        *stored_key_id = key_id;
+      if (!*entry) {
+        *entry = &this_entry;
       }
       return false;
-    } else if (*key_id == TABLE_ENTRY_REMOVED) {
+    } else if (this_entry == TABLE_ENTRY_REMOVED) {
       // Save the first removed entry.
-      if (!*stored_key_id) {
-        *stored_key_id = key_id;
+      if (!*entry) {
+        *entry = &this_entry;
       }
     } else {
-      Key stored_key = pool_->get_key(*key_id);
+      Key stored_key = pool_->get_key(this_entry);
       if (Helper<T>::equal_to(stored_key, key)) {
         // Found.
-        *stored_key_id = key_id;
+        *entry = &this_entry;
         return true;
       }
     }
@@ -391,24 +397,16 @@ bool HashTable<T>::find_key(KeyArg key, int64_t **stored_key_id) {
 
 template <typename T>
 void HashTable<T>::rebuild() {
+  // Create a new table.
   uint64_t new_size = num_keys() * 2;
   if (new_size < MIN_TABLE_SIZE) {
     new_size = MIN_TABLE_SIZE;
   }
   new_size = 2ULL << bit_scan_reverse(new_size - 1);
-  // TODO: Size upper limit check.
-//  if (new_size > key_ids_->size()) {
-//    // Critical error.
-//    GRNXX_ERROR() << "too large table: size = " << new_size
-//                  << ", max_size = " << key_ids_->size();
-//    throw LogicError();
-//  }
-  // Create a new hash table.
-  std::unique_ptr<KeyIDArray> new_key_ids(
-      KeyIDArray::create(storage_, storage_node_id_, new_size,
-                         TABLE_ENTRY_UNUSED));
+  std::unique_ptr<Table> new_table(
+      Table::create(storage_, storage_node_id_, new_size, TABLE_ENTRY_UNUSED));
   try {
-    // Copy keys from the current hash table to the new one.
+    // Copy entries from the current table to the new table.
     const uint64_t new_mask = new_size - 1;
     int64_t key_id;
     for (key_id = MAP_MIN_KEY_ID; key_id <= max_key_id(); ++key_id) {
@@ -417,28 +415,27 @@ void HashTable<T>::rebuild() {
         continue;
       }
       const uint64_t first_hash = Hash<T>()(stored_key);
-      int64_t *stored_key_id;
       for (uint64_t hash = first_hash; ; hash = rehash(hash)) {
-        stored_key_id = &new_key_ids->get_value(hash & new_mask);
-        if (*stored_key_id == TABLE_ENTRY_UNUSED) {
-          *stored_key_id = key_id;
+        int64_t &entry = new_table->get_value(hash & new_mask);
+        if (entry == TABLE_ENTRY_UNUSED) {
+          entry = key_id;
           break;
         }
       }
     }
   } catch (...) {
-    KeyIDArray::unlink(storage_, new_key_ids->storage_node_id());
+    Table::unlink(storage_, new_table->storage_node_id());
     throw;
   }
-  if (header_->old_key_ids_storage_node_id != STORAGE_INVALID_NODE_ID) {
-    KeyIDArray::unlink(storage_, header_->old_key_ids_storage_node_id);
+  if (header_->old_table_storage_node_id != STORAGE_INVALID_NODE_ID) {
+    Table::unlink(storage_, header_->old_table_storage_node_id);
   }
-  header_->old_key_ids_storage_node_id = header_->key_ids_storage_node_id;
-  header_->key_ids_storage_node_id = new_key_ids->storage_node_id();
+  header_->old_table_storage_node_id = header_->table_storage_node_id;
+  header_->table_storage_node_id = new_table->storage_node_id();
   Lock lock(&header_->mutex);
-  if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) {
-    old_key_ids_.swap(new_key_ids);
-    key_ids_.swap(old_key_ids_);
+  if (table_->storage_node_id() != header_->table_storage_node_id) {
+    old_table_.swap(new_table);
+    table_.swap(old_table_);
   }
 }
 
@@ -448,14 +445,14 @@ uint64_t HashTable<T>::rehash(uint64_t hash) const {
 }
 
 template <typename T>
-void HashTable<T>::refresh_key_ids() {
-  if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) {
+void HashTable<T>::refresh_table() {
+  if (table_->storage_node_id() != header_->table_storage_node_id) {
     Lock lock(&header_->mutex);
-    if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) {
-      std::unique_ptr<KeyIDArray> new_key_ids(
-          KeyIDArray::open(storage_, header_->key_ids_storage_node_id));
-      old_key_ids_.swap(new_key_ids);
-      key_ids_.swap(old_key_ids_);
+    if (table_->storage_node_id() != header_->table_storage_node_id) {
+      std::unique_ptr<Table> new_table(
+          Table::open(storage_, header_->table_storage_node_id));
+      old_table_.swap(new_table);
+      table_.swap(old_table_);
     }
   }
 }

  Modified: lib/grnxx/map/hash_table.hpp (+13 -13)
===================================================================
--- lib/grnxx/map/hash_table.hpp    2013-07-25 17:41:55 +0900 (8f281b7)
+++ lib/grnxx/map/hash_table.hpp    2013-07-25 18:36:50 +0900 (1c089eb)
@@ -39,10 +39,10 @@ struct HashTableHeader;
 template <typename T>
 class HashTable : public Map<T> {
   using Header = HashTableHeader;
-  using KeyIDArray = Array<int64_t>;
+  using Table  = Array<int64_t>;
 
  public:
-  using Key = typename Map<T>::Key;
+  using Key    = typename Map<T>::Key;
   using KeyArg = typename Map<T>::KeyArg;
   using Cursor = typename Map<T>::Cursor;
 
@@ -74,8 +74,8 @@ class HashTable : public Map<T> {
   Storage *storage_;
   uint32_t storage_node_id_;
   Header *header_;
-  std::unique_ptr<KeyIDArray> key_ids_;
-  std::unique_ptr<KeyIDArray> old_key_ids_;
+  std::unique_ptr<Table> table_;
+  std::unique_ptr<Table> old_table_;
   std::unique_ptr<KeyPool<T>> pool_;
 
   void create_map(Storage *storage, uint32_t storage_node_id,
@@ -83,23 +83,23 @@ class HashTable : public Map<T> {
   void open_map(Storage *storage, uint32_t storage_node_id);
 
   // Search a hash table for a key ID.
-  // Return a pointer to the stored key ID on success.
-  // Return nullptr on failure.
+  // On success, return a pointer to a matched entry.
+  // On failure, return nullptr.
   int64_t *find_key_id(int64_t key_id);
   // Search a hash table for a key.
-  // Return true on success and assign the address of the stored key ID to
-  // "*stored_key_id".
-  // Return false on failure and assign the address of the first unused or
-  // removed entry to "*stored_key_id".
-  bool find_key(KeyArg key, int64_t **stored_key_id);
+  // On success, assign a pointer to a matched entry to "*entry" and return
+  // true.
+  // On failure, assign a pointer to the first unused or removed entry to
+  // "*entry" and return false.
+  bool find_key(KeyArg key, int64_t **entry);
 
   // Rebuild the hash table.
   void rebuild();
   // Move to the next entry.
   uint64_t rehash(uint64_t hash) const;
 
-  // Refresh "key_ids_" if it is old.
-  void refresh_key_ids();
+  // Refresh "table_" if it is old.
+  void refresh_table();
 };
 
 }  // namespace map
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index