susumu.yata
null+****@clear*****
Tue Jun 4 18:23:12 JST 2013
susumu.yata 2013-06-04 18:23:12 +0900 (Tue, 04 Jun 2013) New Revision: 6e20323de6fdb981549d3de0635dd7e423359e58 https://github.com/groonga/grnxx/commit/6e20323de6fdb981549d3de0635dd7e423359e58 Message: Implement grnxx::map::HashTable. Added files: lib/grnxx/map/hash_table.cpp lib/grnxx/map/hash_table.hpp lib/grnxx/map/hash_table/Makefile.am lib/grnxx/map/hash_table/bit_array.hpp lib/grnxx/map/hash_table/dummy.cpp lib/grnxx/map/hash_table/key_array.hpp lib/grnxx/map/hash_table/key_id_array.hpp lib/grnxx/map/hash_table/link_array.hpp Modified files: configure.ac lib/grnxx/map.cpp lib/grnxx/map.hpp lib/grnxx/map/Makefile.am lib/grnxx/map/helper.hpp test/test_map.cpp Renamed files: lib/grnxx/map/hash_table/hash.hpp (from lib/grnxx/map/hash.hpp) Modified: configure.ac (+1 -0) =================================================================== --- configure.ac 2013-05-31 15:34:49 +0900 (40bb496) +++ configure.ac 2013-06-04 18:23:12 +0900 (6243117) @@ -66,6 +66,7 @@ AC_CONFIG_FILES([Makefile lib/grnxx/db/Makefile lib/grnxx/io/Makefile lib/grnxx/map/Makefile + lib/grnxx/map/hash_table/Makefile lib/grnxx/storage/Makefile src/Makefile test/Makefile]) Modified: lib/grnxx/map.cpp (+3 -2) =================================================================== --- lib/grnxx/map.cpp 2013-05-31 15:34:49 +0900 (baff7f6) +++ lib/grnxx/map.cpp 2013-06-04 18:23:12 +0900 (861a433) @@ -26,6 +26,7 @@ #include "grnxx/string_builder.hpp" #include "grnxx/map/array_map.hpp" #include "grnxx/map/cursor_impl.hpp" +#include "grnxx/map/hash_table.hpp" #include "grnxx/map/header.hpp" #include "grnxx/map/helper.hpp" #include "grnxx/map/scanner_impl.hpp" @@ -78,7 +79,7 @@ Map<T> *Map<T>::create(MapType type, Storage *storage, // TODO: Not supported yet. } case MAP_HASH_TABLE: { - // TODO: Not supported yet. + return map::HashTable<T>::create(storage, storage_node_id, options); } default: { GRNXX_ERROR() << "invalid argument: type = " << type; @@ -110,7 +111,7 @@ Map<T> *Map<T>::open(Storage *storage, uint32_t storage_node_id) { // TODO: Not supported yet. } case MAP_HASH_TABLE: { - // TODO: Not supported yet. + return map::HashTable<T>::open(storage, storage_node_id); } default: { GRNXX_ERROR() << "invalid format: type = " << header->type; Modified: lib/grnxx/map.hpp (+0 -1) =================================================================== --- lib/grnxx/map.hpp 2013-05-31 15:34:49 +0900 (0bdf87c) +++ lib/grnxx/map.hpp 2013-06-04 18:23:12 +0900 (03c3f3f) @@ -20,7 +20,6 @@ #include "grnxx/features.hpp" -#include "grnxx/flags_impl.hpp" #include "grnxx/map_cursor.hpp" #include "grnxx/map_cursor_query.hpp" #include "grnxx/map_scanner.hpp" Modified: lib/grnxx/map/Makefile.am (+20 -13) =================================================================== --- lib/grnxx/map/Makefile.am 2013-05-31 15:34:49 +0900 (7c7e0ad) +++ lib/grnxx/map/Makefile.am 2013-06-04 18:23:12 +0900 (d5066f0) @@ -1,21 +1,28 @@ +SUBDIRS = \ + hash_table + noinst_LTLIBRARIES = libgrnxx_map.la +libgrnxx_map_la_LIBADD = \ + hash_table/libgrnxx_map_hash_table.la + libgrnxx_map_la_LDFLAGS = @AM_LTLDFLAGS@ -libgrnxx_map_la_SOURCES = \ - array_map.cpp \ - bytes_array.cpp \ - bytes_store.cpp \ - cursor_impl.cpp \ +libgrnxx_map_la_SOURCES = \ + array_map.cpp \ + bytes_array.cpp \ + bytes_store.cpp \ + cursor_impl.cpp \ + hash_table.cpp \ scanner_impl.cpp libgrnxx_map_includedir = ${includedir}/grnxx/map -libgrnxx_map_include_HEADERS = \ - array_map.hpp \ - bytes_array.hpp \ - bytes_store.hpp \ - cursor_impl.hpp \ - hash.hpp \ - header.hpp \ - helper.hpp \ +libgrnxx_map_include_HEADERS = \ + array_map.hpp \ + bytes_array.hpp \ + bytes_store.hpp \ + cursor_impl.hpp \ + hash_table.hpp \ + header.hpp \ + helper.hpp \ scanner_impl.hpp Added: lib/grnxx/map/hash_table.cpp (+654 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table.cpp 2013-06-04 18:23:12 +0900 (0f2f283) @@ -0,0 +1,654 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#include "grnxx/map/hash_table.hpp" + +#include <new> + +#include "grnxx/bytes.hpp" +#include "grnxx/geo_point.hpp" +#include "grnxx/intrinsic.hpp" +#include "grnxx/lock.hpp" +#include "grnxx/logger.hpp" +#include "grnxx/map/hash_table/hash.hpp" +#include "grnxx/map/helper.hpp" +#include "grnxx/mutex.hpp" +#include "grnxx/storage.hpp" + +namespace grnxx { +namespace map { + +template <typename T> +using Hash = hash_table::Hash<T>; + +constexpr uint64_t INVALID_LINK = uint64_t(-1); + +struct HashTableHeader { + MapType map_type; + uint32_t key_ids_storage_node_id; + uint32_t old_key_ids_storage_node_id; + uint32_t keys_storage_node_id; + uint32_t bits_storage_node_id; + uint32_t links_storage_node_id; + int64_t max_key_id; + uint64_t num_keys; + uint64_t num_key_ids; + uint64_t latest_link; + Mutex mutex; + + HashTableHeader(); +}; + +HashTableHeader::HashTableHeader() + : map_type(MAP_HASH_TABLE), + key_ids_storage_node_id(STORAGE_INVALID_NODE_ID), + old_key_ids_storage_node_id(STORAGE_INVALID_NODE_ID), + keys_storage_node_id(STORAGE_INVALID_NODE_ID), + bits_storage_node_id(STORAGE_INVALID_NODE_ID), + links_storage_node_id(STORAGE_INVALID_NODE_ID), + max_key_id(MAP_MIN_KEY_ID - 1), + num_keys(0), + num_key_ids(0), + latest_link(INVALID_LINK), + mutex(MUTEX_UNLOCKED) {} + +template <typename T> +HashTable<T>::HashTable() + : storage_(nullptr), + storage_node_id_(STORAGE_INVALID_NODE_ID), + header_(nullptr), + key_ids_(), + old_key_ids_(), + keys_(), + bits_(), + links_() {} + +template <typename T> +HashTable<T>::~HashTable() {} + +template <typename T> +HashTable<T> *HashTable<T>::create(Storage *storage, uint32_t storage_node_id, + const MapOptions &options) { + std::unique_ptr<HashTable> map(new (std::nothrow) HashTable); + if (!map) { + GRNXX_ERROR() << "new grnxx::map::HashTable failed"; + return nullptr; + } + if (!map->create_map(storage, storage_node_id, options)) { + return nullptr; + } + return map.release(); +} + +template <typename T> +HashTable<T> *HashTable<T>::open(Storage *storage, uint32_t storage_node_id) { + std::unique_ptr<HashTable> map(new (std::nothrow) HashTable); + if (!map) { + GRNXX_ERROR() << "new grnxx::map::HashTable failed"; + return nullptr; + } + if (!map->open_map(storage, storage_node_id)) { + return nullptr; + } + return map.release(); +} + +template <typename T> +uint32_t HashTable<T>::storage_node_id() const { + return storage_node_id_; +} + +template <typename T> +MapType HashTable<T>::type() const { + return MAP_HASH_TABLE; +} + +template <typename T> +int64_t HashTable<T>::max_key_id() const { + return header_->max_key_id; +} + +template <typename T> +uint64_t HashTable<T>::num_keys() const { + return header_->num_keys; +} + +template <typename T> +bool HashTable<T>::get(int64_t key_id, Key *key) { + if ((key_id < MAP_MIN_KEY_ID) || (key_id > header_->max_key_id)) { + // Out of range. + return false; + } + bool bit; + if (!bits_->get(key_id, &bit)) { + // Error. + return false; + } + if (!bit) { + // Not found. + return false; + } + return keys_->get(key_id, key); +} + +template <typename T> +bool HashTable<T>::unset(int64_t key_id) { + if (!refresh_key_ids()) { + // Error. + return false; + } + int64_t *stored_key_id; + if (!find_key_id(key_id, &stored_key_id)) { + // Not found or error. + return false; + } + // Prepare to update "bits_" and "links_". + const uint64_t unit_id = key_id / bits_->unit_size(); + const uint64_t unit_bit = 1ULL << (key_id % bits_->unit_size()); + BitArrayUnit * const unit = bits_->get_unit(unit_id); + if (!unit) { + // Error. + return false; + } + // Set "link" if this operation creates the first 0 bit in the unit. + uint64_t *link = nullptr; + if (*unit == ~BitArrayUnit(0)) { + link = links_->get_pointer(unit_id); + if (!link) { + // Error. + return false; + } + } + *unit &= ~unit_bit; + *stored_key_id = -1; + if (link) { + *link = header_->latest_link; + header_->latest_link = *link; + } + --header_->num_keys; + return true; +} + +template <typename T> +bool HashTable<T>::reset(int64_t key_id, KeyArg dest_key) { + if (!refresh_key_ids()) { + // Error. + return false; + } + Key src_key; + if (!get(key_id, &src_key)) { + // Not found or error. + return false; + } + int64_t *src_key_id; + if (!find_key_id(key_id, &src_key_id)) { + // Not found or error. + return false; + } + const Key dest_normalized_key = Helper<T>::normalize(dest_key); + int64_t *dest_key_id; + if (find_key(dest_normalized_key, &dest_key_id)) { + // Found or error. + return false; + } + if (!keys_->set(key_id, dest_normalized_key)) { + // Error. + return false; + } + if (*dest_key_id == MAP_INVALID_KEY_ID) { + ++header_->num_key_ids; + } + *dest_key_id = key_id; + *src_key_id = -1; + return true; +} + +template <typename T> +bool HashTable<T>::find(KeyArg key, int64_t *key_id) { + if (!refresh_key_ids()) { + // Error. + return false; + } + const Key normalized_key = Helper<T>::normalize(key); + int64_t *stored_key_id; + if (!find_key(normalized_key, &stored_key_id)) { + // Not found or error. + return false; + } + if (key_id) { + *key_id = *stored_key_id; + } + return true; +} + +template <typename T> +bool HashTable<T>::add(KeyArg key, int64_t *key_id) { + if (!refresh_key_ids()) { + // Error. + return false; + } + // Rebuild the hash table if the filling rate is greater than 62.5%. + const uint64_t key_ids_size = key_ids_->mask() + 1; + if (header_->num_key_ids > ((key_ids_size + (key_ids_size / 4)) / 2)) { + if (!rebuild()) { + // Error. + return false; + } + } + const Key normalized_key = Helper<T>::normalize(key); + int64_t *stored_key_id; + if (find_key(normalized_key, &stored_key_id)) { + // Found. + if (key_id) { + *key_id = *stored_key_id; + } + return false; + } else if (!stored_key_id) { + // Error. + return false; + } + // Find an unused key ID. + const bool is_new_unit = (header_->latest_link == INVALID_LINK); + uint64_t unit_id; + if (is_new_unit) { + unit_id = (header_->max_key_id + 1) / bits_->unit_size(); + } else { + unit_id = header_->latest_link; + } + BitArrayUnit * const unit = bits_->get_unit(unit_id); + if (!unit) { + // Error. + return false; + } + const uint8_t unit_bit_id = bit_scan_forward(~*unit); + const uint64_t unit_bit = 1ULL << unit_bit_id; + const int64_t next_key_id = (unit_id * bits_->unit_size()) + unit_bit_id; + uint64_t *link = nullptr; + if (is_new_unit) { + if (!links_->set(unit_id, INVALID_LINK)) { + // Error. + return false; + } + *unit = 0; + header_->latest_link = (header_->max_key_id + 1) / bits_->unit_size(); + } else if ((*unit | unit_bit) == ~BitArrayUnit(0)) { + // The unit will be removed from "links_" if it becomes full. + link = links_->get_pointer(header_->latest_link); + if (!link) { + // Error. + return false; + } + } + if (!keys_->set(next_key_id, normalized_key)) { + // Error. + return false; + } + if (link) { + header_->latest_link = *link; + } + *unit |= unit_bit; + if (*stored_key_id == MAP_INVALID_KEY_ID) { + ++header_->num_key_ids; + } + *stored_key_id = next_key_id; + if (next_key_id > header_->max_key_id) { + header_->max_key_id = next_key_id; + } + ++header_->num_keys; + if (key_id) { + *key_id = next_key_id; + } + return true; +} + +template <typename T> +bool HashTable<T>::remove(KeyArg key) { + if (!refresh_key_ids()) { + // Error. + return false; + } + const Key normalized_key = Helper<T>::normalize(key); + int64_t *stored_key_id; + if (!find_key(normalized_key, &stored_key_id)) { + // Not found or error. + return false; + } + // Prepare to update "bits_" and "links_". + const uint64_t unit_id = *stored_key_id / bits_->unit_size(); + const uint64_t unit_bit = 1ULL << (*stored_key_id % bits_->unit_size()); + BitArrayUnit * const unit = bits_->get_unit(unit_id); + if (!unit) { + // Error. + return false; + } + // Set "link" if this operation creates the first 0 bit in the unit. + uint64_t *link = nullptr; + if (*unit == ~BitArrayUnit(0)) { + link = links_->get_pointer(unit_id); + if (!link) { + // Error. + return false; + } + } + *unit &= ~unit_bit; + *stored_key_id = -1; + if (link) { + *link = header_->latest_link; + header_->latest_link = *link; + } + --header_->num_keys; + return true; +} + +template <typename T> +bool HashTable<T>::replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id) { + if (!refresh_key_ids()) { + // Error. + return false; + } + const Key src_normalized_key = Helper<T>::normalize(src_key); + int64_t *src_key_id; + if (!find_key(src_normalized_key, &src_key_id)) { + // Not found or error. + return false; + } + const Key dest_normalized_key = Helper<T>::normalize(dest_key); + int64_t *dest_key_id; + if (find_key(dest_normalized_key, &dest_key_id)) { + // Found or error. + return false; + } + if (!keys_->set(*src_key_id, dest_normalized_key)) { + // Error. + return false; + } + if (*dest_key_id == MAP_INVALID_KEY_ID) { + ++header_->num_key_ids; + } + *dest_key_id = *src_key_id; + *src_key_id = -1; + if (key_id) { + *key_id = *dest_key_id; + } + return true; +} + +template <typename T> +bool HashTable<T>::truncate() { + if (!refresh_key_ids()) { + return false; + } + if (header_->max_key_id == MAP_MIN_KEY_ID) { + // Nothing to do. + return true; + } + header_->max_key_id = MAP_MIN_KEY_ID - 1; + header_->num_keys = 0; + header_->num_key_ids = 0; + header_->latest_link = INVALID_LINK; + std::unique_ptr<KeyIDArray> new_key_ids( + KeyIDArray::create(storage_, storage_node_id_, + KeyIDArray::page_size() - 1)); + if (!new_key_ids) { + // Error. + return false; + } + if (header_->old_key_ids_storage_node_id != STORAGE_INVALID_NODE_ID) { + storage_->unlink_node(header_->old_key_ids_storage_node_id); + } + header_->old_key_ids_storage_node_id = header_->key_ids_storage_node_id; + header_->key_ids_storage_node_id = new_key_ids->storage_node_id(); + Lock lock(&header_->mutex); + if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) { + old_key_ids_.swap(new_key_ids); + key_ids_.swap(old_key_ids_); + } + return true; +} + +template <typename T> +bool HashTable<T>::create_map(Storage *storage, uint32_t storage_node_id, + const MapOptions &) { + storage_ = storage; + StorageNode storage_node = + storage->create_node(storage_node_id, sizeof(HashTableHeader)); + if (!storage_node) { + return false; + } + storage_node_id_ = storage_node.id(); + header_ = static_cast<HashTableHeader *>(storage_node.body()); + *header_ = HashTableHeader(); + bits_.reset(BitArray::create(storage, storage_node_id_)); + keys_.reset(KeyArray::create(storage, storage_node_id_)); + links_.reset(LinkArray::create(storage, storage_node_id_)); + key_ids_.reset(KeyIDArray::create(storage, storage_node_id_, + KeyIDArray::page_size() - 1)); + if (!bits_ || !keys_ || !links_ || !key_ids_) { + storage->unlink_node(storage_node_id_); + return false; + } + header_->bits_storage_node_id = bits_->storage_node_id(); + header_->keys_storage_node_id = keys_->storage_node_id(); + header_->key_ids_storage_node_id = key_ids_->storage_node_id(); + header_->links_storage_node_id = links_->storage_node_id(); + return true; +} + +template <typename T> +bool HashTable<T>::open_map(Storage *storage, uint32_t storage_node_id) { + storage_ = storage; + StorageNode storage_node = storage->open_node(storage_node_id); + if (!storage_node) { + return false; + } + if (storage_node.size() < sizeof(HashTableHeader)) { + GRNXX_ERROR() << "invalid format: size = " << storage_node.size() + << ", header_size = " << sizeof(HashTableHeader); + return false; + } + storage_node_id_ = storage_node_id; + header_ = static_cast<HashTableHeader *>(storage_node.body()); + bits_.reset(BitArray::open(storage, header_->bits_storage_node_id)); + keys_.reset(KeyArray::open(storage, header_->keys_storage_node_id)); + links_.reset(LinkArray::open(storage, header_->links_storage_node_id)); + key_ids_.reset(KeyIDArray::open(storage, header_->key_ids_storage_node_id)); + if (!bits_ || !keys_ || !links_ || !key_ids_) { + return false; + } + return true; +} + +template <typename T> +bool HashTable<T>::find_key_id(int64_t key_id, int64_t **stored_key_id) { + KeyIDArray * const key_ids = key_ids_.get(); + Key stored_key; + if (!get(key_id, &stored_key)) { + // Not found or error. + return false; + } + const uint64_t first_hash = Hash<T>()(stored_key); + for (uint64_t hash = first_hash; ; ) { + *stored_key_id = key_ids->get_pointer(hash); + if (!*stored_key_id) { + // Error. + return false; + } + if (**stored_key_id == key_id) { + // Found. + return true; + } + hash = rehash(hash); + if (hash == first_hash) { + // Critical error. + GRNXX_ERROR() << "endless loop"; + return false; + } + } +} + +template <typename T> +bool HashTable<T>::find_key(KeyArg key, int64_t **stored_key_id) { + KeyIDArray * const key_ids = key_ids_.get(); + *stored_key_id = nullptr; + const uint64_t first_hash = Hash<T>()(key); + for (uint64_t hash = first_hash; ; ) { + int64_t * const key_id = key_ids->get_pointer(hash); + if (!key_id) { + // Error. + return false; + } + if (*key_id == MAP_INVALID_KEY_ID) { + // Not found. + if (!*stored_key_id) { + *stored_key_id = key_id; + } + return false; + } else if (*key_id < 0) { + // Save the first removed entry. + if (!*stored_key_id) { + *stored_key_id = key_id; + } + } else { + Key stored_key; + if (!keys_->get(*key_id, &stored_key)) { + // Error. + return false; + } + if (Helper<T>::equal_to(stored_key, key)) { + // Found. + *stored_key_id = key_id; + return true; + } + } + hash = rehash(hash); + if (hash == first_hash) { + // Critical error. + GRNXX_ERROR() << "endless loop"; + return false; + } + } +} + +template <typename T> +bool HashTable<T>::rebuild() { + uint64_t new_size = header_->num_keys * 2; + if (new_size < key_ids_->page_size()) { + new_size = key_ids_->page_size(); + } + new_size = 2ULL << bit_scan_reverse(new_size - 1); + if (new_size > key_ids_->size()) { + // Critical error. + GRNXX_ERROR() << "too large table: size = " << new_size + << ", max_size = " << key_ids_->size(); + return false; + } + const uint64_t new_mask = new_size - 1; + // Create a new hash table. + std::unique_ptr<KeyIDArray> new_key_ids( + KeyIDArray::create(storage_, storage_node_id_, new_mask)); + if (!new_key_ids) { + // Error. + return false; + } + // Copy keys from the current hash table to the new one. + int64_t key_id; + for (key_id = MAP_MIN_KEY_ID; key_id <= max_key_id(); ++key_id) { + bool bit; + if (!bits_->get(key_id, &bit)) { + // Error. + break; + } + if (!bit) { + continue; + } + Key stored_key; + if (!keys_->get(key_id, &stored_key)) { + // Error. + break; + } + const uint64_t first_hash = Hash<T>()(stored_key); + int64_t *stored_key_id; + for (uint64_t hash = first_hash; ; hash = rehash(hash)) { + stored_key_id = new_key_ids->get_pointer(hash); + if (!stored_key_id) { + // Error. + break; + } + if (*stored_key_id == MAP_INVALID_KEY_ID) { + *stored_key_id = key_id; + break; + } + } + if (!stored_key_id) { + // Error. + break; + } + } + if (key_id <= max_key_id()) { + // Error. + return false; + } + if (header_->old_key_ids_storage_node_id != STORAGE_INVALID_NODE_ID) { + storage_->unlink_node(header_->old_key_ids_storage_node_id); + } + header_->old_key_ids_storage_node_id = header_->key_ids_storage_node_id; + header_->key_ids_storage_node_id = new_key_ids->storage_node_id(); + Lock lock(&header_->mutex); + if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) { + old_key_ids_.swap(new_key_ids); + key_ids_.swap(old_key_ids_); + } + return true; +} + +template <typename T> +uint64_t HashTable<T>::rehash(uint64_t hash) const { + return hash + 1; +} + +template <typename T> +bool HashTable<T>::refresh_key_ids() { + if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) { + Lock lock(&header_->mutex); + if (key_ids_->storage_node_id() != header_->key_ids_storage_node_id) { + std::unique_ptr<KeyIDArray> new_key_ids( + KeyIDArray::open(storage_, header_->key_ids_storage_node_id)); + if (!new_key_ids) { + // Error. + return false; + } + old_key_ids_.swap(new_key_ids); + key_ids_.swap(old_key_ids_); + } + } + return true; +} + +template class HashTable<int8_t>; +template class HashTable<uint8_t>; +template class HashTable<int16_t>; +template class HashTable<uint16_t>; +template class HashTable<int32_t>; +template class HashTable<uint32_t>; +template class HashTable<int64_t>; +template class HashTable<uint64_t>; +template class HashTable<double>; +template class HashTable<GeoPoint>; +template class HashTable<Bytes>; + +} // namespace map +} // namespace grnxx Added: lib/grnxx/map/hash_table.hpp (+110 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table.hpp 2013-06-04 18:23:12 +0900 (ea6e20b) @@ -0,0 +1,110 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_HPP +#define GRNXX_MAP_HASH_TABLE_HPP + +#include "grnxx/features.hpp" + +#include <memory> + +#include "grnxx/map.hpp" +#include "grnxx/map/hash_table/bit_array.hpp" +#include "grnxx/map/hash_table/key_array.hpp" +#include "grnxx/map/hash_table/key_id_array.hpp" +#include "grnxx/map/hash_table/link_array.hpp" +#include "grnxx/types.hpp" + +namespace grnxx { +namespace map { + +struct HashTableHeader; + +template <typename T> +class HashTable : public Map<T> { + using KeyIDArray = typename hash_table::KeyIDArray<T>; + using KeyArray = typename hash_table::KeyArray<T>::Type; + using BitArray = typename hash_table::BitArray<T>::Type; + using BitArrayUnit = typename BitArray::Unit; + using LinkArray = typename hash_table::LinkArray<T>::Type; + + public: + using Key = typename Map<T>::Key; + using KeyArg = typename Map<T>::KeyArg; + using Cursor = typename Map<T>::Cursor; + + HashTable(); + ~HashTable(); + + static HashTable *create(Storage *storage, uint32_t storage_node_id, + const MapOptions &options = MapOptions()); + static HashTable *open(Storage *storage, uint32_t storage_node_id); + + uint32_t storage_node_id() const; + MapType type() const; + + int64_t max_key_id() const; + uint64_t num_keys() const; + + bool get(int64_t key_id, Key *key = nullptr); + bool unset(int64_t key_id); + bool reset(int64_t key_id, KeyArg dest_key); + + bool find(KeyArg key, int64_t *key_id = nullptr); + bool add(KeyArg key, int64_t *key_id = nullptr); + bool remove(KeyArg key); + bool replace(KeyArg src_key, KeyArg dest_key, int64_t *key_id = nullptr); + + bool truncate(); + + private: + Storage *storage_; + uint32_t storage_node_id_; + HashTableHeader *header_; + std::unique_ptr<KeyIDArray> key_ids_; + std::unique_ptr<KeyIDArray> old_key_ids_; + std::unique_ptr<KeyArray> keys_; + std::unique_ptr<BitArray> bits_; + std::unique_ptr<LinkArray> links_; + + bool create_map(Storage *storage, uint32_t storage_node_id, + const MapOptions &options); + bool open_map(Storage *storage, uint32_t storage_node_id); + + // Find a key ID in the hash table. + // Return true on success and assign the address to "*stored_key_id". + // Return false on failure and don't modify "*stored_key_id". + bool find_key_id(int64_t key_id, int64_t **stored_key_id); + // Find a key in the hash table. + // Return true on success and assign the address to "*stored_key_id". + // Return false on failure and assign the address of first unused entry. + // Assign nullptr to "*stored_key_id" on error. + bool find_key(KeyArg key, int64_t **stored_key_id); + + // Rebuild the hash table. + bool rebuild(); + // Move to the next entry. + uint64_t rehash(uint64_t hash) const; + + // Refresh "key_ids_" if it is old. + bool refresh_key_ids(); +}; + +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_HPP Added: lib/grnxx/map/hash_table/Makefile.am (+14 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/Makefile.am 2013-06-04 18:23:12 +0900 (f0475af) @@ -0,0 +1,14 @@ +noinst_LTLIBRARIES = libgrnxx_map_hash_table.la + +libgrnxx_map_hash_table_la_LDFLAGS = @AM_LTLDFLAGS@ + +libgrnxx_map_hash_table_la_SOURCES = \ + dummy.cpp + +libgrnxx_map_hash_table_includedir = ${includedir}/grnxx/map/hash_table +libgrnxx_map_hash_table_include_HEADERS = \ + bit_array.hpp \ + hash.hpp \ + key_array.hpp \ + key_id_array.hpp \ + link_array.hpp Added: lib/grnxx/map/hash_table/bit_array.hpp (+62 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/bit_array.hpp 2013-06-04 18:23:12 +0900 (a21dd6a) @@ -0,0 +1,62 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_BIT_ARRAY_HPP +#define GRNXX_MAP_HASH_TABLE_BIT_ARRAY_HPP + +#include "grnxx/features.hpp" + +#include "grnxx/array.hpp" +#include "grnxx/types.hpp" + +namespace grnxx { +namespace map { +namespace hash_table { + +// Change the array size based on the size of "T". +template <typename T, size_t T_SIZE = sizeof(T)> +struct BitArray; + +// Map<T> has at most 2^40 different keys. +template <typename T, size_t T_SIZE> +struct BitArray { + using Type = Array<bool, 65536, 4096, 4096>; +}; + +// Map<T> has at most 2^8 different keys. +template <typename T> +struct BitArray<T, 1> { + using Type = Array<bool, 256, 1, 1>; +}; + +// Map<T> has at most 2^16 different keys. +template <typename T> +struct BitArray<T, 2> { + using Type = Array<bool, 256, 256, 1>; +}; + +// Map<T> has at most 2^32 different keys. +template <typename T> +struct BitArray<T, 4> { + using Type = Array<bool, 16384, 512, 512>; +}; + +} // namespace hash_table +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_BIT_ARRAY_HPP Added: lib/grnxx/map/hash_table/dummy.cpp (+37 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/dummy.cpp 2013-06-04 18:23:12 +0900 (43a92c7) @@ -0,0 +1,37 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_DUMMY_HPP +#define GRNXX_MAP_HASH_TABLE_DUMMY_HPP + +#include "grnxx/features.hpp" + +#include "grnxx/map/hash_table/bit_array.hpp" +#include "grnxx/map/hash_table/hash.hpp" +#include "grnxx/map/hash_table/key_array.hpp" +#include "grnxx/map/hash_table/key_id_array.hpp" +#include "grnxx/map/hash_table/link_array.hpp" + +namespace grnxx { +namespace map { +namespace hash_table { + +} // namespace hash_table +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_DUMMY_HPP Renamed: lib/grnxx/map/hash_table/hash.hpp (+3 -0) 98% =================================================================== --- lib/grnxx/map/hash.hpp 2013-05-31 15:34:49 +0900 (b4e8363) +++ lib/grnxx/map/hash_table/hash.hpp 2013-06-04 18:23:12 +0900 (9558df4) @@ -23,9 +23,11 @@ #include "grnxx/bytes.hpp" #include "grnxx/geo_point.hpp" #include "grnxx/traits.hpp" +#include "grnxx/types.hpp" namespace grnxx { namespace map { +namespace hash_table { // Calculate a hash value. template <typename T> @@ -159,6 +161,7 @@ struct Hash<Bytes> { } }; +} // namespace hash_table } // namespace map } // namespace grnxx Added: lib/grnxx/map/hash_table/key_array.hpp (+70 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/key_array.hpp 2013-06-04 18:23:12 +0900 (4c96009) @@ -0,0 +1,70 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_KEY_ARRAY_HPP +#define GRNXX_MAP_HASH_TABLE_KEY_ARRAY_HPP + +#include "grnxx/features.hpp" + +#include "grnxx/array.hpp" +#include "grnxx/bytes.hpp" +#include "grnxx/map/bytes_array.hpp" +#include "grnxx/types.hpp" + +namespace grnxx { +namespace map { +namespace hash_table { + +// Change the array size based on "T". +template <typename T, size_t T_SIZE = sizeof(T)> +struct KeyArray; + +// Map<T> has at most 2^40 different keys. +template <typename T, size_t T_SIZE> +struct KeyArray { + using Type = Array<T>; +}; + +// Map<T> has at most 2^8 different keys. +template <typename T> +struct KeyArray<T, 1> { + using Type = Array<T, 256, 1, 1>; +}; + +// Map<T> has at most 2^16 different keys. +template <typename T> +struct KeyArray<T, 2> { + using Type = Array<T, 256, 256, 1>; +}; + +// Map<T> has at most 2^32 different keys. +template <typename T> +struct KeyArray<T, 4> { + using Type = Array<T, 65536, 256, 256>; +}; + +// Map<T> has at most 2^40 different keys. +template <> +struct KeyArray<Bytes> { + using Type = BytesArray; +}; + +} // namespace hash_table +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_KEY_ARRAY_HPP Added: lib/grnxx/map/hash_table/key_id_array.hpp (+222 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/key_id_array.hpp 2013-06-04 18:23:12 +0900 (76a0038) @@ -0,0 +1,222 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_KEY_ID_ARRAY_HPP +#define GRNXX_MAP_HASH_TABLE_KEY_ID_ARRAY_HPP + +#include "grnxx/features.hpp" + +#include "grnxx/array_impl.hpp" +#include "grnxx/map.hpp" +#include "grnxx/storage.hpp" +#include "grnxx/types.hpp" + +namespace grnxx { +namespace map { +namespace hash_table { + +// Change the array size based on the size of "T". +// Note that the size of link array is N/64 where N is the size of BitArray. +template <typename T, size_t T_SIZE = sizeof(T)> +struct KeyIDArrayHelper; + +// Map<T> has at most 2^40 different keys. +template <typename T, size_t T_SIZE> +struct KeyIDArrayHelper { + using ImplType = ArrayImpl<int64_t, 65536, 8192, 4096>; +}; + +// Map<T> has at most 2^8 different keys. +template <typename T> +struct KeyIDArrayHelper<T, 1> { + using ImplType = ArrayImpl<int64_t, 512, 1, 1>; +}; + +// Map<T> has at most 2^16 different keys. +template <typename T> +struct KeyIDArrayHelper<T, 2> { + using ImplType = ArrayImpl<int64_t, 512, 256, 1>; +}; + +// Map<T> has at most 2^32 different keys. +template <typename T> +struct KeyIDArrayHelper<T, 4> { + using ImplType = ArrayImpl<int64_t, 65536, 512, 256>; +}; + +struct KeyIDArrayHeader { + uint32_t impl_storage_node_id; + uint32_t reserved; + uint64_t mask; + + KeyIDArrayHeader() + : impl_storage_node_id(STORAGE_INVALID_NODE_ID), + reserved(0), + mask(0) {} +}; + +template <typename T> +class KeyIDArray { + using ArrayImpl = typename KeyIDArrayHelper<T>::ImplType; + + public: + using Value = typename ArrayImpl::Value; + using ValueArg = typename ArrayImpl::ValueArg; + + KeyIDArray() + : storage_(nullptr), + storage_node_id_(STORAGE_INVALID_NODE_ID), + header_(nullptr), + impl_(), + mask_(0) {} + ~KeyIDArray() {} + + // Create a table. + static KeyIDArray *create(Storage *storage, uint32_t storage_node_id, + uint64_t mask) { + std::unique_ptr<KeyIDArray> array(create_instance()); + if (!array) { + return nullptr; + } + if (!array->create_array(storage, storage_node_id, mask)) { + return nullptr; + } + return array.release(); + } + + // Open an array. + static KeyIDArray *open(Storage *storage, uint32_t storage_node_id) { + std::unique_ptr<KeyIDArray> array(create_instance()); + if (!array) { + return nullptr; + } + if (!array->open_array(storage, storage_node_id)) { + return nullptr; + } + return array.release(); + } + + // Unlink an array. + static bool unlink(Storage *storage, uint32_t storage_node_id) { + return ArrayImpl::unlink(storage, storage_node_id); + } + + // Return the number of values in each page. + static constexpr uint64_t page_size() { + return ArrayImpl::page_size(); + } + // Return the number of pages in each table. + static constexpr uint64_t table_size() { + return ArrayImpl::table_size(); + } + // Return the number of tables in each secondary table. + static constexpr uint64_t secondary_table_size() { + return ArrayImpl::secondary_table_size(); + } + // Return the number of values in Array. + static constexpr uint64_t size() { + return ArrayImpl::size(); + } + + // Return the storage node ID. + uint32_t storage_node_id() const { + return storage_node_id_; + } + // Return the mask. + uint64_t mask() const { + return mask_; + } + + // Get a value and return true on success. + // The value is assigned to "*value" iff "value" != nullptr. + bool get(uint64_t value_id, Value *value) { + return impl_.get(value_id & mask_, value); + } + // Set a value and return true on success. + bool set(uint64_t value_id, ValueArg value) { + return impl_.set(value_id & mask_, value); + } + // Get a value and return its address on success. + Value *get_pointer(uint64_t value_id) { + return impl_.get_pointer(value_id & mask_); + } + // Get a page and return its starting address on success. + Value *get_page(uint64_t page_id) { + return impl_.get_page(page_id & (mask_ / page_size())); + } + + private: + Storage *storage_; + uint32_t storage_node_id_; + KeyIDArrayHeader *header_; + ArrayImpl impl_; + uint64_t mask_; + + static KeyIDArray *create_instance() { + KeyIDArray * const array = new (std::nothrow) KeyIDArray; + if (!array) { + GRNXX_ERROR() << "new grnxx::Array failed: " + << "value_size = " << sizeof(Value) + << ", page_size = " << page_size() + << ", table_size = " << table_size() + << ", secondary_table_size = " << secondary_table_size(); + } + return array; + } + + bool create_array(Storage *storage, uint32_t storage_node_id, + uint64_t mask) { + storage_ = storage; + StorageNode storage_node = + storage_->create_node(storage_node_id, sizeof(KeyIDArrayHeader)); + if (!storage_node) { + return false; + } + storage_node_id_ = storage_node.id(); + header_ = static_cast<KeyIDArrayHeader *>(storage_node.body()); + *header_ = KeyIDArrayHeader(); + if (!impl_.create(storage, storage_node_id_, MAP_INVALID_KEY_ID)) { + storage_->unlink_node(storage_node_id_); + return false; + } + mask_ = mask; + header_->impl_storage_node_id = impl_.storage_node_id(); + header_->mask = mask_; + return true; + } + + bool open_array(Storage *storage, uint32_t storage_node_id) { + storage_ = storage; + StorageNode storage_node = storage_->open_node(storage_node_id); + if (!storage_node) { + return false; + } + storage_node_id_ = storage_node.id(); + header_ = static_cast<KeyIDArrayHeader *>(storage_node.body()); + if (!impl_.open(storage, header_->impl_storage_node_id)) { + return false; + } + mask_ = header_->mask; + return true; + } +}; + +} // namespace hash_table +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_KEY_ID_ARRAY_HPP Added: lib/grnxx/map/hash_table/link_array.hpp (+63 -0) 100644 =================================================================== --- /dev/null +++ lib/grnxx/map/hash_table/link_array.hpp 2013-06-04 18:23:12 +0900 (40b98ac) @@ -0,0 +1,63 @@ +/* + Copyright (C) 2013 Brazil, Inc. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ +#ifndef GRNXX_MAP_HASH_TABLE_LINK_ARRAY_HPP +#define GRNXX_MAP_HASH_TABLE_LINK_ARRAY_HPP + +#include "grnxx/features.hpp" + +#include "grnxx/array.hpp" +#include "grnxx/types.hpp" + +namespace grnxx { +namespace map { +namespace hash_table { + +// Change the array size based on the size of "T". +// Note that the size of link array is N/64 where N is the size of BitArray. +template <typename T, size_t T_SIZE = sizeof(T)> +struct LinkArray; + +// Map<T> has at most 2^40 different keys. +template <typename T, size_t T_SIZE> +struct LinkArray { + using Type = Array<uint64_t, 16384, 1024, 1024>; +}; + +// Map<T> has at most 2^8 different keys. +template <typename T> +struct LinkArray<T, 1> { + using Type = Array<uint64_t, 4, 1, 1>; +}; + +// Map<T> has at most 2^16 different keys. +template <typename T> +struct LinkArray<T, 2> { + using Type = Array<uint64_t, 1024, 1, 1>; +}; + +// Map<T> has at most 2^32 different keys. +template <typename T> +struct LinkArray<T, 4> { + using Type = Array<uint64_t, 4096, 128, 128>; +}; + +} // namespace hash_table +} // namespace map +} // namespace grnxx + +#endif // GRNXX_MAP_HASH_TABLE_LINK_ARRAY_HPP Modified: lib/grnxx/map/helper.hpp (+0 -25) =================================================================== --- lib/grnxx/map/helper.hpp 2013-05-31 15:34:49 +0900 (cc5aee6) +++ lib/grnxx/map/helper.hpp 2013-06-04 18:23:12 +0900 (e6d9fb5) @@ -82,30 +82,6 @@ struct KeyArrayHelper<Bytes> { using Type = BytesArray; }; -// Change the settings based on the key type. -template <typename T, size_t T_SIZE = sizeof(T)> -struct HashArrayHelper { - // Map<T> has at most 2^40 different keys. - using Type = Array<uint64_t, 65536, 4096, 8192>; -}; -template <typename T> -struct HashArrayHelper<T, 1> { - // Map<T> has at most 2^8 different keys. - // Assume a perfect hash function. - using Type = Array<uint8_t, 256, 1, 1>; -}; -template <typename T> -struct HashArrayHelper<T, 2> { - // Map<T> has at most 2^16 different keys. - // Assume a perfect hash function. - using Type = Array<uint16_t, 65536, 1, 1>; -}; -template <typename T> -struct HashArrayHelper<T, 4> { - // Map<T> has at most 2^32 different keys. - using Type = Array<uint32_t, 65536, 256, 512>; -}; - // Normalize a key. template <typename T, bool IS_FLOATING_POINT = std::is_floating_point<T>::value> @@ -168,7 +144,6 @@ struct Helper { using Bitmap = typename BitmapHelper<T>::Type; using KeyArray = typename KeyArrayHelper<T>::Type; - using HashArray = typename HashArrayHelper<T>::Type; static Key normalize(KeyArg key) { return NormalizeHelper<T>::normalize(key); Modified: test/test_map.cpp (+6 -13) =================================================================== --- test/test_map.cpp 2013-05-31 15:34:49 +0900 (a381169) +++ test/test_map.cpp 2013-06-04 18:23:12 +0900 (1e660f9) @@ -31,6 +31,7 @@ #include "grnxx/map/bytes_array.hpp" #include "grnxx/map/bytes_store.hpp" #include "grnxx/map/helper.hpp" +#include "grnxx/map/hash_table/hash.hpp" #include "grnxx/periodic_clock.hpp" #include "grnxx/storage.hpp" @@ -51,18 +52,7 @@ std::mt19937_64 mersenne_twister(mersenne_twister_seed); // For std::unordered_set. template <typename T> -struct Hash { - uint64_t operator()(T x) const { - return std::hash<T>()(x); - } -}; -template <> -struct Hash<grnxx::GeoPoint> { - using ValueArg = typename grnxx::Traits<grnxx::GeoPoint>::ArgumentType; - uint64_t operator()(ValueArg x) const { - return std::hash<std::uint64_t>()(x.value()); - } -}; +using Hash = grnxx::map::hash_table::Hash<T>; // For std::random_shuffle(). struct RandomNumberGenerator { @@ -651,7 +641,9 @@ void test_map_replace(grnxx::MapType map_type) { for (std::uint64_t i = (MAP_NUM_KEYS / 2); i < MAP_NUM_KEYS; ++i) { const std::int64_t key_id = i - (MAP_NUM_KEYS / 2); assert(!map->replace(keys[key_id], keys[key_id])); - assert(map->replace(keys[key_id], keys[i])); + std::int64_t stored_key_id; + assert(map->replace(keys[key_id], keys[i], &stored_key_id)); + assert(stored_key_id == key_id); T key; assert(map->get(key_id, &key)); assert(grnxx::map::Helper<T>::equal_to(key, keys[i])); @@ -961,6 +953,7 @@ template <typename T> void test_map(T) { GRNXX_NOTICE() << __PRETTY_FUNCTION__; test_map<T>(grnxx::MAP_ARRAY); + test_map<T>(grnxx::MAP_HASH_TABLE); } template <typename T, typename... U> -------------- next part -------------- HTML����������������������������... Download