From 581442d44609876705a7ec399ab5b9b7843954fe Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Thu, 25 Sep 2014 13:53:27 -0700 Subject: [PATCH] option to choose module when calculating CuckooTable hash Summary: Using module to calculate hash makes lookup ~8% slower. But it has its benefit: file size is more predictable, more space enffient Test Plan: db_bench Reviewers: igor, yhchiang, sdong Reviewed By: sdong Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D23691 --- include/rocksdb/table.h | 10 ++++- table/cuckoo_table_builder.cc | 64 +++++++++++++++++++----------- table/cuckoo_table_builder.h | 3 +- table/cuckoo_table_builder_test.cc | 38 +++++++++--------- table/cuckoo_table_factory.cc | 5 ++- table/cuckoo_table_factory.h | 20 ++++++---- table/cuckoo_table_reader.cc | 23 +++++++---- table/cuckoo_table_reader.h | 3 +- table/cuckoo_table_reader_test.cc | 6 +-- 9 files changed, 107 insertions(+), 65 deletions(-) diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 2b0255a97..e8ac6bd62 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -253,6 +253,8 @@ struct CuckooTablePropertyNames { static const std::string kIsLastLevel; // Indicate if using identity function for the first hash function. static const std::string kIdentityAsFirstHash; + // Indicate if using module or bit and to calculate hash value + static const std::string kUseModuleHash; }; struct CuckooTableOptions { @@ -271,11 +273,17 @@ struct CuckooTableOptions { // function. This makes lookups more cache friendly in case // of collisions. uint32_t cuckoo_block_size = 5; - // If this options is enabled, user key is treated as uint64_t and its value + // If this option is enabled, user key is treated as uint64_t and its value // is used as hash value directly. This option changes builder's behavior. // Reader ignore this option and behave according to what specified in table // property. bool identity_as_first_hash = false; + // If this option is set to true, module is used during hash calculation. + // This often yields better space efficiency at the cost of performance. + // If this optino is set to false, # of entries in table is constrained to be + // power of two, and bit and is used to calculate hash, which is faster in + // general. + bool use_module_hash = true; }; // Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index 51c80d9df..17184ae2c 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -37,6 +37,8 @@ const std::string CuckooTablePropertyNames::kCuckooBlockSize = "rocksdb.cuckoo.hash.cuckooblocksize"; const std::string CuckooTablePropertyNames::kIdentityAsFirstHash = "rocksdb.cuckoo.hash.identityfirst"; +const std::string CuckooTablePropertyNames::kUseModuleHash = + "rocksdb.cuckoo.hash.usemodule"; // Obtained by running echo rocksdb.table.cuckoo | sha1sum extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; @@ -45,7 +47,7 @@ CuckooTableBuilder::CuckooTableBuilder( WritableFile* file, double max_hash_table_ratio, uint32_t max_num_hash_table, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, - bool identity_as_first_hash, + bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) : num_hash_func_(2), file_(file), @@ -53,10 +55,11 @@ CuckooTableBuilder::CuckooTableBuilder( max_num_hash_func_(max_num_hash_table), max_search_depth_(max_search_depth), cuckoo_block_size_(std::max(1U, cuckoo_block_size)), - hash_table_size_(2), + hash_table_size_(use_module_hash ? 0 : 2), is_last_level_file_(false), has_seen_first_key_(false), ucomp_(user_comparator), + use_module_hash_(use_module_hash), identity_as_first_hash_(identity_as_first_hash), get_slice_hash_(get_slice_hash), closed_(false) { @@ -105,14 +108,15 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { } else if (ikey.user_key.compare(largest_user_key_) > 0) { largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); } - if (hash_table_size_ < kvs_.size() / max_hash_table_ratio_) { - hash_table_size_ *= 2; + if (!use_module_hash_) { + if (hash_table_size_ < kvs_.size() / max_hash_table_ratio_) { + hash_table_size_ *= 2; + } } } Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { - uint64_t hash_table_size_minus_one = hash_table_size_ - 1; - buckets->resize(hash_table_size_minus_one + cuckoo_block_size_); + buckets->resize(hash_table_size_ + cuckoo_block_size_ - 1); uint64_t make_space_for_key_call_id = 0; for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) { uint64_t bucket_id; @@ -122,8 +126,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { ExtractUserKey(kvs_[vector_idx].first); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found; ++hash_cnt) { - uint64_t hash_val = CuckooHash(user_key, hash_cnt, - hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_); + uint64_t hash_val = CuckooHash(user_key, hash_cnt, use_module_hash_, + hash_table_size_, identity_as_first_hash_, get_slice_hash_); // If there is a collision, check next cuckoo_block_size_ locations for // empty locations. While checking, if we reach end of the hash table, // stop searching and proceed for next hash function. @@ -152,8 +156,8 @@ Status CuckooTableBuilder::MakeHashTable(std::vector* buckets) { } // We don't really need to rehash the entire table because old hashes are // still valid and we only increased the number of hash functions. - uint64_t hash_val = CuckooHash(user_key, num_hash_func_, - hash_table_size_minus_one, identity_as_first_hash_, get_slice_hash_); + uint64_t hash_val = CuckooHash(user_key, num_hash_func_, use_module_hash_, + hash_table_size_, identity_as_first_hash_, get_slice_hash_); ++num_hash_func_; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, ++hash_val) { @@ -178,6 +182,10 @@ Status CuckooTableBuilder::Finish() { Status s; std::string unused_bucket; if (!kvs_.empty()) { + // Calculate the real hash size if module hash is enabled. + if (use_module_hash_) { + hash_table_size_ = kvs_.size() / max_hash_table_ratio_; + } s = MakeHashTable(&buckets); if (!s.ok()) { return s; @@ -252,11 +260,10 @@ Status CuckooTableBuilder::Finish() { CuckooTablePropertyNames::kNumHashFunc].assign( reinterpret_cast(&num_hash_func_), sizeof(num_hash_func_)); - uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1; properties_.user_collected_properties[ CuckooTablePropertyNames::kHashTableSize].assign( - reinterpret_cast(&hash_table_size), - sizeof(hash_table_size)); + reinterpret_cast(&hash_table_size_), + sizeof(hash_table_size_)); properties_.user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].assign( reinterpret_cast(&is_last_level_file_), @@ -269,6 +276,10 @@ Status CuckooTableBuilder::Finish() { CuckooTablePropertyNames::kIdentityAsFirstHash].assign( reinterpret_cast(&identity_as_first_hash_), sizeof(identity_as_first_hash_)); + properties_.user_collected_properties[ + CuckooTablePropertyNames::kUseModuleHash].assign( + reinterpret_cast(&use_module_hash_), + sizeof(use_module_hash_)); // Write meta blocks. MetaIndexBuilder meta_index_builder; @@ -322,16 +333,22 @@ uint64_t CuckooTableBuilder::FileSize() const { return 0; } - // Account for buckets being a power of two. - // As elements are added, file size remains constant for a while and doubles - // its size. Since compaction algorithm stops adding elements only after it - // exceeds the file limit, we account for the extra element being added here. - uint64_t expected_hash_table_size = hash_table_size_; - if (expected_hash_table_size < (kvs_.size() + 1) / max_hash_table_ratio_) { - expected_hash_table_size *= 2; + if (use_module_hash_) { + return (kvs_[0].first.size() + kvs_[0].second.size()) * kvs_.size() / + max_hash_table_ratio_; + } else { + // Account for buckets being a power of two. + // As elements are added, file size remains constant for a while and + // doubles its size. Since compaction algorithm stops adding elements + // only after it exceeds the file limit, we account for the extra element + // being added here. + uint64_t expected_hash_table_size = hash_table_size_; + if (expected_hash_table_size < (kvs_.size() + 1) / max_hash_table_ratio_) { + expected_hash_table_size *= 2; + } + return (kvs_[0].first.size() + kvs_[0].second.size()) * + expected_hash_table_size - 1; } - return (kvs_[0].first.size() + kvs_[0].second.size()) * - expected_hash_table_size - 1; } // This method is invoked when there is no place to insert the target key. @@ -373,7 +390,6 @@ bool CuckooTableBuilder::MakeSpaceForKey( make_space_for_key_call_id; tree.push_back(CuckooNode(bucket_id, 0, 0)); } - uint64_t hash_table_size_minus_one = hash_table_size_ - 1; bool null_found = false; uint32_t curr_pos = 0; while (!null_found && curr_pos < tree.size()) { @@ -388,7 +404,7 @@ bool CuckooTableBuilder::MakeSpaceForKey( uint64_t child_bucket_id = CuckooHash( (is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first : ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first))), - hash_cnt, hash_table_size_minus_one, identity_as_first_hash_, + hash_cnt, use_module_hash_, hash_table_size_, identity_as_first_hash_, get_slice_hash_); // Iterate inside Cuckoo Block. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index 45cf49315..d5fe3f5dc 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -24,7 +24,7 @@ class CuckooTableBuilder: public TableBuilder { WritableFile* file, double max_hash_table_ratio, uint32_t max_num_hash_func, uint32_t max_search_depth, const Comparator* user_comparator, uint32_t cuckoo_block_size, - bool identity_as_first_hash, + bool use_module_hash, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); // REQUIRES: Either Finish() or Abandon() has been called. @@ -88,6 +88,7 @@ class CuckooTableBuilder: public TableBuilder { TableProperties properties_; bool has_seen_first_key_; const Comparator* ucomp_; + bool use_module_hash_; bool identity_as_first_hash_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); diff --git a/table/cuckoo_table_builder_test.cc b/table/cuckoo_table_builder_test.cc index d25950728..d3b3a713e 100644 --- a/table/cuckoo_table_builder_test.cc +++ b/table/cuckoo_table_builder_test.cc @@ -50,12 +50,6 @@ class CuckooBuilderTest { TableProperties* props = nullptr; ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size, kCuckooTableMagicNumber, env_, nullptr, &props)); - ASSERT_EQ(props->num_entries, keys.size()); - ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); - ASSERT_EQ(props->data_size, expected_unused_bucket.size() * - (expected_table_size + expected_cuckoo_block_size - 1)); - ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len); - // Check unused bucket. std::string unused_key = props->user_collected_properties[ CuckooTablePropertyNames::kEmptyKey]; @@ -83,6 +77,12 @@ class CuckooBuilderTest { *reinterpret_cast(props->user_collected_properties[ CuckooTablePropertyNames::kIsLastLevel].data()); ASSERT_EQ(expected_is_last_level, is_last_level_found); + + ASSERT_EQ(props->num_entries, keys.size()); + ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); + ASSERT_EQ(props->data_size, expected_unused_bucket.size() * + (expected_table_size + expected_cuckoo_block_size - 1)); + ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len); delete props; // Check contents of the bucket. @@ -133,12 +133,12 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) { fname = test::TmpDir() + "/EmptyFile"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - 4, 100, BytewiseComparator(), 1, false, GetSliceHash); + 4, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); ASSERT_EQ(0UL, builder.FileSize()); ASSERT_OK(builder.Finish()); ASSERT_OK(writable_file->Close()); - CheckFileContents({}, {}, {}, "", 0, 2, false); + CheckFileContents({}, {}, {}, "", 2, 2, false); } TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { @@ -162,7 +162,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) { fname = test::TmpDir() + "/NoCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -202,7 +202,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { fname = test::TmpDir() + "/WithCollisionFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -243,8 +243,8 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) { fname = test::TmpDir() + "/WithCollisionFullKey2"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, false, - GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, + false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -289,7 +289,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) { fname = test::TmpDir() + "/WithCollisionPathFullKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -331,7 +331,7 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) { fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 2, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 2, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(keys[i]), Slice(values[i])); @@ -367,7 +367,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { fname = test::TmpDir() + "/NoCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -403,7 +403,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { fname = test::TmpDir() + "/WithCollisionUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -441,7 +441,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); @@ -479,7 +479,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { fname = test::TmpDir() + "/WithCollisionPathUserKey"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 2, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t i = 0; i < user_keys.size(); i++) { builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); @@ -499,7 +499,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) { fname = test::TmpDir() + "/FailWhenSameKeyInserted"; ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, - num_hash_fun, 100, BytewiseComparator(), 1, false, GetSliceHash); + num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); ASSERT_OK(builder.status()); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); diff --git a/table/cuckoo_table_factory.cc b/table/cuckoo_table_factory.cc index 18db54ed7..4afc9fc2e 100644 --- a/table/cuckoo_table_factory.cc +++ b/table/cuckoo_table_factory.cc @@ -30,10 +30,11 @@ TableBuilder* CuckooTableFactory::NewTableBuilder( const InternalKeyComparator& internal_comparator, WritableFile* file, const CompressionType, const CompressionOptions&) const { + // TODO: change builder to take the option struct return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64, table_options_.max_search_depth, internal_comparator.user_comparator(), - table_options_.cuckoo_block_size, table_options_.identity_as_first_hash, - nullptr); + table_options_.cuckoo_block_size, table_options_.use_module_hash, + table_options_.identity_as_first_hash, nullptr); } std::string CuckooTableFactory::GetPrintableTableOptions() const { diff --git a/table/cuckoo_table_factory.h b/table/cuckoo_table_factory.h index 7b2f32ce3..599908678 100644 --- a/table/cuckoo_table_factory.h +++ b/table/cuckoo_table_factory.h @@ -15,21 +15,27 @@ namespace rocksdb { const uint32_t kCuckooMurmurSeedMultiplier = 816922183; static inline uint64_t CuckooHash( - const Slice& user_key, uint32_t hash_cnt, uint64_t table_size_minus_one, - bool identity_as_first_hash, + const Slice& user_key, uint32_t hash_cnt, bool use_module_hash, + uint64_t table_size_, bool identity_as_first_hash, uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) { #ifndef NDEBUG // This part is used only in unit tests. if (get_slice_hash != nullptr) { - return get_slice_hash(user_key, hash_cnt, table_size_minus_one + 1); + return get_slice_hash(user_key, hash_cnt, table_size_); } #endif + uint64_t value = 0; if (hash_cnt == 0 && identity_as_first_hash) { - return (*reinterpret_cast(user_key.data())) & - table_size_minus_one; + value = (*reinterpret_cast(user_key.data())); + } else { + value = MurmurHash(user_key.data(), user_key.size(), + kCuckooMurmurSeedMultiplier * hash_cnt); + } + if (use_module_hash) { + return value % table_size_; + } else { + return value & (table_size_ - 1); } - return MurmurHash(user_key.data(), user_key.size(), - kCuckooMurmurSeedMultiplier * hash_cnt) & table_size_minus_one; } // Cuckoo Table is designed for applications that require fast point lookups diff --git a/table/cuckoo_table_reader.cc b/table/cuckoo_table_reader.cc index b8ac5a47e..30a8d8079 100644 --- a/table/cuckoo_table_reader.cc +++ b/table/cuckoo_table_reader.cc @@ -77,8 +77,9 @@ CuckooTableReader::CuckooTableReader( status_ = Status::Corruption("Hash table size not found"); return; } - table_size_minus_one_ = *reinterpret_cast( - hash_table_size->second.data()) - 1; + table_size_ = *reinterpret_cast( + hash_table_size->second.data()); + auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); if (is_last_level == user_props.end()) { status_ = Status::Corruption("Is last level not found"); @@ -95,6 +96,15 @@ CuckooTableReader::CuckooTableReader( identity_as_first_hash_ = *reinterpret_cast( identity_as_first_hash->second.data()); + auto use_module_hash = user_props.find( + CuckooTablePropertyNames::kUseModuleHash); + if (use_module_hash == user_props.end()) { + status_ = Status::Corruption("hash type is not found"); + return; + } + use_module_hash_ = *reinterpret_cast( + use_module_hash->second.data()); + fprintf(stderr, "use_module_hash %d\n", use_module_hash_); auto cuckoo_block_size = user_props.find( CuckooTablePropertyNames::kCuckooBlockSize); if (cuckoo_block_size == user_props.end()) { @@ -116,8 +126,8 @@ Status CuckooTableReader::Get( Slice user_key = ExtractUserKey(key); for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) { uint64_t offset = bucket_length_ * CuckooHash( - user_key, hash_cnt, table_size_minus_one_, identity_as_first_hash_, - get_slice_hash_); + user_key, hash_cnt, use_module_hash_, table_size_, + identity_as_first_hash_, get_slice_hash_); const char* bucket = &file_data_.data()[offset]; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_; ++block_idx, bucket += bucket_length_) { @@ -151,7 +161,7 @@ void CuckooTableReader::Prepare(const Slice& key) { // Prefetch the first Cuckoo Block. Slice user_key = ExtractUserKey(key); uint64_t addr = reinterpret_cast(file_data_.data()) + - bucket_length_ * CuckooHash(user_key, 0, table_size_minus_one_, + bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_, identity_as_first_hash_, nullptr); uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_; for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) { @@ -219,8 +229,7 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader) void CuckooTableIterator::LoadKeysFromReader() { key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries); - uint64_t num_buckets = reader_->table_size_minus_one_ + - reader_->cuckoo_block_size_; + uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1; for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) { Slice read_key; status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_, diff --git a/table/cuckoo_table_reader.h b/table/cuckoo_table_reader.h index f9e93abf4..8b3ad4b91 100644 --- a/table/cuckoo_table_reader.h +++ b/table/cuckoo_table_reader.h @@ -65,6 +65,7 @@ class CuckooTableReader: public TableReader { Slice file_data_; bool is_last_level_; bool identity_as_first_hash_; + bool use_module_hash_; std::shared_ptr table_props_; Status status_; uint32_t num_hash_func_; @@ -74,7 +75,7 @@ class CuckooTableReader: public TableReader { uint32_t bucket_length_; uint32_t cuckoo_block_size_; uint32_t cuckoo_block_bytes_minus_one_; - uint64_t table_size_minus_one_; + uint64_t table_size_; const Comparator* ucomp_; uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t max_num_buckets); diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index 6dd5e5525..6566b7a29 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -110,8 +110,8 @@ class CuckooReaderTest { std::unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( - writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, false, - GetSliceHash); + writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, + false, false, GetSliceHash); ASSERT_OK(builder.status()); for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); @@ -434,7 +434,7 @@ void WriteFile(const std::vector& keys, CuckooTableBuilder builder( writable_file.get(), hash_ratio, 64, 1000, test::Uint64Comparator(), 5, - FLAGS_identity_as_first_hash, nullptr); + false, FLAGS_identity_as_first_hash, nullptr); ASSERT_OK(builder.status()); for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key.