Implementing a cache friendly version of Cuckoo Hash

Summary: This implements a cache friendly version of Cuckoo Hash in which, in case of collission, we try to insert in next few locations. The size of the neighborhood to check is taken as an input parameter in builder and stored in the table.

Test Plan:
make check all
cuckoo_table_{db,reader,builder}_test

Reviewers: sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22455
main
Radheshyam Balasundaram 10 years ago
parent d977e55596
commit 7f71448388
  1. 37
      include/rocksdb/table.h
  2. 139
      table/cuckoo_table_builder.cc
  3. 9
      table/cuckoo_table_builder.h
  4. 145
      table/cuckoo_table_builder_test.cc
  5. 7
      table/cuckoo_table_factory.cc
  6. 7
      table/cuckoo_table_factory.h
  7. 89
      table/cuckoo_table_reader.cc
  8. 6
      table/cuckoo_table_reader.h
  9. 8
      table/cuckoo_table_reader_test.cc

@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions());
struct CuckooTablePropertyNames {
// The key that is used to fill empty buckets.
static const std::string kEmptyKey;
// Fixed length of value.
static const std::string kValueLength;
static const std::string kNumHashTable;
static const std::string kMaxNumBuckets;
// Number of hash functions used in Cuckoo Hash.
static const std::string kNumHashFunc;
// It denotes the number of buckets in a Cuckoo Block. Given a key and a
// particular hash function, a Cuckoo Block is a set of consecutive buckets,
// where starting bucket id is given by the hash function on the key. In case
// of a collision during inserting the key, the builder tries to insert the
// key in other locations of the cuckoo block before using the next hash
// function. This reduces cache miss during read operation in case of
// collision.
static const std::string kCuckooBlockSize;
// Size of the hash table. Use this number to compute the modulo of hash
// function. The actual number of buckets will be kMaxHashTableSize +
// kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
// accommodate the Cuckoo Block from end of hash table, due to cache friendly
// implementation.
static const std::string kHashTableSize;
// Denotes if the key sorted in the file is Internal Key (if false)
// or User Key only (if true).
static const std::string kIsLastLevel;
};
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
// result in larger hash tables with fewer collisions.
// @max_search_depth: A property used by builder to determine the depth to go to
// to search for a path to displace elements in case of
// collision. See Builder.MakeSpaceForKey method. Higher
// values result in more efficient hash tables with fewer
// lookups but take more time to build.
// @cuckoo_block_size: In case of collision while inserting, the builder
// attempts to insert in the next cuckoo_block_size
// locations before skipping over to the next Cuckoo hash
// function. This makes lookups more cache friendly in case
// of collisions.
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
uint32_t max_search_depth = 100);
uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
#endif // ROCKSDB_LITE

@ -24,14 +24,16 @@
namespace rocksdb {
const std::string CuckooTablePropertyNames::kEmptyKey =
"rocksdb.cuckoo.bucket.empty.key";
const std::string CuckooTablePropertyNames::kNumHashTable =
const std::string CuckooTablePropertyNames::kNumHashFunc =
"rocksdb.cuckoo.hash.num";
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
"rocksdb.cuckoo.bucket.maxnum";
const std::string CuckooTablePropertyNames::kHashTableSize =
"rocksdb.cuckoo.hash.size";
const std::string CuckooTablePropertyNames::kValueLength =
"rocksdb.cuckoo.value.length";
const std::string CuckooTablePropertyNames::kIsLastLevel =
"rocksdb.cuckoo.file.islastlevel";
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
"rocksdb.cuckoo.hash.cuckooblocksize";
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
@ -39,13 +41,14 @@ extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, double hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth,
const Comparator* user_comparator,
const Comparator* user_comparator, uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2),
: num_hash_func_(2),
file_(file),
hash_table_ratio_(hash_table_ratio),
max_num_hash_table_(max_num_hash_table),
max_num_hash_func_(max_num_hash_table),
max_search_depth_(max_search_depth),
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
is_last_level_file_(false),
has_seen_first_key_(false),
ucomp_(user_comparator),
@ -101,8 +104,8 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
}
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
buckets->resize(num_buckets);
uint64_t hash_table_size = kvs_.size() / hash_table_ratio_;
buckets->resize(hash_table_size + cuckoo_block_size_ - 1);
uint64_t make_space_for_key_call_id = 0;
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
uint64_t bucket_id;
@ -110,39 +113,49 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
autovector<uint64_t> hash_vals;
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
ExtractUserKey(kvs_[vector_idx].first);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size);
// If there is a collision, check next cuckoo_block_size_ locations for
// empty locations. While checking, if we reach end of the hash table,
// stop searching and proceed for next hash function.
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++hash_val) {
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
}
hash_vals.push_back(hash_val);
}
hash_vals.push_back(hash_val);
}
}
while (!bucket_found && !MakeSpaceForKey(hash_vals,
++make_space_for_key_call_id, buckets, &bucket_id)) {
hash_table_size, ++make_space_for_key_call_id, buckets, &bucket_id)) {
// Rehash by increashing number of hash tables.
if (num_hash_table_ >= max_num_hash_table_) {
return Status::NotSupported("Too many collissions. Unable to hash.");
if (num_hash_func_ >= max_num_hash_func_) {
return Status::NotSupported("Too many collisions. Unable to hash.");
}
// We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions.
uint64_t hash_val = get_slice_hash_(user_key,
num_hash_table_, num_buckets);
++num_hash_table_;
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
num_hash_func_, hash_table_size);
++num_hash_func_;
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++hash_val) {
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
}
}
(*buckets)[bucket_id].vector_idx = vector_idx;
@ -226,16 +239,22 @@ Status CuckooTableBuilder::Finish() {
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
uint64_t num_buckets = buckets.size();
CuckooTablePropertyNames::kNumHashFunc].assign(
reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
properties_.user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].assign(
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
CuckooTablePropertyNames::kHashTableSize].assign(
reinterpret_cast<const char*>(&hash_table_size),
sizeof(hash_table_size));
properties_.user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].assign(
reinterpret_cast<const char*>(&is_last_level_file_),
sizeof(is_last_level_file_));
properties_.user_collected_properties[
CuckooTablePropertyNames::kCuckooBlockSize].assign(
reinterpret_cast<const char*>(&cuckoo_block_size_),
sizeof(cuckoo_block_size_));
// Write meta blocks.
MetaIndexBuilder meta_index_builder;
@ -307,6 +326,7 @@ uint64_t CuckooTableBuilder::FileSize() const {
// If tree depth exceedes max depth, we return false indicating failure.
bool CuckooTableBuilder::MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t make_space_for_key_call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id) {
@ -322,12 +342,13 @@ bool CuckooTableBuilder::MakeSpaceForKey(
std::vector<CuckooNode> tree;
// We want to identify already visited buckets in the current method call so
// that we don't add same buckets again for exploration in the tree.
// We do this by maintaining a count of current method call, which acts as a
// unique id for this invocation of the method. We store this number into
// the nodes that we explore in current method call.
// We do this by maintaining a count of current method call in
// make_space_for_key_call_id, which acts as a unique id for this invocation
// of the method. We store this number into the nodes that we explore in
// current method call.
// It is unlikely for the increment operation to overflow because the maximum
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
// no. of times this will be called is <= max_num_hash_func_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt];
(*buckets)[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
@ -342,22 +363,26 @@ bool CuckooTableBuilder::MakeSpaceForKey(
break;
}
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
for (uint32_t hash_cnt = 0;
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
uint64_t child_bucket_id = get_slice_hash_(
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
hash_cnt, buckets->size());
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id) {
continue;
}
(*buckets)[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
curr_pos));
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
hash_cnt, hash_table_size);
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++child_bucket_id) {
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id) {
continue;
}
(*buckets)[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
curr_pos));
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
}
}
}
++curr_pos;
@ -367,10 +392,10 @@ bool CuckooTableBuilder::MakeSpaceForKey(
// There is an empty node in tree.back(). Now, traverse the path from this
// empty node to top of the tree and at every node in the path, replace
// child with the parent. Stop when first level is reached in the tree
// (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return
// (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
// this location in first level for target key to be inserted.
uint32_t bucket_to_replace_pos = tree.size()-1;
while (bucket_to_replace_pos >= num_hash_table_) {
while (bucket_to_replace_pos >= num_hash_func_) {
CuckooNode& curr_node = tree[bucket_to_replace_pos];
(*buckets)[curr_node.bucket_id] =
(*buckets)[tree[curr_node.parent_pos].bucket_id];

@ -23,6 +23,7 @@ class CuckooTableBuilder: public TableBuilder {
CuckooTableBuilder(
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
uint32_t max_search_depth, const Comparator* user_comparator,
uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
// REQUIRES: Either Finish() or Abandon() has been called.
@ -60,7 +61,7 @@ class CuckooTableBuilder: public TableBuilder {
CuckooBucket()
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
uint32_t vector_idx;
// This number will not exceed kvs_.size() + max_num_hash_table_.
// This number will not exceed kvs_.size() + max_num_hash_func_.
// We assume number of items is <= 2^32.
uint32_t make_space_for_key_call_id;
};
@ -68,16 +69,18 @@ class CuckooTableBuilder: public TableBuilder {
bool MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id);
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
uint32_t num_hash_table_;
uint32_t num_hash_func_;
WritableFile* file_;
const double hash_table_ratio_;
const uint32_t max_num_hash_table_;
const uint32_t max_num_hash_func_;
const uint32_t max_search_depth_;
const uint32_t cuckoo_block_size_;
bool is_last_level_file_;
Status status_;
std::vector<std::pair<std::string, std::string>> kvs_;

@ -37,8 +37,9 @@ class CuckooBuilderTest {
void CheckFileContents(const std::vector<std::string>& keys,
const std::vector<std::string>& values,
const std::vector<uint64_t>& expected_locations,
std::string expected_unused_bucket, uint64_t expected_max_buckets,
uint32_t expected_num_hash_fun, bool expected_is_last_level) {
std::string expected_unused_bucket, uint64_t expected_table_size,
uint32_t expected_num_hash_func, bool expected_is_last_level,
uint32_t expected_cuckoo_block_size = 1) {
// Read file
unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
@ -51,7 +52,8 @@ class CuckooBuilderTest {
kCuckooTableMagicNumber, env_, nullptr, &props));
ASSERT_EQ(props->num_entries, keys.size());
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
ASSERT_EQ(props->data_size, keys.size()*expected_unused_bucket.size());
ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
(expected_table_size + expected_cuckoo_block_size - 1));
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
// Check unused bucket.
@ -65,14 +67,18 @@ class CuckooBuilderTest {
CuckooTablePropertyNames::kValueLength].data());
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
ASSERT_EQ(props->raw_value_size, values.size()*value_len_found);
const uint64_t max_buckets =
const uint64_t table_size =
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].data());
ASSERT_EQ(expected_max_buckets, max_buckets);
const uint32_t num_hash_fun_found =
CuckooTablePropertyNames::kHashTableSize].data());
ASSERT_EQ(expected_table_size, table_size);
const uint32_t num_hash_func_found =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].data());
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
CuckooTablePropertyNames::kNumHashFunc].data());
ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
const uint32_t cuckoo_block_size =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kCuckooBlockSize].data());
ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
const bool is_last_level_found =
*reinterpret_cast<const bool*>(props->user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].data());
@ -82,7 +88,7 @@ class CuckooBuilderTest {
// Check contents of the bucket.
std::vector<bool> keys_found(keys.size(), false);
uint32_t bucket_size = expected_unused_bucket.size();
for (uint32_t i = 0; i < max_buckets; ++i) {
for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) {
Slice read_slice;
ASSERT_OK(read_file->Read(i*bucket_size, bucket_size,
&read_slice, nullptr));
@ -119,7 +125,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
4, 100, BytewiseComparator(), GetSliceHash);
4, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
@ -146,7 +152,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
@ -156,11 +162,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false);
expected_unused_bucket, expected_table_size, 2, false);
}
TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
@ -183,7 +189,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
fname = test::TmpDir() + "/WithCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
@ -193,11 +199,49 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, false);
expected_unused_bucket, expected_table_size, 4, false);
}
TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
uint32_t num_hash_fun = 4;
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
hash_map = {
{user_keys[0], {0, 1, 2, 3}},
{user_keys[1], {0, 1, 2, 3}},
{user_keys[2], {0, 1, 2, 3}},
{user_keys[3], {0, 1, 2, 3}},
};
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
std::vector<std::string> keys;
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
unique_ptr<WritableFile> writable_file;
uint32_t cuckoo_block_size = 2;
fname = test::TmpDir() + "/WithCollisionFullKey2";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size);
}
TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
@ -225,7 +269,46 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
fname = test::TmpDir() + "/WithCollisionPathFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_table_size, 2, false);
}
TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
uint32_t num_hash_fun = 2;
std::vector<std::string> user_keys = {"key01", "key02", "key03",
"key04", "key05"};
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
hash_map = {
{user_keys[0], {0, 1}},
{user_keys[1], {1, 2}},
{user_keys[2], {3, 4}},
{user_keys[3], {4, 5}},
{user_keys[4], {0, 3}},
};
std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
std::vector<std::string> keys;
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
@ -235,11 +318,11 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio;
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false);
expected_unused_bucket, expected_table_size, 2, false, 2);
}
TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
@ -258,7 +341,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
fname = test::TmpDir() + "/NoCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -268,11 +351,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true);
expected_unused_bucket, expected_table_size, 2, true);
}
TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
@ -291,7 +374,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
fname = test::TmpDir() + "/WithCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -301,11 +384,11 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, true);
expected_unused_bucket, expected_table_size, 4, true);
}
TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
@ -326,7 +409,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -336,11 +419,11 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio;
uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true);
expected_unused_bucket, expected_table_size, 2, true);
}
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
@ -362,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, BytewiseComparator(), GetSliceHash);
num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
@ -382,7 +465,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
fname = test::TmpDir() + "/FailWhenSameKeyInserted";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash);
num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));

@ -49,7 +49,7 @@ TableBuilder* CuckooTableFactory::NewTableBuilder(
WritableFile* file, CompressionType compression_type) const {
return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable,
max_search_depth_, internal_comparator.user_comparator(),
GetSliceMurmurHash);
cuckoo_block_size_, GetSliceMurmurHash);
}
std::string CuckooTableFactory::GetPrintableTableOptions() const {
@ -68,8 +68,9 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const {
}
TableFactory* NewCuckooTableFactory(double hash_table_ratio,
uint32_t max_search_depth) {
return new CuckooTableFactory(hash_table_ratio, max_search_depth);
uint32_t max_search_depth, uint32_t cuckoo_block_size) {
return new CuckooTableFactory(
hash_table_ratio, max_search_depth, cuckoo_block_size);
}
} // namespace rocksdb

@ -23,9 +23,11 @@ extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
// - Does not support Merge operations.
class CuckooTableFactory : public TableFactory {
public:
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth)
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth,
uint32_t cuckoo_block_size)
: hash_table_ratio_(hash_table_ratio),
max_search_depth_(max_search_depth) {}
max_search_depth_(max_search_depth),
cuckoo_block_size_(cuckoo_block_size) {}
~CuckooTableFactory() {}
const char* Name() const override { return "CuckooTable"; }
@ -50,6 +52,7 @@ class CuckooTableFactory : public TableFactory {
private:
const double hash_table_ratio_;
const uint32_t max_search_depth_;
const uint32_t cuckoo_block_size_;
};
} // namespace rocksdb

@ -21,6 +21,9 @@
#include "util/coding.h"
namespace rocksdb {
namespace {
static const uint64_t CACHE_LINE_MASK = ~(CACHE_LINE_SIZE - 1);
}
extern const uint64_t kCuckooTableMagicNumber;
@ -44,12 +47,12 @@ CuckooTableReader::CuckooTableReader(
}
table_props_.reset(props);
auto& user_props = props->user_collected_properties;
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashTable);
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
if (hash_funs == user_props.end()) {
status_ = Status::InvalidArgument("Number of hash functions not found");
return;
}
num_hash_fun_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
if (unused_key == user_props.end()) {
status_ = Status::InvalidArgument("Empty bucket value not found");
@ -67,18 +70,29 @@ CuckooTableReader::CuckooTableReader(
value_length->second.data());
bucket_length_ = key_length_ + value_length_;
auto num_buckets = user_props.find(CuckooTablePropertyNames::kMaxNumBuckets);
if (num_buckets == user_props.end()) {
status_ = Status::InvalidArgument("Num buckets not found");
auto hash_table_size = user_props.find(
CuckooTablePropertyNames::kHashTableSize);
if (hash_table_size == user_props.end()) {
status_ = Status::InvalidArgument("Hash table size not found");
return;
}
num_buckets_ = *reinterpret_cast<const uint64_t*>(num_buckets->second.data());
hash_table_size_ = *reinterpret_cast<const uint64_t*>(
hash_table_size->second.data());
auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
if (is_last_level == user_props.end()) {
status_ = Status::InvalidArgument("Is last level not found");
return;
}
is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
auto cuckoo_block_size = user_props.find(
CuckooTablePropertyNames::kCuckooBlockSize);
if (cuckoo_block_size == user_props.end()) {
status_ = Status::InvalidArgument("Cuckoo block size not found");
return;
}
cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
cuckoo_block_size->second.data());
cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
status_ = file_->Read(0, file_size, &file_data_, nullptr);
}
@ -89,40 +103,45 @@ Status CuckooTableReader::Get(
void (*mark_key_may_exist_handler)(void* handle_context)) {
assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
Slice user_key = ExtractUserKey(key);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_);
assert(hash_val < num_buckets_);
const char* bucket = &file_data_.data()[hash_val * bucket_length_];
if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()),
Slice(bucket, user_key.size())) == 0) {
return Status::OK();
}
// Here, we compare only the user key part as we support only one entry
// per user key and we don't support sanpshot.
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
Slice value = Slice(&bucket[key_length_], value_length_);
if (is_last_level_) {
ParsedInternalKey found_ikey(Slice(bucket, key_length_), 0, kTypeValue);
result_handler(handle_context, found_ikey, value);
} else {
Slice full_key(bucket, key_length_);
ParsedInternalKey found_ikey;
ParseInternalKey(full_key, &found_ikey);
result_handler(handle_context, found_ikey, value);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size_);
assert(hash_val < hash_table_size_);
for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
++block_idx, ++hash_val) {
const char* bucket = &file_data_.data()[hash_val * bucket_length_];
if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()),
Slice(bucket, user_key.size())) == 0) {
return Status::OK();
}
// Here, we compare only the user key part as we support only one entry
// per user key and we don't support sanpshot.
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
Slice value = Slice(&bucket[key_length_], value_length_);
if (is_last_level_) {
ParsedInternalKey found_ikey(
Slice(bucket, key_length_), 0, kTypeValue);
result_handler(handle_context, found_ikey, value);
} else {
Slice full_key(bucket, key_length_);
ParsedInternalKey found_ikey;
ParseInternalKey(full_key, &found_ikey);
result_handler(handle_context, found_ikey, value);
}
// We don't support merge operations. So, we return here.
return Status::OK();
}
// We don't support merge operations. So, we return here.
return Status::OK();
}
}
return Status::OK();
}
void CuckooTableReader::Prepare(const Slice& key) {
Slice user_key = ExtractUserKey(key);
// Prefetching first location also helps improve Get performance.
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_);
PREFETCH(&file_data_.data()[hash_val * bucket_length_], 0, 3);
// Prefetch the first Cuckoo Block.
uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) + bucket_length_
* get_slice_hash_(ExtractUserKey(key), 0, hash_table_size_);
uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
}
}
@ -186,7 +205,9 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
void CuckooTableIterator::LoadKeysFromReader() {
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
for (uint32_t bucket_id = 0; bucket_id < reader_->num_buckets_; bucket_id++) {
uint64_t num_buckets = reader_->hash_table_size_ +
reader_->cuckoo_block_size_ - 1;
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
Slice read_key;
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
reader_->key_length_, &read_key, nullptr);

@ -65,12 +65,14 @@ class CuckooTableReader: public TableReader {
bool is_last_level_;
std::shared_ptr<const TableProperties> table_props_;
Status status_;
uint32_t num_hash_fun_;
uint32_t num_hash_func_;
std::string unused_key_;
uint32_t key_length_;
uint32_t value_length_;
uint32_t bucket_length_;
uint64_t num_buckets_;
uint32_t cuckoo_block_size_;
uint32_t cuckoo_block_bytes_minus_one_;
uint64_t hash_table_size_;
const Comparator* ucomp_;
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);

@ -109,7 +109,7 @@ class CuckooReaderTest {
std::unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder(
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, GetSliceHash);
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
@ -420,7 +420,7 @@ void WriteFile(const std::vector<std::string>& keys,
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder(
writable_file.get(), hash_ratio,
kMaxNumHashTable, 1000, test::Uint64Comparator(), GetSliceMurmurHash);
kMaxNumHashTable, 1000, test::Uint64Comparator(), 5, GetSliceMurmurHash);
ASSERT_OK(builder.status());
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key.
@ -446,7 +446,7 @@ void WriteFile(const std::vector<std::string>& keys,
int cnt = 0;
ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr));
if (cnt != 1) {
fprintf(stderr, "%" PRIx64 " not found.\n",
fprintf(stderr, "%" PRIu64 " not found.\n",
*reinterpret_cast<const uint64_t*>(key.data()));
ASSERT_EQ(1, cnt);
}
@ -473,7 +473,7 @@ void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
const UserCollectedProperties user_props =
reader.GetTableProperties()->user_collected_properties;
const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>(
user_props.at(CuckooTablePropertyNames::kNumHashTable).data());
user_props.at(CuckooTablePropertyNames::kNumHashFunc).data());
fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of"
" hash functions used: %u.\n", num, hash_ratio, num_hash_fun);
ReadOptions r_options;

Loading…
Cancel
Save