Implementing a cache friendly version of Cuckoo Hash

Summary: This implements a cache friendly version of Cuckoo Hash in which, in case of collission, we try to insert in next few locations. The size of the neighborhood to check is taken as an input parameter in builder and stored in the table.

Test Plan:
make check all
cuckoo_table_{db,reader,builder}_test

Reviewers: sdong, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D22455
main
Radheshyam Balasundaram 11 years ago
parent d977e55596
commit 7f71448388
  1. 37
      include/rocksdb/table.h
  2. 139
      table/cuckoo_table_builder.cc
  3. 9
      table/cuckoo_table_builder.h
  4. 145
      table/cuckoo_table_builder_test.cc
  5. 7
      table/cuckoo_table_factory.cc
  6. 7
      table/cuckoo_table_factory.h
  7. 89
      table/cuckoo_table_reader.cc
  8. 6
      table/cuckoo_table_reader.h
  9. 8
      table/cuckoo_table_reader_test.cc

@ -227,15 +227,46 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions()); PlainTableOptions());
struct CuckooTablePropertyNames { struct CuckooTablePropertyNames {
// The key that is used to fill empty buckets.
static const std::string kEmptyKey; static const std::string kEmptyKey;
// Fixed length of value.
static const std::string kValueLength; static const std::string kValueLength;
static const std::string kNumHashTable; // Number of hash functions used in Cuckoo Hash.
static const std::string kMaxNumBuckets; static const std::string kNumHashFunc;
// It denotes the number of buckets in a Cuckoo Block. Given a key and a
// particular hash function, a Cuckoo Block is a set of consecutive buckets,
// where starting bucket id is given by the hash function on the key. In case
// of a collision during inserting the key, the builder tries to insert the
// key in other locations of the cuckoo block before using the next hash
// function. This reduces cache miss during read operation in case of
// collision.
static const std::string kCuckooBlockSize;
// Size of the hash table. Use this number to compute the modulo of hash
// function. The actual number of buckets will be kMaxHashTableSize +
// kCuckooBlockSize - 1. The last kCuckooBlockSize-1 buckets are used to
// accommodate the Cuckoo Block from end of hash table, due to cache friendly
// implementation.
static const std::string kHashTableSize;
// Denotes if the key sorted in the file is Internal Key (if false)
// or User Key only (if true).
static const std::string kIsLastLevel; static const std::string kIsLastLevel;
}; };
// Cuckoo Table Factory for SST table format using Cache Friendly Cuckoo Hashing
// @hash_table_ratio: Determines the utilization of hash tables. Smaller values
// result in larger hash tables with fewer collisions.
// @max_search_depth: A property used by builder to determine the depth to go to
// to search for a path to displace elements in case of
// collision. See Builder.MakeSpaceForKey method. Higher
// values result in more efficient hash tables with fewer
// lookups but take more time to build.
// @cuckoo_block_size: In case of collision while inserting, the builder
// attempts to insert in the next cuckoo_block_size
// locations before skipping over to the next Cuckoo hash
// function. This makes lookups more cache friendly in case
// of collisions.
extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9, extern TableFactory* NewCuckooTableFactory(double hash_table_ratio = 0.9,
uint32_t max_search_depth = 100); uint32_t max_search_depth = 100, uint32_t cuckoo_block_size = 5);
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -24,14 +24,16 @@
namespace rocksdb { namespace rocksdb {
const std::string CuckooTablePropertyNames::kEmptyKey = const std::string CuckooTablePropertyNames::kEmptyKey =
"rocksdb.cuckoo.bucket.empty.key"; "rocksdb.cuckoo.bucket.empty.key";
const std::string CuckooTablePropertyNames::kNumHashTable = const std::string CuckooTablePropertyNames::kNumHashFunc =
"rocksdb.cuckoo.hash.num"; "rocksdb.cuckoo.hash.num";
const std::string CuckooTablePropertyNames::kMaxNumBuckets = const std::string CuckooTablePropertyNames::kHashTableSize =
"rocksdb.cuckoo.bucket.maxnum"; "rocksdb.cuckoo.hash.size";
const std::string CuckooTablePropertyNames::kValueLength = const std::string CuckooTablePropertyNames::kValueLength =
"rocksdb.cuckoo.value.length"; "rocksdb.cuckoo.value.length";
const std::string CuckooTablePropertyNames::kIsLastLevel = const std::string CuckooTablePropertyNames::kIsLastLevel =
"rocksdb.cuckoo.file.islastlevel"; "rocksdb.cuckoo.file.islastlevel";
const std::string CuckooTablePropertyNames::kCuckooBlockSize =
"rocksdb.cuckoo.hash.cuckooblocksize";
// Obtained by running echo rocksdb.table.cuckoo | sha1sum // Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
@ -39,13 +41,14 @@ extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder( CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, double hash_table_ratio, WritableFile* file, double hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth, uint32_t max_num_hash_table, uint32_t max_search_depth,
const Comparator* user_comparator, const Comparator* user_comparator, uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2), : num_hash_func_(2),
file_(file), file_(file),
hash_table_ratio_(hash_table_ratio), hash_table_ratio_(hash_table_ratio),
max_num_hash_table_(max_num_hash_table), max_num_hash_func_(max_num_hash_table),
max_search_depth_(max_search_depth), max_search_depth_(max_search_depth),
cuckoo_block_size_(std::max(1U, cuckoo_block_size)),
is_last_level_file_(false), is_last_level_file_(false),
has_seen_first_key_(false), has_seen_first_key_(false),
ucomp_(user_comparator), ucomp_(user_comparator),
@ -101,8 +104,8 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
} }
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) { Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
uint64_t num_buckets = kvs_.size() / hash_table_ratio_; uint64_t hash_table_size = kvs_.size() / hash_table_ratio_;
buckets->resize(num_buckets); buckets->resize(hash_table_size + cuckoo_block_size_ - 1);
uint64_t make_space_for_key_call_id = 0; uint64_t make_space_for_key_call_id = 0;
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) { for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
uint64_t bucket_id; uint64_t bucket_id;
@ -110,39 +113,49 @@ Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
autovector<uint64_t> hash_vals; autovector<uint64_t> hash_vals;
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first : Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
ExtractUserKey(kvs_[vector_idx].first); ExtractUserKey(kvs_[vector_idx].first);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_ && !bucket_found;
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets); ++hash_cnt) {
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size);
bucket_id = hash_val; // If there is a collision, check next cuckoo_block_size_ locations for
bucket_found = true; // empty locations. While checking, if we reach end of the hash table,
break; // stop searching and proceed for next hash function.
} else { for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
if (ucomp_->Compare(user_key, is_last_level_file_ ++block_idx, ++hash_val) {
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first) if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
: ExtractUserKey( bucket_id = hash_val;
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) { bucket_found = true;
return Status::NotSupported("Same key is being inserted again."); break;
} else {
if (ucomp_->Compare(user_key, is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
}
hash_vals.push_back(hash_val);
} }
hash_vals.push_back(hash_val);
} }
} }
while (!bucket_found && !MakeSpaceForKey(hash_vals, while (!bucket_found && !MakeSpaceForKey(hash_vals,
++make_space_for_key_call_id, buckets, &bucket_id)) { hash_table_size, ++make_space_for_key_call_id, buckets, &bucket_id)) {
// Rehash by increashing number of hash tables. // Rehash by increashing number of hash tables.
if (num_hash_table_ >= max_num_hash_table_) { if (num_hash_func_ >= max_num_hash_func_) {
return Status::NotSupported("Too many collissions. Unable to hash."); return Status::NotSupported("Too many collisions. Unable to hash.");
} }
// We don't really need to rehash the entire table because old hashes are // We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions. // still valid and we only increased the number of hash functions.
uint64_t hash_val = get_slice_hash_(user_key, uint64_t hash_val = get_slice_hash_(user_key,
num_hash_table_, num_buckets); num_hash_func_, hash_table_size);
++num_hash_table_; ++num_hash_func_;
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) { for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
bucket_found = true; ++block_idx, ++hash_val) {
bucket_id = hash_val; if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
break; bucket_found = true;
} else { bucket_id = hash_val;
hash_vals.push_back(hash_val); break;
} else {
hash_vals.push_back(hash_val);
}
} }
} }
(*buckets)[bucket_id].vector_idx = vector_idx; (*buckets)[bucket_id].vector_idx = vector_idx;
@ -226,16 +239,22 @@ Status CuckooTableBuilder::Finish() {
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket; CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].assign( CuckooTablePropertyNames::kNumHashFunc].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_)); reinterpret_cast<char*>(&num_hash_func_), sizeof(num_hash_func_));
uint64_t num_buckets = buckets.size();
uint64_t hash_table_size = buckets.size() - cuckoo_block_size_ + 1;
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].assign( CuckooTablePropertyNames::kHashTableSize].assign(
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets)); reinterpret_cast<const char*>(&hash_table_size),
sizeof(hash_table_size));
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].assign( CuckooTablePropertyNames::kIsLastLevel].assign(
reinterpret_cast<const char*>(&is_last_level_file_), reinterpret_cast<const char*>(&is_last_level_file_),
sizeof(is_last_level_file_)); sizeof(is_last_level_file_));
properties_.user_collected_properties[
CuckooTablePropertyNames::kCuckooBlockSize].assign(
reinterpret_cast<const char*>(&cuckoo_block_size_),
sizeof(cuckoo_block_size_));
// Write meta blocks. // Write meta blocks.
MetaIndexBuilder meta_index_builder; MetaIndexBuilder meta_index_builder;
@ -307,6 +326,7 @@ uint64_t CuckooTableBuilder::FileSize() const {
// If tree depth exceedes max depth, we return false indicating failure. // If tree depth exceedes max depth, we return false indicating failure.
bool CuckooTableBuilder::MakeSpaceForKey( bool CuckooTableBuilder::MakeSpaceForKey(
const autovector<uint64_t>& hash_vals, const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t make_space_for_key_call_id, const uint64_t make_space_for_key_call_id,
std::vector<CuckooBucket>* buckets, std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id) { uint64_t* bucket_id) {
@ -322,12 +342,13 @@ bool CuckooTableBuilder::MakeSpaceForKey(
std::vector<CuckooNode> tree; std::vector<CuckooNode> tree;
// We want to identify already visited buckets in the current method call so // We want to identify already visited buckets in the current method call so
// that we don't add same buckets again for exploration in the tree. // that we don't add same buckets again for exploration in the tree.
// We do this by maintaining a count of current method call, which acts as a // We do this by maintaining a count of current method call in
// unique id for this invocation of the method. We store this number into // make_space_for_key_call_id, which acts as a unique id for this invocation
// the nodes that we explore in current method call. // of the method. We store this number into the nodes that we explore in
// current method call.
// It is unlikely for the increment operation to overflow because the maximum // It is unlikely for the increment operation to overflow because the maximum
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size(). // no. of times this will be called is <= max_num_hash_func_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt]; uint64_t bucket_id = hash_vals[hash_cnt];
(*buckets)[bucket_id].make_space_for_key_call_id = (*buckets)[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id; make_space_for_key_call_id;
@ -342,22 +363,26 @@ bool CuckooTableBuilder::MakeSpaceForKey(
break; break;
} }
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id]; CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0;
hash_cnt < num_hash_func_ && !null_found; ++hash_cnt) {
uint64_t child_bucket_id = get_slice_hash_( uint64_t child_bucket_id = get_slice_hash_(
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)), : ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
hash_cnt, buckets->size()); hash_cnt, hash_table_size);
if ((*buckets)[child_bucket_id].make_space_for_key_call_id == for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
make_space_for_key_call_id) { ++block_idx, ++child_bucket_id) {
continue; if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
} make_space_for_key_call_id) {
(*buckets)[child_bucket_id].make_space_for_key_call_id = continue;
make_space_for_key_call_id; }
tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1, (*buckets)[child_bucket_id].make_space_for_key_call_id =
curr_pos)); make_space_for_key_call_id;
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) { tree.push_back(CuckooNode(child_bucket_id, curr_depth + 1,
null_found = true; curr_pos));
break; if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
}
} }
} }
++curr_pos; ++curr_pos;
@ -367,10 +392,10 @@ bool CuckooTableBuilder::MakeSpaceForKey(
// There is an empty node in tree.back(). Now, traverse the path from this // There is an empty node in tree.back(). Now, traverse the path from this
// empty node to top of the tree and at every node in the path, replace // empty node to top of the tree and at every node in the path, replace
// child with the parent. Stop when first level is reached in the tree // child with the parent. Stop when first level is reached in the tree
// (happens when 0 <= bucket_to_replace_pos < num_hash_table_) and return // (happens when 0 <= bucket_to_replace_pos < num_hash_func_) and return
// this location in first level for target key to be inserted. // this location in first level for target key to be inserted.
uint32_t bucket_to_replace_pos = tree.size()-1; uint32_t bucket_to_replace_pos = tree.size()-1;
while (bucket_to_replace_pos >= num_hash_table_) { while (bucket_to_replace_pos >= num_hash_func_) {
CuckooNode& curr_node = tree[bucket_to_replace_pos]; CuckooNode& curr_node = tree[bucket_to_replace_pos];
(*buckets)[curr_node.bucket_id] = (*buckets)[curr_node.bucket_id] =
(*buckets)[tree[curr_node.parent_pos].bucket_id]; (*buckets)[tree[curr_node.parent_pos].bucket_id];

@ -23,6 +23,7 @@ class CuckooTableBuilder: public TableBuilder {
CuckooTableBuilder( CuckooTableBuilder(
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table, WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
uint32_t max_search_depth, const Comparator* user_comparator, uint32_t max_search_depth, const Comparator* user_comparator,
uint32_t cuckoo_block_size,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
@ -60,7 +61,7 @@ class CuckooTableBuilder: public TableBuilder {
CuckooBucket() CuckooBucket()
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {} : vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
uint32_t vector_idx; uint32_t vector_idx;
// This number will not exceed kvs_.size() + max_num_hash_table_. // This number will not exceed kvs_.size() + max_num_hash_func_.
// We assume number of items is <= 2^32. // We assume number of items is <= 2^32.
uint32_t make_space_for_key_call_id; uint32_t make_space_for_key_call_id;
}; };
@ -68,16 +69,18 @@ class CuckooTableBuilder: public TableBuilder {
bool MakeSpaceForKey( bool MakeSpaceForKey(
const autovector<uint64_t>& hash_vals, const autovector<uint64_t>& hash_vals,
const uint64_t hash_table_size,
const uint64_t call_id, const uint64_t call_id,
std::vector<CuckooBucket>* buckets, std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id); uint64_t* bucket_id);
Status MakeHashTable(std::vector<CuckooBucket>* buckets); Status MakeHashTable(std::vector<CuckooBucket>* buckets);
uint32_t num_hash_table_; uint32_t num_hash_func_;
WritableFile* file_; WritableFile* file_;
const double hash_table_ratio_; const double hash_table_ratio_;
const uint32_t max_num_hash_table_; const uint32_t max_num_hash_func_;
const uint32_t max_search_depth_; const uint32_t max_search_depth_;
const uint32_t cuckoo_block_size_;
bool is_last_level_file_; bool is_last_level_file_;
Status status_; Status status_;
std::vector<std::pair<std::string, std::string>> kvs_; std::vector<std::pair<std::string, std::string>> kvs_;

@ -37,8 +37,9 @@ class CuckooBuilderTest {
void CheckFileContents(const std::vector<std::string>& keys, void CheckFileContents(const std::vector<std::string>& keys,
const std::vector<std::string>& values, const std::vector<std::string>& values,
const std::vector<uint64_t>& expected_locations, const std::vector<uint64_t>& expected_locations,
std::string expected_unused_bucket, uint64_t expected_max_buckets, std::string expected_unused_bucket, uint64_t expected_table_size,
uint32_t expected_num_hash_fun, bool expected_is_last_level) { uint32_t expected_num_hash_func, bool expected_is_last_level,
uint32_t expected_cuckoo_block_size = 1) {
// Read file // Read file
unique_ptr<RandomAccessFile> read_file; unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
@ -51,7 +52,8 @@ class CuckooBuilderTest {
kCuckooTableMagicNumber, env_, nullptr, &props)); kCuckooTableMagicNumber, env_, nullptr, &props));
ASSERT_EQ(props->num_entries, keys.size()); ASSERT_EQ(props->num_entries, keys.size());
ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size()); ASSERT_EQ(props->fixed_key_len, keys.empty() ? 0 : keys[0].size());
ASSERT_EQ(props->data_size, keys.size()*expected_unused_bucket.size()); ASSERT_EQ(props->data_size, expected_unused_bucket.size() *
(expected_table_size + expected_cuckoo_block_size - 1));
ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len); ASSERT_EQ(props->raw_key_size, keys.size()*props->fixed_key_len);
// Check unused bucket. // Check unused bucket.
@ -65,14 +67,18 @@ class CuckooBuilderTest {
CuckooTablePropertyNames::kValueLength].data()); CuckooTablePropertyNames::kValueLength].data());
ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found); ASSERT_EQ(values.empty() ? 0 : values[0].size(), value_len_found);
ASSERT_EQ(props->raw_value_size, values.size()*value_len_found); ASSERT_EQ(props->raw_value_size, values.size()*value_len_found);
const uint64_t max_buckets = const uint64_t table_size =
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[ *reinterpret_cast<const uint64_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].data()); CuckooTablePropertyNames::kHashTableSize].data());
ASSERT_EQ(expected_max_buckets, max_buckets); ASSERT_EQ(expected_table_size, table_size);
const uint32_t num_hash_fun_found = const uint32_t num_hash_func_found =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[ *reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].data()); CuckooTablePropertyNames::kNumHashFunc].data());
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found); ASSERT_EQ(expected_num_hash_func, num_hash_func_found);
const uint32_t cuckoo_block_size =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kCuckooBlockSize].data());
ASSERT_EQ(expected_cuckoo_block_size, cuckoo_block_size);
const bool is_last_level_found = const bool is_last_level_found =
*reinterpret_cast<const bool*>(props->user_collected_properties[ *reinterpret_cast<const bool*>(props->user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].data()); CuckooTablePropertyNames::kIsLastLevel].data());
@ -82,7 +88,7 @@ class CuckooBuilderTest {
// Check contents of the bucket. // Check contents of the bucket.
std::vector<bool> keys_found(keys.size(), false); std::vector<bool> keys_found(keys.size(), false);
uint32_t bucket_size = expected_unused_bucket.size(); uint32_t bucket_size = expected_unused_bucket.size();
for (uint32_t i = 0; i < max_buckets; ++i) { for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) {
Slice read_slice; Slice read_slice;
ASSERT_OK(read_file->Read(i*bucket_size, bucket_size, ASSERT_OK(read_file->Read(i*bucket_size, bucket_size,
&read_slice, nullptr)); &read_slice, nullptr));
@ -119,7 +125,7 @@ TEST(CuckooBuilderTest, SuccessWithEmptyFile) {
fname = test::TmpDir() + "/NoCollisionFullKey"; fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
4, 100, BytewiseComparator(), GetSliceHash); 4, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
@ -146,7 +152,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
fname = test::TmpDir() + "/NoCollisionFullKey"; fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -156,11 +162,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio; uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false); expected_unused_bucket, expected_table_size, 2, false);
} }
TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) { TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
@ -183,7 +189,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
fname = test::TmpDir() + "/WithCollisionFullKey"; fname = test::TmpDir() + "/WithCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -193,11 +199,49 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio; uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, false); expected_unused_bucket, expected_table_size, 4, false);
}
TEST(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
uint32_t num_hash_fun = 4;
std::vector<std::string> user_keys = {"key01", "key02", "key03", "key04"};
std::vector<std::string> values = {"v01", "v02", "v03", "v04"};
hash_map = {
{user_keys[0], {0, 1, 2, 3}},
{user_keys[1], {0, 1, 2, 3}},
{user_keys[2], {0, 1, 2, 3}},
{user_keys[3], {0, 1, 2, 3}},
};
std::vector<uint64_t> expected_locations = {0, 1, 2, 3};
std::vector<std::string> keys;
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
unique_ptr<WritableFile> writable_file;
uint32_t cuckoo_block_size = 2;
fname = test::TmpDir() + "/WithCollisionFullKey2";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_table_size, 3, false, cuckoo_block_size);
} }
TEST(CuckooBuilderTest, WithCollisionPathFullKey) { TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
@ -225,7 +269,46 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
fname = test::TmpDir() + "/WithCollisionPathFullKey"; fname = test::TmpDir() + "/WithCollisionPathFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i]));
ASSERT_EQ(builder.NumEntries(), i + 1);
ASSERT_OK(builder.status());
}
ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close());
uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_table_size, 2, false);
}
TEST(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
uint32_t num_hash_fun = 2;
std::vector<std::string> user_keys = {"key01", "key02", "key03",
"key04", "key05"};
std::vector<std::string> values = {"v01", "v02", "v03", "v04", "v05"};
hash_map = {
{user_keys[0], {0, 1}},
{user_keys[1], {1, 2}},
{user_keys[2], {3, 4}},
{user_keys[3], {4, 5}},
{user_keys[4], {0, 3}},
};
std::vector<uint64_t> expected_locations = {2, 1, 3, 4, 0};
std::vector<std::string> keys;
for (auto& user_key : user_keys) {
keys.push_back(GetInternalKey(user_key, false));
}
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), 2, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -235,11 +318,11 @@ TEST(CuckooBuilderTest, WithCollisionPathFullKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = keys.size() / kHashTableRatio; uint32_t expected_table_size = keys.size() / kHashTableRatio;
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(keys, values, expected_locations, CheckFileContents(keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, false); expected_unused_bucket, expected_table_size, 2, false, 2);
} }
TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) { TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
@ -258,7 +341,7 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
fname = test::TmpDir() + "/NoCollisionUserKey"; fname = test::TmpDir() + "/NoCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -268,11 +351,11 @@ TEST(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true); expected_unused_bucket, expected_table_size, 2, true);
} }
TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) { TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
@ -291,7 +374,7 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
fname = test::TmpDir() + "/WithCollisionUserKey"; fname = test::TmpDir() + "/WithCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -301,11 +384,11 @@ TEST(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 4, true); expected_unused_bucket, expected_table_size, 4, true);
} }
TEST(CuckooBuilderTest, WithCollisionPathUserKey) { TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
@ -326,7 +409,7 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
fname = test::TmpDir() + "/WithCollisionPathUserKey"; fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, BytewiseComparator(), GetSliceHash); num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -336,11 +419,11 @@ TEST(CuckooBuilderTest, WithCollisionPathUserKey) {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(writable_file->Close());
uint32_t expected_max_buckets = user_keys.size() / kHashTableRatio; uint32_t expected_table_size = user_keys.size() / kHashTableRatio;
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
expected_unused_bucket += std::string(values[0].size(), 'a'); expected_unused_bucket += std::string(values[0].size(), 'a');
CheckFileContents(user_keys, values, expected_locations, CheckFileContents(user_keys, values, expected_locations,
expected_unused_bucket, expected_max_buckets, 2, true); expected_unused_bucket, expected_table_size, 2, true);
} }
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
@ -362,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
fname = test::TmpDir() + "/WithCollisionPathUserKey"; fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 2, BytewiseComparator(), GetSliceHash); num_hash_fun, 2, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
@ -382,7 +465,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
fname = test::TmpDir() + "/FailWhenSameKeyInserted"; fname = test::TmpDir() + "/FailWhenSameKeyInserted";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, CuckooTableBuilder builder(writable_file.get(), kHashTableRatio,
num_hash_fun, 100, BytewiseComparator(), GetSliceHash); num_hash_fun, 100, BytewiseComparator(), 1, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));

@ -49,7 +49,7 @@ TableBuilder* CuckooTableFactory::NewTableBuilder(
WritableFile* file, CompressionType compression_type) const { WritableFile* file, CompressionType compression_type) const {
return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable, return new CuckooTableBuilder(file, hash_table_ratio_, kMaxNumHashTable,
max_search_depth_, internal_comparator.user_comparator(), max_search_depth_, internal_comparator.user_comparator(),
GetSliceMurmurHash); cuckoo_block_size_, GetSliceMurmurHash);
} }
std::string CuckooTableFactory::GetPrintableTableOptions() const { std::string CuckooTableFactory::GetPrintableTableOptions() const {
@ -68,8 +68,9 @@ std::string CuckooTableFactory::GetPrintableTableOptions() const {
} }
TableFactory* NewCuckooTableFactory(double hash_table_ratio, TableFactory* NewCuckooTableFactory(double hash_table_ratio,
uint32_t max_search_depth) { uint32_t max_search_depth, uint32_t cuckoo_block_size) {
return new CuckooTableFactory(hash_table_ratio, max_search_depth); return new CuckooTableFactory(
hash_table_ratio, max_search_depth, cuckoo_block_size);
} }
} // namespace rocksdb } // namespace rocksdb

@ -23,9 +23,11 @@ extern uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
// - Does not support Merge operations. // - Does not support Merge operations.
class CuckooTableFactory : public TableFactory { class CuckooTableFactory : public TableFactory {
public: public:
CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth) CuckooTableFactory(double hash_table_ratio, uint32_t max_search_depth,
uint32_t cuckoo_block_size)
: hash_table_ratio_(hash_table_ratio), : hash_table_ratio_(hash_table_ratio),
max_search_depth_(max_search_depth) {} max_search_depth_(max_search_depth),
cuckoo_block_size_(cuckoo_block_size) {}
~CuckooTableFactory() {} ~CuckooTableFactory() {}
const char* Name() const override { return "CuckooTable"; } const char* Name() const override { return "CuckooTable"; }
@ -50,6 +52,7 @@ class CuckooTableFactory : public TableFactory {
private: private:
const double hash_table_ratio_; const double hash_table_ratio_;
const uint32_t max_search_depth_; const uint32_t max_search_depth_;
const uint32_t cuckoo_block_size_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -21,6 +21,9 @@
#include "util/coding.h" #include "util/coding.h"
namespace rocksdb { namespace rocksdb {
namespace {
static const uint64_t CACHE_LINE_MASK = ~(CACHE_LINE_SIZE - 1);
}
extern const uint64_t kCuckooTableMagicNumber; extern const uint64_t kCuckooTableMagicNumber;
@ -44,12 +47,12 @@ CuckooTableReader::CuckooTableReader(
} }
table_props_.reset(props); table_props_.reset(props);
auto& user_props = props->user_collected_properties; auto& user_props = props->user_collected_properties;
auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashTable); auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
if (hash_funs == user_props.end()) { if (hash_funs == user_props.end()) {
status_ = Status::InvalidArgument("Number of hash functions not found"); status_ = Status::InvalidArgument("Number of hash functions not found");
return; return;
} }
num_hash_fun_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data()); num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey); auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
if (unused_key == user_props.end()) { if (unused_key == user_props.end()) {
status_ = Status::InvalidArgument("Empty bucket value not found"); status_ = Status::InvalidArgument("Empty bucket value not found");
@ -67,18 +70,29 @@ CuckooTableReader::CuckooTableReader(
value_length->second.data()); value_length->second.data());
bucket_length_ = key_length_ + value_length_; bucket_length_ = key_length_ + value_length_;
auto num_buckets = user_props.find(CuckooTablePropertyNames::kMaxNumBuckets); auto hash_table_size = user_props.find(
if (num_buckets == user_props.end()) { CuckooTablePropertyNames::kHashTableSize);
status_ = Status::InvalidArgument("Num buckets not found"); if (hash_table_size == user_props.end()) {
status_ = Status::InvalidArgument("Hash table size not found");
return; return;
} }
num_buckets_ = *reinterpret_cast<const uint64_t*>(num_buckets->second.data()); hash_table_size_ = *reinterpret_cast<const uint64_t*>(
hash_table_size->second.data());
auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel); auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
if (is_last_level == user_props.end()) { if (is_last_level == user_props.end()) {
status_ = Status::InvalidArgument("Is last level not found"); status_ = Status::InvalidArgument("Is last level not found");
return; return;
} }
is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data()); is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
auto cuckoo_block_size = user_props.find(
CuckooTablePropertyNames::kCuckooBlockSize);
if (cuckoo_block_size == user_props.end()) {
status_ = Status::InvalidArgument("Cuckoo block size not found");
return;
}
cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
cuckoo_block_size->second.data());
cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
status_ = file_->Read(0, file_size, &file_data_, nullptr); status_ = file_->Read(0, file_size, &file_data_, nullptr);
} }
@ -89,40 +103,45 @@ Status CuckooTableReader::Get(
void (*mark_key_may_exist_handler)(void* handle_context)) { void (*mark_key_may_exist_handler)(void* handle_context)) {
assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0)); assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
Slice user_key = ExtractUserKey(key); Slice user_key = ExtractUserKey(key);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_); uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, hash_table_size_);
assert(hash_val < num_buckets_); assert(hash_val < hash_table_size_);
const char* bucket = &file_data_.data()[hash_val * bucket_length_]; for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()), ++block_idx, ++hash_val) {
Slice(bucket, user_key.size())) == 0) { const char* bucket = &file_data_.data()[hash_val * bucket_length_];
return Status::OK(); if (ucomp_->Compare(Slice(unused_key_.data(), user_key.size()),
} Slice(bucket, user_key.size())) == 0) {
// Here, we compare only the user key part as we support only one entry return Status::OK();
// per user key and we don't support sanpshot. }
if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) { // Here, we compare only the user key part as we support only one entry
Slice value = Slice(&bucket[key_length_], value_length_); // per user key and we don't support sanpshot.
if (is_last_level_) { if (ucomp_->Compare(user_key, Slice(bucket, user_key.size())) == 0) {
ParsedInternalKey found_ikey(Slice(bucket, key_length_), 0, kTypeValue); Slice value = Slice(&bucket[key_length_], value_length_);
result_handler(handle_context, found_ikey, value); if (is_last_level_) {
} else { ParsedInternalKey found_ikey(
Slice full_key(bucket, key_length_); Slice(bucket, key_length_), 0, kTypeValue);
ParsedInternalKey found_ikey; result_handler(handle_context, found_ikey, value);
ParseInternalKey(full_key, &found_ikey); } else {
result_handler(handle_context, found_ikey, value); Slice full_key(bucket, key_length_);
ParsedInternalKey found_ikey;
ParseInternalKey(full_key, &found_ikey);
result_handler(handle_context, found_ikey, value);
}
// We don't support merge operations. So, we return here.
return Status::OK();
} }
// We don't support merge operations. So, we return here.
return Status::OK();
} }
} }
return Status::OK(); return Status::OK();
} }
void CuckooTableReader::Prepare(const Slice& key) { void CuckooTableReader::Prepare(const Slice& key) {
Slice user_key = ExtractUserKey(key); // Prefetch the first Cuckoo Block.
// Prefetching first location also helps improve Get performance. uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) + bucket_length_
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) { * get_slice_hash_(ExtractUserKey(key), 0, hash_table_size_);
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets_); uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
PREFETCH(&file_data_.data()[hash_val * bucket_length_], 0, 3); for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
} }
} }
@ -186,7 +205,9 @@ CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
void CuckooTableIterator::LoadKeysFromReader() { void CuckooTableIterator::LoadKeysFromReader() {
key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries); key_to_bucket_id_.reserve(reader_->GetTableProperties()->num_entries);
for (uint32_t bucket_id = 0; bucket_id < reader_->num_buckets_; bucket_id++) { uint64_t num_buckets = reader_->hash_table_size_ +
reader_->cuckoo_block_size_ - 1;
for (uint32_t bucket_id = 0; bucket_id < num_buckets; bucket_id++) {
Slice read_key; Slice read_key;
status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_, status_ = reader_->file_->Read(bucket_id * reader_->bucket_length_,
reader_->key_length_, &read_key, nullptr); reader_->key_length_, &read_key, nullptr);

@ -65,12 +65,14 @@ class CuckooTableReader: public TableReader {
bool is_last_level_; bool is_last_level_;
std::shared_ptr<const TableProperties> table_props_; std::shared_ptr<const TableProperties> table_props_;
Status status_; Status status_;
uint32_t num_hash_fun_; uint32_t num_hash_func_;
std::string unused_key_; std::string unused_key_;
uint32_t key_length_; uint32_t key_length_;
uint32_t value_length_; uint32_t value_length_;
uint32_t bucket_length_; uint32_t bucket_length_;
uint64_t num_buckets_; uint32_t cuckoo_block_size_;
uint32_t cuckoo_block_bytes_minus_one_;
uint64_t hash_table_size_;
const Comparator* ucomp_; const Comparator* ucomp_;
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index, uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
uint64_t max_num_buckets); uint64_t max_num_buckets);

@ -109,7 +109,7 @@ class CuckooReaderTest {
std::unique_ptr<WritableFile> writable_file; std::unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder( CuckooTableBuilder builder(
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, GetSliceHash); writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
@ -420,7 +420,7 @@ void WriteFile(const std::vector<std::string>& keys,
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder( CuckooTableBuilder builder(
writable_file.get(), hash_ratio, writable_file.get(), hash_ratio,
kMaxNumHashTable, 1000, test::Uint64Comparator(), GetSliceMurmurHash); kMaxNumHashTable, 1000, test::Uint64Comparator(), 5, GetSliceMurmurHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key. // Value is just a part of key.
@ -446,7 +446,7 @@ void WriteFile(const std::vector<std::string>& keys,
int cnt = 0; int cnt = 0;
ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr)); ASSERT_OK(reader.Get(r_options, Slice(key), &cnt, CheckValue, nullptr));
if (cnt != 1) { if (cnt != 1) {
fprintf(stderr, "%" PRIx64 " not found.\n", fprintf(stderr, "%" PRIu64 " not found.\n",
*reinterpret_cast<const uint64_t*>(key.data())); *reinterpret_cast<const uint64_t*>(key.data()));
ASSERT_EQ(1, cnt); ASSERT_EQ(1, cnt);
} }
@ -473,7 +473,7 @@ void ReadKeys(const std::vector<std::string>& keys, uint64_t num,
const UserCollectedProperties user_props = const UserCollectedProperties user_props =
reader.GetTableProperties()->user_collected_properties; reader.GetTableProperties()->user_collected_properties;
const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>( const uint32_t num_hash_fun = *reinterpret_cast<const uint32_t*>(
user_props.at(CuckooTablePropertyNames::kNumHashTable).data()); user_props.at(CuckooTablePropertyNames::kNumHashFunc).data());
fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of" fprintf(stderr, "With %" PRIu64 " items and hash table ratio %f, number of"
" hash functions used: %u.\n", num, hash_ratio, num_hash_fun); " hash functions used: %u.\n", num, hash_ratio, num_hash_fun);
ReadOptions r_options; ReadOptions r_options;

Loading…
Cancel
Save