Addressing TODOs in CuckooTableBuilder

Summary:
Contains the following changes in CuckooTableBuilder:
- Take an extra parameter in constructor to identify last level file.
- Implement a better way to identify if a bucket has been inserted into the tree already during BFS search.
- Minor typos

Test Plan:
make cuckoo_table_builder
./cuckoo_table_builder
make valgrind_check

Reviewers: sdong, igor, yhchiang, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20445
main
Radheshyam Balasundaram 10 years ago
parent 4b61a3d67d
commit 07a7d870b8
  1. 3
      include/rocksdb/table.h
  2. 130
      table/cuckoo_table_builder.cc
  3. 53
      table/cuckoo_table_builder.h
  4. 156
      table/cuckoo_table_builder_test.cc
  5. 10
      table/cuckoo_table_factory.h

@ -185,7 +185,8 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions());
struct CuckooTablePropertyNames {
static const std::string kEmptyBucket;
static const std::string kEmptyKey;
static const std::string kValueLength;
static const std::string kNumHashTable;
static const std::string kMaxNumBuckets;
};

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -21,37 +21,38 @@
#include "util/random.h"
namespace rocksdb {
const std::string CuckooTablePropertyNames::kEmptyBucket =
"rocksdb.cuckoo.bucket.empty.bucket";
const std::string CuckooTablePropertyNames::kEmptyKey =
"rocksdb.cuckoo.bucket.empty.key";
const std::string CuckooTablePropertyNames::kNumHashTable =
"rocksdb.cuckoo.hash.num";
const std::string CuckooTablePropertyNames::kMaxNumBuckets =
"rocksdb.cuckoo.bucket.maxnum";
const std::string CuckooTablePropertyNames::kValueLength =
"rocksdb.cuckoo.value.length";
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, unsigned int fixed_key_length,
unsigned int fixed_value_length, double hash_table_ratio,
unsigned int file_size, unsigned int max_num_hash_table,
unsigned int max_search_depth,
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int,
unsigned int))
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)),
WritableFile* file, uint32_t fixed_key_length,
uint32_t fixed_value_length, double hash_table_ratio,
uint64_t file_size, uint32_t max_num_hash_table,
uint32_t max_search_depth, bool is_last_level,
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2),
file_(file),
value_length_(fixed_value_length),
bucket_size_(fixed_key_length + fixed_value_length),
// 8 is the difference between sizes of user key and InternalKey.
bucket_size_(fixed_key_length +
fixed_value_length - (is_last_level ? 8 : 0)),
hash_table_ratio_(hash_table_ratio),
max_num_buckets_(file_size / bucket_size_),
max_num_hash_table_(max_num_hash_table),
max_search_depth_(max_search_depth),
is_last_level_file_(is_last_level),
buckets_(max_num_buckets_),
make_space_for_key_call_id_(0),
GetSliceHash(GetSliceHashPtr) {
// The bucket_size is currently not optimized for last level.
// In last level, the bucket will not contain full key.
// TODO(rbs): Find how we can determine if last level or not
// before we start adding entries into the table.
properties_.num_entries = 0;
// Data is in a huge block.
properties_.num_data_blocks = 1;
@ -67,17 +68,17 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
status_ = Status::Corruption("Hash Table is full.");
return;
}
unsigned int bucket_id;
uint64_t bucket_id;
bool bucket_found = false;
autovector<unsigned int> hash_vals;
autovector<uint64_t> hash_vals;
ParsedInternalKey ikey;
if (!ParseInternalKey(key, &ikey)) {
status_ = Status::Corruption("Unable to parse key into inernal key.");
return;
}
Slice user_key = ikey.user_key;
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
if (buckets_[hash_val].is_empty) {
bucket_id = hash_val;
bucket_found = true;
@ -98,27 +99,21 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
}
// We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions.
unsigned int old_num_hash = num_hash_table_;
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_);
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) {
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_);
if (buckets_[hash_val].is_empty) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
uint64_t hash_val = GetSliceHash(user_key,
num_hash_table_, max_num_buckets_);
++num_hash_table_;
if (buckets_[hash_val].is_empty) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
}
buckets_[bucket_id].key = key;
buckets_[bucket_id].value = value;
buckets_[bucket_id].is_empty = false;
if (ikey.sequence != 0) {
// This is not a last level file.
is_last_level_file_ = false;
}
properties_.num_entries++;
// We assume that the keys are inserted in sorted order. To identify an
@ -169,10 +164,11 @@ Status CuckooTableBuilder::Finish() {
AppendInternalKey(&unused_bucket, ikey);
}
properties_.fixed_key_len = unused_bucket.size();
unsigned int bucket_size = unused_bucket.size() + value_length_;
// Resize to bucket size.
unused_bucket.resize(bucket_size, 'a');
properties_.user_collected_properties[
CuckooTablePropertyNames::kValueLength].assign(
reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
unused_bucket.resize(bucket_size_, 'a');
// Write the table.
for (auto& bucket : buckets_) {
Status s;
@ -197,16 +193,20 @@ Status CuckooTableBuilder::Finish() {
}
}
unsigned int offset = buckets_.size() * bucket_size;
uint64_t offset = buckets_.size() * bucket_size_;
unused_bucket.resize(properties_.fixed_key_len);
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket;
CuckooTablePropertyNames::kNumHashTable].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_);
PutVarint32(&properties_.user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_);
CuckooTablePropertyNames::kMaxNumBuckets].assign(
reinterpret_cast<const char*>(&max_num_buckets_),
sizeof(max_num_buckets_));
// Write meta blocks.
MetaIndexBuilder meta_index_builer;
MetaIndexBuilder meta_index_builder;
PropertyBlockBuilder property_block_builder;
property_block_builder.AddTableProperty(properties_);
@ -221,8 +221,8 @@ Status CuckooTableBuilder::Finish() {
return s;
}
meta_index_builer.Add(kPropertiesBlock, property_block_handle);
Slice meta_index_block = meta_index_builer.Finish();
meta_index_builder.Add(kPropertiesBlock, property_block_handle);
Slice meta_index_block = meta_index_builder.Finish();
BlockHandle meta_index_block_handle;
meta_index_block_handle.set_offset(offset);
@ -262,44 +262,52 @@ uint64_t CuckooTableBuilder::FileSize() const {
}
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
unsigned int *bucket_id, autovector<unsigned int> hash_vals) {
uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
struct CuckooNode {
unsigned int bucket_id;
unsigned int depth;
uint64_t bucket_id;
uint32_t depth;
int parent_pos;
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos)
CuckooNode(uint64_t bucket_id, uint32_t depth, int parent_pos)
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
};
// This is BFS search tree that is stored simply as a vector.
// Each node stores the index of parent node in the vector.
std::vector<CuckooNode> tree;
// This is a very bad way to keep track of visited nodes.
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket
// and use it to track visited nodes.
std::vector<bool> buckets_visited(max_num_buckets_, false);
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
unsigned int bucket_id = hash_vals[hash_cnt];
buckets_visited[bucket_id] = true;
// We want to identify already visited buckets in the current method call so
// that we don't add same buckets again for exploration in the tree.
// We do this by maintaining a count of current method call, which acts as a
// unique id for this invocation of the method. We store this number into
// the nodes that we explore in current method call.
// It is unlikely for the increment operation to overflow because the maximum
// number of times this will be called is <= max_num_hash_table_ +
// max_num_buckets_.
++make_space_for_key_call_id_;
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt];
buckets_[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
tree.push_back(CuckooNode(bucket_id, 0, -1));
}
bool null_found = false;
unsigned int curr_pos = 0;
uint32_t curr_pos = 0;
while (!null_found && curr_pos < tree.size()) {
CuckooNode& curr_node = tree[curr_pos];
if (curr_node.depth >= max_search_depth_) {
break;
}
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
unsigned int child_bucket_id = GetSliceHash(
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t child_bucket_id = GetSliceHash(
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
if (child_bucket_id == curr_node.bucket_id) {
continue;
}
if (buckets_visited[child_bucket_id]) {
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id_) {
continue;
}
buckets_visited[child_bucket_id] = true;
buckets_[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
curr_pos));
if (buckets_[child_bucket_id].is_empty) {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -16,22 +16,14 @@
namespace rocksdb {
struct CuckooBucket {
CuckooBucket(): is_empty(true) {}
Slice key;
Slice value;
bool is_empty;
};
class CuckooTableBuilder: public TableBuilder {
public:
CuckooTableBuilder(
WritableFile* file, unsigned int fixed_key_length,
unsigned int fixed_value_length, double hash_table_ratio,
unsigned int file_size, unsigned int max_num_hash_table,
unsigned int max_search_depth,
unsigned int (*GetSliceHash)(const Slice&, unsigned int,
unsigned int));
WritableFile* file, uint32_t fixed_key_length,
uint32_t fixed_value_length, double hash_table_ratio,
uint64_t file_size, uint32_t max_num_hash_table,
uint32_t max_search_depth, bool is_last_level,
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
// REQUIRES: Either Finish() or Abandon() has been called.
~CuckooTableBuilder();
@ -64,23 +56,32 @@ class CuckooTableBuilder: public TableBuilder {
uint64_t FileSize() const override;
private:
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id,
autovector<unsigned int> hash_vals);
unsigned int num_hash_table_;
struct CuckooBucket {
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
Slice key;
Slice value;
bool is_empty;
uint64_t make_space_for_key_call_id;
};
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
autovector<uint64_t> hash_vals);
uint32_t num_hash_table_;
WritableFile* file_;
const unsigned int value_length_;
const unsigned int bucket_size_;
const uint32_t value_length_;
const uint32_t bucket_size_;
const double hash_table_ratio_;
const unsigned int max_num_buckets_;
const unsigned int max_num_hash_table_;
const unsigned int max_search_depth_;
const uint64_t max_num_buckets_;
const uint32_t max_num_hash_table_;
const uint32_t max_search_depth_;
const bool is_last_level_file_;
Status status_;
std::vector<CuckooBucket> buckets_;
bool is_last_level_file_ = true;
TableProperties properties_;
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index,
unsigned int max_num_buckets);
uint64_t make_space_for_key_call_id_;
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);
std::string unused_user_key_ = "";
std::string prev_key_;

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -19,20 +19,19 @@ namespace rocksdb {
extern const uint64_t kCuckooTableMagicNumber;
namespace {
std::unordered_map<std::string, std::vector<unsigned int>> hash_map;
std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
void AddHashLookups(const std::string& s, unsigned int bucket_id,
unsigned int num_hash_fun) {
std::vector<unsigned int> v;
for (unsigned int i = 0; i < num_hash_fun; i++) {
void AddHashLookups(const std::string& s, uint64_t bucket_id,
uint32_t num_hash_fun) {
std::vector<uint64_t> v;
for (uint32_t i = 0; i < num_hash_fun; i++) {
v.push_back(bucket_id + i);
}
hash_map[s] = v;
return;
}
unsigned int GetSliceHash(const Slice& s, unsigned int index,
unsigned int max_num_buckets) {
uint64_t GetSliceHash(const Slice& s, uint32_t index,
uint64_t max_num_buckets) {
return hash_map[s.ToString()][index];
}
} // namespace
@ -43,7 +42,9 @@ class CuckooBuilderTest {
env_ = Env::Default();
}
void CheckFileContents(const std::string& expected_data) {
void CheckFileContents(const std::string& expected_data,
std::string expected_unused_bucket, uint64_t expected_max_buckets,
uint32_t expected_num_hash_fun) {
// Read file
unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
@ -58,15 +59,22 @@ class CuckooBuilderTest {
ASSERT_EQ(props->fixed_key_len, key_length);
// Check unused bucket.
std::string unused_bucket = props->user_collected_properties[
CuckooTablePropertyNames::kEmptyBucket];
ASSERT_EQ(expected_unused_bucket, unused_bucket);
std::string unused_key = props->user_collected_properties[
CuckooTablePropertyNames::kEmptyKey];
ASSERT_EQ(expected_unused_bucket.substr(0, key_length), unused_key);
unsigned int max_buckets;
Slice max_buckets_slice = Slice(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets]);
GetVarint32(&max_buckets_slice, &max_buckets);
uint32_t value_len_found =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kValueLength].data());
ASSERT_EQ(value_length, value_len_found);
const uint64_t max_buckets =
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].data());
ASSERT_EQ(expected_max_buckets, max_buckets);
const uint32_t num_hash_fun_found =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].data());
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
delete props;
// Check contents of the bucket.
std::string read_data;
@ -80,28 +88,25 @@ class CuckooBuilderTest {
Env* env_;
const EnvOptions env_options_;
std::string fname;
std::string expected_unused_bucket;
unsigned int file_size = 100000;
unsigned int num_items = 20;
unsigned int num_hash_fun = 64;
uint64_t file_size = 100000;
uint32_t num_items = 20;
uint32_t num_hash_fun = 64;
double hash_table_ratio = 0.9;
unsigned int ikey_length;
unsigned int user_key_length;
unsigned int key_length;
unsigned int value_length;
unsigned int bucket_length;
unsigned int expected_max_buckets;
uint32_t ikey_length;
uint32_t user_key_length;
uint32_t key_length;
uint32_t value_length;
uint32_t bucket_length;
};
TEST(CuckooBuilderTest, NoCollision) {
hash_map.clear();
num_items = 20;
num_hash_fun = 64;
uint32_t expected_num_hash_fun = 2;
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
unsigned int bucket_ids = 0;
for (unsigned int i = 0; i < num_items; i++) {
uint64_t bucket_ids = 0;
for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -114,9 +119,10 @@ TEST(CuckooBuilderTest, NoCollision) {
key_length = ikey_length;
value_length = values[0].size();
bucket_length = ikey_length + value_length;
expected_max_buckets = file_size / bucket_length;
uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:";
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file;
@ -125,11 +131,11 @@ TEST(CuckooBuilderTest, NoCollision) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length,
value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash);
file_size, num_hash_fun, 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0;
uint32_t key_idx = 0;
std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) {
for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx * num_hash_fun == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -142,16 +148,18 @@ TEST(CuckooBuilderTest, NoCollision) {
}
ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close();
CheckFileContents(expected_file_data);
CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
}
TEST(CuckooBuilderTest, NoCollisionLastLevel) {
hash_map.clear();
uint32_t expected_num_hash_fun = 2;
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
unsigned int bucket_ids = 0;
for (unsigned int i = 0; i < num_items; i++) {
uint64_t bucket_ids = 0;
for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
// Set zero sequence number in all keys.
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
@ -165,20 +173,20 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
key_length = user_key_length;
value_length = values[0].size();
bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length;
expected_unused_bucket = "keys10:";
uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_bucket = "keys10:";
expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length,
writable_file.get(), ikey_length,
value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash);
file_size, num_hash_fun, 100, true, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0;
uint32_t key_idx = 0;
std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) {
for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx * num_hash_fun == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -191,7 +199,8 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
}
ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close();
CheckFileContents(expected_file_data);
CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
}
TEST(CuckooBuilderTest, WithCollision) {
@ -199,10 +208,11 @@ TEST(CuckooBuilderTest, WithCollision) {
hash_map.clear();
num_hash_fun = 20;
num_items = num_hash_fun;
uint32_t expected_num_hash_fun = num_hash_fun;
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) {
for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -214,9 +224,10 @@ TEST(CuckooBuilderTest, WithCollision) {
value_length = values[0].size();
key_length = ikey_length;
bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length;
uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:";
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file;
@ -224,11 +235,11 @@ TEST(CuckooBuilderTest, WithCollision) {
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length, value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash);
file_size, num_hash_fun, 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0;
uint32_t key_idx = 0;
std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) {
for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -241,7 +252,8 @@ TEST(CuckooBuilderTest, WithCollision) {
}
ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close();
CheckFileContents(expected_file_data);
CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
}
TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
@ -253,7 +265,7 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) {
for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -269,9 +281,9 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash);
100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -300,7 +312,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash);
100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
ASSERT_OK(cuckoo_builder.status());
@ -316,14 +328,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
// Have two hash functions. Insert elements with overlapping hashes.
// Finally insert an element which will displace all the current elements.
num_hash_fun = 2;
unsigned int max_search_depth = 100;
uint32_t expected_num_hash_fun = num_hash_fun;
uint32_t max_search_depth = 100;
num_items = max_search_depth + 2;
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
std::vector<unsigned int> expected_bucket_id(num_items);
for (unsigned int i = 0; i < num_items - 1; i++) {
std::vector<uint64_t> expected_bucket_id(num_items);
for (uint32_t i = 0; i < num_items - 1; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -346,13 +358,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
key_length = ikey_length;
bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length;
uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:";
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a');
std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) {
for (uint32_t i = 0; i < expected_max_buckets; i++) {
expected_file_data += expected_unused_bucket;
}
@ -362,9 +375,9 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length,
value_length, hash_table_ratio, file_size,
num_hash_fun, max_search_depth, GetSliceHash);
num_hash_fun, max_search_depth, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) {
for (uint32_t key_idx = 0; key_idx < num_items; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -373,7 +386,8 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
}
ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close();
CheckFileContents(expected_file_data);
CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
}
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
@ -382,12 +396,12 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
// Finally insert an element which will displace all the current elements.
num_hash_fun = 2;
unsigned int max_search_depth = 100;
uint32_t max_search_depth = 100;
num_items = max_search_depth + 3;
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items - 1; i++) {
for (uint32_t i = 0; i < num_items - 1; i++) {
user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -410,9 +424,9 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun,
max_search_depth, GetSliceHash);
max_search_depth, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -431,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) {
for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+1000);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey);
@ -451,9 +465,9 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash);
100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) {
for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
@ -10,11 +10,11 @@
namespace rocksdb {
static const unsigned int kMaxNumHashTable = 64;
static const uint32_t kMaxNumHashTable = 64;
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index,
unsigned int max_num_buckets) {
static constexpr unsigned int seeds[kMaxNumHashTable] = {
uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
uint64_t max_num_buckets) {
static constexpr uint32_t seeds[kMaxNumHashTable] = {
816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
193618736, 428277388, 770956674, 819994962, 755946528, 40807421,

Loading…
Cancel
Save