Addressing TODOs in CuckooTableBuilder

Summary:
Contains the following changes in CuckooTableBuilder:
- Take an extra parameter in constructor to identify last level file.
- Implement a better way to identify if a bucket has been inserted into the tree already during BFS search.
- Minor typos

Test Plan:
make cuckoo_table_builder
./cuckoo_table_builder
make valgrind_check

Reviewers: sdong, igor, yhchiang, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20445
main
Radheshyam Balasundaram 10 years ago
parent 4b61a3d67d
commit 07a7d870b8
  1. 3
      include/rocksdb/table.h
  2. 118
      table/cuckoo_table_builder.cc
  3. 51
      table/cuckoo_table_builder.h
  4. 156
      table/cuckoo_table_builder_test.cc
  5. 10
      table/cuckoo_table_factory.h

@ -185,7 +185,8 @@ extern TableFactory* NewPlainTableFactory(const PlainTableOptions& options =
PlainTableOptions()); PlainTableOptions());
struct CuckooTablePropertyNames { struct CuckooTablePropertyNames {
static const std::string kEmptyBucket; static const std::string kEmptyKey;
static const std::string kValueLength;
static const std::string kNumHashTable; static const std::string kNumHashTable;
static const std::string kMaxNumBuckets; static const std::string kMaxNumBuckets;
}; };

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved. // Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the // This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
@ -21,37 +21,38 @@
#include "util/random.h" #include "util/random.h"
namespace rocksdb { namespace rocksdb {
const std::string CuckooTablePropertyNames::kEmptyBucket = const std::string CuckooTablePropertyNames::kEmptyKey =
"rocksdb.cuckoo.bucket.empty.bucket"; "rocksdb.cuckoo.bucket.empty.key";
const std::string CuckooTablePropertyNames::kNumHashTable = const std::string CuckooTablePropertyNames::kNumHashTable =
"rocksdb.cuckoo.hash.num"; "rocksdb.cuckoo.hash.num";
const std::string CuckooTablePropertyNames::kMaxNumBuckets = const std::string CuckooTablePropertyNames::kMaxNumBuckets =
"rocksdb.cuckoo.bucket.maxnum"; "rocksdb.cuckoo.bucket.maxnum";
const std::string CuckooTablePropertyNames::kValueLength =
"rocksdb.cuckoo.value.length";
// Obtained by running echo rocksdb.table.cuckoo | sha1sum // Obtained by running echo rocksdb.table.cuckoo | sha1sum
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder( CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, unsigned int fixed_key_length, WritableFile* file, uint32_t fixed_key_length,
unsigned int fixed_value_length, double hash_table_ratio, uint32_t fixed_value_length, double hash_table_ratio,
unsigned int file_size, unsigned int max_num_hash_table, uint64_t file_size, uint32_t max_num_hash_table,
unsigned int max_search_depth, uint32_t max_search_depth, bool is_last_level,
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int, uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
unsigned int)) : num_hash_table_(2),
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)),
file_(file), file_(file),
value_length_(fixed_value_length), value_length_(fixed_value_length),
bucket_size_(fixed_key_length + fixed_value_length), // 8 is the difference between sizes of user key and InternalKey.
bucket_size_(fixed_key_length +
fixed_value_length - (is_last_level ? 8 : 0)),
hash_table_ratio_(hash_table_ratio), hash_table_ratio_(hash_table_ratio),
max_num_buckets_(file_size / bucket_size_), max_num_buckets_(file_size / bucket_size_),
max_num_hash_table_(max_num_hash_table), max_num_hash_table_(max_num_hash_table),
max_search_depth_(max_search_depth), max_search_depth_(max_search_depth),
is_last_level_file_(is_last_level),
buckets_(max_num_buckets_), buckets_(max_num_buckets_),
make_space_for_key_call_id_(0),
GetSliceHash(GetSliceHashPtr) { GetSliceHash(GetSliceHashPtr) {
// The bucket_size is currently not optimized for last level.
// In last level, the bucket will not contain full key.
// TODO(rbs): Find how we can determine if last level or not
// before we start adding entries into the table.
properties_.num_entries = 0; properties_.num_entries = 0;
// Data is in a huge block. // Data is in a huge block.
properties_.num_data_blocks = 1; properties_.num_data_blocks = 1;
@ -67,17 +68,17 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
status_ = Status::Corruption("Hash Table is full."); status_ = Status::Corruption("Hash Table is full.");
return; return;
} }
unsigned int bucket_id; uint64_t bucket_id;
bool bucket_found = false; bool bucket_found = false;
autovector<unsigned int> hash_vals; autovector<uint64_t> hash_vals;
ParsedInternalKey ikey; ParsedInternalKey ikey;
if (!ParseInternalKey(key, &ikey)) { if (!ParseInternalKey(key, &ikey)) {
status_ = Status::Corruption("Unable to parse key into inernal key."); status_ = Status::Corruption("Unable to parse key into inernal key.");
return; return;
} }
Slice user_key = ikey.user_key; Slice user_key = ikey.user_key;
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_); uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
if (buckets_[hash_val].is_empty) { if (buckets_[hash_val].is_empty) {
bucket_id = hash_val; bucket_id = hash_val;
bucket_found = true; bucket_found = true;
@ -98,10 +99,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
} }
// We don't really need to rehash the entire table because old hashes are // We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions. // still valid and we only increased the number of hash functions.
unsigned int old_num_hash = num_hash_table_; uint64_t hash_val = GetSliceHash(user_key,
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_); num_hash_table_, max_num_buckets_);
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) { ++num_hash_table_;
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_);
if (buckets_[hash_val].is_empty) { if (buckets_[hash_val].is_empty) {
bucket_found = true; bucket_found = true;
bucket_id = hash_val; bucket_id = hash_val;
@ -110,15 +110,10 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
hash_vals.push_back(hash_val); hash_vals.push_back(hash_val);
} }
} }
}
buckets_[bucket_id].key = key; buckets_[bucket_id].key = key;
buckets_[bucket_id].value = value; buckets_[bucket_id].value = value;
buckets_[bucket_id].is_empty = false; buckets_[bucket_id].is_empty = false;
if (ikey.sequence != 0) {
// This is not a last level file.
is_last_level_file_ = false;
}
properties_.num_entries++; properties_.num_entries++;
// We assume that the keys are inserted in sorted order. To identify an // We assume that the keys are inserted in sorted order. To identify an
@ -169,10 +164,11 @@ Status CuckooTableBuilder::Finish() {
AppendInternalKey(&unused_bucket, ikey); AppendInternalKey(&unused_bucket, ikey);
} }
properties_.fixed_key_len = unused_bucket.size(); properties_.fixed_key_len = unused_bucket.size();
unsigned int bucket_size = unused_bucket.size() + value_length_; properties_.user_collected_properties[
// Resize to bucket size. CuckooTablePropertyNames::kValueLength].assign(
unused_bucket.resize(bucket_size, 'a'); reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
unused_bucket.resize(bucket_size_, 'a');
// Write the table. // Write the table.
for (auto& bucket : buckets_) { for (auto& bucket : buckets_) {
Status s; Status s;
@ -197,16 +193,20 @@ Status CuckooTableBuilder::Finish() {
} }
} }
unsigned int offset = buckets_.size() * bucket_size; uint64_t offset = buckets_.size() * bucket_size_;
unused_bucket.resize(properties_.fixed_key_len);
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket; CuckooTablePropertyNames::kNumHashTable].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
properties_.user_collected_properties[ properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_); CuckooTablePropertyNames::kMaxNumBuckets].assign(
PutVarint32(&properties_.user_collected_properties[ reinterpret_cast<const char*>(&max_num_buckets_),
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_); sizeof(max_num_buckets_));
// Write meta blocks. // Write meta blocks.
MetaIndexBuilder meta_index_builer; MetaIndexBuilder meta_index_builder;
PropertyBlockBuilder property_block_builder; PropertyBlockBuilder property_block_builder;
property_block_builder.AddTableProperty(properties_); property_block_builder.AddTableProperty(properties_);
@ -221,8 +221,8 @@ Status CuckooTableBuilder::Finish() {
return s; return s;
} }
meta_index_builer.Add(kPropertiesBlock, property_block_handle); meta_index_builder.Add(kPropertiesBlock, property_block_handle);
Slice meta_index_block = meta_index_builer.Finish(); Slice meta_index_block = meta_index_builder.Finish();
BlockHandle meta_index_block_handle; BlockHandle meta_index_block_handle;
meta_index_block_handle.set_offset(offset); meta_index_block_handle.set_offset(offset);
@ -262,44 +262,52 @@ uint64_t CuckooTableBuilder::FileSize() const {
} }
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key, bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
unsigned int *bucket_id, autovector<unsigned int> hash_vals) { uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
struct CuckooNode { struct CuckooNode {
unsigned int bucket_id; uint64_t bucket_id;
unsigned int depth; uint32_t depth;
int parent_pos; int parent_pos;
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos) CuckooNode(uint64_t bucket_id, uint32_t depth, int parent_pos)
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {} : bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {}
}; };
// This is BFS search tree that is stored simply as a vector. // This is BFS search tree that is stored simply as a vector.
// Each node stores the index of parent node in the vector. // Each node stores the index of parent node in the vector.
std::vector<CuckooNode> tree; std::vector<CuckooNode> tree;
// This is a very bad way to keep track of visited nodes. // We want to identify already visited buckets in the current method call so
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket // that we don't add same buckets again for exploration in the tree.
// and use it to track visited nodes. // We do this by maintaining a count of current method call, which acts as a
std::vector<bool> buckets_visited(max_num_buckets_, false); // unique id for this invocation of the method. We store this number into
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { // the nodes that we explore in current method call.
unsigned int bucket_id = hash_vals[hash_cnt]; // It is unlikely for the increment operation to overflow because the maximum
buckets_visited[bucket_id] = true; // number of times this will be called is <= max_num_hash_table_ +
// max_num_buckets_.
++make_space_for_key_call_id_;
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt];
buckets_[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
tree.push_back(CuckooNode(bucket_id, 0, -1)); tree.push_back(CuckooNode(bucket_id, 0, -1));
} }
bool null_found = false; bool null_found = false;
unsigned int curr_pos = 0; uint32_t curr_pos = 0;
while (!null_found && curr_pos < tree.size()) { while (!null_found && curr_pos < tree.size()) {
CuckooNode& curr_node = tree[curr_pos]; CuckooNode& curr_node = tree[curr_pos];
if (curr_node.depth >= max_search_depth_) { if (curr_node.depth >= max_search_depth_) {
break; break;
} }
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id]; CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
unsigned int child_bucket_id = GetSliceHash( uint64_t child_bucket_id = GetSliceHash(
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_); ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_);
if (child_bucket_id == curr_node.bucket_id) { if (child_bucket_id == curr_node.bucket_id) {
continue; continue;
} }
if (buckets_visited[child_bucket_id]) { if (buckets_[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id_) {
continue; continue;
} }
buckets_visited[child_bucket_id] = true; buckets_[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1, tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
curr_pos)); curr_pos));
if (buckets_[child_bucket_id].is_empty) { if (buckets_[child_bucket_id].is_empty) {

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved. // Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the // This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
@ -16,22 +16,14 @@
namespace rocksdb { namespace rocksdb {
struct CuckooBucket {
CuckooBucket(): is_empty(true) {}
Slice key;
Slice value;
bool is_empty;
};
class CuckooTableBuilder: public TableBuilder { class CuckooTableBuilder: public TableBuilder {
public: public:
CuckooTableBuilder( CuckooTableBuilder(
WritableFile* file, unsigned int fixed_key_length, WritableFile* file, uint32_t fixed_key_length,
unsigned int fixed_value_length, double hash_table_ratio, uint32_t fixed_value_length, double hash_table_ratio,
unsigned int file_size, unsigned int max_num_hash_table, uint64_t file_size, uint32_t max_num_hash_table,
unsigned int max_search_depth, uint32_t max_search_depth, bool is_last_level,
unsigned int (*GetSliceHash)(const Slice&, unsigned int, uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
unsigned int));
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~CuckooTableBuilder(); ~CuckooTableBuilder();
@ -64,23 +56,32 @@ class CuckooTableBuilder: public TableBuilder {
uint64_t FileSize() const override; uint64_t FileSize() const override;
private: private:
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id, struct CuckooBucket {
autovector<unsigned int> hash_vals); CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
Slice key;
Slice value;
bool is_empty;
uint64_t make_space_for_key_call_id;
};
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
autovector<uint64_t> hash_vals);
unsigned int num_hash_table_; uint32_t num_hash_table_;
WritableFile* file_; WritableFile* file_;
const unsigned int value_length_; const uint32_t value_length_;
const unsigned int bucket_size_; const uint32_t bucket_size_;
const double hash_table_ratio_; const double hash_table_ratio_;
const unsigned int max_num_buckets_; const uint64_t max_num_buckets_;
const unsigned int max_num_hash_table_; const uint32_t max_num_hash_table_;
const unsigned int max_search_depth_; const uint32_t max_search_depth_;
const bool is_last_level_file_;
Status status_; Status status_;
std::vector<CuckooBucket> buckets_; std::vector<CuckooBucket> buckets_;
bool is_last_level_file_ = true;
TableProperties properties_; TableProperties properties_;
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index, uint64_t make_space_for_key_call_id_;
unsigned int max_num_buckets); uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);
std::string unused_user_key_ = ""; std::string unused_user_key_ = "";
std::string prev_key_; std::string prev_key_;

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved. // Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the // This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
@ -19,20 +19,19 @@ namespace rocksdb {
extern const uint64_t kCuckooTableMagicNumber; extern const uint64_t kCuckooTableMagicNumber;
namespace { namespace {
std::unordered_map<std::string, std::vector<unsigned int>> hash_map; std::unordered_map<std::string, std::vector<uint64_t>> hash_map;
void AddHashLookups(const std::string& s, unsigned int bucket_id, void AddHashLookups(const std::string& s, uint64_t bucket_id,
unsigned int num_hash_fun) { uint32_t num_hash_fun) {
std::vector<unsigned int> v; std::vector<uint64_t> v;
for (unsigned int i = 0; i < num_hash_fun; i++) { for (uint32_t i = 0; i < num_hash_fun; i++) {
v.push_back(bucket_id + i); v.push_back(bucket_id + i);
} }
hash_map[s] = v; hash_map[s] = v;
return;
} }
unsigned int GetSliceHash(const Slice& s, unsigned int index, uint64_t GetSliceHash(const Slice& s, uint32_t index,
unsigned int max_num_buckets) { uint64_t max_num_buckets) {
return hash_map[s.ToString()][index]; return hash_map[s.ToString()][index];
} }
} // namespace } // namespace
@ -43,7 +42,9 @@ class CuckooBuilderTest {
env_ = Env::Default(); env_ = Env::Default();
} }
void CheckFileContents(const std::string& expected_data) { void CheckFileContents(const std::string& expected_data,
std::string expected_unused_bucket, uint64_t expected_max_buckets,
uint32_t expected_num_hash_fun) {
// Read file // Read file
unique_ptr<RandomAccessFile> read_file; unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_));
@ -58,15 +59,22 @@ class CuckooBuilderTest {
ASSERT_EQ(props->fixed_key_len, key_length); ASSERT_EQ(props->fixed_key_len, key_length);
// Check unused bucket. // Check unused bucket.
std::string unused_bucket = props->user_collected_properties[ std::string unused_key = props->user_collected_properties[
CuckooTablePropertyNames::kEmptyBucket]; CuckooTablePropertyNames::kEmptyKey];
ASSERT_EQ(expected_unused_bucket, unused_bucket); ASSERT_EQ(expected_unused_bucket.substr(0, key_length), unused_key);
unsigned int max_buckets; uint32_t value_len_found =
Slice max_buckets_slice = Slice(props->user_collected_properties[ *reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets]); CuckooTablePropertyNames::kValueLength].data());
GetVarint32(&max_buckets_slice, &max_buckets); ASSERT_EQ(value_length, value_len_found);
const uint64_t max_buckets =
*reinterpret_cast<const uint64_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].data());
ASSERT_EQ(expected_max_buckets, max_buckets); ASSERT_EQ(expected_max_buckets, max_buckets);
const uint32_t num_hash_fun_found =
*reinterpret_cast<const uint32_t*>(props->user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].data());
ASSERT_EQ(expected_num_hash_fun, num_hash_fun_found);
delete props; delete props;
// Check contents of the bucket. // Check contents of the bucket.
std::string read_data; std::string read_data;
@ -80,28 +88,25 @@ class CuckooBuilderTest {
Env* env_; Env* env_;
const EnvOptions env_options_; const EnvOptions env_options_;
std::string fname; std::string fname;
std::string expected_unused_bucket; uint64_t file_size = 100000;
unsigned int file_size = 100000; uint32_t num_items = 20;
unsigned int num_items = 20; uint32_t num_hash_fun = 64;
unsigned int num_hash_fun = 64;
double hash_table_ratio = 0.9; double hash_table_ratio = 0.9;
unsigned int ikey_length; uint32_t ikey_length;
unsigned int user_key_length; uint32_t user_key_length;
unsigned int key_length; uint32_t key_length;
unsigned int value_length; uint32_t value_length;
unsigned int bucket_length; uint32_t bucket_length;
unsigned int expected_max_buckets;
}; };
TEST(CuckooBuilderTest, NoCollision) { TEST(CuckooBuilderTest, NoCollision) {
hash_map.clear(); hash_map.clear();
num_items = 20; uint32_t expected_num_hash_fun = 2;
num_hash_fun = 64;
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
unsigned int bucket_ids = 0; uint64_t bucket_ids = 0;
for (unsigned int i = 0; i < num_items; i++) { for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -114,9 +119,10 @@ TEST(CuckooBuilderTest, NoCollision) {
key_length = ikey_length; key_length = ikey_length;
value_length = values[0].size(); value_length = values[0].size();
bucket_length = ikey_length + value_length; bucket_length = ikey_length + value_length;
expected_max_buckets = file_size / bucket_length; uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:"; std::string expected_unused_user_key = "keys10:";
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey); AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a'); expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
@ -125,11 +131,11 @@ TEST(CuckooBuilderTest, NoCollision) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash); file_size, num_hash_fun, 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0; uint32_t key_idx = 0;
std::string expected_file_data = ""; std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) { for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx * num_hash_fun == i && key_idx < num_items) { if (key_idx * num_hash_fun == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -142,16 +148,18 @@ TEST(CuckooBuilderTest, NoCollision) {
} }
ASSERT_OK(cuckoo_builder.Finish()); ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close(); writable_file->Close();
CheckFileContents(expected_file_data); CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
} }
TEST(CuckooBuilderTest, NoCollisionLastLevel) { TEST(CuckooBuilderTest, NoCollisionLastLevel) {
hash_map.clear(); hash_map.clear();
uint32_t expected_num_hash_fun = 2;
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
unsigned int bucket_ids = 0; uint64_t bucket_ids = 0;
for (unsigned int i = 0; i < num_items; i++) { for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
// Set zero sequence number in all keys. // Set zero sequence number in all keys.
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue); ParsedInternalKey ikey(user_keys[i], 0, kTypeValue);
@ -165,20 +173,20 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
key_length = user_key_length; key_length = user_key_length;
value_length = values[0].size(); value_length = values[0].size();
bucket_length = key_length + value_length; bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length; uint64_t expected_max_buckets = file_size / bucket_length;
expected_unused_bucket = "keys10:"; std::string expected_unused_bucket = "keys10:";
expected_unused_bucket.resize(bucket_length, 'a'); expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file"; fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash); file_size, num_hash_fun, 100, true, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0; uint32_t key_idx = 0;
std::string expected_file_data = ""; std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) { for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx * num_hash_fun == i && key_idx < num_items) { if (key_idx * num_hash_fun == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -191,7 +199,8 @@ TEST(CuckooBuilderTest, NoCollisionLastLevel) {
} }
ASSERT_OK(cuckoo_builder.Finish()); ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close(); writable_file->Close();
CheckFileContents(expected_file_data); CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
} }
TEST(CuckooBuilderTest, WithCollision) { TEST(CuckooBuilderTest, WithCollision) {
@ -199,10 +208,11 @@ TEST(CuckooBuilderTest, WithCollision) {
hash_map.clear(); hash_map.clear();
num_hash_fun = 20; num_hash_fun = 20;
num_items = num_hash_fun; num_items = num_hash_fun;
uint32_t expected_num_hash_fun = num_hash_fun;
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) { for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -214,9 +224,10 @@ TEST(CuckooBuilderTest, WithCollision) {
value_length = values[0].size(); value_length = values[0].size();
key_length = ikey_length; key_length = ikey_length;
bucket_length = key_length + value_length; bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length; uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:"; std::string expected_unused_user_key = "keys10:";
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey); AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a'); expected_unused_bucket.resize(bucket_length, 'a');
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
@ -224,11 +235,11 @@ TEST(CuckooBuilderTest, WithCollision) {
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length, value_length, hash_table_ratio, writable_file.get(), key_length, value_length, hash_table_ratio,
file_size, num_hash_fun, 100, GetSliceHash); file_size, num_hash_fun, 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
unsigned int key_idx = 0; uint32_t key_idx = 0;
std::string expected_file_data = ""; std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) { for (uint32_t i = 0; i < expected_max_buckets; i++) {
if (key_idx == i && key_idx < num_items) { if (key_idx == i && key_idx < num_items) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -241,7 +252,8 @@ TEST(CuckooBuilderTest, WithCollision) {
} }
ASSERT_OK(cuckoo_builder.Finish()); ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close(); writable_file->Close();
CheckFileContents(expected_file_data); CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
} }
TEST(CuckooBuilderTest, FailWithTooManyCollisions) { TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
@ -253,7 +265,7 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) { for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -269,9 +281,9 @@ TEST(CuckooBuilderTest, FailWithTooManyCollisions) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun, value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash); 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -300,7 +312,7 @@ TEST(CuckooBuilderTest, FailWhenSameKeyInserted) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun, value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash); 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value)); cuckoo_builder.Add(Slice(key_to_reuse1), Slice(value));
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
@ -316,14 +328,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
// Have two hash functions. Insert elements with overlapping hashes. // Have two hash functions. Insert elements with overlapping hashes.
// Finally insert an element which will displace all the current elements. // Finally insert an element which will displace all the current elements.
num_hash_fun = 2; num_hash_fun = 2;
uint32_t expected_num_hash_fun = num_hash_fun;
unsigned int max_search_depth = 100; uint32_t max_search_depth = 100;
num_items = max_search_depth + 2; num_items = max_search_depth + 2;
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
std::vector<unsigned int> expected_bucket_id(num_items); std::vector<uint64_t> expected_bucket_id(num_items);
for (unsigned int i = 0; i < num_items - 1; i++) { for (uint32_t i = 0; i < num_items - 1; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -346,13 +358,14 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
key_length = ikey_length; key_length = ikey_length;
bucket_length = key_length + value_length; bucket_length = key_length + value_length;
expected_max_buckets = file_size / bucket_length; uint64_t expected_max_buckets = file_size / bucket_length;
std::string expected_unused_user_key = "keys10:"; std::string expected_unused_user_key = "keys10:";
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue); ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue);
std::string expected_unused_bucket;
AppendInternalKey(&expected_unused_bucket, ikey); AppendInternalKey(&expected_unused_bucket, ikey);
expected_unused_bucket.resize(bucket_length, 'a'); expected_unused_bucket.resize(bucket_length, 'a');
std::string expected_file_data = ""; std::string expected_file_data = "";
for (unsigned int i = 0; i < expected_max_buckets; i++) { for (uint32_t i = 0; i < expected_max_buckets; i++) {
expected_file_data += expected_unused_bucket; expected_file_data += expected_unused_bucket;
} }
@ -362,9 +375,9 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), key_length, writable_file.get(), key_length,
value_length, hash_table_ratio, file_size, value_length, hash_table_ratio, file_size,
num_hash_fun, max_search_depth, GetSliceHash); num_hash_fun, max_search_depth, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) { for (uint32_t key_idx = 0; key_idx < num_items; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -373,7 +386,8 @@ TEST(CuckooBuilderTest, WithACollisionPath) {
} }
ASSERT_OK(cuckoo_builder.Finish()); ASSERT_OK(cuckoo_builder.Finish());
writable_file->Close(); writable_file->Close();
CheckFileContents(expected_file_data); CheckFileContents(expected_file_data, expected_unused_bucket,
expected_max_buckets, expected_num_hash_fun);
} }
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
@ -382,12 +396,12 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
// Finally insert an element which will displace all the current elements. // Finally insert an element which will displace all the current elements.
num_hash_fun = 2; num_hash_fun = 2;
unsigned int max_search_depth = 100; uint32_t max_search_depth = 100;
num_items = max_search_depth + 3; num_items = max_search_depth + 3;
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items - 1; i++) { for (uint32_t i = 0; i < num_items - 1; i++) {
user_keys[i] = "keys" + std::to_string(i+100); user_keys[i] = "keys" + std::to_string(i+100);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -410,9 +424,9 @@ TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun, value_length, hash_table_ratio, file_size, num_hash_fun,
max_search_depth, GetSliceHash); max_search_depth, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);
@ -431,7 +445,7 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
std::vector<std::string> user_keys(num_items); std::vector<std::string> user_keys(num_items);
std::vector<std::string> keys(num_items); std::vector<std::string> keys(num_items);
std::vector<std::string> values(num_items); std::vector<std::string> values(num_items);
for (unsigned int i = 0; i < num_items; i++) { for (uint32_t i = 0; i < num_items; i++) {
user_keys[i] = "keys" + std::to_string(i+1000); user_keys[i] = "keys" + std::to_string(i+1000);
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue);
AppendInternalKey(&keys[i], ikey); AppendInternalKey(&keys[i], ikey);
@ -451,9 +465,9 @@ TEST(CuckooBuilderTest, FailWhenTableIsFull) {
CuckooTableBuilder cuckoo_builder( CuckooTableBuilder cuckoo_builder(
writable_file.get(), ikey_length, writable_file.get(), ikey_length,
value_length, hash_table_ratio, file_size, num_hash_fun, value_length, hash_table_ratio, file_size, num_hash_fun,
100, GetSliceHash); 100, false, GetSliceHash);
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { for (uint32_t key_idx = 0; key_idx < num_items-1; key_idx++) {
cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); cuckoo_builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
ASSERT_OK(cuckoo_builder.status()); ASSERT_OK(cuckoo_builder.status());
ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1); ASSERT_EQ(cuckoo_builder.NumEntries(), key_idx + 1);

@ -1,4 +1,4 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved. // Copyright (c) 2014, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the // This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant // LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
@ -10,11 +10,11 @@
namespace rocksdb { namespace rocksdb {
static const unsigned int kMaxNumHashTable = 64; static const uint32_t kMaxNumHashTable = 64;
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index, uint64_t GetSliceMurmurHash(const Slice& s, uint32_t index,
unsigned int max_num_buckets) { uint64_t max_num_buckets) {
static constexpr unsigned int seeds[kMaxNumHashTable] = { static constexpr uint32_t seeds[kMaxNumHashTable] = {
816922183, 506425713, 949485004, 22513986, 421427259, 500437285, 816922183, 506425713, 949485004, 22513986, 421427259, 500437285,
888981693, 847587269, 511007211, 722295391, 934013645, 566947683, 888981693, 847587269, 511007211, 722295391, 934013645, 566947683,
193618736, 428277388, 770956674, 819994962, 755946528, 40807421, 193618736, 428277388, 770956674, 819994962, 755946528, 40807421,

Loading…
Cancel
Save