Summary: Cuckoo Hashing based SST table builder. Contains: - Cuckoo Hashing logic and file storage logic. - Unit tests for logic Test Plan: make cuckoo_table_builder_test ./cuckoo_table_builder_test make check all Reviewers: yhchiang, igor, sdong, ljin Reviewed By: ljin Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D19545main
parent
f6f1533c6f
commit
cf3da899b0
@ -0,0 +1,333 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
#include "table/cuckoo_table_builder.h" |
||||||
|
|
||||||
|
#include <assert.h> |
||||||
|
#include <algorithm> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
#include "db/dbformat.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/table.h" |
||||||
|
#include "table/block_builder.h" |
||||||
|
#include "table/format.h" |
||||||
|
#include "table/meta_blocks.h" |
||||||
|
#include "util/autovector.h" |
||||||
|
#include "util/random.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
const std::string CuckooTablePropertyNames::kEmptyBucket = |
||||||
|
"rocksdb.cuckoo.bucket.empty.bucket"; |
||||||
|
const std::string CuckooTablePropertyNames::kNumHashTable = |
||||||
|
"rocksdb.cuckoo.hash.num"; |
||||||
|
const std::string CuckooTablePropertyNames::kMaxNumBuckets = |
||||||
|
"rocksdb.cuckoo.bucket.maxnum"; |
||||||
|
|
||||||
|
// Obtained by running echo rocksdb.table.cuckoo | sha1sum
|
||||||
|
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; |
||||||
|
|
||||||
|
CuckooTableBuilder::CuckooTableBuilder( |
||||||
|
WritableFile* file, unsigned int fixed_key_length, |
||||||
|
unsigned int fixed_value_length, double hash_table_ratio, |
||||||
|
unsigned int file_size, unsigned int max_num_hash_table, |
||||||
|
unsigned int max_search_depth, |
||||||
|
unsigned int (*GetSliceHashPtr)(const Slice&, unsigned int, |
||||||
|
unsigned int)) |
||||||
|
: num_hash_table_(std::min((unsigned int) 4, max_num_hash_table)), |
||||||
|
file_(file), |
||||||
|
key_length_(fixed_key_length), |
||||||
|
value_length_(fixed_value_length), |
||||||
|
bucket_size_(fixed_key_length + fixed_value_length), |
||||||
|
hash_table_ratio_(hash_table_ratio), |
||||||
|
max_num_buckets_(file_size / bucket_size_), |
||||||
|
max_num_hash_table_(max_num_hash_table), |
||||||
|
max_search_depth_(max_search_depth), |
||||||
|
buckets_(max_num_buckets_), |
||||||
|
GetSliceHash(GetSliceHashPtr) { |
||||||
|
// The bucket_size is currently not optimized for last level.
|
||||||
|
// In last level, the bucket will not contain full key.
|
||||||
|
// TODO(rbs): Find how we can determine if last level or not
|
||||||
|
// before we start adding entries into the table.
|
||||||
|
properties_.num_entries = 0; |
||||||
|
// Data is in a huge block.
|
||||||
|
properties_.num_data_blocks = 1; |
||||||
|
properties_.index_size = 0; |
||||||
|
properties_.filter_size = 0; |
||||||
|
} |
||||||
|
|
||||||
|
CuckooTableBuilder::~CuckooTableBuilder() { |
||||||
|
} |
||||||
|
|
||||||
|
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { |
||||||
|
if (NumEntries() == max_num_buckets_) { |
||||||
|
status_ = Status::Corruption("Hash Table is full."); |
||||||
|
return; |
||||||
|
} |
||||||
|
unsigned int bucket_id; |
||||||
|
bool bucket_found = false; |
||||||
|
autovector<unsigned int> hash_vals; |
||||||
|
ParsedInternalKey ikey; |
||||||
|
if (!ParseInternalKey(key, &ikey)) { |
||||||
|
status_ = Status::Corruption("Unable to parse key into inernal key."); |
||||||
|
return; |
||||||
|
} |
||||||
|
Slice user_key = ikey.user_key; |
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { |
||||||
|
unsigned int hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_); |
||||||
|
if (buckets_[hash_val].is_empty) { |
||||||
|
bucket_id = hash_val; |
||||||
|
bucket_found = true; |
||||||
|
break; |
||||||
|
} else { |
||||||
|
if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) { |
||||||
|
status_ = Status::Corruption("Same key is being inserted again."); |
||||||
|
return; |
||||||
|
} |
||||||
|
hash_vals.push_back(hash_val); |
||||||
|
} |
||||||
|
} |
||||||
|
while (!bucket_found && !MakeSpaceForKey(key, &bucket_id, hash_vals)) { |
||||||
|
// Rehash by increashing number of hash tables.
|
||||||
|
if (num_hash_table_ >= max_num_hash_table_) { |
||||||
|
status_ = Status::Corruption("Too many collissions. Unable to hash."); |
||||||
|
return; |
||||||
|
} |
||||||
|
// We don't really need to rehash the entire table because old hashes are
|
||||||
|
// still valid and we only increased the number of hash functions.
|
||||||
|
unsigned int old_num_hash = num_hash_table_; |
||||||
|
num_hash_table_ = std::min(num_hash_table_ + 1, max_num_hash_table_); |
||||||
|
for (unsigned int i = old_num_hash; i < num_hash_table_; i++) { |
||||||
|
unsigned int hash_val = GetSliceHash(user_key, i, max_num_buckets_); |
||||||
|
if (buckets_[hash_val].is_empty) { |
||||||
|
bucket_found = true; |
||||||
|
bucket_id = hash_val; |
||||||
|
break; |
||||||
|
} else { |
||||||
|
hash_vals.push_back(hash_val); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
buckets_[bucket_id].key = key; |
||||||
|
buckets_[bucket_id].value = value; |
||||||
|
buckets_[bucket_id].is_empty = false; |
||||||
|
|
||||||
|
if (ikey.sequence != 0) { |
||||||
|
// This is not a last level file.
|
||||||
|
is_last_level_file_ = false; |
||||||
|
} |
||||||
|
properties_.num_entries++; |
||||||
|
|
||||||
|
// We assume that the keys are inserted in sorted order. To identify an
|
||||||
|
// unused key, which will be used in filling empty buckets in the table,
|
||||||
|
// we try to find gaps between successive keys inserted. This is done by
|
||||||
|
// maintaining the previous key and comparing it with next key.
|
||||||
|
if (unused_user_key_.empty()) { |
||||||
|
if (prev_key_.empty()) { |
||||||
|
prev_key_ = user_key.ToString(); |
||||||
|
return; |
||||||
|
} |
||||||
|
std::string new_user_key = prev_key_; |
||||||
|
new_user_key.back()++; |
||||||
|
// We ignore carry-overs and check that it is larger than previous key.
|
||||||
|
if ((new_user_key > prev_key_) && |
||||||
|
(new_user_key < user_key.ToString())) { |
||||||
|
unused_user_key_ = new_user_key; |
||||||
|
} else { |
||||||
|
prev_key_ = user_key.ToString(); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
Status CuckooTableBuilder::status() const { return status_; } |
||||||
|
|
||||||
|
Status CuckooTableBuilder::Finish() { |
||||||
|
assert(!closed_); |
||||||
|
closed_ = true; |
||||||
|
|
||||||
|
if (unused_user_key_.empty()) { |
||||||
|
if (prev_key_.empty()) { |
||||||
|
return Status::Corruption("Unable to find unused key"); |
||||||
|
} |
||||||
|
std::string new_user_key = prev_key_; |
||||||
|
new_user_key.back()++; |
||||||
|
// We ignore carry-overs and check that it is larger than previous key.
|
||||||
|
if (new_user_key > prev_key_) { |
||||||
|
unused_user_key_ = new_user_key; |
||||||
|
} else { |
||||||
|
return Status::Corruption("Unable to find unused key"); |
||||||
|
} |
||||||
|
} |
||||||
|
std::string unused_bucket; |
||||||
|
if (is_last_level_file_) { |
||||||
|
unused_bucket = unused_user_key_; |
||||||
|
} else { |
||||||
|
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue); |
||||||
|
AppendInternalKey(&unused_bucket, ikey); |
||||||
|
} |
||||||
|
properties_.fixed_key_len = unused_bucket.size(); |
||||||
|
unsigned int bucket_size = unused_bucket.size() + value_length_; |
||||||
|
// Resize to bucket size.
|
||||||
|
unused_bucket.resize(bucket_size, 'a'); |
||||||
|
|
||||||
|
// Write the table.
|
||||||
|
for (auto& bucket : buckets_) { |
||||||
|
Status s; |
||||||
|
if (bucket.is_empty) { |
||||||
|
s = file_->Append(Slice(unused_bucket)); |
||||||
|
} else { |
||||||
|
if (is_last_level_file_) { |
||||||
|
Slice user_key = ExtractUserKey(bucket.key); |
||||||
|
s = file_->Append(user_key); |
||||||
|
if (s.ok()) { |
||||||
|
s = file_->Append(bucket.value); |
||||||
|
} |
||||||
|
} else { |
||||||
|
s = file_->Append(bucket.key); |
||||||
|
if (s.ok()) { |
||||||
|
s = file_->Append(bucket.value); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
unsigned int offset = buckets_.size() * bucket_size; |
||||||
|
properties_.user_collected_properties[ |
||||||
|
CuckooTablePropertyNames::kEmptyBucket] = unused_bucket; |
||||||
|
properties_.user_collected_properties[ |
||||||
|
CuckooTablePropertyNames::kNumHashTable] = std::to_string(num_hash_table_); |
||||||
|
PutVarint32(&properties_.user_collected_properties[ |
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets], max_num_buckets_); |
||||||
|
|
||||||
|
// Write meta blocks.
|
||||||
|
MetaIndexBuilder meta_index_builer; |
||||||
|
PropertyBlockBuilder property_block_builder; |
||||||
|
|
||||||
|
property_block_builder.AddTableProperty(properties_); |
||||||
|
property_block_builder.Add(properties_.user_collected_properties); |
||||||
|
Slice property_block = property_block_builder.Finish(); |
||||||
|
BlockHandle property_block_handle; |
||||||
|
property_block_handle.set_offset(offset); |
||||||
|
property_block_handle.set_size(property_block.size()); |
||||||
|
Status s = file_->Append(property_block); |
||||||
|
offset += property_block.size(); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
meta_index_builer.Add(kPropertiesBlock, property_block_handle); |
||||||
|
Slice meta_index_block = meta_index_builer.Finish(); |
||||||
|
|
||||||
|
BlockHandle meta_index_block_handle; |
||||||
|
meta_index_block_handle.set_offset(offset); |
||||||
|
meta_index_block_handle.set_size(meta_index_block.size()); |
||||||
|
s = file_->Append(meta_index_block); |
||||||
|
if (!s.ok()) { |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
Footer footer(kCuckooTableMagicNumber); |
||||||
|
footer.set_metaindex_handle(meta_index_block_handle); |
||||||
|
footer.set_index_handle(BlockHandle::NullBlockHandle()); |
||||||
|
std::string footer_encoding; |
||||||
|
footer.EncodeTo(&footer_encoding); |
||||||
|
s = file_->Append(footer_encoding); |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
void CuckooTableBuilder::Abandon() { |
||||||
|
assert(!closed_); |
||||||
|
closed_ = true; |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t CuckooTableBuilder::NumEntries() const { |
||||||
|
return properties_.num_entries; |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t CuckooTableBuilder::FileSize() const { |
||||||
|
if (closed_) { |
||||||
|
return file_->GetFileSize(); |
||||||
|
} else { |
||||||
|
// This is not the actual size of the file as we need to account for
|
||||||
|
// hash table ratio. This returns the size of filled buckets in the table
|
||||||
|
// scaled up by a factor of 1/hash table ratio.
|
||||||
|
return (properties_.num_entries * bucket_size_) / hash_table_ratio_; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key, |
||||||
|
unsigned int *bucket_id, autovector<unsigned int> hash_vals) { |
||||||
|
struct CuckooNode { |
||||||
|
unsigned int bucket_id; |
||||||
|
unsigned int depth; |
||||||
|
int parent_pos; |
||||||
|
CuckooNode(unsigned int bucket_id, unsigned int depth, int parent_pos) |
||||||
|
: bucket_id(bucket_id), depth(depth), parent_pos(parent_pos) {} |
||||||
|
}; |
||||||
|
// This is BFS search tree that is stored simply as a vector.
|
||||||
|
// Each node stores the index of parent node in the vector.
|
||||||
|
std::vector<CuckooNode> tree; |
||||||
|
// This is a very bad way to keep track of visited nodes.
|
||||||
|
// TODO(rbs): Change this by adding a 'GetKeyPathId' field to the bucket
|
||||||
|
// and use it to track visited nodes.
|
||||||
|
std::vector<bool> buckets_visited(max_num_buckets_, false); |
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { |
||||||
|
unsigned int bucket_id = hash_vals[hash_cnt]; |
||||||
|
buckets_visited[bucket_id] = true; |
||||||
|
tree.push_back(CuckooNode(bucket_id, 0, -1)); |
||||||
|
} |
||||||
|
bool null_found = false; |
||||||
|
unsigned int curr_pos = 0; |
||||||
|
while (!null_found && curr_pos < tree.size()) { |
||||||
|
CuckooNode& curr_node = tree[curr_pos]; |
||||||
|
if (curr_node.depth >= max_search_depth_) { |
||||||
|
break; |
||||||
|
} |
||||||
|
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id]; |
||||||
|
for (unsigned int hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { |
||||||
|
unsigned int child_bucket_id = GetSliceHash( |
||||||
|
ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_); |
||||||
|
if (child_bucket_id == curr_node.bucket_id) { |
||||||
|
continue; |
||||||
|
} |
||||||
|
if (buckets_visited[child_bucket_id]) { |
||||||
|
continue; |
||||||
|
} |
||||||
|
buckets_visited[child_bucket_id] = true; |
||||||
|
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1, |
||||||
|
curr_pos)); |
||||||
|
if (buckets_[child_bucket_id].is_empty) { |
||||||
|
null_found = true; |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
++curr_pos; |
||||||
|
} |
||||||
|
|
||||||
|
if (null_found) { |
||||||
|
int bucket_to_replace_pos = tree.size()-1; |
||||||
|
while (bucket_to_replace_pos >= 0) { |
||||||
|
CuckooNode& curr_node = tree[bucket_to_replace_pos]; |
||||||
|
if (curr_node.parent_pos != -1) { |
||||||
|
buckets_[curr_node.bucket_id] = buckets_[curr_node.parent_pos]; |
||||||
|
bucket_to_replace_pos = curr_node.parent_pos; |
||||||
|
} else { |
||||||
|
*bucket_id = curr_node.bucket_id; |
||||||
|
return true; |
||||||
|
} |
||||||
|
} |
||||||
|
return true; |
||||||
|
} else { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
#endif // ROCKSDB_LITE
|
@ -0,0 +1,97 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
#include <stdint.h> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
#include "rocksdb/status.h" |
||||||
|
#include "table/table_builder.h" |
||||||
|
#include "rocksdb/table.h" |
||||||
|
#include "rocksdb/table_properties.h" |
||||||
|
#include "util/autovector.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
struct CuckooBucket { |
||||||
|
CuckooBucket(): is_empty(true) {} |
||||||
|
Slice key; |
||||||
|
Slice value; |
||||||
|
bool is_empty; |
||||||
|
}; |
||||||
|
|
||||||
|
class CuckooTableBuilder: public TableBuilder { |
||||||
|
public: |
||||||
|
CuckooTableBuilder( |
||||||
|
WritableFile* file, unsigned int fixed_key_length, |
||||||
|
unsigned int fixed_value_length, double hash_table_ratio, |
||||||
|
unsigned int file_size, unsigned int max_num_hash_table, |
||||||
|
unsigned int max_search_depth, |
||||||
|
unsigned int (*GetSliceHash)(const Slice&, unsigned int, |
||||||
|
unsigned int)); |
||||||
|
|
||||||
|
// REQUIRES: Either Finish() or Abandon() has been called.
|
||||||
|
~CuckooTableBuilder(); |
||||||
|
|
||||||
|
// Add key,value to the table being constructed.
|
||||||
|
// REQUIRES: key is after any previously added key according to comparator.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Add(const Slice& key, const Slice& value) override; |
||||||
|
|
||||||
|
// Return non-ok iff some error has been detected.
|
||||||
|
Status status() const override; |
||||||
|
|
||||||
|
// Finish building the table. Stops using the file passed to the
|
||||||
|
// constructor after this function returns.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
Status Finish() override; |
||||||
|
|
||||||
|
// Indicate that the contents of this builder should be abandoned. Stops
|
||||||
|
// using the file passed to the constructor after this function returns.
|
||||||
|
// If the caller is not going to call Finish(), it must call Abandon()
|
||||||
|
// before destroying this builder.
|
||||||
|
// REQUIRES: Finish(), Abandon() have not been called
|
||||||
|
void Abandon() override; |
||||||
|
|
||||||
|
// Number of calls to Add() so far.
|
||||||
|
uint64_t NumEntries() const override; |
||||||
|
|
||||||
|
// Size of the file generated so far. If invoked after a successful
|
||||||
|
// Finish() call, returns the size of the final generated file.
|
||||||
|
uint64_t FileSize() const override; |
||||||
|
|
||||||
|
private: |
||||||
|
bool MakeSpaceForKey(const Slice& key, unsigned int* bucket_id, |
||||||
|
autovector<unsigned int> hash_vals); |
||||||
|
|
||||||
|
unsigned int num_hash_table_; |
||||||
|
WritableFile* file_; |
||||||
|
const unsigned int key_length_; |
||||||
|
const unsigned int value_length_; |
||||||
|
const unsigned int bucket_size_; |
||||||
|
const double hash_table_ratio_; |
||||||
|
const unsigned int max_num_buckets_; |
||||||
|
const unsigned int max_num_hash_table_; |
||||||
|
const unsigned int max_search_depth_; |
||||||
|
Status status_; |
||||||
|
std::vector<CuckooBucket> buckets_; |
||||||
|
bool is_last_level_file_ = true; |
||||||
|
TableProperties properties_; |
||||||
|
unsigned int (*GetSliceHash)(const Slice& s, unsigned int index, |
||||||
|
unsigned int max_num_buckets); |
||||||
|
std::string unused_user_key_ = ""; |
||||||
|
std::string prev_key_; |
||||||
|
|
||||||
|
bool closed_ = false; // Either Finish() or Abandon() has been called.
|
||||||
|
|
||||||
|
// No copying allowed
|
||||||
|
CuckooTableBuilder(const CuckooTableBuilder&) = delete; |
||||||
|
void operator=(const CuckooTableBuilder&) = delete; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
#endif // ROCKSDB_LITE
|
@ -0,0 +1,468 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include <vector> |
||||||
|
#include <string> |
||||||
|
#include <map> |
||||||
|
#include <utility> |
||||||
|
|
||||||
|
#include "table/meta_blocks.h" |
||||||
|
#include "table/cuckoo_table_builder.h" |
||||||
|
#include "util/random.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
extern const uint64_t kCuckooTableMagicNumber; |
||||||
|
|
||||||
|
namespace { |
||||||
|
std::unordered_map<std::string, std::vector<unsigned int>> hash_map; |
||||||
|
|
||||||
|
void AddHashLookups(const std::string& s, unsigned int bucket_id, |
||||||
|
unsigned int num_hash_fun) { |
||||||
|
std::vector<unsigned int> v; |
||||||
|
for (unsigned int i = 0; i < num_hash_fun; i++) { |
||||||
|
v.push_back(bucket_id + i); |
||||||
|
} |
||||||
|
hash_map[s] = v; |
||||||
|
return; |
||||||
|
} |
||||||
|
|
||||||
|
unsigned int GetSliceHash(const Slice& s, unsigned int index, |
||||||
|
unsigned int max_num_buckets) { |
||||||
|
return hash_map[s.ToString()][index]; |
||||||
|
} |
||||||
|
} // namespace
|
||||||
|
|
||||||
|
class CuckooBuilderTest { |
||||||
|
public: |
||||||
|
CuckooBuilderTest() { |
||||||
|
env_ = Env::Default(); |
||||||
|
} |
||||||
|
|
||||||
|
void CheckFileContents(const std::string& expected_data) { |
||||||
|
// Read file
|
||||||
|
unique_ptr<RandomAccessFile> read_file; |
||||||
|
ASSERT_OK(env_->NewRandomAccessFile(fname, &read_file, env_options_)); |
||||||
|
uint64_t read_file_size; |
||||||
|
ASSERT_OK(env_->GetFileSize(fname, &read_file_size)); |
||||||
|
|
||||||
|
// Assert Table Properties.
|
||||||
|
TableProperties* props = nullptr; |
||||||
|
ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size, |
||||||
|
kCuckooTableMagicNumber, env_, nullptr, &props)); |
||||||
|
ASSERT_EQ(props->num_entries, num_items); |
||||||
|
ASSERT_EQ(props->fixed_key_len, key_length); |
||||||
|
|
||||||
|
// Check unused bucket.
|
||||||
|
std::string unused_bucket = props->user_collected_properties[ |
||||||
|
CuckooTablePropertyNames::kEmptyBucket]; |
||||||
|
ASSERT_EQ(expected_unused_bucket, unused_bucket); |
||||||
|
|
||||||
|
unsigned int max_buckets; |
||||||
|
Slice max_buckets_slice = Slice(props->user_collected_properties[ |
||||||
|
CuckooTablePropertyNames::kMaxNumBuckets]); |
||||||
|
GetVarint32(&max_buckets_slice, &max_buckets); |
||||||
|
ASSERT_EQ(expected_max_buckets, max_buckets); |
||||||
|
// Check contents of the bucket.
|
||||||
|
std::string read_data; |
||||||
|
read_data.resize(expected_data.size()); |
||||||
|
Slice read_slice; |
||||||
|
ASSERT_OK(read_file->Read(0, expected_data.size(), |
||||||
|
&read_slice, &read_data[0])); |
||||||
|
ASSERT_EQ(expected_data, read_data); |
||||||
|
} |
||||||
|
|
||||||
|
Env* env_; |
||||||
|
const EnvOptions env_options_; |
||||||
|
std::string fname; |
||||||
|
std::string expected_unused_bucket; |
||||||
|
unsigned int file_size = 100000; |
||||||
|
unsigned int num_items = 20; |
||||||
|
unsigned int num_hash_fun = 64; |
||||||
|
double hash_table_ratio = 0.9; |
||||||
|
unsigned int ikey_length; |
||||||
|
unsigned int user_key_length; |
||||||
|
unsigned int key_length; |
||||||
|
unsigned int value_length; |
||||||
|
unsigned int bucket_length; |
||||||
|
unsigned int expected_max_buckets; |
||||||
|
}; |
||||||
|
|
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, NoCollision) { |
||||||
|
hash_map.clear(); |
||||||
|
num_items = 20; |
||||||
|
num_hash_fun = 64; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
unsigned int bucket_ids = 0; |
||||||
|
for (unsigned int i = 0; i < num_items; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun); |
||||||
|
bucket_ids += num_hash_fun; |
||||||
|
} |
||||||
|
|
||||||
|
ikey_length = keys[0].size(); |
||||||
|
key_length = ikey_length; |
||||||
|
value_length = values[0].size(); |
||||||
|
bucket_length = ikey_length + value_length; |
||||||
|
expected_max_buckets = file_size / bucket_length; |
||||||
|
std::string expected_unused_user_key = "keys10:"; |
||||||
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); |
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey); |
||||||
|
expected_unused_bucket.resize(bucket_length, 'a'); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/BasicTest_writable_file"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), ikey_length, |
||||||
|
value_length, hash_table_ratio, |
||||||
|
file_size, num_hash_fun, 100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
unsigned int key_idx = 0; |
||||||
|
std::string expected_file_data = ""; |
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) { |
||||||
|
if (key_idx * num_hash_fun == i && key_idx < num_items) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
expected_file_data.append(keys[key_idx] + values[key_idx]); |
||||||
|
++key_idx; |
||||||
|
} else { |
||||||
|
expected_file_data.append(expected_unused_bucket); |
||||||
|
} |
||||||
|
} |
||||||
|
ASSERT_OK(cuckoo_builder->Finish()); |
||||||
|
writable_file->Close(); |
||||||
|
CheckFileContents(expected_file_data); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, NoCollisionLastLevel) { |
||||||
|
hash_map.clear(); |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
unsigned int bucket_ids = 0; |
||||||
|
for (unsigned int i = 0; i < num_items; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
// Set zero sequence number in all keys.
|
||||||
|
ParsedInternalKey ikey(user_keys[i], 0, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
AddHashLookups(user_keys[i], bucket_ids, num_hash_fun); |
||||||
|
bucket_ids += num_hash_fun; |
||||||
|
} |
||||||
|
ikey_length = keys[0].size(); |
||||||
|
user_key_length = user_keys[0].size(); |
||||||
|
key_length = user_key_length; |
||||||
|
value_length = values[0].size(); |
||||||
|
bucket_length = key_length + value_length; |
||||||
|
expected_max_buckets = file_size / bucket_length; |
||||||
|
expected_unused_bucket = "keys10:"; |
||||||
|
expected_unused_bucket.resize(bucket_length, 'a'); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/NoCollisionLastLevel_writable_file"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), key_length, |
||||||
|
value_length, hash_table_ratio, |
||||||
|
file_size, num_hash_fun, 100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
unsigned int key_idx = 0; |
||||||
|
std::string expected_file_data = ""; |
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) { |
||||||
|
if (key_idx * num_hash_fun == i && key_idx < num_items) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
expected_file_data.append(user_keys[key_idx] + values[key_idx]); |
||||||
|
++key_idx; |
||||||
|
} else { |
||||||
|
expected_file_data.append(expected_unused_bucket); |
||||||
|
} |
||||||
|
} |
||||||
|
ASSERT_OK(cuckoo_builder->Finish()); |
||||||
|
writable_file->Close(); |
||||||
|
CheckFileContents(expected_file_data); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, WithCollision) { |
||||||
|
// Take keys with colliding hash function values.
|
||||||
|
hash_map.clear(); |
||||||
|
num_hash_fun = 20; |
||||||
|
num_items = num_hash_fun; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
for (unsigned int i = 0; i < num_items; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
// Make all hash values collide.
|
||||||
|
AddHashLookups(user_keys[i], 0, num_hash_fun); |
||||||
|
} |
||||||
|
ikey_length = keys[0].size(); |
||||||
|
value_length = values[0].size(); |
||||||
|
key_length = ikey_length; |
||||||
|
bucket_length = key_length + value_length; |
||||||
|
expected_max_buckets = file_size / bucket_length; |
||||||
|
std::string expected_unused_user_key = "keys10:"; |
||||||
|
ParsedInternalKey ikey(expected_unused_user_key, 0, kTypeValue); |
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey); |
||||||
|
expected_unused_bucket.resize(bucket_length, 'a'); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/WithCollision_writable_file"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), key_length, value_length, hash_table_ratio, |
||||||
|
file_size, num_hash_fun, 100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
unsigned int key_idx = 0; |
||||||
|
std::string expected_file_data = ""; |
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) { |
||||||
|
if (key_idx == i && key_idx < num_items) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
expected_file_data.append(keys[key_idx] + values[key_idx]); |
||||||
|
++key_idx; |
||||||
|
} else { |
||||||
|
expected_file_data.append(expected_unused_bucket); |
||||||
|
} |
||||||
|
} |
||||||
|
ASSERT_OK(cuckoo_builder->Finish()); |
||||||
|
writable_file->Close(); |
||||||
|
CheckFileContents(expected_file_data); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWithTooManyCollisions) { |
||||||
|
// Take keys with colliding hash function values.
|
||||||
|
// Take more keys than the number of hash functions.
|
||||||
|
hash_map.clear(); |
||||||
|
num_hash_fun = 20; |
||||||
|
num_items = num_hash_fun + 1; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
for (unsigned int i = 0; i < num_items; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
// Make all hash values collide.
|
||||||
|
AddHashLookups(user_keys[i], 0, num_hash_fun); |
||||||
|
} |
||||||
|
ikey_length = keys[0].size(); |
||||||
|
value_length = values[0].size(); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/FailWithTooManyCollisions_writable"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), ikey_length, |
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun, |
||||||
|
100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
} |
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); |
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); |
||||||
|
cuckoo_builder->Abandon(); |
||||||
|
writable_file->Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenSameKeyInserted) { |
||||||
|
hash_map.clear(); |
||||||
|
std::string user_key = "repeatedkey"; |
||||||
|
AddHashLookups(user_key, 0, num_hash_fun); |
||||||
|
std::string key_to_reuse1, key_to_reuse2; |
||||||
|
ParsedInternalKey ikey1(user_key, 1000, kTypeValue); |
||||||
|
ParsedInternalKey ikey2(user_key, 1001, kTypeValue); |
||||||
|
AppendInternalKey(&key_to_reuse1, ikey1); |
||||||
|
AppendInternalKey(&key_to_reuse2, ikey2); |
||||||
|
std::string value = "value"; |
||||||
|
ikey_length = key_to_reuse1.size(); |
||||||
|
value_length = value.size(); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/FailWhenSameKeyInserted_writable_file"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), ikey_length, |
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun, |
||||||
|
100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
cuckoo_builder->Add(Slice(key_to_reuse1), Slice(value)); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), 1); |
||||||
|
cuckoo_builder->Add(Slice(key_to_reuse2), Slice(value)); |
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); |
||||||
|
cuckoo_builder->Abandon(); |
||||||
|
writable_file->Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, WithACollisionPath) { |
||||||
|
hash_map.clear(); |
||||||
|
// Have two hash functions. Insert elements with overlapping hashes.
|
||||||
|
// Finally insert an element which will displace all the current elements.
|
||||||
|
num_hash_fun = 2; |
||||||
|
|
||||||
|
unsigned int max_search_depth = 100; |
||||||
|
num_items = max_search_depth + 2; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
std::vector<unsigned int> expected_bucket_id(num_items); |
||||||
|
for (unsigned int i = 0; i < num_items - 1; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
// Make all hash values collide with the next element.
|
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun); |
||||||
|
expected_bucket_id[i] = i+1; |
||||||
|
} |
||||||
|
expected_bucket_id[0] = 0; |
||||||
|
user_keys.back() = "keys" + std::to_string(num_items + 99); |
||||||
|
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys.back(), ikey); |
||||||
|
values.back() = "value" + std::to_string(num_items+100); |
||||||
|
// Make both hash values collide with first element.
|
||||||
|
AddHashLookups(user_keys.back(), 0, num_hash_fun); |
||||||
|
expected_bucket_id.back() = 1; |
||||||
|
|
||||||
|
ikey_length = keys[0].size(); |
||||||
|
value_length = values[0].size(); |
||||||
|
key_length = ikey_length; |
||||||
|
bucket_length = key_length + value_length; |
||||||
|
|
||||||
|
expected_max_buckets = file_size / bucket_length; |
||||||
|
std::string expected_unused_user_key = "keys10:"; |
||||||
|
ikey = ParsedInternalKey(expected_unused_user_key, 0, kTypeValue); |
||||||
|
AppendInternalKey(&expected_unused_bucket, ikey); |
||||||
|
expected_unused_bucket.resize(bucket_length, 'a'); |
||||||
|
std::string expected_file_data = ""; |
||||||
|
for (unsigned int i = 0; i < expected_max_buckets; i++) { |
||||||
|
expected_file_data += expected_unused_bucket; |
||||||
|
} |
||||||
|
|
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/WithCollisionPath_writable_file"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), key_length, |
||||||
|
value_length, hash_table_ratio, file_size, |
||||||
|
num_hash_fun, max_search_depth, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items; key_idx++) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
expected_file_data.replace(expected_bucket_id[key_idx]*bucket_length, |
||||||
|
bucket_length, keys[key_idx] + values[key_idx]); |
||||||
|
} |
||||||
|
ASSERT_OK(cuckoo_builder->Finish()); |
||||||
|
writable_file->Close(); |
||||||
|
CheckFileContents(expected_file_data); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenCollisionPathTooLong) { |
||||||
|
hash_map.clear(); |
||||||
|
// Have two hash functions. Insert elements with overlapping hashes.
|
||||||
|
// Finally insert an element which will displace all the current elements.
|
||||||
|
num_hash_fun = 2; |
||||||
|
|
||||||
|
unsigned int max_search_depth = 100; |
||||||
|
num_items = max_search_depth + 3; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
for (unsigned int i = 0; i < num_items - 1; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+100); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
// Make all hash values collide with the next element.
|
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun); |
||||||
|
} |
||||||
|
user_keys.back() = "keys" + std::to_string(num_items + 99); |
||||||
|
ParsedInternalKey ikey(user_keys.back(), num_items + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys.back(), ikey); |
||||||
|
Slice(values.back()) = "value" + std::to_string(num_items+100); |
||||||
|
// Make both hash values collide with first element.
|
||||||
|
AddHashLookups(user_keys.back(), 0, num_hash_fun); |
||||||
|
|
||||||
|
ikey_length = keys[0].size(); |
||||||
|
value_length = values[0].size(); |
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/FailWhenCollisionPathTooLong_writable"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), ikey_length, |
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun, |
||||||
|
max_search_depth, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
} |
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); |
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); |
||||||
|
cuckoo_builder->Abandon(); |
||||||
|
writable_file->Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(CuckooBuilderTest, FailWhenTableIsFull) { |
||||||
|
hash_map.clear(); |
||||||
|
file_size = 160; |
||||||
|
|
||||||
|
num_items = 7; |
||||||
|
std::vector<std::string> user_keys(num_items); |
||||||
|
std::vector<std::string> keys(num_items); |
||||||
|
std::vector<std::string> values(num_items); |
||||||
|
for (unsigned int i = 0; i < num_items; i++) { |
||||||
|
user_keys[i] = "keys" + std::to_string(i+1000); |
||||||
|
ParsedInternalKey ikey(user_keys[i], i + 1000, kTypeValue); |
||||||
|
AppendInternalKey(&keys[i], ikey); |
||||||
|
values[i] = "value" + std::to_string(i+100); |
||||||
|
AddHashLookups(user_keys[i], i, num_hash_fun); |
||||||
|
} |
||||||
|
ikey_length = keys[0].size(); |
||||||
|
value_length = values[0].size(); |
||||||
|
bucket_length = ikey_length + value_length; |
||||||
|
// Check that number of items is tight.
|
||||||
|
ASSERT_GT(bucket_length * num_items, file_size); |
||||||
|
ASSERT_LE(bucket_length * (num_items-1), file_size); |
||||||
|
|
||||||
|
unique_ptr<WritableFile> writable_file; |
||||||
|
fname = test::TmpDir() + "/FailWhenTabelIsFull_writable"; |
||||||
|
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); |
||||||
|
CuckooTableBuilder* cuckoo_builder = new CuckooTableBuilder( |
||||||
|
writable_file.get(), ikey_length, |
||||||
|
value_length, hash_table_ratio, file_size, num_hash_fun, |
||||||
|
100, GetSliceHash); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
for (unsigned int key_idx = 0; key_idx < num_items-1; key_idx++) { |
||||||
|
cuckoo_builder->Add(Slice(keys[key_idx]), Slice(values[key_idx])); |
||||||
|
ASSERT_OK(cuckoo_builder->status()); |
||||||
|
ASSERT_EQ(cuckoo_builder->NumEntries(), key_idx + 1); |
||||||
|
} |
||||||
|
cuckoo_builder->Add(Slice(keys.back()), Slice(values.back())); |
||||||
|
ASSERT_TRUE(cuckoo_builder->status().IsCorruption()); |
||||||
|
cuckoo_builder->Abandon(); |
||||||
|
writable_file->Close(); |
||||||
|
} |
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } |
@ -0,0 +1,32 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include "util/murmurhash.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
static const unsigned int kMaxNumHashTable = 64; |
||||||
|
|
||||||
|
unsigned int GetSliceMurmurHash(const Slice& s, unsigned int index, |
||||||
|
unsigned int max_num_buckets) { |
||||||
|
static constexpr unsigned int seeds[kMaxNumHashTable] = { |
||||||
|
816922183, 506425713, 949485004, 22513986, 421427259, 500437285, |
||||||
|
888981693, 847587269, 511007211, 722295391, 934013645, 566947683, |
||||||
|
193618736, 428277388, 770956674, 819994962, 755946528, 40807421, |
||||||
|
263144466, 241420041, 444294464, 731606396, 304158902, 563235655, |
||||||
|
968740453, 336996831, 462831574, 407970157, 985877240, 637708754, |
||||||
|
736932700, 205026023, 755371467, 729648411, 807744117, 46482135, |
||||||
|
847092855, 620960699, 102476362, 314094354, 625838942, 550889395, |
||||||
|
639071379, 834567510, 397667304, 151945969, 443634243, 196618243, |
||||||
|
421986347, 407218337, 964502417, 327741231, 493359459, 452453139, |
||||||
|
692216398, 108161624, 816246924, 234779764, 618949448, 496133787, |
||||||
|
156374056, 316589799, 982915425, 553105889 }; |
||||||
|
return MurmurHash(s.data(), s.size(), seeds[index]) % max_num_buckets; |
||||||
|
} |
||||||
|
} // namespace rocksdb
|
||||||
|
#endif // ROCKSDB_LITE
|
Loading…
Reference in new issue