From 91c01485d1c0a4675e24b39ad2adb950a3a468e6 Mon Sep 17 00:00:00 2001 From: Radheshyam Balasundaram Date: Mon, 28 Jul 2014 17:14:25 -0700 Subject: [PATCH] Minor changes to CuckooTableBuilder Summary: - Copy the key and value to in-memory hash table during Add operation. Also modified cuckoo_table_reader_test to use this. - Store only the user_key in in-memory hash table if it is last level file. - Handle Carryover while chosing unused key in Finish() method in case unused key was never found before Finish() call. Test Plan: cuckoo_table_reader_test --enable_perf cuckoo_table_builder_test valgrind_check asan_check Reviewers: sdong, yhchiang, igor, ljin Reviewed By: ljin Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20715 --- table/cuckoo_table_builder.cc | 46 +++++++++++++++++-------------- table/cuckoo_table_builder.h | 4 +-- table/cuckoo_table_reader_test.cc | 26 +++++++---------- 3 files changed, 38 insertions(+), 38 deletions(-) diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index c1cb2e4c2..0fe243665 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -86,7 +86,9 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { bucket_found = true; break; } else { - if (user_key.compare(ExtractUserKey(buckets_[hash_val].key)) == 0) { + if (user_key.compare( + is_last_level_file_ ? Slice(buckets_[hash_val].key) + : ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) { status_ = Status::Corruption("Same key is being inserted again."); return; } @@ -112,8 +114,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { hash_vals.push_back(hash_val); } } - buckets_[bucket_id].key = key; - buckets_[bucket_id].value = value; + if (is_last_level_file_) { + buckets_[bucket_id].key.assign(user_key.data(), user_key.size()); + } else { + buckets_[bucket_id].key.assign(key.data(), key.size()); + } + buckets_[bucket_id].value.assign(value.data(), value.size()); buckets_[bucket_id].is_empty = false; properties_.num_entries++; @@ -149,12 +155,18 @@ Status CuckooTableBuilder::Finish() { if (prev_key_.empty()) { return Status::Corruption("Unable to find unused key"); } + // Try to find the key next to prev_key_ by handling carryovers. std::string new_user_key = prev_key_; - new_user_key.back()++; - // We ignore carry-overs and check that it is larger than previous key. - if (new_user_key > prev_key_) { - unused_user_key_ = new_user_key; - } else { + int curr_pos = new_user_key.size() - 1; + while (curr_pos >= 0) { + ++new_user_key[curr_pos]; + if (new_user_key > prev_key_) { + unused_user_key_ = new_user_key; + break; + } + --curr_pos; + } + if (curr_pos < 0) { return Status::Corruption("Unable to find unused key"); } } @@ -179,17 +191,9 @@ Status CuckooTableBuilder::Finish() { s = file_->Append(Slice(unused_bucket)); } else { ++num_added; - if (is_last_level_file_) { - Slice user_key = ExtractUserKey(bucket.key); - s = file_->Append(user_key); - if (s.ok()) { - s = file_->Append(bucket.value); - } - } else { - s = file_->Append(bucket.key); - if (s.ok()) { - s = file_->Append(bucket.value); - } + s = file_->Append(Slice(bucket.key)); + if (s.ok()) { + s = file_->Append(Slice(bucket.value)); } } if (!s.ok()) { @@ -307,7 +311,9 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key, CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id]; for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) { uint64_t child_bucket_id = GetSliceHash( - ExtractUserKey(curr_bucket.key), hash_cnt, max_num_buckets_); + is_last_level_file_ ? curr_bucket.key + : ExtractUserKey(Slice(curr_bucket.key)), + hash_cnt, max_num_buckets_); if (buckets_[child_bucket_id].make_space_for_key_call_id == make_space_for_key_call_id_) { continue; diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index a3c5a6e3b..d40b1f7ff 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -58,8 +58,8 @@ class CuckooTableBuilder: public TableBuilder { private: struct CuckooBucket { CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {} - Slice key; - Slice value; + std::string key; + std::string value; bool is_empty; uint64_t make_space_for_key_call_id; }; diff --git a/table/cuckoo_table_reader_test.cc b/table/cuckoo_table_reader_test.cc index 6006cc7e7..f2fe9567b 100644 --- a/table/cuckoo_table_reader_test.cc +++ b/table/cuckoo_table_reader_test.cc @@ -236,18 +236,6 @@ TEST(CuckooReaderTest, WhenKeyNotFound) { // Performance tests namespace { -void GenerateKeys(uint64_t num, std::vector* keys, - uint32_t user_key_length) { - for (uint64_t i = 0; i < num; ++i) { - std::string new_key(reinterpret_cast(&i), sizeof(i)); - new_key = std::string(user_key_length - new_key.size(), 'k') + new_key; - ParsedInternalKey ikey(new_key, num, kTypeValue); - std::string full_key; - AppendInternalKey(&full_key, ikey); - keys->push_back(full_key); - } -} - bool DoNothing(void* arg, const ParsedInternalKey& k, const Slice& v) { // Deliberately empty. return false; @@ -266,6 +254,7 @@ bool CheckValue(void* cnt_ptr, const ParsedInternalKey& k, const Slice& v) { void BM_CuckooRead(uint64_t num, uint32_t key_length, uint32_t value_length, uint64_t num_reads, double hash_ratio) { assert(value_length <= key_length); + assert(8 <= key_length); std::vector keys; Options options; options.allow_mmap_reads = true; @@ -277,21 +266,26 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length, } std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark"; - GenerateKeys(num, &keys, key_length); uint64_t predicted_file_size = num * (key_length + value_length) / hash_ratio + 1024; unique_ptr writable_file; ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); CuckooTableBuilder builder( - writable_file.get(), keys[0].size(), value_length, hash_ratio, + writable_file.get(), key_length + 8, value_length, hash_ratio, predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash); ASSERT_OK(builder.status()); - for (uint32_t key_idx = 0; key_idx < num; ++key_idx) { + for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { // Value is just a part of key. - builder.Add(Slice(keys[key_idx]), Slice(&keys[key_idx][0], value_length)); + std::string new_key(reinterpret_cast(&key_idx), sizeof(key_idx)); + new_key = std::string(key_length - new_key.size(), 'k') + new_key; + ParsedInternalKey ikey(new_key, num, kTypeValue); + std::string full_key; + AppendInternalKey(&full_key, ikey); + builder.Add(Slice(full_key), Slice(&full_key[0], value_length)); ASSERT_EQ(builder.NumEntries(), key_idx + 1); ASSERT_OK(builder.status()); + keys.push_back(full_key); } ASSERT_OK(builder.Finish()); ASSERT_EQ(num, builder.NumEntries());