From 2dc6f62bb9205d5b071757d72bd28bbb77ab0745 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Mon, 29 Sep 2014 10:25:21 -0700 Subject: [PATCH] handle kDelete type in cuckoo builder Summary: when I changed std::vector to std::string to store key/value pairs in builder, I missed the handling for kDeletion type. As a result, value_size_ can be wrong if the first add key is for deletion. The is captured by ./cuckoo_table_db_test Test Plan: ./cuckoo_table_db_test ./cuckoo_table_reader_test ./cuckoo_table_builder_test Reviewers: sdong, yhchiang, igor Reviewed By: igor Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D24045 --- table/cuckoo_table_builder.cc | 59 +++++++++++++++++++++++++++++++---- table/cuckoo_table_builder.h | 7 ++++- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/table/cuckoo_table_builder.cc b/table/cuckoo_table_builder.cc index a66e9899e..6ff1fa0cf 100644 --- a/table/cuckoo_table_builder.cc +++ b/table/cuckoo_table_builder.cc @@ -60,9 +60,11 @@ CuckooTableBuilder::CuckooTableBuilder( hash_table_size_(use_module_hash ? 0 : 2), is_last_level_file_(false), has_seen_first_key_(false), + has_seen_first_value_(false), key_size_(0), value_size_(0), num_entries_(0), + num_values_(0), ucomp_(user_comparator), use_module_hash_(use_module_hash), identity_as_first_hash_(identity_as_first_hash), @@ -84,6 +86,12 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { status_ = Status::Corruption("Unable to parse key into inernal key."); return; } + if (ikey.type != kTypeDeletion && ikey.type != kTypeValue) { + status_ = Status::NotSupported("Unsupported key type " + + std::to_string(ikey.type)); + return; + } + // Determine if we can ignore the sequence number and value type from // internal keys by looking at sequence number from first key. We assume // that if first key has a zero sequence number, then all the remaining @@ -94,16 +102,38 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { smallest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); largest_user_key_.assign(ikey.user_key.data(), ikey.user_key.size()); key_size_ = is_last_level_file_ ? ikey.user_key.size() : key.size(); - value_size_ = value.size(); + } + if (key_size_ != (is_last_level_file_ ? ikey.user_key.size() : key.size())) { + status_ = Status::NotSupported("all keys have to be the same size"); + return; } // Even if one sequence number is non-zero, then it is not last level. assert(!is_last_level_file_ || ikey.sequence == 0); - if (is_last_level_file_) { - kvs_.append(ikey.user_key.data(), ikey.user_key.size()); + + if (ikey.type == kTypeValue) { + if (!has_seen_first_value_) { + has_seen_first_value_ = true; + value_size_ = value.size(); + } + if (value_size_ != value.size()) { + status_ = Status::NotSupported("all values have to be the same size"); + return; + } + + if (is_last_level_file_) { + kvs_.append(ikey.user_key.data(), ikey.user_key.size()); + } else { + kvs_.append(key.data(), key.size()); + } + kvs_.append(value.data(), value.size()); + ++num_values_; } else { - kvs_.append(key.data(), key.size()); + if (is_last_level_file_) { + deleted_keys_.append(ikey.user_key.data(), ikey.user_key.size()); + } else { + deleted_keys_.append(key.data(), key.size()); + } } - kvs_.append(value.data(), value.size()); ++num_entries_; // In order to fill the empty buckets in the hash table, we identify a @@ -123,15 +153,30 @@ void CuckooTableBuilder::Add(const Slice& key, const Slice& value) { } } +bool CuckooTableBuilder::IsDeletedKey(uint64_t idx) const { + assert(closed_); + return idx >= num_values_; +} + Slice CuckooTableBuilder::GetKey(uint64_t idx) const { + assert(closed_); + if (IsDeletedKey(idx)) { + return Slice(&deleted_keys_[(idx - num_values_) * key_size_], key_size_); + } return Slice(&kvs_[idx * (key_size_ + value_size_)], key_size_); } Slice CuckooTableBuilder::GetUserKey(uint64_t idx) const { + assert(closed_); return is_last_level_file_ ? GetKey(idx) : ExtractUserKey(GetKey(idx)); } Slice CuckooTableBuilder::GetValue(uint64_t idx) const { + assert(closed_); + if (IsDeletedKey(idx)) { + static std::string empty_value(value_size_, 'a'); + return Slice(empty_value); + } return Slice(&kvs_[idx * (key_size_ + value_size_) + key_size_], value_size_); } @@ -256,7 +301,9 @@ Status CuckooTableBuilder::Finish() { ++num_added; s = file_->Append(GetKey(bucket.vector_idx)); if (s.ok()) { - s = file_->Append(GetValue(bucket.vector_idx)); + if (value_size_ > 0) { + s = file_->Append(GetValue(bucket.vector_idx)); + } } } if (!s.ok()) { diff --git a/table/cuckoo_table_builder.h b/table/cuckoo_table_builder.h index b1d7e649c..6898c1ef6 100644 --- a/table/cuckoo_table_builder.h +++ b/table/cuckoo_table_builder.h @@ -75,6 +75,7 @@ class CuckooTableBuilder: public TableBuilder { uint64_t* bucket_id); Status MakeHashTable(std::vector* buckets); + inline bool IsDeletedKey(uint64_t idx) const; inline Slice GetKey(uint64_t idx) const; inline Slice GetUserKey(uint64_t idx) const; inline Slice GetValue(uint64_t idx) const; @@ -88,14 +89,18 @@ class CuckooTableBuilder: public TableBuilder { uint64_t hash_table_size_; bool is_last_level_file_; bool has_seen_first_key_; + bool has_seen_first_value_; uint64_t key_size_; uint64_t value_size_; // A list of fixed-size key-value pairs concatenating into a string. // Use GetKey(), GetUserKey(), and GetValue() to retrieve a specific // key / value given an index std::string kvs_; - // Number of key-value pairs stored in kvs_ + std::string deleted_keys_; + // Number of key-value pairs stored in kvs_ + number of deleted keys uint64_t num_entries_; + // Number of keys that contain value (non-deletion op) + uint64_t num_values_; Status status_; TableProperties properties_; const Comparator* ucomp_;