From b20486f29469b005ac0b07c7fb1723faadf6c877 Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Fri, 24 Jan 2014 17:50:59 -0800 Subject: [PATCH] [Performance Branch] HashLinkList to avoid to convert length prefixed string back to internal keys Summary: Converting from length prefixed buffer back to internal key costs some CPU but it is not necessary. In this patch, internal keys are pass though the functions so that we don't need to convert back to it. Test Plan: make all check Reviewers: haobo, kailiu Reviewed By: kailiu CC: igor, leveldb Differential Revision: https://reviews.facebook.net/D15393 --- db/memtable.cc | 24 ++++++++++++------- db/memtable.h | 5 +++- include/rocksdb/memtablerep.h | 6 ++++- util/hash_linklist_rep.cc | 45 +++++++++++++++++++++++------------ util/hash_skiplist_rep.cc | 5 ++-- 5 files changed, 58 insertions(+), 27 deletions(-) diff --git a/db/memtable.cc b/db/memtable.cc index e0e2a5c2f..deb3f7ad7 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -65,12 +65,20 @@ size_t MemTable::ApproximateMemoryUsage() { table_->ApproximateMemoryUsage(); } -int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr) +int MemTable::KeyComparator::operator()(const char* prefix_len_key1, + const char* prefix_len_key2) const { + // Internal keys are encoded as length-prefixed strings. + Slice k1 = GetLengthPrefixedSlice(prefix_len_key1); + Slice k2 = GetLengthPrefixedSlice(prefix_len_key2); + return comparator.Compare(k1, k2); +} + +int MemTable::KeyComparator::operator()(const char* prefix_len_key, + const Slice& key) const { // Internal keys are encoded as length-prefixed strings. - Slice a = GetLengthPrefixedSlice(aptr); - Slice b = GetLengthPrefixedSlice(bptr); - return comparator.Compare(a, b); + Slice a = GetLengthPrefixedSlice(prefix_len_key); + return comparator.Compare(a, key); } Slice MemTableRep::UserKey(const char* key) const { @@ -213,7 +221,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, // iter is null if prefix bloom says the key does not exist } else { iter.reset(table_->GetIterator(user_key)); - iter->Seek(user_key, mem_key.data()); + iter->Seek(key.internal_key(), mem_key.data()); } bool merge_in_progress = s->IsMergeInProgress(); @@ -325,7 +333,7 @@ void MemTable::Update(SequenceNumber seq, std::unique_ptr iter( table_->GetIterator(lkey.user_key())); - iter->Seek(key, mem_key.data()); + iter->Seek(lkey.internal_key(), mem_key.data()); if (iter->Valid()) { // entry format is: @@ -389,7 +397,7 @@ bool MemTable::UpdateCallback(SequenceNumber seq, std::shared_ptr iter( table_->GetIterator(lkey.user_key())); - iter->Seek(key, memkey.data()); + iter->Seek(lkey.internal_key(), memkey.data()); if (iter->Valid()) { // entry format is: @@ -461,7 +469,7 @@ size_t MemTable::CountSuccessiveMergeEntries(const LookupKey& key) { // The iterator only needs to be ordered within the same user key. std::unique_ptr iter( table_->GetIterator(key.user_key())); - iter->Seek(key.user_key(), memkey.data()); + iter->Seek(key.internal_key(), memkey.data()); size_t num_successive_merges = 0; diff --git a/db/memtable.h b/db/memtable.h index 5e7eeb4a1..aca4aaf16 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -30,7 +30,10 @@ class MemTable { struct KeyComparator : public MemTableRep::KeyComparator { const InternalKeyComparator comparator; explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { } - virtual int operator()(const char* a, const char* b) const; + virtual int operator()(const char* prefix_len_key1, + const char* prefix_len_key2) const; + virtual int operator()(const char* prefix_len_key, + const Slice& key) const override; }; // MemTables are reference counted. The initial reference count diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 1a16a30cc..3cb03c7fc 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -51,7 +51,11 @@ class MemTableRep { public: // Compare a and b. Return a negative value if a is less than b, 0 if they // are equal, and a positive value if a is greater than b - virtual int operator()(const char* a, const char* b) const = 0; + virtual int operator()(const char* prefix_len_key1, + const char* prefix_len_key2) const = 0; + + virtual int operator()(const char* prefix_len_key, + const Slice& key) const = 0; virtual ~KeyComparator() { } }; diff --git a/util/hash_linklist_rep.cc b/util/hash_linklist_rep.cc index 9c2d8f52c..844907a28 100644 --- a/util/hash_linklist_rep.cc +++ b/util/hash_linklist_rep.cc @@ -92,7 +92,11 @@ class HashLinkListRep : public MemTableRep { // immutable after construction Arena* const arena_; - bool BucketContains(Node* head, const Key& key) const; + bool BucketContains(Node* head, const Slice& key) const; + + Slice GetPrefix(const Slice& internal_key) const { + return transform_->Transform(ExtractUserKey(internal_key)); + } size_t GetHash(const Slice& slice) const { return MurmurHash(slice.data(), slice.size(), 0) % bucket_size_; @@ -111,14 +115,25 @@ class HashLinkListRep : public MemTableRep { return new (mem) Node(key); } + bool Equal(const Slice& a, const Key& b) const { + return (compare_(b, a) == 0); + } + + bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); } + bool KeyIsAfterNode(const Slice& internal_key, const Node* n) const { + // nullptr n is considered infinite + return (n != nullptr) && (compare_(n->key, internal_key) < 0); + } + bool KeyIsAfterNode(const Key& key, const Node* n) const { // nullptr n is considered infinite return (n != nullptr) && (compare_(n->key, key) < 0); } - Node* FindGreaterOrEqualInBucket(Node* head, const Key& key) const; + + Node* FindGreaterOrEqualInBucket(Node* head, const Slice& key) const; class FullListIterator : public MemTableRep::Iterator { public: @@ -219,11 +234,8 @@ class HashLinkListRep : public MemTableRep { // Advance to the first entry with a key >= target virtual void Seek(const Slice& internal_key, const char* memtable_key) { - const char* encoded_key = - (memtable_key != nullptr) ? - memtable_key : EncodeKey(&tmp_, internal_key); node_ = hash_link_list_rep_->FindGreaterOrEqualInBucket(head_, - encoded_key); + internal_key); } // Position at the first entry in collection. @@ -267,7 +279,7 @@ class HashLinkListRep : public MemTableRep { // Advance to the first entry with a key >= target virtual void Seek(const Slice& k, const char* memtable_key) { - auto transformed = memtable_rep_.transform_->Transform(k); + auto transformed = memtable_rep_.GetPrefix(k); Reset(memtable_rep_.GetBucket(transformed)); HashLinkListRep::Iterator::Seek(k, memtable_key); } @@ -320,7 +332,8 @@ HashLinkListRep::~HashLinkListRep() { void HashLinkListRep::Insert(const char* key) { assert(!Contains(key)); - auto transformed = transform_->Transform(UserKey(key)); + Slice internal_key = GetLengthPrefixedSlice(key); + auto transformed = GetPrefix(internal_key); auto& bucket = buckets_[GetHash(transformed)]; Node* head = static_cast(bucket.Acquire_Load()); @@ -344,7 +357,7 @@ void HashLinkListRep::Insert(const char* key) { // If x points to head_ or next points nullptr, it is trivially satisfied. assert((cur == head) || (next == nullptr) || KeyIsAfterNode(next->key, cur)); - if (KeyIsAfterNode(key, cur)) { + if (KeyIsAfterNode(internal_key, cur)) { // Keep searching in this list prev = cur; cur = next; @@ -370,12 +383,14 @@ void HashLinkListRep::Insert(const char* key) { } bool HashLinkListRep::Contains(const char* key) const { - auto transformed = transform_->Transform(UserKey(key)); + Slice internal_key = GetLengthPrefixedSlice(key); + + auto transformed = GetPrefix(internal_key); auto bucket = GetBucket(transformed); if (bucket == nullptr) { return false; } - return BucketContains(bucket, key); + return BucketContains(bucket, internal_key); } size_t HashLinkListRep::ApproximateMemoryUsage() { @@ -414,13 +429,13 @@ MemTableRep::Iterator* HashLinkListRep::GetDynamicPrefixIterator() { return new DynamicIterator(*this); } -bool HashLinkListRep::BucketContains(Node* head, const Key& key) const { - Node* x = FindGreaterOrEqualInBucket(head, key); - return (x != nullptr && Equal(key, x->key)); +bool HashLinkListRep::BucketContains(Node* head, const Slice& user_key) const { + Node* x = FindGreaterOrEqualInBucket(head, user_key); + return (x != nullptr && Equal(user_key, x->key)); } Node* HashLinkListRep::FindGreaterOrEqualInBucket(Node* head, - const Key& key) const { + const Slice& key) const { Node* x = head; while (true) { if (x == nullptr) { diff --git a/util/hash_skiplist_rep.cc b/util/hash_skiplist_rep.cc index 906f83030..845137a4c 100644 --- a/util/hash_skiplist_rep.cc +++ b/util/hash_skiplist_rep.cc @@ -170,7 +170,7 @@ class HashSkipListRep : public MemTableRep { // Advance to the first entry with a key >= target virtual void Seek(const Slice& k, const char* memtable_key) { - auto transformed = memtable_rep_.transform_->Transform(k); + auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k)); Reset(memtable_rep_.GetBucket(transformed)); HashSkipListRep::Iterator::Seek(k, memtable_key); } @@ -209,7 +209,8 @@ class HashSkipListRep : public MemTableRep { } virtual void Next() { } virtual void Prev() { } - virtual void Seek(const Slice& user_key, const char* memtable_key) { } + virtual void Seek(const Slice& internal_key, + const char* memtable_key) { } virtual void SeekToFirst() { } virtual void SeekToLast() { } private: