From 58e1956d50224175299dc50b6bea9b22cbda884f Mon Sep 17 00:00:00 2001 From: Siying Dong Date: Wed, 20 Nov 2013 19:49:27 -0800 Subject: [PATCH] [Only for Performance Branch] A Hacky patch to lazily generate memtable key for prefix-hashed memtables. Summary: For prefix mem tables, encoding mem table key may be unnecessary if the prefix doesn't have any key. This patch is a little bit hacky but I want to try out the performance gain of removing this lazy initialization. In longer term, we might want to revisit the way we abstract mem tables implementations. Test Plan: make all check Reviewers: haobo, igor, kailiu Reviewed By: igor CC: leveldb Differential Revision: https://reviews.facebook.net/D14265 --- db/memtable.cc | 9 ++++----- db/memtable.h | 2 ++ include/rocksdb/memtablerep.h | 2 +- util/hash_skiplist_rep.cc | 18 +++++++++++------- util/skiplistrep.cc | 11 +++++++++-- util/transformrep.cc | 20 ++++++++++++-------- util/vectorrep.cc | 11 ++++++++--- 7 files changed, 47 insertions(+), 26 deletions(-) diff --git a/db/memtable.cc b/db/memtable.cc index 291899c21..44eb160e7 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -75,7 +75,7 @@ Slice MemTableRep::UserKey(const char* key) const { // Encode a suitable internal key target for "target" and return it. // Uses *scratch as scratch space, and the returned pointer will point // into this scratch space. -static const char* EncodeKey(std::string* scratch, const Slice& target) { +const char* EncodeKey(std::string* scratch, const Slice& target) { scratch->clear(); PutVarint32(scratch, target.size()); scratch->append(target.data(), target.size()); @@ -96,7 +96,7 @@ class MemTableIterator: public Iterator { } virtual bool Valid() const { return iter_->Valid(); } - virtual void Seek(const Slice& k) { iter_->Seek(EncodeKey(&tmp_, k)); } + virtual void Seek(const Slice& k) { iter_->Seek(k, nullptr); } virtual void SeekToFirst() { iter_->SeekToFirst(); } virtual void SeekToLast() { iter_->SeekToLast(); } virtual void Next() { iter_->Next(); } @@ -113,7 +113,6 @@ class MemTableIterator: public Iterator { private: std::shared_ptr iter_; - std::string tmp_; // For passing to EncodeKey // No copying allowed MemTableIterator(const MemTableIterator&); @@ -165,7 +164,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, Slice memkey = key.memtable_key(); std::shared_ptr iter( table_->GetIterator(key.user_key())); - iter->Seek(memkey.data()); + iter->Seek(key.user_key(), memkey.data()); // It is the caller's responsibility to allocate/delete operands list assert(operands != nullptr); @@ -274,7 +273,7 @@ bool MemTable::Update(SequenceNumber seq, ValueType type, std::shared_ptr iter( table_.get()->GetIterator(lkey.user_key())); - iter->Seek(memkey.data()); + iter->Seek(key, memkey.data()); if (iter->Valid()) { // entry format is: diff --git a/db/memtable.h b/db/memtable.h index 93b9b7e2c..9efb16431 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -169,4 +169,6 @@ class MemTable { port::RWMutex* GetLock(const Slice& key); }; +extern const char* EncodeKey(std::string* scratch, const Slice& target); + } // namespace rocksdb diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 4147e5f3a..83b15d256 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -107,7 +107,7 @@ class MemTableRep { virtual void Prev() = 0; // Advance to the first entry with a key >= target - virtual void Seek(const char* target) = 0; + virtual void Seek(const Slice& user_key, const char* memtable_key) = 0; // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. diff --git a/util/hash_skiplist_rep.cc b/util/hash_skiplist_rep.cc index b67911f93..290ce9d63 100644 --- a/util/hash_skiplist_rep.cc +++ b/util/hash_skiplist_rep.cc @@ -11,6 +11,7 @@ #include "port/port.h" #include "port/atomic_pointer.h" #include "util/murmurhash.h" +#include "db/memtable.h" #include "db/skiplist.h" namespace rocksdb { @@ -112,9 +113,12 @@ class HashSkipListRep : public MemTableRep { } // Advance to the first entry with a key >= target - virtual void Seek(const char* target) { + virtual void Seek(const Slice& user_key, const char* memtable_key) { if (list_ != nullptr) { - iter_.Seek(target); + const char* encoded_key = + (memtable_key != nullptr) ? + memtable_key : EncodeKey(&tmp_, user_key); + iter_.Seek(encoded_key); } } @@ -151,6 +155,7 @@ class HashSkipListRep : public MemTableRep { // here we track if we own list_. If we own it, we are also // responsible for it's cleaning. This is a poor man's shared_ptr bool own_list_; + std::string tmp_; // For passing to EncodeKey }; class DynamicIterator : public HashSkipListRep::Iterator { @@ -160,11 +165,10 @@ class HashSkipListRep : public MemTableRep { memtable_rep_(memtable_rep) {} // Advance to the first entry with a key >= target - virtual void Seek(const char* target) { - auto transformed = memtable_rep_.transform_->Transform( - memtable_rep_.UserKey(target)); + virtual void Seek(const Slice& k, const char* memtable_key) { + auto transformed = memtable_rep_.transform_->Transform(k); Reset(memtable_rep_.GetBucket(transformed)); - HashSkipListRep::Iterator::Seek(target); + HashSkipListRep::Iterator::Seek(k, memtable_key); } // Position at the first entry in collection. @@ -201,7 +205,7 @@ class HashSkipListRep : public MemTableRep { } virtual void Next() { } virtual void Prev() { } - virtual void Seek(const char* target) { } + virtual void Seek(const Slice& user_key, const char* memtable_key) { } virtual void SeekToFirst() { } virtual void SeekToLast() { } private: diff --git a/util/skiplistrep.cc b/util/skiplistrep.cc index 955d754b1..f4c6e0c93 100644 --- a/util/skiplistrep.cc +++ b/util/skiplistrep.cc @@ -70,8 +70,13 @@ public: } // Advance to the first entry with a key >= target - virtual void Seek(const char* target) override { - iter_.Seek(target); + virtual void Seek(const Slice& user_key, const char* memtable_key) + override { + if (memtable_key != nullptr) { + iter_.Seek(memtable_key); + } else { + iter_.Seek(EncodeKey(&tmp_, user_key)); + } } // Position at the first entry in list. @@ -85,6 +90,8 @@ public: virtual void SeekToLast() override { iter_.SeekToLast(); } + protected: + std::string tmp_; // For passing to EncodeKey }; // Unhide default implementations of GetIterator diff --git a/util/transformrep.cc b/util/transformrep.cc index 4c7df1321..ef1205570 100644 --- a/util/transformrep.cc +++ b/util/transformrep.cc @@ -13,6 +13,7 @@ #include "rocksdb/arena.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" +#include "db/memtable.h" #include "port/port.h" #include "util/mutexlock.h" #include "util/murmurhash.h" @@ -110,7 +111,7 @@ class TransformRep : public MemTableRep { virtual void Prev(); // Advance to the first entry with a key >= target - virtual void Seek(const char* target); + virtual void Seek(const Slice& user_key, const char* memtable_key); // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. @@ -122,6 +123,7 @@ class TransformRep : public MemTableRep { private: std::shared_ptr items_; Bucket::const_iterator cit_; + std::string tmp_; // For passing to EncodeKey }; class EmptyIterator : public MemTableRep::Iterator { @@ -137,7 +139,7 @@ class TransformRep : public MemTableRep { } virtual void Next() { } virtual void Prev() { } - virtual void Seek(const char* target) { } + virtual void Seek(const Slice& user_key, const char* memtable_key) { } virtual void SeekToFirst() { } virtual void SeekToLast() { } static std::shared_ptr GetInstance(); @@ -197,9 +199,8 @@ class TransformRep : public MemTableRep { // Advance to the first entry with a key >= target within the // same bucket as target - virtual void Seek(const char* target) { - Slice prefix = memtable_rep_.transform_->Transform( - memtable_rep_.UserKey(target)); + virtual void Seek(const Slice& user_key, const char* memtable_key) { + Slice prefix = memtable_rep_.transform_->Transform(user_key); ReadLock l(&memtable_rep_.rwlock_); auto bucket = memtable_rep_.buckets_.find(prefix); @@ -208,7 +209,7 @@ class TransformRep : public MemTableRep { } else { bucket_iterator_.reset( new TransformIterator(bucket->second, memtable_rep_.GetLock(prefix))); - bucket_iterator_->Seek(target); + bucket_iterator_->Seek(user_key, memtable_key); } } @@ -343,8 +344,11 @@ void TransformRep::Iterator::Prev() { } // Advance to the first entry with a key >= target -void TransformRep::Iterator::Seek(const char* target) { - cit_ = items_->lower_bound(target); +void TransformRep::Iterator::Seek(const Slice& user_key, + const char* memtable_key) { + const char* encoded_key = + (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key); + cit_ = items_->lower_bound(encoded_key); } // Position at the first entry in collection. diff --git a/util/vectorrep.cc b/util/vectorrep.cc index 8d3ccc9df..3887f356d 100644 --- a/util/vectorrep.cc +++ b/util/vectorrep.cc @@ -12,6 +12,7 @@ #include #include "rocksdb/arena.h" +#include "db/memtable.h" #include "port/port.h" #include "util/mutexlock.h" #include "util/stl_wrappers.h" @@ -45,6 +46,7 @@ class VectorRep : public MemTableRep { std::shared_ptr> bucket_; typename std::vector::const_iterator mutable cit_; const KeyComparator& compare_; + std::string tmp_; // For passing to EncodeKey bool mutable sorted_; void DoSort() const; public: @@ -73,7 +75,7 @@ class VectorRep : public MemTableRep { virtual void Prev() override; // Advance to the first entry with a key >= target - virtual void Seek(const char* target) override; + virtual void Seek(const Slice& user_key, const char* memtable_key) override; // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. @@ -200,12 +202,15 @@ void VectorRep::Iterator::Prev() { } // Advance to the first entry with a key >= target -void VectorRep::Iterator::Seek(const char* target) { +void VectorRep::Iterator::Seek(const Slice& user_key, + const char* memtable_key) { DoSort(); // Do binary search to find first value not less than the target + const char* encoded_key = + (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, user_key); cit_ = std::equal_range(bucket_->begin(), bucket_->end(), - target, + encoded_key, [this] (const char* a, const char* b) { return compare_(a, b) < 0; }).first;