// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under the BSD-style license found in the // LICENSE file in the root directory of this source tree. An additional grant // of patent rights can be found in the PATENTS file in the same directory. // #ifndef ROCKSDB_LITE #include "memtable/hash_skiplist_rep.h" #include #include "rocksdb/memtablerep.h" #include "util/arena.h" #include "rocksdb/slice.h" #include "rocksdb/slice_transform.h" #include "port/port.h" #include "util/murmurhash.h" #include "db/memtable.h" #include "memtable/skiplist.h" namespace rocksdb { namespace { class HashSkipListRep : public MemTableRep { public: HashSkipListRep(const MemTableRep::KeyComparator& compare, MemTableAllocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor); virtual void Insert(KeyHandle handle) override; virtual bool Contains(const char* key) const override; virtual size_t ApproximateMemoryUsage() override; virtual void Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) override; virtual ~HashSkipListRep(); virtual MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override; virtual MemTableRep::Iterator* GetDynamicPrefixIterator( Arena* arena = nullptr) override; private: friend class DynamicIterator; typedef SkipList Bucket; size_t bucket_size_; const int32_t skiplist_height_; const int32_t skiplist_branching_factor_; // Maps slices (which are transformed user keys) to buckets of keys sharing // the same transform. std::atomic* buckets_; // The user-supplied transform whose domain is the user keys. const SliceTransform* transform_; const MemTableRep::KeyComparator& compare_; // immutable after construction MemTableAllocator* const allocator_; inline size_t GetHash(const Slice& slice) const { return MurmurHash(slice.data(), static_cast(slice.size()), 0) % bucket_size_; } inline Bucket* GetBucket(size_t i) const { return buckets_[i].load(std::memory_order_acquire); } inline Bucket* GetBucket(const Slice& slice) const { return GetBucket(GetHash(slice)); } // Get a bucket from buckets_. If the bucket hasn't been initialized yet, // initialize it before returning. Bucket* GetInitializedBucket(const Slice& transformed); class Iterator : public MemTableRep::Iterator { public: explicit Iterator(Bucket* list, bool own_list = true, Arena* arena = nullptr) : list_(list), iter_(list), own_list_(own_list), arena_(arena) {} virtual ~Iterator() { // if we own the list, we should also delete it if (own_list_) { assert(list_ != nullptr); delete list_; } } // Returns true iff the iterator is positioned at a valid node. virtual bool Valid() const override { return list_ != nullptr && iter_.Valid(); } // Returns the key at the current position. // REQUIRES: Valid() virtual const char* key() const override { assert(Valid()); return iter_.key(); } // Advances to the next position. // REQUIRES: Valid() virtual void Next() override { assert(Valid()); iter_.Next(); } // Advances to the previous position. // REQUIRES: Valid() virtual void Prev() override { assert(Valid()); iter_.Prev(); } // Advance to the first entry with a key >= target virtual void Seek(const Slice& internal_key, const char* memtable_key) override { if (list_ != nullptr) { const char* encoded_key = (memtable_key != nullptr) ? memtable_key : EncodeKey(&tmp_, internal_key); iter_.Seek(encoded_key); } } // Retreat to the last entry with a key <= target virtual void SeekForPrev(const Slice& internal_key, const char* memtable_key) override { // not supported assert(false); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToFirst() override { if (list_ != nullptr) { iter_.SeekToFirst(); } } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToLast() override { if (list_ != nullptr) { iter_.SeekToLast(); } } protected: void Reset(Bucket* list) { if (own_list_) { assert(list_ != nullptr); delete list_; } list_ = list; iter_.SetList(list); own_list_ = false; } private: // if list_ is nullptr, we should NEVER call any methods on iter_ // if list_ is nullptr, this Iterator is not Valid() Bucket* list_; Bucket::Iterator iter_; // here we track if we own list_. If we own it, we are also // responsible for it's cleaning. This is a poor man's shared_ptr bool own_list_; std::unique_ptr arena_; std::string tmp_; // For passing to EncodeKey }; class DynamicIterator : public HashSkipListRep::Iterator { public: explicit DynamicIterator(const HashSkipListRep& memtable_rep) : HashSkipListRep::Iterator(nullptr, false), memtable_rep_(memtable_rep) {} // Advance to the first entry with a key >= target virtual void Seek(const Slice& k, const char* memtable_key) override { auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k)); Reset(memtable_rep_.GetBucket(transformed)); HashSkipListRep::Iterator::Seek(k, memtable_key); } // Position at the first entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToFirst() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } // Position at the last entry in collection. // Final state of iterator is Valid() iff collection is not empty. virtual void SeekToLast() override { // Prefix iterator does not support total order. // We simply set the iterator to invalid state Reset(nullptr); } private: // the underlying memtable const HashSkipListRep& memtable_rep_; }; class EmptyIterator : public MemTableRep::Iterator { // This is used when there wasn't a bucket. It is cheaper than // instantiating an empty bucket over which to iterate. public: EmptyIterator() { } virtual bool Valid() const override { return false; } virtual const char* key() const override { assert(false); return nullptr; } virtual void Next() override {} virtual void Prev() override {} virtual void Seek(const Slice& internal_key, const char* memtable_key) override {} virtual void SeekForPrev(const Slice& internal_key, const char* memtable_key) override {} virtual void SeekToFirst() override {} virtual void SeekToLast() override {} private: }; }; HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare, MemTableAllocator* allocator, const SliceTransform* transform, size_t bucket_size, int32_t skiplist_height, int32_t skiplist_branching_factor) : MemTableRep(allocator), bucket_size_(bucket_size), skiplist_height_(skiplist_height), skiplist_branching_factor_(skiplist_branching_factor), transform_(transform), compare_(compare), allocator_(allocator) { auto mem = allocator->AllocateAligned( sizeof(std::atomic) * bucket_size); buckets_ = new (mem) std::atomic[bucket_size]; for (size_t i = 0; i < bucket_size_; ++i) { buckets_[i].store(nullptr, std::memory_order_relaxed); } } HashSkipListRep::~HashSkipListRep() { } HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket( const Slice& transformed) { size_t hash = GetHash(transformed); auto bucket = GetBucket(hash); if (bucket == nullptr) { auto addr = allocator_->AllocateAligned(sizeof(Bucket)); bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_, skiplist_branching_factor_); buckets_[hash].store(bucket, std::memory_order_release); } return bucket; } void HashSkipListRep::Insert(KeyHandle handle) { auto* key = static_cast(handle); assert(!Contains(key)); auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetInitializedBucket(transformed); bucket->Insert(key); } bool HashSkipListRep::Contains(const char* key) const { auto transformed = transform_->Transform(UserKey(key)); auto bucket = GetBucket(transformed); if (bucket == nullptr) { return false; } return bucket->Contains(key); } size_t HashSkipListRep::ApproximateMemoryUsage() { return 0; } void HashSkipListRep::Get(const LookupKey& k, void* callback_args, bool (*callback_func)(void* arg, const char* entry)) { auto transformed = transform_->Transform(k.user_key()); auto bucket = GetBucket(transformed); if (bucket != nullptr) { Bucket::Iterator iter(bucket); for (iter.Seek(k.memtable_key().data()); iter.Valid() && callback_func(callback_args, iter.key()); iter.Next()) { } } } MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) { // allocate a new arena of similar size to the one currently in use Arena* new_arena = new Arena(allocator_->BlockSize()); auto list = new Bucket(compare_, new_arena); for (size_t i = 0; i < bucket_size_; ++i) { auto bucket = GetBucket(i); if (bucket != nullptr) { Bucket::Iterator itr(bucket); for (itr.SeekToFirst(); itr.Valid(); itr.Next()) { list->Insert(itr.key()); } } } if (arena == nullptr) { return new Iterator(list, true, new_arena); } else { auto mem = arena->AllocateAligned(sizeof(Iterator)); return new (mem) Iterator(list, true, new_arena); } } MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) { if (arena == nullptr) { return new DynamicIterator(*this); } else { auto mem = arena->AllocateAligned(sizeof(DynamicIterator)); return new (mem) DynamicIterator(*this); } } } // anon namespace MemTableRep* HashSkipListRepFactory::CreateMemTableRep( const MemTableRep::KeyComparator& compare, MemTableAllocator* allocator, const SliceTransform* transform, Logger* logger) { return new HashSkipListRep(compare, allocator, transform, bucket_count_, skiplist_height_, skiplist_branching_factor_); } MemTableRepFactory* NewHashSkipListRepFactory( size_t bucket_count, int32_t skiplist_height, int32_t skiplist_branching_factor) { return new HashSkipListRepFactory(bucket_count, skiplist_height, skiplist_branching_factor); } } // namespace rocksdb #endif // ROCKSDB_LITE