TransformRep - use array instead of unordered_map

Summary:
I'm sending this diff together with https://reviews.facebook.net/D13881 because it didn't allow me to send only the array one.

Here I also replaced unordered_map with just an array of shared_ptrs. This elminated all the locks.

I will run the new benchmark and post the results here.

Test Plan: db_test

Reviewers: dhruba, haobo

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13893
main
Igor Canadi 11 years ago
parent fe4a449472
commit be96f2498e
  1. 153
      db/db_test.cc
  2. 16
      db/prefix_test.cc
  3. 70
      include/rocksdb/memtablerep.h
  4. 262
      util/transformrepnolock.cc

@ -4649,85 +4649,90 @@ void PrefixScanInit(DBTest *dbtest) {
} }
TEST(DBTest, PrefixScan) { TEST(DBTest, PrefixScan) {
ReadOptions ro = ReadOptions(); for (int it = 0; it < 2; ++it) {
int count; ReadOptions ro = ReadOptions();
Slice prefix; int count;
Slice key; Slice prefix;
char buf[100]; Slice key;
Iterator* iter; char buf[100];
snprintf(buf, sizeof(buf), "03______:"); Iterator* iter;
prefix = Slice(buf, 8); snprintf(buf, sizeof(buf), "03______:");
key = Slice(buf, 9); prefix = Slice(buf, 8);
auto prefix_extractor = NewFixedPrefixTransform(8); key = Slice(buf, 9);
auto memtable_factory = auto prefix_extractor = NewFixedPrefixTransform(8);
std::make_shared<PrefixHashRepFactory>(prefix_extractor); // db configs
env_->count_random_reads_ = true;
// db configs Options options = CurrentOptions();
env_->count_random_reads_ = true; options.env = env_;
Options options = CurrentOptions(); options.block_cache = NewLRUCache(0); // Prevent cache hits
options.env = env_; options.filter_policy = NewBloomFilterPolicy(10);
options.block_cache = NewLRUCache(0); // Prevent cache hits options.prefix_extractor = prefix_extractor;
options.filter_policy = NewBloomFilterPolicy(10); options.whole_key_filtering = false;
options.prefix_extractor = prefix_extractor; options.disable_auto_compactions = true;
options.whole_key_filtering = false; options.max_background_compactions = 2;
options.disable_auto_compactions = true; options.create_if_missing = true;
options.max_background_compactions = 2; options.disable_seek_compaction = true;
options.create_if_missing = true; if (it == 0) {
options.disable_seek_compaction = true; options.memtable_factory = std::make_shared<PrefixHashRepNoLockFactory>(
options.memtable_factory = memtable_factory; prefix_extractor);
} else {
options.memtable_factory = std::make_shared<PrefixHashRepFactory>(
prefix_extractor);
}
// prefix specified, with blooms: 2 RAND I/Os // prefix specified, with blooms: 2 RAND I/Os
// SeekToFirst // SeekToFirst
DestroyAndReopen(&options); DestroyAndReopen(&options);
PrefixScanInit(this); PrefixScanInit(this);
count = 0; count = 0;
env_->random_read_counter_.Reset(); env_->random_read_counter_.Reset();
ro.prefix = &prefix; ro.prefix = &prefix;
iter = db_->NewIterator(ro); iter = db_->NewIterator(ro);
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
assert(iter->key().starts_with(prefix)); assert(iter->key().starts_with(prefix));
count++; count++;
} }
ASSERT_OK(iter->status()); ASSERT_OK(iter->status());
delete iter; delete iter;
ASSERT_EQ(count, 2); ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2); ASSERT_EQ(env_->random_read_counter_.Read(), 2);
// prefix specified, with blooms: 2 RAND I/Os // prefix specified, with blooms: 2 RAND I/Os
// Seek // Seek
DestroyAndReopen(&options); DestroyAndReopen(&options);
PrefixScanInit(this); PrefixScanInit(this);
count = 0; count = 0;
env_->random_read_counter_.Reset(); env_->random_read_counter_.Reset();
ro.prefix = &prefix; ro.prefix = &prefix;
iter = db_->NewIterator(ro); iter = db_->NewIterator(ro);
for (iter->Seek(key); iter->Valid(); iter->Next()) { for (iter->Seek(key); iter->Valid(); iter->Next()) {
assert(iter->key().starts_with(prefix)); assert(iter->key().starts_with(prefix));
count++; count++;
} }
ASSERT_OK(iter->status()); ASSERT_OK(iter->status());
delete iter; delete iter;
ASSERT_EQ(count, 2); ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 2); ASSERT_EQ(env_->random_read_counter_.Read(), 2);
// no prefix specified: 11 RAND I/Os // no prefix specified: 11 RAND I/Os
DestroyAndReopen(&options); DestroyAndReopen(&options);
PrefixScanInit(this); PrefixScanInit(this);
count = 0; count = 0;
env_->random_read_counter_.Reset(); env_->random_read_counter_.Reset();
iter = db_->NewIterator(ReadOptions()); iter = db_->NewIterator(ReadOptions());
for (iter->Seek(prefix); iter->Valid(); iter->Next()) { for (iter->Seek(prefix); iter->Valid(); iter->Next()) {
if (! iter->key().starts_with(prefix)) { if (! iter->key().starts_with(prefix)) {
break; break;
}
count++;
} }
count++; ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
Close();
delete options.filter_policy;
} }
ASSERT_OK(iter->status());
delete iter;
ASSERT_EQ(count, 2);
ASSERT_EQ(env_->random_read_counter_.Read(), 11);
Close();
delete options.filter_policy;
} }
std::string MakeKey(unsigned int num) { std::string MakeKey(unsigned int num) {

@ -11,6 +11,7 @@
#include "util/testharness.h" #include "util/testharness.h"
DEFINE_bool(use_prefix_hash_memtable, true, ""); DEFINE_bool(use_prefix_hash_memtable, true, "");
DEFINE_bool(use_nolock_version, true, "");
DEFINE_bool(trigger_deadlock, false, DEFINE_bool(trigger_deadlock, false,
"issue delete in range scan to trigger PrefixHashMap deadlock"); "issue delete in range scan to trigger PrefixHashMap deadlock");
DEFINE_uint64(bucket_count, 100000, "number of buckets"); DEFINE_uint64(bucket_count, 100000, "number of buckets");
@ -93,15 +94,24 @@ class PrefixTest {
if (FLAGS_use_prefix_hash_memtable) { if (FLAGS_use_prefix_hash_memtable) {
auto prefix_extractor = NewFixedPrefixTransform(8); auto prefix_extractor = NewFixedPrefixTransform(8);
options.prefix_extractor = prefix_extractor; options.prefix_extractor = prefix_extractor;
options.memtable_factory = if (FLAGS_use_nolock_version) {
std::make_shared<rocksdb::PrefixHashRepFactory>( options.memtable_factory =
prefix_extractor, FLAGS_bucket_count, FLAGS_num_locks); std::make_shared<rocksdb::PrefixHashRepNoLockFactory>(
prefix_extractor, FLAGS_bucket_count);
} else {
options.memtable_factory =
std::make_shared<rocksdb::PrefixHashRepFactory>(
prefix_extractor, FLAGS_bucket_count, FLAGS_num_locks);
}
} }
Status s = DB::Open(options, kDbName, &db); Status s = DB::Open(options, kDbName, &db);
ASSERT_OK(s); ASSERT_OK(s);
return std::shared_ptr<DB>(db); return std::shared_ptr<DB>(db);
} }
~PrefixTest() {
delete options.comparator;
}
protected: protected:
Options options; Options options;
}; };

@ -15,11 +15,11 @@
// Users can implement their own memtable representations. We include four // Users can implement their own memtable representations. We include four
// types built in: // types built in:
// - SkipListRep: This is the default; it is backed by a skip list. // - SkipListRep: This is the default; it is backed by a skip list.
// - TransformRep: This is backed by an std::unordered_map<Slice, // - TransformRep: This is backed by an custom hash map.
// std::set>. On construction, they are given a SliceTransform object. This // On construction, they are given a SliceTransform object. This
// object is applied to the user key of stored items which indexes into the // object is applied to the user key of stored items which indexes into the
// unordered map to yield a set containing all records that share the same user // hash map to yield a skiplist containing all records that share the same
// key under the transform function. // user key under the transform function.
// - UnsortedRep: A subclass of TransformRep where the transform function is // - UnsortedRep: A subclass of TransformRep where the transform function is
// the identity function. Optimized for point lookups. // the identity function. Optimized for point lookups.
// - PrefixHashRep: A subclass of TransformRep where the transform function is // - PrefixHashRep: A subclass of TransformRep where the transform function is
@ -254,6 +254,68 @@ public:
} }
}; };
// NO LOCKS VERSION
// The same as TransformRepFactory except it doesn't use locks.
// Experimental, will replace TransformRepFactory once we are sure
// it performs better
class TransformRepNoLockFactory : public MemTableRepFactory {
public:
explicit TransformRepNoLockFactory(const SliceTransform* transform,
size_t bucket_count)
: transform_(transform),
bucket_count_(bucket_count) { }
virtual ~TransformRepNoLockFactory() { delete transform_; }
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
MemTableRep::KeyComparator&, Arena*) override;
virtual const char* Name() const override {
return "TransformRepNoLockFactory";
}
const SliceTransform* GetTransform() { return transform_; }
protected:
const SliceTransform* transform_;
const size_t bucket_count_;
};
// UnsortedReps bin user keys based on an identity function transform -- that
// is, transform(key) = key. This optimizes for point look-ups.
//
// Parameters: See TransformRepNoLockFactory.
class UnsortedRepNoLockFactory : public TransformRepNoLockFactory {
public:
explicit UnsortedRepNoLockFactory(size_t bucket_count = 1000000)
: TransformRepNoLockFactory(NewNoopTransform(),
bucket_count) { }
virtual const char* Name() const override {
return "UnsortedRepNoLockFactory";
}
};
// PrefixHashReps bin user keys based on a fixed-size prefix. This optimizes for
// short ranged scans over a given prefix.
//
// Parameters: See TransformRepNoLockFactory.
class PrefixHashRepNoLockFactory : public TransformRepNoLockFactory {
public:
explicit PrefixHashRepNoLockFactory(const SliceTransform* prefix_extractor,
size_t bucket_count = 1000000)
: TransformRepNoLockFactory(prefix_extractor, bucket_count)
{ }
virtual std::shared_ptr<MemTableRep> CreateMemTableRep(
MemTableRep::KeyComparator&, Arena*) override;
virtual const char* Name() const override {
return "PrefixHashRepNoLockFactory";
}
};
} }
#endif // STORAGE_ROCKSDB_DB_MEMTABLEREP_H_ #endif // STORAGE_ROCKSDB_DB_MEMTABLEREP_H_

@ -0,0 +1,262 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
#include "rocksdb/memtablerep.h"
#include "rocksdb/arena.h"
#include "rocksdb/slice.h"
#include "rocksdb/slice_transform.h"
#include "port/port.h"
#include "port/atomic_pointer.h"
#include "util/murmurhash.h"
#include "db/skiplist.h"
namespace rocksdb {
namespace {
class TransformRepNoLock : public MemTableRep {
public:
TransformRepNoLock(MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size);
virtual void Insert(const char* key) override;
virtual bool Contains(const char* key) const override;
virtual size_t ApproximateMemoryUsage() override;
virtual ~TransformRepNoLock();
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator() override;
virtual std::shared_ptr<MemTableRep::Iterator> GetIterator(
const Slice& slice) override;
std::shared_ptr<MemTableRep::Iterator> GetTransformIterator(
const Slice& transformed);
private:
typedef SkipList<const char*, MemTableRep::KeyComparator&> Bucket;
size_t bucket_size_;
// Maps slices (which are transformed user keys) to buckets of keys sharing
// the same transform.
port::AtomicPointer* buckets_;
// The user-supplied transform whose domain is the user keys.
const SliceTransform* transform_;
MemTableRep::KeyComparator& compare_;
// immutable after construction
Arena* const arena_;
inline size_t GetHash(const Slice& slice) const {
return MurmurHash(slice.data(), slice.size(), 0) % bucket_size_;
}
inline Bucket* GetBucket(size_t i) const {
return static_cast<Bucket*>(buckets_[i].Acquire_Load());
}
inline Bucket* GetBucket(const Slice& slice) const {
return GetBucket(GetHash(slice));
}
// Get a bucket from buckets_. If the bucket hasn't been initialized yet,
// initialize it before returning.
Bucket* GetInitializedBucket(const Slice& transformed);
class Iterator : public MemTableRep::Iterator {
public:
explicit Iterator(Bucket* list, bool own_list = true)
: list_(list),
iter_(list),
own_list_(own_list) {}
virtual ~Iterator() {
// if we own the list, we should also delete it
if (own_list_) {
delete list_;
}
};
// Returns true iff the iterator is positioned at a valid node.
virtual bool Valid() const {
return iter_.Valid();
}
// Returns the key at the current position.
// REQUIRES: Valid()
virtual const char* key() const {
return iter_.key();
}
// Advances to the next position.
// REQUIRES: Valid()
virtual void Next() {
iter_.Next();
}
// Advances to the previous position.
// REQUIRES: Valid()
virtual void Prev() {
iter_.Prev();
}
// Advance to the first entry with a key >= target
virtual void Seek(const char* target) {
iter_.Seek(target);
}
// Position at the first entry in collection.
// Final state of iterator is Valid() iff collection is not empty.
virtual void SeekToFirst() {
iter_.SeekToFirst();
}
// Position at the last entry in collection.
// Final state of iterator is Valid() iff collection is not empty.
virtual void SeekToLast() {
iter_.SeekToLast();
}
private:
Bucket* list_;
Bucket::Iterator iter_;
// here we track if we own list_. If we own it, we are also
// responsible for it's cleaning. This is a poor man's shared_ptr
bool own_list_;
};
class EmptyIterator : public MemTableRep::Iterator {
// This is used when there wasn't a bucket. It is cheaper than
// instantiating an empty bucket over which to iterate.
public:
EmptyIterator() { }
virtual bool Valid() const {
return false;
}
virtual const char* key() const {
assert(false);
return nullptr;
}
virtual void Next() { }
virtual void Prev() { }
virtual void Seek(const char* target) { }
virtual void SeekToFirst() { }
virtual void SeekToLast() { }
private:
};
std::shared_ptr<EmptyIterator> empty_iterator_;
};
class PrefixHashRepNoLock : public TransformRepNoLock {
public:
PrefixHashRepNoLock(MemTableRep::KeyComparator& compare, Arena* arena,
const SliceTransform* transform, size_t bucket_size)
: TransformRepNoLock(compare, arena, transform, bucket_size) { }
virtual std::shared_ptr<MemTableRep::Iterator> GetPrefixIterator(
const Slice& prefix) override;
};
TransformRepNoLock::TransformRepNoLock(MemTableRep::KeyComparator& compare,
Arena* arena, const SliceTransform* transform, size_t bucket_size)
: bucket_size_(bucket_size),
transform_(transform),
compare_(compare),
arena_(arena),
empty_iterator_(std::make_shared<EmptyIterator>()) {
buckets_ = new port::AtomicPointer[bucket_size];
for (size_t i = 0; i < bucket_size_; ++i) {
buckets_[i].NoBarrier_Store(nullptr);
}
}
TransformRepNoLock::~TransformRepNoLock() {
delete[] buckets_;
}
TransformRepNoLock::Bucket* TransformRepNoLock::GetInitializedBucket(
const Slice& transformed) {
size_t hash = GetHash(transformed);
auto bucket = GetBucket(hash);
if (bucket == nullptr) {
auto addr = arena_->AllocateAligned(sizeof(Bucket));
bucket = new (addr) Bucket(compare_, arena_);
buckets_[hash].Release_Store(static_cast<void*>(bucket));
}
return bucket;
}
void TransformRepNoLock::Insert(const char* key) {
assert(!Contains(key));
auto transformed = transform_->Transform(UserKey(key));
auto bucket = GetInitializedBucket(transformed);
bucket->Insert(key);
}
bool TransformRepNoLock::Contains(const char* key) const {
auto transformed = transform_->Transform(UserKey(key));
auto bucket = GetBucket(transformed);
if (bucket == nullptr) {
return false;
}
return bucket->Contains(key);
}
size_t TransformRepNoLock::ApproximateMemoryUsage() {
return sizeof(buckets_);
}
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetIterator() {
auto list = new Bucket(compare_, arena_);
for (size_t i = 0; i < bucket_size_; ++i) {
auto bucket = GetBucket(i);
if (bucket != nullptr) {
Bucket::Iterator itr(bucket);
for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
list->Insert(itr.key());
}
}
}
return std::make_shared<Iterator>(list);
}
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetTransformIterator(
const Slice& transformed) {
auto bucket = GetBucket(transformed);
if (bucket == nullptr) {
return empty_iterator_;
}
return std::make_shared<Iterator>(bucket, false);
}
std::shared_ptr<MemTableRep::Iterator> TransformRepNoLock::GetIterator(
const Slice& slice) {
auto transformed = transform_->Transform(slice);
return GetTransformIterator(transformed);
}
} // anon namespace
std::shared_ptr<MemTableRep> TransformRepNoLockFactory::CreateMemTableRep(
MemTableRep::KeyComparator& compare, Arena* arena) {
return std::make_shared<TransformRepNoLock>(compare, arena, transform_,
bucket_count_);
}
std::shared_ptr<MemTableRep> PrefixHashRepNoLockFactory::CreateMemTableRep(
MemTableRep::KeyComparator& compare, Arena* arena) {
return std::make_shared<PrefixHashRepNoLock>(compare, arena, transform_,
bucket_count_);
}
std::shared_ptr<MemTableRep::Iterator> PrefixHashRepNoLock::GetPrefixIterator(
const Slice& prefix) {
return TransformRepNoLock::GetTransformIterator(prefix);
}
} // namespace rocksdb
Loading…
Cancel
Save