From eb12e47e0e38ddf18890451f536c14ae7b1aa188 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Tue, 3 Dec 2013 12:42:15 -0800 Subject: [PATCH] Killing Transform Rep Summary: Let's get rid of TransformRep and it's children. We have confirmed that HashSkipListRep works better with multifeed, so there is no benefit to keeping this around. This diff is mostly just deleting references to obsoleted functions. I also have a diff for fbcode that we'll need to push when we switch to new release. I had to expose HashSkipListRepFactory in the client header files because db_impl.cc needs access to GetTransform() function for SanitizeOptions. Test Plan: make check Reviewers: dhruba, haobo, kailiu, sdong Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D14397 --- db/db_bench.cc | 18 +- db/db_impl.cc | 5 +- db/db_test.cc | 164 ++++++------- db/perf_context_test.cc | 4 +- db/prefix_test.cc | 11 +- include/rocksdb/memtablerep.h | 109 ++------- tools/db_stress.cc | 28 +-- util/hash_skiplist_rep.cc | 33 +-- util/hash_skiplist_rep.h | 38 +++ util/stl_wrappers.h | 19 -- util/transformrep.cc | 422 ---------------------------------- 11 files changed, 158 insertions(+), 693 deletions(-) create mode 100644 util/hash_skiplist_rep.h delete mode 100644 util/transformrep.cc diff --git a/db/db_bench.cc b/db/db_bench.cc index 63cc906e7..d7c0223e4 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -431,12 +431,11 @@ static bool ValidatePrefixSize(const char* flagname, int32_t value) { } return true; } -DEFINE_int32(prefix_size, 0, "Control the prefix size for PrefixHashRep"); +DEFINE_int32(prefix_size, 0, "Control the prefix size for HashSkipList"); enum RepFactory { kSkipList, kPrefixHash, - kUnsorted, kVectorRep }; enum RepFactory StringToRepFactory(const char* ctype) { @@ -446,8 +445,6 @@ enum RepFactory StringToRepFactory(const char* ctype) { return kSkipList; else if (!strcasecmp(ctype, "prefix_hash")) return kPrefixHash; - else if (!strcasecmp(ctype, "unsorted")) - return kUnsorted; else if (!strcasecmp(ctype, "vector")) return kVectorRep; @@ -803,9 +800,6 @@ class Benchmark { case kSkipList: fprintf(stdout, "Memtablerep: skip_list\n"); break; - case kUnsorted: - fprintf(stdout, "Memtablerep: unsorted\n"); - break; case kVectorRep: fprintf(stdout, "Memtablerep: vector\n"); break; @@ -1328,14 +1322,8 @@ class Benchmark { } switch (FLAGS_rep_factory) { case kPrefixHash: - options.memtable_factory.reset( - new PrefixHashRepFactory(NewFixedPrefixTransform(FLAGS_prefix_size)) - ); - break; - case kUnsorted: - options.memtable_factory.reset( - new UnsortedRepFactory - ); + options.memtable_factory.reset(NewHashSkipListRepFactory( + NewFixedPrefixTransform(FLAGS_prefix_size))); break; case kSkipList: // no need to do anything diff --git a/db/db_impl.cc b/db/db_impl.cc index 61f03327c..5ac6956b1 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -50,6 +50,7 @@ #include "util/auto_roll_logger.h" #include "util/build_version.h" #include "util/coding.h" +#include "util/hash_skiplist_rep.h" #include "util/logging.h" #include "util/mutexlock.h" #include "util/perf_context_imp.h" @@ -162,10 +163,10 @@ Options SanitizeOptions(const std::string& dbname, Log(result.info_log, "Compaction filter specified, ignore factory"); } if (result.prefix_extractor) { - // If a prefix extractor has been supplied and a PrefixHashRepFactory is + // If a prefix extractor has been supplied and a HashSkipListRepFactory is // being used, make sure that the latter uses the former as its transform // function. - auto factory = dynamic_cast( + auto factory = dynamic_cast( result.memtable_factory.get()); if (factory && factory->GetTransform() != result.prefix_extractor) { diff --git a/db/db_test.cc b/db/db_test.cc index c698d4bde..069ab679f 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -244,7 +244,6 @@ class DBTest { enum OptionConfig { kDefault, kVectorRep, - kUnsortedRep, kMergePut, kFilter, kUncompressed, @@ -255,7 +254,7 @@ class DBTest { kCompactOnFlush, kPerfOptions, kDeletesFilterFirst, - kPrefixHashRep, + kHashSkipList, kUniversalCompaction, kCompressedBlockCache, kEnd @@ -339,9 +338,9 @@ class DBTest { Options CurrentOptions() { Options options; switch (option_config_) { - case kPrefixHashRep: - options.memtable_factory.reset(new - PrefixHashRepFactory(NewFixedPrefixTransform(1))); + case kHashSkipList: + options.memtable_factory.reset( + NewHashSkipListRepFactory(NewFixedPrefixTransform(1))); break; case kMergePut: options.merge_operator = MergeOperators::CreatePutOperator(); @@ -375,9 +374,6 @@ class DBTest { case kDeletesFilterFirst: options.filter_deletes = true; break; - case kUnsortedRep: - options.memtable_factory.reset(new UnsortedRepFactory); - break; case kVectorRep: options.memtable_factory.reset(new VectorRepFactory(100)); break; @@ -4600,7 +4596,7 @@ TEST(DBTest, Randomized) { // TODO(sanjay): Test Get() works int p = rnd.Uniform(100); int minimum = 0; - if (option_config_ == kPrefixHashRep) { + if (option_config_ == kHashSkipList) { minimum = 1; } if (p < 45) { // Put @@ -4770,90 +4766,82 @@ void PrefixScanInit(DBTest *dbtest) { } TEST(DBTest, PrefixScan) { - for (int it = 0; it < 2; ++it) { - ReadOptions ro = ReadOptions(); - int count; - Slice prefix; - Slice key; - char buf[100]; - Iterator* iter; - snprintf(buf, sizeof(buf), "03______:"); - prefix = Slice(buf, 8); - key = Slice(buf, 9); - auto prefix_extractor = NewFixedPrefixTransform(8); - // db configs - env_->count_random_reads_ = true; - Options options = CurrentOptions(); - options.env = env_; - options.no_block_cache = true; - options.filter_policy = NewBloomFilterPolicy(10); - options.prefix_extractor = prefix_extractor; - options.whole_key_filtering = false; - options.disable_auto_compactions = true; - options.max_background_compactions = 2; - options.create_if_missing = true; - options.disable_seek_compaction = true; - if (it == 0) { - options.memtable_factory.reset(NewHashSkipListRepFactory( - prefix_extractor)); - } else { - options.memtable_factory = std::make_shared( - prefix_extractor); - } + ReadOptions ro = ReadOptions(); + int count; + Slice prefix; + Slice key; + char buf[100]; + Iterator* iter; + snprintf(buf, sizeof(buf), "03______:"); + prefix = Slice(buf, 8); + key = Slice(buf, 9); + auto prefix_extractor = NewFixedPrefixTransform(8); + // db configs + env_->count_random_reads_ = true; + Options options = CurrentOptions(); + options.env = env_; + options.no_block_cache = true; + options.filter_policy = NewBloomFilterPolicy(10); + options.prefix_extractor = prefix_extractor; + options.whole_key_filtering = false; + options.disable_auto_compactions = true; + options.max_background_compactions = 2; + options.create_if_missing = true; + options.disable_seek_compaction = true; + options.memtable_factory.reset(NewHashSkipListRepFactory(prefix_extractor)); - // prefix specified, with blooms: 2 RAND I/Os - // SeekToFirst - DestroyAndReopen(&options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - ro.prefix = &prefix; - iter = db_->NewIterator(ro); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - assert(iter->key().starts_with(prefix)); - count++; - } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 2); + // prefix specified, with blooms: 2 RAND I/Os + // SeekToFirst + DestroyAndReopen(&options); + PrefixScanInit(this); + count = 0; + env_->random_read_counter_.Reset(); + ro.prefix = &prefix; + iter = db_->NewIterator(ro); + for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { + assert(iter->key().starts_with(prefix)); + count++; + } + ASSERT_OK(iter->status()); + delete iter; + ASSERT_EQ(count, 2); + ASSERT_EQ(env_->random_read_counter_.Read(), 2); - // prefix specified, with blooms: 2 RAND I/Os - // Seek - DestroyAndReopen(&options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - ro.prefix = &prefix; - iter = db_->NewIterator(ro); - for (iter->Seek(key); iter->Valid(); iter->Next()) { - assert(iter->key().starts_with(prefix)); - count++; - } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 2); + // prefix specified, with blooms: 2 RAND I/Os + // Seek + DestroyAndReopen(&options); + PrefixScanInit(this); + count = 0; + env_->random_read_counter_.Reset(); + ro.prefix = &prefix; + iter = db_->NewIterator(ro); + for (iter->Seek(key); iter->Valid(); iter->Next()) { + assert(iter->key().starts_with(prefix)); + count++; + } + ASSERT_OK(iter->status()); + delete iter; + ASSERT_EQ(count, 2); + ASSERT_EQ(env_->random_read_counter_.Read(), 2); - // no prefix specified: 11 RAND I/Os - DestroyAndReopen(&options); - PrefixScanInit(this); - count = 0; - env_->random_read_counter_.Reset(); - iter = db_->NewIterator(ReadOptions()); - for (iter->Seek(prefix); iter->Valid(); iter->Next()) { - if (! iter->key().starts_with(prefix)) { - break; - } - count++; + // no prefix specified: 11 RAND I/Os + DestroyAndReopen(&options); + PrefixScanInit(this); + count = 0; + env_->random_read_counter_.Reset(); + iter = db_->NewIterator(ReadOptions()); + for (iter->Seek(prefix); iter->Valid(); iter->Next()) { + if (! iter->key().starts_with(prefix)) { + break; } - ASSERT_OK(iter->status()); - delete iter; - ASSERT_EQ(count, 2); - ASSERT_EQ(env_->random_read_counter_.Read(), 11); - Close(); - delete options.filter_policy; + count++; } + ASSERT_OK(iter->status()); + delete iter; + ASSERT_EQ(count, 2); + ASSERT_EQ(env_->random_read_counter_.Read(), 11); + Close(); + delete options.filter_policy; } std::string MakeKey(unsigned int num) { diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 05416748d..0934de0cd 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -38,8 +38,8 @@ std::shared_ptr OpenDb() { if (FLAGS_use_set_based_memetable) { auto prefix_extractor = rocksdb::NewFixedPrefixTransform(0); - options.memtable_factory = - std::make_shared(prefix_extractor); + options.memtable_factory.reset( + NewHashSkipListRepFactory(prefix_extractor)); } Status s = DB::Open(options, kDbName, &db); diff --git a/db/prefix_test.cc b/db/prefix_test.cc index 6c7fc1697..7e5e9cc0e 100644 --- a/db/prefix_test.cc +++ b/db/prefix_test.cc @@ -11,7 +11,6 @@ #include "util/testharness.h" DEFINE_bool(use_prefix_hash_memtable, true, ""); -DEFINE_bool(use_nolock_version, true, ""); DEFINE_bool(trigger_deadlock, false, "issue delete in range scan to trigger PrefixHashMap deadlock"); DEFINE_uint64(bucket_count, 100000, "number of buckets"); @@ -109,14 +108,8 @@ class PrefixTest { if (FLAGS_use_prefix_hash_memtable) { auto prefix_extractor = NewFixedPrefixTransform(8); options.prefix_extractor = prefix_extractor; - if (FLAGS_use_nolock_version) { - options.memtable_factory.reset(NewHashSkipListRepFactory( - prefix_extractor, FLAGS_bucket_count)); - } else { - options.memtable_factory = - std::make_shared( - prefix_extractor, FLAGS_bucket_count, FLAGS_num_locks); - } + options.memtable_factory.reset(NewHashSkipListRepFactory( + prefix_extractor, FLAGS_bucket_count)); } Status s = DB::Open(options, kDbName, &db); diff --git a/include/rocksdb/memtablerep.h b/include/rocksdb/memtablerep.h index 4147e5f3a..fcb782d41 100644 --- a/include/rocksdb/memtablerep.h +++ b/include/rocksdb/memtablerep.h @@ -17,21 +17,13 @@ // The factory will be passed an Arena object when a new MemTableRep is // requested. The API for this object is in rocksdb/arena.h. // -// Users can implement their own memtable representations. We include four +// Users can implement their own memtable representations. We include three // types built in: // - SkipListRep: This is the default; it is backed by a skip list. -// - TransformRep: This is backed by an custom hash map. -// On construction, they are given a SliceTransform object. This -// object is applied to the user key of stored items which indexes into the -// hash map to yield a skiplist containing all records that share the same -// user key under the transform function. -// - UnsortedRep: A subclass of TransformRep where the transform function is -// the identity function. Optimized for point lookups. -// - PrefixHashRep: A subclass of TransformRep where the transform function is -// a fixed-size prefix extractor. If you use PrefixHashRepFactory, the transform -// must be identical to options.prefix_extractor, otherwise it will be discarded -// and the default will be used. It is optimized for ranged scans over a -// prefix. +// - HashSkipListRep: The memtable rep that is best used for keys that are +// structured like "prefix:suffix" where iteration withing a prefix is +// common and iteration across different prefixes is rare. It is backed by +// a hash map where each bucket is a skip list. // - VectorRep: This is backed by an unordered std::vector. On iteration, the // vector is sorted. It is intelligent about sorting; once the MarkReadOnly() // has been called, the vector will only be sorted once. It is optimized for @@ -186,88 +178,23 @@ public: } }; -// TransformReps are backed by an unordered map of buffers to buckets. When -// looking up a key, the user key is extracted and a user-supplied transform -// function (see rocksdb/slice_transform.h) is applied to get the key into the -// unordered map. This allows the user to bin user keys based on arbitrary -// criteria. Two example implementations are UnsortedRepFactory and -// PrefixHashRepFactory. +// HashSkipListRep is backed by hash map of buckets. Each bucket is a skip +// list. All the keys with the same prefix will be in the same bucket. +// The prefix is determined using user supplied SliceTransform. It has +// to match prefix_extractor in options.prefix_extractor. // // Iteration over the entire collection is implemented by dumping all the keys -// into an std::set. Thus, these data structures are best used when iteration -// over the entire collection is rare. +// into a separate skip list. Thus, these data structures are best used when +// iteration over the entire collection is rare. // // Parameters: -// transform: The SliceTransform to bucket user keys on. TransformRepFactory -// owns the pointer. -// bucket_count: Passed to the constructor of the underlying -// std::unordered_map of each TransformRep. On initialization, the -// underlying array will be at least bucket_count size. -// num_locks: Number of read-write locks to have for the rep. Each bucket is -// hashed onto a read-write lock which controls access to that lock. More -// locks means finer-grained concurrency but more memory overhead. -class TransformRepFactory : public MemTableRepFactory { - public: - explicit TransformRepFactory(const SliceTransform* transform, - size_t bucket_count, size_t num_locks = 1000) - : transform_(transform), - bucket_count_(bucket_count), - num_locks_(num_locks) { } - - virtual ~TransformRepFactory() { delete transform_; } - - virtual std::shared_ptr CreateMemTableRep( - MemTableRep::KeyComparator&, Arena*) override; - - virtual const char* Name() const override { - return "TransformRepFactory"; - } - - const SliceTransform* GetTransform() { return transform_; } - - protected: - const SliceTransform* transform_; - const size_t bucket_count_; - const size_t num_locks_; -}; - -// UnsortedReps bin user keys based on an identity function transform -- that -// is, transform(key) = key. This optimizes for point look-ups. -// -// Parameters: See TransformRepFactory. -class UnsortedRepFactory : public TransformRepFactory { -public: - explicit UnsortedRepFactory(size_t bucket_count = 0, size_t num_locks = 1000) - : TransformRepFactory(NewNoopTransform(), - bucket_count, - num_locks) { } - virtual const char* Name() const override { - return "UnsortedRepFactory"; - } -}; - -// PrefixHashReps bin user keys based on a fixed-size prefix. This optimizes for -// short ranged scans over a given prefix. -// -// Parameters: See TransformRepFactory. -class PrefixHashRepFactory : public TransformRepFactory { -public: - explicit PrefixHashRepFactory(const SliceTransform* prefix_extractor, - size_t bucket_count = 0, size_t num_locks = 1000) - : TransformRepFactory(prefix_extractor, bucket_count, num_locks) - { } - - virtual std::shared_ptr CreateMemTableRep( - MemTableRep::KeyComparator&, Arena*) override; - - virtual const char* Name() const override { - return "PrefixHashRepFactory"; - } -}; - -// The same as TransformRepFactory except it doesn't use locks. -// Experimental, will replace TransformRepFactory once we are sure -// it performs better +// transform: The prefix extractor that returns prefix when supplied a user +// key. Has to match options.prefix_extractor +// bucket_count: Number of buckets in a hash_map. Each bucket needs +// 8 bytes. By default, we set buckets to one million, which +// will take 8MB of memory. If you know the number of keys you'll +// keep in hash map, set bucket count to be approximately twice +// the number of keys extern MemTableRepFactory* NewHashSkipListRepFactory( const SliceTransform* transform, size_t bucket_count = 1000000); diff --git a/tools/db_stress.cc b/tools/db_stress.cc index 71e36e901..966f007e8 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -305,8 +305,7 @@ DEFINE_bool(filter_deletes, false, "On true, deletes use KeyMayExist to drop" enum RepFactory { kSkipList, - kPrefixHash, - kUnsorted, + kHashSkipList, kVectorRep }; enum RepFactory StringToRepFactory(const char* ctype) { @@ -315,9 +314,7 @@ enum RepFactory StringToRepFactory(const char* ctype) { if (!strcasecmp(ctype, "skip_list")) return kSkipList; else if (!strcasecmp(ctype, "prefix_hash")) - return kPrefixHash; - else if (!strcasecmp(ctype, "unsorted")) - return kUnsorted; + return kHashSkipList; else if (!strcasecmp(ctype, "vector")) return kVectorRep; @@ -335,7 +332,7 @@ static bool ValidatePrefixSize(const char* flagname, int32_t value) { } return true; } -DEFINE_int32(prefix_size, 0, "Control the prefix size for PrefixHashRep"); +DEFINE_int32(prefix_size, 0, "Control the prefix size for HashSkipListRep"); static const bool FLAGS_prefix_size_dummy = google::RegisterFlagValidator(&FLAGS_prefix_size, &ValidatePrefixSize); @@ -1338,12 +1335,9 @@ class StressTest { case kSkipList: memtablerep = "skip_list"; break; - case kPrefixHash: + case kHashSkipList: memtablerep = "prefix_hash"; break; - case kUnsorted: - memtablerep = "unsorted"; - break; case kVectorRep: memtablerep = "vector"; break; @@ -1393,21 +1387,15 @@ class StressTest { FLAGS_delete_obsolete_files_period_micros; options.max_manifest_file_size = 1024; options.filter_deletes = FLAGS_filter_deletes; - if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kPrefixHash)) { + if ((FLAGS_prefix_size == 0) == (FLAGS_rep_factory == kHashSkipList)) { fprintf(stderr, "prefix_size should be non-zero iff memtablerep == prefix_hash\n"); exit(1); } switch (FLAGS_rep_factory) { - case kPrefixHash: - options.memtable_factory.reset( - new PrefixHashRepFactory(NewFixedPrefixTransform(FLAGS_prefix_size)) - ); - break; - case kUnsorted: - options.memtable_factory.reset( - new UnsortedRepFactory() - ); + case kHashSkipList: + options.memtable_factory.reset(NewHashSkipListRepFactory( + NewFixedPrefixTransform(FLAGS_prefix_size))); break; case kSkipList: // no need to do anything diff --git a/util/hash_skiplist_rep.cc b/util/hash_skiplist_rep.cc index b67911f93..c669769e0 100644 --- a/util/hash_skiplist_rep.cc +++ b/util/hash_skiplist_rep.cc @@ -4,6 +4,8 @@ // of patent rights can be found in the PATENTS file in the same directory. // +#include "util/hash_skiplist_rep.h" + #include "rocksdb/memtablerep.h" #include "rocksdb/arena.h" #include "rocksdb/slice.h" @@ -296,31 +298,12 @@ std::shared_ptr } // anon namespace -class HashSkipListRepFactory : public MemTableRepFactory { - public: - explicit HashSkipListRepFactory(const SliceTransform* transform, - size_t bucket_count = 1000000) - : transform_(transform), - bucket_count_(bucket_count) { } - - virtual ~HashSkipListRepFactory() { delete transform_; } - - virtual std::shared_ptr CreateMemTableRep( - MemTableRep::KeyComparator& compare, Arena* arena) override { - return std::make_shared(compare, arena, transform_, - bucket_count_); - } - - virtual const char* Name() const override { - return "HashSkipListRepFactory"; - } - - const SliceTransform* GetTransform() { return transform_; } - - private: - const SliceTransform* transform_; - const size_t bucket_count_; -}; +std::shared_ptr +HashSkipListRepFactory::CreateMemTableRep(MemTableRep::KeyComparator &compare, + Arena *arena) { + return std::make_shared(compare, arena, transform_, + bucket_count_); +} MemTableRepFactory* NewHashSkipListRepFactory( const SliceTransform* transform, size_t bucket_count) { diff --git a/util/hash_skiplist_rep.h b/util/hash_skiplist_rep.h new file mode 100644 index 000000000..b946cf05e --- /dev/null +++ b/util/hash_skiplist_rep.h @@ -0,0 +1,38 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#pragma once +#include "rocksdb/slice_transform.h" +#include "rocksdb/memtablerep.h" + +namespace rocksdb { + +class HashSkipListRepFactory : public MemTableRepFactory { + public: + explicit HashSkipListRepFactory(const SliceTransform* transform, + size_t bucket_count = 1000000) + : transform_(transform), + bucket_count_(bucket_count) { } + + virtual ~HashSkipListRepFactory() { delete transform_; } + + virtual std::shared_ptr CreateMemTableRep( + MemTableRep::KeyComparator& compare, Arena* arena) override; + + virtual const char* Name() const override { + return "HashSkipListRepFactory"; + } + + const SliceTransform* GetTransform() { return transform_; } + + private: + const SliceTransform* transform_; + const size_t bucket_count_; +}; + +} diff --git a/util/stl_wrappers.h b/util/stl_wrappers.h index b42a58427..b4c14b4ba 100644 --- a/util/stl_wrappers.h +++ b/util/stl_wrappers.h @@ -28,24 +28,5 @@ namespace stl_wrappers { } }; - struct Hash { - inline size_t operator()(const char* buf) const { - Slice internal_key = GetLengthPrefixedSlice(buf); - Slice value = - GetLengthPrefixedSlice(internal_key.data() + internal_key.size()); - unsigned int hval = MurmurHash(internal_key.data(), internal_key.size(), - 0); - hval = MurmurHash(value.data(), value.size(), hval); - return hval; - } - }; - - struct KeyEqual : private Base { - explicit KeyEqual(const MemTableRep::KeyComparator& compare) - : Base(compare) { } - inline bool operator()(const char* a, const char* b) const { - return this->compare_(a, b) == 0; - } - }; } } diff --git a/util/transformrep.cc b/util/transformrep.cc deleted file mode 100644 index 4c7df1321..000000000 --- a/util/transformrep.cc +++ /dev/null @@ -1,422 +0,0 @@ -// Copyright (c) 2013, Facebook, Inc. All rights reserved. -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. An additional grant -// of patent rights can be found in the PATENTS file in the same directory. -// -#include -#include -#include -#include -#include - -#include "rocksdb/memtablerep.h" -#include "rocksdb/arena.h" -#include "rocksdb/slice.h" -#include "rocksdb/slice_transform.h" -#include "port/port.h" -#include "util/mutexlock.h" -#include "util/murmurhash.h" -#include "util/stl_wrappers.h" - -namespace std { -template <> -struct hash { - size_t operator()(const rocksdb::Slice& slice) const { - return MurmurHash(slice.data(), slice.size(), 0); - } -}; -} - -namespace rocksdb { -namespace { - -using namespace stl_wrappers; - -class TransformRep : public MemTableRep { - public: - TransformRep(const KeyComparator& compare, Arena* arena, - const SliceTransform* transform, size_t bucket_size, - size_t num_locks); - - virtual void Insert(const char* key) override; - - virtual bool Contains(const char* key) const override; - - virtual size_t ApproximateMemoryUsage() override; - - virtual ~TransformRep() { } - - virtual std::shared_ptr GetIterator() override; - - virtual std::shared_ptr GetIterator( - const Slice& slice) override; - - virtual std::shared_ptr GetDynamicPrefixIterator() - override { - return std::make_shared(*this); - } - - std::shared_ptr GetTransformIterator( - const Slice& transformed); - - private: - friend class DynamicPrefixIterator; - typedef std::set Bucket; - typedef std::unordered_map> BucketMap; - - // Maps slices (which are transformed user keys) to buckets of keys sharing - // the same transform. - BucketMap buckets_; - - // rwlock_ protects access to the buckets_ data structure itself. Each bucket - // has its own read-write lock as well. - mutable port::RWMutex rwlock_; - - // Keep track of approximately how much memory is being used. - size_t memory_usage_ = 0; - - // The user-supplied transform whose domain is the user keys. - const SliceTransform* transform_; - - // Get a bucket from buckets_. If the bucket hasn't been initialized yet, - // initialize it before returning. Must be externally synchronized. - std::shared_ptr& GetBucket(const Slice& transformed); - - port::RWMutex* GetLock(const Slice& transformed) const; - - mutable std::vector locks_; - - const KeyComparator& compare_; - - class Iterator : public MemTableRep::Iterator { - public: - explicit Iterator(std::shared_ptr items); - - virtual ~Iterator() { }; - - // Returns true iff the iterator is positioned at a valid node. - virtual bool Valid() const; - - // Returns the key at the current position. - // REQUIRES: Valid() - virtual const char* key() const; - - // Advances to the next position. - // REQUIRES: Valid() - virtual void Next(); - - // Advances to the previous position. - // REQUIRES: Valid() - virtual void Prev(); - - // Advance to the first entry with a key >= target - virtual void Seek(const char* target); - - // Position at the first entry in collection. - // Final state of iterator is Valid() iff collection is not empty. - virtual void SeekToFirst(); - - // Position at the last entry in collection. - // Final state of iterator is Valid() iff collection is not empty. - virtual void SeekToLast(); - private: - std::shared_ptr items_; - Bucket::const_iterator cit_; - }; - - class EmptyIterator : public MemTableRep::Iterator { - // This is used when there wasn't a bucket. It is cheaper than - // instantiating an empty bucket over which to iterate. - public: - virtual bool Valid() const { - return false; - } - virtual const char* key() const { - assert(false); - return nullptr; - } - virtual void Next() { } - virtual void Prev() { } - virtual void Seek(const char* target) { } - virtual void SeekToFirst() { } - virtual void SeekToLast() { } - static std::shared_ptr GetInstance(); - private: - static std::shared_ptr instance; - EmptyIterator() { } - }; - - class TransformIterator : public Iterator { - public: - explicit TransformIterator(std::shared_ptr items, - port::RWMutex* rwlock); - virtual ~TransformIterator() { } - private: - const ReadLock l_; - }; - - - class DynamicPrefixIterator : public MemTableRep::Iterator { - private: - // the underlying memtable rep - const TransformRep& memtable_rep_; - // the result of a prefix seek - std::unique_ptr bucket_iterator_; - - public: - explicit DynamicPrefixIterator(const TransformRep& memtable_rep) - : memtable_rep_(memtable_rep) {} - - virtual ~DynamicPrefixIterator() { }; - - // Returns true iff the iterator is positioned at a valid node. - virtual bool Valid() const { - return bucket_iterator_ && bucket_iterator_->Valid(); - } - - // Returns the key at the current position. - // REQUIRES: Valid() - virtual const char* key() const { - assert(Valid()); - return bucket_iterator_->key(); - } - - // Advances to the next position. - // REQUIRES: Valid() - virtual void Next() { - assert(Valid()); - bucket_iterator_->Next(); - } - - // Advances to the previous position. - // REQUIRES: Valid() - virtual void Prev() { - assert(Valid()); - bucket_iterator_->Prev(); - } - - // Advance to the first entry with a key >= target within the - // same bucket as target - virtual void Seek(const char* target) { - Slice prefix = memtable_rep_.transform_->Transform( - memtable_rep_.UserKey(target)); - - ReadLock l(&memtable_rep_.rwlock_); - auto bucket = memtable_rep_.buckets_.find(prefix); - if (bucket == memtable_rep_.buckets_.end()) { - bucket_iterator_.reset(nullptr); - } else { - bucket_iterator_.reset( - new TransformIterator(bucket->second, memtable_rep_.GetLock(prefix))); - bucket_iterator_->Seek(target); - } - } - - // Position at the first entry in collection. - // Final state of iterator is Valid() iff collection is not empty. - virtual void SeekToFirst() { - // Prefix iterator does not support total order. - // We simply set the iterator to invalid state - bucket_iterator_.reset(nullptr); - } - - // Position at the last entry in collection. - // Final state of iterator is Valid() iff collection is not empty. - virtual void SeekToLast() { - // Prefix iterator does not support total order. - // We simply set the iterator to invalid state - bucket_iterator_.reset(nullptr); - } - }; -}; - -class PrefixHashRep : public TransformRep { - public: - PrefixHashRep(const KeyComparator& compare, Arena* arena, - const SliceTransform* transform, size_t bucket_size, - size_t num_locks) - : TransformRep(compare, arena, transform, - bucket_size, num_locks) { } - - virtual std::shared_ptr GetPrefixIterator( - const Slice& prefix) override; -}; - -std::shared_ptr& TransformRep::GetBucket( - const Slice& transformed) { - WriteLock l(&rwlock_); - auto& bucket = buckets_[transformed]; - if (!bucket) { - bucket.reset( - new decltype(buckets_)::mapped_type::element_type(Compare(compare_))); - // To memory_usage_ we add the size of the std::set and the size of the - // std::pair (decltype(buckets_)::value_type) which includes the - // Slice and the std::shared_ptr - memory_usage_ += sizeof(*bucket) + - sizeof(decltype(buckets_)::value_type); - } - return bucket; -} - -port::RWMutex* TransformRep::GetLock(const Slice& transformed) const { - return &locks_[std::hash()(transformed) % locks_.size()]; -} - -TransformRep::TransformRep(const KeyComparator& compare, Arena* arena, - const SliceTransform* transform, size_t bucket_size, - size_t num_locks) - : buckets_(bucket_size), - transform_(transform), - locks_(num_locks), - compare_(compare) { } - -void TransformRep::Insert(const char* key) { - assert(!Contains(key)); - auto transformed = transform_->Transform(UserKey(key)); - auto& bucket = GetBucket(transformed); - WriteLock bl(GetLock(transformed)); - bucket->insert(key); - memory_usage_ += sizeof(key); -} - -bool TransformRep::Contains(const char* key) const { - ReadLock l(&rwlock_); - auto transformed = transform_->Transform(UserKey(key)); - auto bucket = buckets_.find(transformed); - if (bucket == buckets_.end()) { - return false; - } - ReadLock bl(GetLock(transformed)); - return bucket->second->count(key) != 0; -} - -size_t TransformRep::ApproximateMemoryUsage() { - return memory_usage_; -} - -std::shared_ptr - TransformRep::EmptyIterator::GetInstance() { - if (!instance) { - instance.reset(new TransformRep::EmptyIterator); - } - return instance; -} - -TransformRep::Iterator::Iterator(std::shared_ptr items) - : items_(items), - cit_(items_->begin()) { } - -// Returns true iff the iterator is positioned at a valid node. -bool TransformRep::Iterator::Valid() const { - return cit_ != items_->end(); -} - -// Returns the key at the current position. -// REQUIRES: Valid() -const char* TransformRep::Iterator::key() const { - assert(Valid()); - return *cit_; -} - -// Advances to the next position. -// REQUIRES: Valid() -void TransformRep::Iterator::Next() { - assert(Valid()); - if (cit_ == items_->end()) { - return; - } - ++cit_; -} - -// Advances to the previous position. -// REQUIRES: Valid() -void TransformRep::Iterator::Prev() { - assert(Valid()); - if (cit_ == items_->begin()) { - // If you try to go back from the first element, the iterator should be - // invalidated. So we set it to past-the-end. This means that you can - // treat the container circularly. - cit_ = items_->end(); - } else { - --cit_; - } -} - -// Advance to the first entry with a key >= target -void TransformRep::Iterator::Seek(const char* target) { - cit_ = items_->lower_bound(target); -} - -// Position at the first entry in collection. -// Final state of iterator is Valid() iff collection is not empty. -void TransformRep::Iterator::SeekToFirst() { - cit_ = items_->begin(); -} - -void TransformRep::Iterator::SeekToLast() { - cit_ = items_->end(); - if (items_->size() != 0) { - --cit_; - } -} - -TransformRep::TransformIterator::TransformIterator( - std::shared_ptr items, port::RWMutex* rwlock) - : Iterator(items), l_(rwlock) { } - -std::shared_ptr TransformRep::GetIterator() { - auto items = std::make_shared(Compare(compare_)); - // Hold read locks on all locks - ReadLock l(&rwlock_); - std::for_each(locks_.begin(), locks_.end(), [] (port::RWMutex& lock) { - lock.ReadLock(); - }); - for (auto& bucket : buckets_) { - items->insert(bucket.second->begin(), bucket.second->end()); - } - std::for_each(locks_.begin(), locks_.end(), [] (port::RWMutex& lock) { - lock.Unlock(); - }); - return std::make_shared(std::move(items)); -} - -std::shared_ptr TransformRep::GetTransformIterator( - const Slice& transformed) { - ReadLock l(&rwlock_); - auto bucket = buckets_.find(transformed); - if (bucket == buckets_.end()) { - return EmptyIterator::GetInstance(); - } - return std::make_shared(bucket->second, - GetLock(transformed)); -} - -std::shared_ptr TransformRep::GetIterator( - const Slice& slice) { - auto transformed = transform_->Transform(slice); - return GetTransformIterator(transformed); -} - -std::shared_ptr - TransformRep::EmptyIterator::instance; - -} // anon namespace - -std::shared_ptr TransformRepFactory::CreateMemTableRep( - MemTableRep::KeyComparator& compare, Arena* arena) { - return std::make_shared(compare, arena, transform_, - bucket_count_, num_locks_); -} - -std::shared_ptr PrefixHashRepFactory::CreateMemTableRep( - MemTableRep::KeyComparator& compare, Arena* arena) { - return std::make_shared(compare, arena, transform_, - bucket_count_, num_locks_); -} - -std::shared_ptr PrefixHashRep::GetPrefixIterator( - const Slice& prefix) { - return TransformRep::GetTransformIterator(prefix); -} - -} // namespace rocksdb