From 94cf218720907acb84e2072b9dde701d4860b456 Mon Sep 17 00:00:00 2001 From: Tyler Harter Date: Thu, 22 Aug 2013 18:01:11 -0700 Subject: [PATCH] Revert "Prefix scan: db_bench and bug fixes" This reverts commit c2bd8f4824bda98db8699f1e08d6969cf21ef86f. --- db/db_bench.cc | 61 ++---------------------------------- db/table_cache.cc | 18 ----------- db/table_cache.h | 6 ---- db/version_set.cc | 48 +++------------------------- db/version_set.h | 2 -- include/leveldb/statistics.h | 10 +----- include/leveldb/status.h | 2 +- table/filter_block.cc | 11 ++----- table/table.cc | 20 +++--------- table/table.h | 2 +- 10 files changed, 16 insertions(+), 164 deletions(-) diff --git a/db/db_bench.cc b/db/db_bench.cc index 99f1549a2..56ddd2356 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -41,9 +41,8 @@ // readrandom -- read N times in random order // readmissing -- read N missing keys in random order // readhot -- read N times in random order from 1% section of DB -// readwhilewriting -- 1 writer, N threads doing random reads -// readrandomwriterandom -- N threads doing random-read, random-write -// prefixscanrandom -- prefix scan N times in random order +// readwhilewriting -- 1 writer, N threads doing random reads +// readrandomwriterandom - N threads doing random-read, random-write // updaterandom -- N threads doing read-modify-write for random keys // appendrandom -- N threads doing read-modify-write with growing values // mergerandom -- same as updaterandom/appendrandom using merge operator @@ -96,13 +95,6 @@ static long FLAGS_reads = -1; // When ==1 reads use ::Get, when >1 reads use an iterator static long FLAGS_read_range = 1; -// Whether to place prefixes in blooms -static bool FLAGS_use_prefix_blooms = false; - -// Whether to set ReadOptions.prefix for prefixscanrandom. If this -// true, use_prefix_blooms must also be true. -static bool FLAGS_use_prefix_api = false; - // Seed base for random number generators. When 0 it is deterministic. static long FLAGS_seed = 0; @@ -639,7 +631,6 @@ class Benchmark { private: shared_ptr cache_; const FilterPolicy* filter_policy_; - const SliceTransform* prefix_extractor_; DB* db_; long num_; int value_size_; @@ -782,7 +773,6 @@ class Benchmark { filter_policy_(FLAGS_bloom_bits >= 0 ? NewBloomFilterPolicy(FLAGS_bloom_bits) : nullptr), - prefix_extractor_(NewFixedPrefixTransform(FLAGS_key_size-1)), db_(nullptr), num_(FLAGS_num), value_size_(FLAGS_value_size), @@ -809,7 +799,6 @@ class Benchmark { ~Benchmark() { delete db_; delete filter_policy_; - delete prefix_extractor_; } //this function will construct string format for key. e.g "%016d" @@ -905,8 +894,6 @@ class Benchmark { } else if (name == Slice("readrandomsmall")) { reads_ /= 1000; method = &Benchmark::ReadRandom; - } else if (name == Slice("prefixscanrandom")) { - method = &Benchmark::PrefixScanRandom; } else if (name == Slice("deleteseq")) { method = &Benchmark::DeleteSeq; } else if (name == Slice("deleterandom")) { @@ -1159,8 +1146,6 @@ class Benchmark { FLAGS_compaction_universal_min_merge_width; options.block_size = FLAGS_block_size; options.filter_policy = filter_policy_; - options.prefix_extractor = FLAGS_use_prefix_blooms ? prefix_extractor_ - : nullptr; options.max_open_files = FLAGS_open_files; options.statistics = dbstats; options.env = FLAGS_env; @@ -1482,41 +1467,6 @@ class Benchmark { thread->stats.AddMessage(msg); } - void PrefixScanRandom(ThreadState* thread) { - if (FLAGS_use_prefix_api) { - assert(FLAGS_use_prefix_blooms); - assert(FLAGS_bloom_bits >= 1); - } - - ReadOptions options(FLAGS_verify_checksum, true); - Duration duration(FLAGS_duration, reads_); - - long found = 0; - - while (!duration.Done(1)) { - std::string value; - const int k = thread->rand.Next() % FLAGS_num; - unique_ptr key = GenerateKeyFromInt(k); - Slice skey(key.get()); - Slice prefix = prefix_extractor_->Transform(skey); - options.prefix = FLAGS_use_prefix_api ? &prefix : nullptr; - - Iterator* iter = db_->NewIterator(options); - for (iter->Seek(skey); - iter->Valid() && iter->key().starts_with(prefix); - iter->Next()) { - found++; - } - delete iter; - - thread->stats.FinishedSingleOp(db_); - } - - char msg[100]; - snprintf(msg, sizeof(msg), "(%ld of %ld found)", found, reads_); - thread->stats.AddMessage(msg); - } - void ReadMissing(ThreadState* thread) { FLAGS_warn_missing_keys = false; // Never warn about missing keys @@ -2220,13 +2170,6 @@ int main(int argc, char** argv) { FLAGS_reads = n; } else if (sscanf(argv[i], "--read_range=%d%c", &n, &junk) == 1) { FLAGS_read_range = n; - - } else if (sscanf(argv[i], "--use_prefix_blooms=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_use_prefix_blooms = n; - } else if (sscanf(argv[i], "--use_prefix_api=%d%c", &n, &junk) == 1 && - (n == 0 || n == 1)) { - FLAGS_use_prefix_api = n; } else if (sscanf(argv[i], "--duration=%d%c", &n, &junk) == 1) { FLAGS_duration = n; } else if (sscanf(argv[i], "--seed=%ld%c", &l, &junk) == 1) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 2d1a4bf39..02408d95c 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -135,24 +135,6 @@ Status TableCache::Get(const ReadOptions& options, return s; } -bool TableCache::PrefixMayMatch(const ReadOptions& options, - uint64_t file_number, - uint64_t file_size, - const Slice& internal_prefix, - bool* table_io) { - Cache::Handle* handle = nullptr; - Status s = FindTable(storage_options_, file_number, - file_size, &handle, table_io); - bool may_match = true; - if (s.ok()) { - Table* t = - reinterpret_cast(cache_->Value(handle)); - may_match = t->PrefixMayMatch(internal_prefix); - cache_->Release(handle); - } - return may_match; -} - void TableCache::Evict(uint64_t file_number) { char buf[sizeof(file_number)]; EncodeFixed64(buf, file_number); diff --git a/db/table_cache.h b/db/table_cache.h index 6b7b4b53a..2f3787609 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -52,12 +52,6 @@ class TableCache { void (*mark_key_may_exist)(void*) = nullptr, const bool no_io = false); - // Determine whether the table may contain the specified prefix. If - // the table index of blooms are not in memory, this may cause an I/O - bool PrefixMayMatch(const ReadOptions& options, uint64_t file_number, - uint64_t file_size, const Slice& internal_prefix, - bool* table_io); - // Evict any entry for the specified file number void Evict(uint64_t file_number); diff --git a/db/version_set.cc b/db/version_set.cc index bf9376e84..29741ba14 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -189,14 +189,7 @@ static Iterator* GetFileIterator(void* arg, return NewErrorIterator( Status::Corruption("FileReader invoked with unexpected value")); } else { - ReadOptions options_copy; - if (options.prefix) { - // suppress prefix filtering since we have already checked the - // filters once at this point - options_copy = options; - options_copy.prefix = nullptr; - } - return cache->NewIterator(options.prefix ? options_copy : options, + return cache->NewIterator(options, soptions, DecodeFixed64(file_value.data()), DecodeFixed64(file_value.data() + 8), @@ -205,45 +198,12 @@ static Iterator* GetFileIterator(void* arg, } } -bool Version::PrefixMayMatch(const ReadOptions& options, - const EnvOptions& soptions, - const Slice& internal_prefix, - Iterator* level_iter) const { - bool may_match = true; - level_iter->Seek(internal_prefix); - if (!level_iter->Valid()) { - // we're past end of level - may_match = false; - } else if (ExtractUserKey(level_iter->key()).starts_with( - ExtractUserKey(internal_prefix))) { - // TODO(tylerharter): do we need this case? Or are we guaranteed - // key() will always be the biggest value for this SST? - may_match = true; - } else { - may_match = vset_->table_cache_->PrefixMayMatch( - options, - DecodeFixed64(level_iter->value().data()), - DecodeFixed64(level_iter->value().data() + 8), - internal_prefix, nullptr); - } - return may_match; -} - Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, const EnvOptions& soptions, int level) const { - Iterator* level_iter = new LevelFileNumIterator(vset_->icmp_, &files_[level]); - if (options.prefix) { - InternalKey internal_prefix(*options.prefix, 0, kTypeValue); - if (!PrefixMayMatch(options, soptions, - internal_prefix.Encode(), level_iter)) { - delete level_iter; - // nothing in this level can match the prefix - return NewEmptyIterator(); - } - } - return NewTwoLevelIterator(level_iter, &GetFileIterator, - vset_->table_cache_, options, soptions); + return NewTwoLevelIterator( + new LevelFileNumIterator(vset_->icmp_, &files_[level]), + &GetFileIterator, vset_->table_cache_, options, soptions); } void Version::AddIterators(const ReadOptions& options, diff --git a/db/version_set.h b/db/version_set.h index 9a7aeb20b..ddfd17cdc 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -152,8 +152,6 @@ class Version { Iterator* NewConcatenatingIterator(const ReadOptions&, const EnvOptions& soptions, int level) const; - bool PrefixMayMatch(const ReadOptions& options, const EnvOptions& soptions, - const Slice& internal_prefix, Iterator* level_iter) const; VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list diff --git a/include/leveldb/statistics.h b/include/leveldb/statistics.h index 78643ff67..9a36ffccf 100644 --- a/include/leveldb/statistics.h +++ b/include/leveldb/statistics.h @@ -62,21 +62,13 @@ enum Tickers { NUMBER_MERGE_FAILURES = 22, SEQUENCE_NUMBER = 23, - // number of times bloom was checked before creating iterator on a - // file, and the number of times the check was useful in avoiding - // iterator creation (and thus likely IOPs). - BLOOM_FILTER_PREFIX_CHECKED = 24, - BLOOM_FILTER_PREFIX_USEFUL = 25, - - TICKER_ENUM_MAX = 26 + TICKER_ENUM_MAX = 24 }; const std::vector> TickersNameMap = { { BLOCK_CACHE_MISS, "rocksdb.block.cache.miss" }, { BLOCK_CACHE_HIT, "rocksdb.block.cache.hit" }, { BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful" }, - { BLOOM_FILTER_PREFIX_CHECKED, "rocksdb.bloom.filter.prefix.checked" }, - { BLOOM_FILTER_PREFIX_USEFUL, "rocksdb.bloom.filter.prefix.useful" }, { COMPACTION_KEY_DROP_NEWER_ENTRY, "rocksdb.compaction.key.drop.new" }, { COMPACTION_KEY_DROP_OBSOLETE, "rocksdb.compaction.key.drop.obsolete" }, { COMPACTION_KEY_DROP_USER, "rocksdb.compaction.key.drop.user" }, diff --git a/include/leveldb/status.h b/include/leveldb/status.h index e41adb39f..a20351f32 100644 --- a/include/leveldb/status.h +++ b/include/leveldb/status.h @@ -91,7 +91,7 @@ class Status { kNotSupported = 3, kInvalidArgument = 4, kIOError = 5, - kMergeInProgress = 6, + kMergeInProgress = 6 }; Code code() const { diff --git a/table/filter_block.cc b/table/filter_block.cc index 337cbacd8..5f687ebc7 100644 --- a/table/filter_block.cc +++ b/table/filter_block.cc @@ -45,7 +45,6 @@ bool FilterBlockBuilder::SamePrefix(const Slice &key1, } void FilterBlockBuilder::AddKey(const Slice& key) { - // get slice for most recently added entry Slice prev; if (start_.size() > 0) { size_t prev_start = start_[start_.size() - 1]; @@ -54,21 +53,17 @@ void FilterBlockBuilder::AddKey(const Slice& key) { prev = Slice(base, length); } - // add key to filter if needed if (whole_key_filtering_) { start_.push_back(entries_.size()); entries_.append(key.data(), key.size()); } - // add prefix to filter if needed - Slice user_key = ExtractUserKey(key); - if (prefix_extractor_ && prefix_extractor_->InDomain(user_key)) { + if (prefix_extractor_ && prefix_extractor_->InDomain(key)) { // this assumes prefix(prefix(key)) == prefix(key), as the last // entry in entries_ may be either a key or prefix, and we use // prefix(last entry) to get the prefix of the last key. - if (prev.size() == 0 || - !SamePrefix(user_key, ExtractUserKey(prev))) { - Slice prefix = prefix_extractor_->Transform(user_key); + if (prev.size() == 0 || ! SamePrefix(key, prev)) { + Slice prefix = prefix_extractor_->Transform(key); InternalKey internal_prefix_tmp(prefix, 0, kTypeValue); Slice internal_prefix = internal_prefix_tmp.Encode(); assert(comparator_->Compare(internal_prefix, key) <= 0); diff --git a/table/table.cc b/table/table.cc index 31c2d2679..9a5f1b761 100644 --- a/table/table.cc +++ b/table/table.cc @@ -328,11 +328,6 @@ Iterator* Table::BlockReader(void* arg, // 1) key.starts_with(prefix(key)) // 2) Compare(prefix(key), key) <= 0. // 3) If Compare(key1, key2) <= 0, then Compare(prefix(key1), prefix(key2)) <= 0 -// -// TODO(tylerharter): right now, this won't cause I/O since blooms are -// in memory. When blooms may need to be paged in, we should refactor so that -// this is only ever called lazily. In particular, this shouldn't be called -// while the DB lock is held like it is now. bool Table::PrefixMayMatch(const Slice& internal_prefix) const { FilterBlockReader* filter = rep_->filter; bool may_match = true; @@ -342,14 +337,12 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const { return true; } - std::unique_ptr iiter(rep_->index_block->NewIterator( - rep_->options.comparator)); + Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); iiter->Seek(internal_prefix); - if (!iiter->Valid()) { + if (! iiter->Valid()) { // we're past end of file may_match = false; - } else if (ExtractUserKey(iiter->key()).starts_with( - ExtractUserKey(internal_prefix))) { + } else if (iiter->key().starts_with(internal_prefix)) { // we need to check for this subtle case because our only // guarantee is that "the key is a string >= last key in that data // block" according to the doc/table_format.txt spec. @@ -373,12 +366,7 @@ bool Table::PrefixMayMatch(const Slice& internal_prefix) const { assert(s.ok()); may_match = filter->PrefixMayMatch(handle.offset(), internal_prefix); } - - RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_CHECKED); - if (!may_match) { - RecordTick(rep_->options.statistics, BLOOM_FILTER_PREFIX_USEFUL); - } - + delete iiter; return may_match; } diff --git a/table/table.h b/table/table.h index b1b2dd57c..c2cb5a32b 100644 --- a/table/table.h +++ b/table/table.h @@ -47,7 +47,7 @@ class Table { ~Table(); - bool PrefixMayMatch(const Slice& internal_prefix) const; + bool PrefixMayMatch(const Slice& prefix) const; // Returns a new iterator over the table contents. // The result of NewIterator() is initially invalid (caller must