From 59d0b02f8b7135051001d2ef377621cb6bfe7556 Mon Sep 17 00:00:00 2001 From: Mayank Agarwal Date: Fri, 26 Jul 2013 12:57:01 -0700 Subject: [PATCH] Expand KeyMayExist to return the proper value if it can be found in memory and also check block_cache Summary: Removed KeyMayExistImpl because KeyMayExist demanded Get like semantics now. Removed no_io from memtable and imm because we need the proper value now and shouldn't just stop when we see Merge in memtable. Added checks to block_cache. Updated documentation and unit-test Test Plan: make all check;db_stress for 1 hour Reviewers: dhruba, haobo Reviewed By: dhruba CC: leveldb Differential Revision: https://reviews.facebook.net/D11853 --- db/db_impl.cc | 33 +++++++++++++-------------------- db/db_impl.h | 26 ++++++++++++-------------- db/db_test.cc | 37 +++++++++++++++++++++++++------------ db/memtable.cc | 6 +----- db/memtable.h | 5 ++--- db/memtablelist.cc | 4 ++-- db/memtablelist.h | 2 +- db/version_set.cc | 17 ++++++++++++----- db/version_set.h | 2 +- db/write_batch.cc | 15 ++++++++++++--- include/leveldb/db.h | 21 +++++++++++++++------ include/leveldb/options.h | 10 ++++------ table/table.cc | 22 ++++++++++++++-------- table/table.h | 3 ++- tools/db_stress.cc | 2 +- utilities/ttl/db_ttl.cc | 7 +++++-- utilities/ttl/db_ttl.h | 5 ++++- 17 files changed, 126 insertions(+), 91 deletions(-) diff --git a/db/db_impl.cc b/db/db_impl.cc index 9cff6991d..8be77d57e 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2093,13 +2093,11 @@ Status DBImpl::Get(const ReadOptions& options, return GetImpl(options, key, value); } -// If no_io is true, then returns Status::NotFound if key is not in memtable, -// immutable-memtable and bloom-filters can guarantee that key is not in db, -// "value" is garbage string if no_io is true Status DBImpl::GetImpl(const ReadOptions& options, const Slice& key, std::string* value, - const bool no_io) { + const bool no_io, + bool* value_found) { Status s; StopWatch sw(env_, options_.statistics, DB_GET); @@ -2128,12 +2126,12 @@ Status DBImpl::GetImpl(const ReadOptions& options, // s is both in/out. When in, s could either be OK or MergeInProgress. // value will contain the current merge operand in the latter case. LookupKey lkey(key, snapshot); - if (mem->Get(lkey, value, &s, options_, no_io)) { + if (mem->Get(lkey, value, &s, options_)) { // Done - } else if (imm.Get(lkey, value, &s, options_, no_io)) { + } else if (imm.Get(lkey, value, &s, options_)) { // Done } else { - current->Get(options, lkey, value, &s, &stats, options_, no_io); + current->Get(options, lkey, value, &s, &stats, options_, no_io,value_found); have_stat_update = true; } mutex_.Lock(); @@ -2223,19 +2221,14 @@ std::vector DBImpl::MultiGet(const ReadOptions& options, return statList; } -bool DBImpl::KeyMayExist(const Slice& key) { - return KeyMayExistImpl(key, versions_->LastSequence()); -} - -bool DBImpl::KeyMayExistImpl(const Slice& key, - const SequenceNumber read_from_seq) { - std::string value; - SnapshotImpl read_from_snapshot; - read_from_snapshot.number_ = read_from_seq; - ReadOptions ropts; - ropts.snapshot = &read_from_snapshot; - const Status s = GetImpl(ropts, key, &value, true); - return !s.IsNotFound(); +bool DBImpl::KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found) { + if (value_found != nullptr) { + *value_found = true; // falsify later if key-may-exist but can't fetch value + } + return GetImpl(options, key, value, true, value_found).ok(); } Iterator* DBImpl::NewIterator(const ReadOptions& options) { diff --git a/db/db_impl.h b/db/db_impl.h index dedfd9d7e..333a86867 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -50,9 +50,14 @@ class DBImpl : public DB { const std::vector& keys, std::vector* values); - // Returns false if key can't exist- based on memtable, immutable-memtable and - // bloom-filters; true otherwise. No IO is performed - virtual bool KeyMayExist(const Slice& key); + // Returns false if key doesn't exist in the database and true if it may. + // If value_found is not passed in as null, then return the value if found in + // memory. On return, if value was found, then value_found will be set to true + // , otherwise false. + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr); virtual Iterator* NewIterator(const ReadOptions&); virtual const Snapshot* GetSnapshot(); virtual void ReleaseSnapshot(const Snapshot* snapshot); @@ -104,15 +109,6 @@ class DBImpl : public DB { // Trigger's a background call for testing. void TEST_PurgeObsoleteteWAL(); - // KeyMayExist's internal function, but can be called internally from rocksdb - // to check memtable from sequence_number=read_from_seq. This is useful to - // check presence of key in db when key's existence is to be also checked in - // an incompletely written WriteBatch in memtable. eg. Database doesn't have - // key A and WriteBatch=[PutA,B; DelA]. A KeyMayExist called from DelA also - // needs to check itself for any PutA to be sure to not drop the delete. - bool KeyMayExistImpl(const Slice& key, - const SequenceNumber read_from_seq); - protected: Env* const env_; const std::string dbname_; @@ -415,11 +411,13 @@ class DBImpl : public DB { std::vector& snapshots, SequenceNumber* prev_snapshot); - // Function that Get and KeyMayExistImpl call with no_io true or false + // Function that Get and KeyMayExist call with no_io true or false + // Note: 'value_found' from KeyMayExist propagates here Status GetImpl(const ReadOptions& options, const Slice& key, std::string* value, - const bool no_io = false); + const bool no_io = false, + bool* value_found = nullptr); }; // Sanitize db options. The caller should delete result.info_log if diff --git a/db/db_test.cc b/db/db_test.cc index d25fe8b3e..9a7bb2eb0 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -772,34 +772,41 @@ TEST(DBTest, GetEncountersEmptyLevel) { } while (ChangeOptions()); } -// KeyMayExist-API returns false if memtable(s) and in-memory bloom-filters can -// guarantee that the key doesn't exist in the db, else true. This can lead to -// a few false positives, but not false negatives. To make test deterministic, -// use a much larger number of bits per key-20 than bits in the key, so -// that false positives are eliminated +// KeyMayExist can lead to a few false positives, but not false negatives. +// To make test deterministic, use a much larger number of bits per key-20 than +// bits in the key, so that false positives are eliminated TEST(DBTest, KeyMayExist) { do { + ReadOptions ropts; + std::string value; Options options = CurrentOptions(); options.filter_policy = NewBloomFilterPolicy(20); Reopen(&options); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); ASSERT_OK(db_->Put(WriteOptions(), "a", "b")); - ASSERT_TRUE(db_->KeyMayExist("a")); + bool value_found = false; + ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found)); + ASSERT_TRUE(value_found); + ASSERT_EQ("b", value); dbfull()->Flush(FlushOptions()); - ASSERT_TRUE(db_->KeyMayExist("a")); + value.clear(); + value_found = false; + ASSERT_TRUE(db_->KeyMayExist(ropts, "a", &value, &value_found)); + ASSERT_TRUE(value_found); + ASSERT_EQ("b", value); ASSERT_OK(db_->Delete(WriteOptions(), "a")); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); dbfull()->Flush(FlushOptions()); dbfull()->CompactRange(nullptr, nullptr); - ASSERT_TRUE(!db_->KeyMayExist("a")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "a", &value)); ASSERT_OK(db_->Delete(WriteOptions(), "c")); - ASSERT_TRUE(!db_->KeyMayExist("c")); + ASSERT_TRUE(!db_->KeyMayExist(ropts, "c", &value)); delete options.filter_policy; } while (ChangeOptions()); @@ -3045,7 +3052,13 @@ class ModelDB: public DB { Status::NotSupported("Not implemented.")); return s; } - virtual bool KeyMayExist(const Slice& key) { + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr) { + if (value_found != nullptr) { + *value_found = false; + } return true; // Not Supported directly } virtual Iterator* NewIterator(const ReadOptions& options) { diff --git a/db/memtable.cc b/db/memtable.cc index fb3f4f1f7..622058804 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -130,7 +130,7 @@ void MemTable::Add(SequenceNumber s, ValueType type, } bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only) { + const Options& options) { Slice memkey = key.memtable_key(); std::shared_ptr iter(table_.get()->GetIterator()); iter->Seek(memkey.data()); @@ -174,10 +174,6 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s, return true; } case kTypeMerge: { - if (check_presence_only) { - *s = Status::OK(); - return true; - } Slice v = GetLengthPrefixedSlice(key_ptr + key_length); if (merge_in_progress) { merge_operator->Merge(key.user_key(), &v, operand, diff --git a/db/memtable.h b/db/memtable.h index 6a3c7fcfd..73d32fc4c 100644 --- a/db/memtable.h +++ b/db/memtable.h @@ -73,13 +73,12 @@ class MemTable { // If memtable contains a deletion for key, store a NotFound() error // in *status and return true. // If memtable contains Merge operation as the most recent entry for a key, - // and if check_presence_only is set, return true with Status::OK, - // else if the merge process does not stop (not reaching a value or delete), + // and the merge process does not stop (not reaching a value or delete), // store the current merged result in value and MergeInProgress in s. // return false // Else, return false. bool Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only = false); + const Options& options); // Returns the edits area that is needed for flushing the memtable VersionEdit* GetEdits() { return &edit_; } diff --git a/db/memtablelist.cc b/db/memtablelist.cc index ac89d1043..9d8b675bf 100644 --- a/db/memtablelist.cc +++ b/db/memtablelist.cc @@ -194,10 +194,10 @@ size_t MemTableList::ApproximateMemoryUsage() { // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only) { + const Options& options) { for (list::iterator it = memlist_.begin(); it != memlist_.end(); ++it) { - if ((*it)->Get(key, value, s, options, check_presence_only)) { + if ((*it)->Get(key, value, s, options)) { return true; } } diff --git a/db/memtablelist.h b/db/memtablelist.h index d741a6630..b30089cf6 100644 --- a/db/memtablelist.h +++ b/db/memtablelist.h @@ -70,7 +70,7 @@ class MemTableList { // Search all the memtables starting from the most recent one. // Return the most recent value found, if any. bool Get(const LookupKey& key, std::string* value, Status* s, - const Options& options, const bool check_presence_only = false); + const Options& options); // Returns the list of underlying memtables. void GetMemTables(std::vector* list); diff --git a/db/version_set.cc b/db/version_set.cc index 15ff1330f..48588c712 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -238,6 +238,7 @@ struct Saver { SaverState state; const Comparator* ucmp; Slice user_key; + bool* value_found; // Is value set correctly? Used by KeyMayExist std::string* value; const MergeOperator* merge_operator; Logger* logger; @@ -245,13 +246,17 @@ struct Saver { }; } -// Called from TableCache::Get when bloom-filters can't guarantee that key does -// not exist and Get is not permitted to do IO to read the data-block and be -// certain. -// Set the key as Found and let the caller know that key-may-exist +// Called from TableCache::Get and InternalGet when file/block in which key may +// exist are not there in TableCache/BlockCache respectively. In this case we +// can't guarantee that key does not exist and are not permitted to do IO to be +// certain.Set the status=kFound and value_found=false to let the caller know +// that key may exist but is not there in memory static void MarkKeyMayExist(void* arg) { Saver* s = reinterpret_cast(arg); s->state = kFound; + if (s->value_found != nullptr) { + *(s->value_found) = false; + } } static bool SaveValue(void* arg, const Slice& ikey, const Slice& v, bool didIO){ @@ -339,7 +344,8 @@ void Version::Get(const ReadOptions& options, Status *status, GetStats* stats, const Options& db_options, - const bool no_io) { + const bool no_io, + bool* value_found) { Slice ikey = k.internal_key(); Slice user_key = k.user_key(); const Comparator* ucmp = vset_->icmp_.user_comparator(); @@ -355,6 +361,7 @@ void Version::Get(const ReadOptions& options, saver.state = status->ok()? kNotFound : kMerge; saver.ucmp = ucmp; saver.user_key = user_key; + saver.value_found = value_found; saver.value = value; saver.merge_operator = merge_operator; saver.logger = logger.get(); diff --git a/db/version_set.h b/db/version_set.h index 11bfb9961..320ff8e68 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -75,7 +75,7 @@ class Version { }; void Get(const ReadOptions&, const LookupKey& key, std::string* val, Status* status, GetStats* stats, const Options& db_option, - const bool no_io = false); + const bool no_io = false, bool* value_found = nullptr); // Adds "stats" into the current state. Returns true if a new // compaction may need to be triggered, false otherwise. diff --git a/db/write_batch.cc b/db/write_batch.cc index 9845f49ad..a4213cd97 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -16,10 +16,12 @@ #include "leveldb/write_batch.h" +#include "leveldb/options.h" #include "leveldb/statistics.h" #include "db/dbformat.h" #include "db/db_impl.h" #include "db/memtable.h" +#include "db/snapshot.h" #include "db/write_batch_internal.h" #include "util/coding.h" #include @@ -167,9 +169,16 @@ class MemTableInserter : public WriteBatch::Handler { sequence_++; } virtual void Delete(const Slice& key) { - if (filter_deletes_ && !db_->KeyMayExistImpl(key, sequence_)) { - RecordTick(options_->statistics, NUMBER_FILTERED_DELETES); - return; + if (filter_deletes_) { + SnapshotImpl read_from_snapshot; + read_from_snapshot.number_ = sequence_; + ReadOptions ropts; + ropts.snapshot = &read_from_snapshot; + std::string value; + if (!db_->KeyMayExist(ropts, key, &value)) { + RecordTick(options_->statistics, NUMBER_FILTERED_DELETES); + return; + } } mem_->Add(sequence_, kTypeDeletion, key, Slice()); sequence_++; diff --git a/include/leveldb/db.h b/include/leveldb/db.h index e331e7f67..0c056c362 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -104,7 +104,8 @@ class DB { // // May return some other Status on an error. virtual Status Get(const ReadOptions& options, - const Slice& key, std::string* value) = 0; + const Slice& key, + std::string* value) = 0; // If keys[i] does not exist in the database, then the i'th returned // status will be one for which Status::IsNotFound() is true, and @@ -121,11 +122,19 @@ class DB { std::vector* values) = 0; // If the key definitely does not exist in the database, then this method - // returns false. Otherwise return true. This check is potentially - // lighter-weight than invoking DB::Get(). One way to make this lighter weight - // is to avoid doing any IOs - // Default implementation here returns true - virtual bool KeyMayExist(const Slice& key) { + // returns false, else true. If the caller wants to obtain value when the key + // is found in memory, a bool for 'value_found' must be passed. 'value_found' + // will be true on return if value has been set properly. + // This check is potentially lighter-weight than invoking DB::Get(). One way + // to make this lighter weight is to avoid doing any IOs. + // Default implementation here returns true and sets 'value_found' to false + virtual bool KeyMayExist(const ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr) { + if (value_found != nullptr) { + *value_found = false; + } return true; } diff --git a/include/leveldb/options.h b/include/leveldb/options.h index c591ad515..1bf0f6dda 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -473,12 +473,10 @@ struct Options { // Default: 0 uint64_t bytes_per_sync; - // Use bloom-filter for deletes when this is true. - // db->Delete first calls KeyMayExist which checks memtable,immutable-memtable - // and bloom-filters to determine if the key does not exist in the database. - // If the key definitely does not exist, then the delete is a noop.KeyMayExist - // only incurs in-memory look up. This optimization avoids writing the delete - // to storage when appropriate. + // Use KeyMayExist API to filter deletes when this is true. + // If KeyMayExist returns false, i.e. the key definitely does not exist, then + // the delete is a noop. KeyMayExist only incurs in-memory look up. + // This optimization avoids writing the delete to storage when appropriate. // Default: false bool filter_deletes; diff --git a/table/table.cc b/table/table.cc index c51a0aa44..80c5ef491 100644 --- a/table/table.cc +++ b/table/table.cc @@ -235,7 +235,8 @@ Iterator* Table::BlockReader(void* arg, const ReadOptions& options, const Slice& index_value, bool* didIO, - bool for_compaction) { + bool for_compaction, + const bool no_io) { Table* table = reinterpret_cast(arg); Cache* block_cache = table->rep_->options.block_cache.get(); std::shared_ptr statistics = table->rep_->options.statistics; @@ -264,6 +265,8 @@ Iterator* Table::BlockReader(void* arg, block = reinterpret_cast(block_cache->Value(cache_handle)); RecordTick(statistics, BLOCK_CACHE_HIT); + } else if (no_io) { + return nullptr; // Did not find in block_cache and can't do IO } else { Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS : READ_BLOCK_GET_MICROS; @@ -286,7 +289,9 @@ Iterator* Table::BlockReader(void* arg, RecordTick(statistics, BLOCK_CACHE_MISS); } - } else { + } else if (no_io) { + return nullptr; // Could not read from block_cache and can't do IO + }else { s = ReadBlock(table->rep_->file.get(), options, handle, &block, didIO); } } @@ -340,16 +345,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k, // cross one data block, we should be fine. RecordTick(rep_->options.statistics, BLOOM_FILTER_USEFUL); break; - } else if (no_io) { - // Update Saver.state to Found because we are only looking for whether - // bloom-filter can guarantee the key is not there when "no_io" - (*mark_key_may_exist)(arg); - done = true; } else { bool didIO = false; Iterator* block_iter = BlockReader(this, options, iiter->value(), - &didIO); + &didIO, no_io); + if (no_io && !block_iter) { // couldn't get block from block_cache + // Update Saver.state to Found because we are only looking for whether + // we can guarantee the key is not there when "no_io" is set + (*mark_key_may_exist)(arg); + break; + } for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) { if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) { done = true; diff --git a/table/table.h b/table/table.h index 0be95f368..b39a5c186 100644 --- a/table/table.h +++ b/table/table.h @@ -77,7 +77,8 @@ class Table { const EnvOptions& soptions, const Slice&, bool for_compaction); static Iterator* BlockReader(void*, const ReadOptions&, const Slice&, - bool* didIO, bool for_compaction = false); + bool* didIO, bool for_compaction = false, + const bool no_io = false); // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found // after a call to Seek(key), until handle_result returns false. diff --git a/tools/db_stress.cc b/tools/db_stress.cc index fcd5dc269..ca6602373 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -180,7 +180,7 @@ static uint32_t FLAGS_log2_keys_per_lock = 2; // implies 2^2 keys per lock // Percentage of times we want to purge redundant keys in memory before flushing static uint32_t FLAGS_purge_redundant_percent = 50; -// On true, deletes use bloom-filter and drop the delete if key not present +// On true, deletes use KeyMayExist to drop the delete if key not present static bool FLAGS_filter_deletes = false; // Level0 compaction start trigger diff --git a/utilities/ttl/db_ttl.cc b/utilities/ttl/db_ttl.cc index 7dfda7207..90cc1ec4c 100644 --- a/utilities/ttl/db_ttl.cc +++ b/utilities/ttl/db_ttl.cc @@ -158,8 +158,11 @@ std::vector DBWithTTL::MultiGet(const ReadOptions& options, supported with TTL")); } -bool DBWithTTL::KeyMayExist(const Slice& key) { - return db_->KeyMayExist(key); +bool DBWithTTL::KeyMayExist(ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found) { + return db_->KeyMayExist(options, key, value, value_found); } Status DBWithTTL::Delete(const WriteOptions& wopts, const Slice& key) { diff --git a/utilities/ttl/db_ttl.h b/utilities/ttl/db_ttl.h index d66e396ca..078a069ba 100644 --- a/utilities/ttl/db_ttl.h +++ b/utilities/ttl/db_ttl.h @@ -33,7 +33,10 @@ class DBWithTTL : public DB, CompactionFilter { const std::vector& keys, std::vector* values); - virtual bool KeyMayExist(const Slice& key); + virtual bool KeyMayExist(ReadOptions& options, + const Slice& key, + std::string* value, + bool* value_found = nullptr); virtual Status Delete(const WriteOptions& wopts, const Slice& key);