Stats for false positive rate of full filtesr

Summary:
Adds two stats to allow us measuring the false positive rate of full filters:
- The total count of positives: rocksdb.bloom.filter.full.positive
- The total count of true positives: rocksdb.bloom.filter.full.true.positive
Not the term "full" in the stat name to indicate that they are meaningful in full filters. block-based filters are to be deprecated soon and supporting it is not worth the the additional cost of if-then-else branches.

Closes #3680

Tested by:
$ ./db_bench -benchmarks=fillrandom  -db /dev/shm/rocksdb-tmpdb --num=1000000 -bloom_bits=10
$ ./db_bench -benchmarks="readwhilewriting"  -db /dev/shm/rocksdb-tmpdb --statistics -bloom_bits=10 --duration=60 --num=2000000 --use_existing_db 2>&1 > /tmp/full.log
$ grep filter.full /tmp/full.log
rocksdb.bloom.filter.full.positive COUNT : 3628593
rocksdb.bloom.filter.full.true.positive COUNT : 3536026
which gives the false positive rate of 2.5%
Closes https://github.com/facebook/rocksdb/pull/3681

Differential Revision: D7517570

Pulled By: maysamyabandeh

fbshipit-source-id: 630ab1a473afdce404916d297035b6318de4c052
main
Maysam Yabandeh 7 years ago committed by Facebook Github Bot
parent 685912d07f
commit 67182678a5
  1. 13
      include/rocksdb/statistics.h
  2. 33
      table/block_based_table_reader.cc
  3. 3
      table/cuckoo_table_reader.cc
  4. 8
      table/get_context.cc
  5. 5
      table/get_context.h
  6. 3
      table/mock_table.cc
  7. 3
      table/plain_table_reader.cc

@ -71,8 +71,13 @@ enum Tickers : uint32_t {
// # of bytes written into cache. // # of bytes written into cache.
BLOCK_CACHE_BYTES_WRITE, BLOCK_CACHE_BYTES_WRITE,
// # of times bloom filter has avoided file reads. // # of times bloom filter has avoided file reads, i.e., negatives.
BLOOM_FILTER_USEFUL, BLOOM_FILTER_USEFUL,
// # of times bloom FullFilter has not avoided the reads.
BLOOM_FILTER_FULL_POSITIVE,
// # of times bloom FullFilter has not avoided the reads and data actually
// exist.
BLOOM_FILTER_FULL_TRUE_POSITIVE,
// # persistent cache hit // # persistent cache hit
PERSISTENT_CACHE_HIT, PERSISTENT_CACHE_HIT,
@ -332,6 +337,9 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"}, {BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
{BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"}, {BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"}, {BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
{BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"},
{BLOOM_FILTER_FULL_TRUE_POSITIVE,
"rocksdb.bloom.filter.full.true.positive"},
{PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"}, {PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
{PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"}, {PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
{SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"}, {SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
@ -349,8 +357,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
"rocksdb.compaction.range_del.drop.obsolete"}, "rocksdb.compaction.range_del.drop.obsolete"},
{COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE, {COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
"rocksdb.compaction.optimized.del.drop.obsolete"}, "rocksdb.compaction.optimized.del.drop.obsolete"},
{COMPACTION_CANCELLED, {COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"},
"rocksdb.compaction.cancelled"},
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"}, {NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"}, {NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"}, {NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},

@ -2030,19 +2030,23 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
} }
Slice user_key = ExtractUserKey(internal_key); Slice user_key = ExtractUserKey(internal_key);
const Slice* const const_ikey_ptr = &internal_key; const Slice* const const_ikey_ptr = &internal_key;
bool may_match = true;
if (filter->whole_key_filtering()) { if (filter->whole_key_filtering()) {
return filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr); may_match = filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
} } else if (!read_options.total_order_seek &&
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor && rep_->ioptions.prefix_extractor &&
rep_->table_properties->prefix_extractor_name.compare( rep_->table_properties->prefix_extractor_name.compare(
rep_->ioptions.prefix_extractor->Name()) == 0 && rep_->ioptions.prefix_extractor->Name()) == 0 &&
rep_->ioptions.prefix_extractor->InDomain(user_key) && rep_->ioptions.prefix_extractor->InDomain(user_key) &&
!filter->PrefixMayMatch( !filter->PrefixMayMatch(
rep_->ioptions.prefix_extractor->Transform(user_key), kNotValid, rep_->ioptions.prefix_extractor->Transform(user_key),
false, const_ikey_ptr)) { kNotValid, false, const_ikey_ptr)) {
return false; may_match = false;
}
if (may_match) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
} }
return true; return may_match;
} }
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key, Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
@ -2070,6 +2074,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
iiter_unique_ptr.reset(iiter); iiter_unique_ptr.reset(iiter);
} }
bool matched = false; // if such user key mathced a key in SST
bool done = false; bool done = false;
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
Slice handle_value = iiter->value(); Slice handle_value = iiter->value();
@ -2111,7 +2116,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
s = Status::Corruption(Slice()); s = Status::Corruption(Slice());
} }
if (!get_context->SaveValue(parsed_key, biter.value(), &biter)) { if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
&biter)) {
done = true; done = true;
break; break;
} }
@ -2123,6 +2129,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
break; break;
} }
} }
if (matched && filter != nullptr && !filter->IsBlockBased()) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
}
if (s.ok()) { if (s.ok()) {
s = iiter->status(); s = iiter->status();
} }

@ -170,7 +170,8 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
Slice full_key(bucket, key_length_); Slice full_key(bucket, key_length_);
ParsedInternalKey found_ikey; ParsedInternalKey found_ikey;
ParseInternalKey(full_key, &found_ikey); ParseInternalKey(full_key, &found_ikey);
get_context->SaveValue(found_ikey, value); bool dont_care __attribute__((__unused__));
get_context->SaveValue(found_ikey, value, &dont_care);
} }
// We don't support merge operations. So, we return here. // We don't support merge operations. So, we return here.
return Status::OK(); return Status::OK();

@ -95,10 +95,13 @@ void GetContext::RecordCounters(Tickers ticker, size_t val) {
} }
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key, bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
const Slice& value, Cleanable* value_pinner) { const Slice& value, bool* matched,
Cleanable* value_pinner) {
assert(matched);
assert((state_ != kMerge && parsed_key.type != kTypeMerge) || assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
merge_context_ != nullptr); merge_context_ != nullptr);
if (ucmp_->Equal(parsed_key.user_key, user_key_)) { if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
*matched = true;
// If the value is not in the snapshot, skip it // If the value is not in the snapshot, skip it
if (!CheckCallback(parsed_key.sequence)) { if (!CheckCallback(parsed_key.sequence)) {
return true; // to continue to the next seq return true; // to continue to the next seq
@ -231,11 +234,12 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
assert(ret); assert(ret);
(void)ret; (void)ret;
bool dont_care __attribute__((__unused__));
// Since SequenceNumber is not stored and unknown, we will use // Since SequenceNumber is not stored and unknown, we will use
// kMaxSequenceNumber. // kMaxSequenceNumber.
get_context->SaveValue( get_context->SaveValue(
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value, ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
value_pinner); &dont_care, value_pinner);
} }
#else // ROCKSDB_LITE #else // ROCKSDB_LITE
assert(false); assert(false);

@ -42,10 +42,13 @@ class GetContext {
// Records this key, value, and any meta-data (such as sequence number and // Records this key, value, and any meta-data (such as sequence number and
// state) into this GetContext. // state) into this GetContext.
// //
// If the parsed_key matches the user key that we are looking for, sets
// mathced to true.
//
// Returns True if more keys need to be read (due to merges) or // Returns True if more keys need to be read (due to merges) or
// False if the complete value has been found. // False if the complete value has been found.
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value, bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
Cleanable* value_pinner = nullptr); bool* matched, Cleanable* value_pinner = nullptr);
// Simplified version of the previous function. Should only be used when we // Simplified version of the previous function. Should only be used when we
// know that the operation is a Put. // know that the operation is a Put.

@ -41,7 +41,8 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key,
return Status::Corruption(Slice()); return Status::Corruption(Slice());
} }
if (!get_context->SaveValue(parsed_key, iter->value())) { bool dont_care __attribute__((__unused__));
if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) {
break; break;
} }
} }

@ -594,7 +594,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
// TODO(ljin): since we know the key comparison result here, // TODO(ljin): since we know the key comparison result here,
// can we enable the fast path? // can we enable the fast path?
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) { if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
if (!get_context->SaveValue(found_key, found_value)) { bool dont_care __attribute__((__unused__));
if (!get_context->SaveValue(found_key, found_value, &dont_care)) {
break; break;
} }
} }

Loading…
Cancel
Save