Stats for false positive rate of full filtesr

Summary:
Adds two stats to allow us measuring the false positive rate of full filters:
- The total count of positives: rocksdb.bloom.filter.full.positive
- The total count of true positives: rocksdb.bloom.filter.full.true.positive
Not the term "full" in the stat name to indicate that they are meaningful in full filters. block-based filters are to be deprecated soon and supporting it is not worth the the additional cost of if-then-else branches.

Closes #3680

Tested by:
$ ./db_bench -benchmarks=fillrandom  -db /dev/shm/rocksdb-tmpdb --num=1000000 -bloom_bits=10
$ ./db_bench -benchmarks="readwhilewriting"  -db /dev/shm/rocksdb-tmpdb --statistics -bloom_bits=10 --duration=60 --num=2000000 --use_existing_db 2>&1 > /tmp/full.log
$ grep filter.full /tmp/full.log
rocksdb.bloom.filter.full.positive COUNT : 3628593
rocksdb.bloom.filter.full.true.positive COUNT : 3536026
which gives the false positive rate of 2.5%
Closes https://github.com/facebook/rocksdb/pull/3681

Differential Revision: D7517570

Pulled By: maysamyabandeh

fbshipit-source-id: 630ab1a473afdce404916d297035b6318de4c052
main
Maysam Yabandeh 7 years ago committed by Facebook Github Bot
parent 685912d07f
commit 67182678a5
  1. 13
      include/rocksdb/statistics.h
  2. 25
      table/block_based_table_reader.cc
  3. 3
      table/cuckoo_table_reader.cc
  4. 8
      table/get_context.cc
  5. 5
      table/get_context.h
  6. 3
      table/mock_table.cc
  7. 3
      table/plain_table_reader.cc

@ -71,8 +71,13 @@ enum Tickers : uint32_t {
// # of bytes written into cache.
BLOCK_CACHE_BYTES_WRITE,
// # of times bloom filter has avoided file reads.
// # of times bloom filter has avoided file reads, i.e., negatives.
BLOOM_FILTER_USEFUL,
// # of times bloom FullFilter has not avoided the reads.
BLOOM_FILTER_FULL_POSITIVE,
// # of times bloom FullFilter has not avoided the reads and data actually
// exist.
BLOOM_FILTER_FULL_TRUE_POSITIVE,
// # persistent cache hit
PERSISTENT_CACHE_HIT,
@ -332,6 +337,9 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{BLOCK_CACHE_BYTES_READ, "rocksdb.block.cache.bytes.read"},
{BLOCK_CACHE_BYTES_WRITE, "rocksdb.block.cache.bytes.write"},
{BLOOM_FILTER_USEFUL, "rocksdb.bloom.filter.useful"},
{BLOOM_FILTER_FULL_POSITIVE, "rocksdb.bloom.filter.full.positive"},
{BLOOM_FILTER_FULL_TRUE_POSITIVE,
"rocksdb.bloom.filter.full.true.positive"},
{PERSISTENT_CACHE_HIT, "rocksdb.persistent.cache.hit"},
{PERSISTENT_CACHE_MISS, "rocksdb.persistent.cache.miss"},
{SIM_BLOCK_CACHE_HIT, "rocksdb.sim.block.cache.hit"},
@ -349,8 +357,7 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
"rocksdb.compaction.range_del.drop.obsolete"},
{COMPACTION_OPTIMIZED_DEL_DROP_OBSOLETE,
"rocksdb.compaction.optimized.del.drop.obsolete"},
{COMPACTION_CANCELLED,
"rocksdb.compaction.cancelled"},
{COMPACTION_CANCELLED, "rocksdb.compaction.cancelled"},
{NUMBER_KEYS_WRITTEN, "rocksdb.number.keys.written"},
{NUMBER_KEYS_READ, "rocksdb.number.keys.read"},
{NUMBER_KEYS_UPDATED, "rocksdb.number.keys.updated"},

@ -2030,19 +2030,23 @@ bool BlockBasedTable::FullFilterKeyMayMatch(const ReadOptions& read_options,
}
Slice user_key = ExtractUserKey(internal_key);
const Slice* const const_ikey_ptr = &internal_key;
bool may_match = true;
if (filter->whole_key_filtering()) {
return filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
}
if (!read_options.total_order_seek && rep_->ioptions.prefix_extractor &&
may_match = filter->KeyMayMatch(user_key, kNotValid, no_io, const_ikey_ptr);
} else if (!read_options.total_order_seek &&
rep_->ioptions.prefix_extractor &&
rep_->table_properties->prefix_extractor_name.compare(
rep_->ioptions.prefix_extractor->Name()) == 0 &&
rep_->ioptions.prefix_extractor->InDomain(user_key) &&
!filter->PrefixMayMatch(
rep_->ioptions.prefix_extractor->Transform(user_key), kNotValid,
false, const_ikey_ptr)) {
return false;
rep_->ioptions.prefix_extractor->Transform(user_key),
kNotValid, false, const_ikey_ptr)) {
may_match = false;
}
return true;
if (may_match) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_POSITIVE);
}
return may_match;
}
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
@ -2070,6 +2074,7 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
iiter_unique_ptr.reset(iiter);
}
bool matched = false; // if such user key mathced a key in SST
bool done = false;
for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) {
Slice handle_value = iiter->value();
@ -2111,7 +2116,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
s = Status::Corruption(Slice());
}
if (!get_context->SaveValue(parsed_key, biter.value(), &biter)) {
if (!get_context->SaveValue(parsed_key, biter.value(), &matched,
&biter)) {
done = true;
break;
}
@ -2123,6 +2129,9 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
break;
}
}
if (matched && filter != nullptr && !filter->IsBlockBased()) {
RecordTick(rep_->ioptions.statistics, BLOOM_FILTER_FULL_TRUE_POSITIVE);
}
if (s.ok()) {
s = iiter->status();
}

@ -170,7 +170,8 @@ Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
Slice full_key(bucket, key_length_);
ParsedInternalKey found_ikey;
ParseInternalKey(full_key, &found_ikey);
get_context->SaveValue(found_ikey, value);
bool dont_care __attribute__((__unused__));
get_context->SaveValue(found_ikey, value, &dont_care);
}
// We don't support merge operations. So, we return here.
return Status::OK();

@ -95,10 +95,13 @@ void GetContext::RecordCounters(Tickers ticker, size_t val) {
}
bool GetContext::SaveValue(const ParsedInternalKey& parsed_key,
const Slice& value, Cleanable* value_pinner) {
const Slice& value, bool* matched,
Cleanable* value_pinner) {
assert(matched);
assert((state_ != kMerge && parsed_key.type != kTypeMerge) ||
merge_context_ != nullptr);
if (ucmp_->Equal(parsed_key.user_key, user_key_)) {
*matched = true;
// If the value is not in the snapshot, skip it
if (!CheckCallback(parsed_key.sequence)) {
return true; // to continue to the next seq
@ -231,11 +234,12 @@ void replayGetContextLog(const Slice& replay_log, const Slice& user_key,
assert(ret);
(void)ret;
bool dont_care __attribute__((__unused__));
// Since SequenceNumber is not stored and unknown, we will use
// kMaxSequenceNumber.
get_context->SaveValue(
ParsedInternalKey(user_key, kMaxSequenceNumber, type), value,
value_pinner);
&dont_care, value_pinner);
}
#else // ROCKSDB_LITE
assert(false);

@ -42,10 +42,13 @@ class GetContext {
// Records this key, value, and any meta-data (such as sequence number and
// state) into this GetContext.
//
// If the parsed_key matches the user key that we are looking for, sets
// mathced to true.
//
// Returns True if more keys need to be read (due to merges) or
// False if the complete value has been found.
bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
Cleanable* value_pinner = nullptr);
bool* matched, Cleanable* value_pinner = nullptr);
// Simplified version of the previous function. Should only be used when we
// know that the operation is a Put.

@ -41,7 +41,8 @@ Status MockTableReader::Get(const ReadOptions&, const Slice& key,
return Status::Corruption(Slice());
}
if (!get_context->SaveValue(parsed_key, iter->value())) {
bool dont_care __attribute__((__unused__));
if (!get_context->SaveValue(parsed_key, iter->value(), &dont_care)) {
break;
}
}

@ -594,7 +594,8 @@ Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
// TODO(ljin): since we know the key comparison result here,
// can we enable the fast path?
if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
if (!get_context->SaveValue(found_key, found_value)) {
bool dont_care __attribute__((__unused__));
if (!get_context->SaveValue(found_key, found_value, &dont_care)) {
break;
}
}

Loading…
Cancel
Save