bloom hit/miss stats for SST and memtable

Summary:
	hit and miss bloom filter stats for memtable and SST
	stats added to perf_context struct
	key matches and prefix matches combined into one stat

Test Plan: unit test veryfing the functionality added, see BloomStatsTest in db_test.cc for details

Reviewers: yhchiang, igor, sdong

Reviewed By: sdong

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D47859
main
dyniusz 9 years ago
parent 40cdf797d2
commit a065cdb388
  1. 153
      db/db_test.cc
  2. 25
      db/memtable.cc
  3. 8
      include/rocksdb/perf_context.h
  4. 12
      table/block_based_filter_block.cc
  5. 9
      table/full_filter_block.cc
  6. 12
      table/plain_table_reader.cc
  7. 8
      util/perf_context.cc

@ -131,6 +131,46 @@ class DBTestWithParam : public DBTest,
uint32_t max_subcompactions_; uint32_t max_subcompactions_;
}; };
class BloomStatsTestWithParam
: public DBTest,
public testing::WithParamInterface<std::tuple<bool, bool>> {
public:
BloomStatsTestWithParam() {
use_block_table_ = std::get<0>(GetParam());
use_block_based_builder_ = std::get<1>(GetParam());
options_.create_if_missing = true;
options_.prefix_extractor.reset(rocksdb::NewFixedPrefixTransform(4));
options_.memtable_prefix_bloom_bits = 8 * 1024;
if (use_block_table_) {
BlockBasedTableOptions table_options;
table_options.hash_index_allow_collision = false;
table_options.filter_policy.reset(
NewBloomFilterPolicy(10, use_block_based_builder_));
options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
} else {
PlainTableOptions table_options;
options_.table_factory.reset(NewPlainTableFactory(table_options));
}
perf_context.Reset();
DestroyAndReopen(options_);
}
~BloomStatsTestWithParam() {
perf_context.Reset();
Destroy(options_);
}
// Required if inheriting from testing::WithParamInterface<>
static void SetUpTestCase() {}
static void TearDownTestCase() {}
bool use_block_table_;
bool use_block_based_builder_;
Options options_;
};
TEST_F(DBTest, Empty) { TEST_F(DBTest, Empty) {
do { do {
Options options; Options options;
@ -9709,6 +9749,119 @@ TEST_F(DBTest, PauseBackgroundWorkTest) {
ASSERT_EQ(true, done.load()); ASSERT_EQ(true, done.load());
} }
// 1 Insert 2 K-V pairs into DB
// 2 Call Get() for both keys - expext memtable bloom hit stat to be 2
// 3 Call Get() for nonexisting key - expect memtable bloom miss stat to be 1
// 4 Call Flush() to create SST
// 5 Call Get() for both keys - expext SST bloom hit stat to be 2
// 6 Call Get() for nonexisting key - expect SST bloom miss stat to be 1
// Test both: block and plain SST
TEST_P(BloomStatsTestWithParam, BloomStatsTest) {
std::string key1("AAAA");
std::string key2("RXDB"); // not in DB
std::string key3("ZBRA");
std::string value1("Value1");
std::string value3("Value3");
ASSERT_OK(Put(key1, value1, WriteOptions()));
ASSERT_OK(Put(key3, value3, WriteOptions()));
// check memtable bloom stats
ASSERT_EQ(value1, Get(key1));
ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(value3, Get(key3));
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
ASSERT_EQ("NOT_FOUND", Get(key2));
ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
// sanity checks
ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
Flush();
// sanity checks
ASSERT_EQ(0, perf_context.bloom_sst_hit_count);
ASSERT_EQ(0, perf_context.bloom_sst_miss_count);
// check SST bloom stats
// NOTE: hits per get differs because of code paths differences
// in BlockBasedTable::Get()
int hits_per_get = use_block_table_ && !use_block_based_builder_ ? 2 : 1;
ASSERT_EQ(value1, Get(key1));
ASSERT_EQ(hits_per_get, perf_context.bloom_sst_hit_count);
ASSERT_EQ(value3, Get(key3));
ASSERT_EQ(2 * hits_per_get, perf_context.bloom_sst_hit_count);
ASSERT_EQ("NOT_FOUND", Get(key2));
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
}
// Same scenario as in BloomStatsTest but using an iterator
TEST_P(BloomStatsTestWithParam, BloomStatsTestWithIter) {
std::string key1("AAAA");
std::string key2("RXDB"); // not in DB
std::string key3("ZBRA");
std::string value1("Value1");
std::string value3("Value3");
ASSERT_OK(Put(key1, value1, WriteOptions()));
ASSERT_OK(Put(key3, value3, WriteOptions()));
unique_ptr<Iterator> iter(dbfull()->NewIterator(ReadOptions()));
// check memtable bloom stats
iter->Seek(key1);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value1, iter->value().ToString());
ASSERT_EQ(1, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
iter->Seek(key3);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value3, iter->value().ToString());
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
ASSERT_EQ(0, perf_context.bloom_memtable_miss_count);
iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ(1, perf_context.bloom_memtable_miss_count);
ASSERT_EQ(2, perf_context.bloom_memtable_hit_count);
Flush();
iter.reset(dbfull()->NewIterator(ReadOptions()));
// check SST bloom stats
iter->Seek(key1);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value1, iter->value().ToString());
ASSERT_EQ(1, perf_context.bloom_sst_hit_count);
iter->Seek(key3);
ASSERT_OK(iter->status());
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(value3, iter->value().ToString());
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
iter->Seek(key2);
ASSERT_OK(iter->status());
ASSERT_TRUE(!iter->Valid());
ASSERT_EQ(1, perf_context.bloom_sst_miss_count);
ASSERT_EQ(2, perf_context.bloom_sst_hit_count);
}
INSTANTIATE_TEST_CASE_P(BloomStatsTestWithParam, BloomStatsTestWithParam,
::testing::Values(std::make_tuple(true, true),
std::make_tuple(true, false),
std::make_tuple(false, false)));
} // namespace rocksdb } // namespace rocksdb
#endif #endif

@ -230,10 +230,15 @@ class MemTableIterator: public Iterator {
virtual void Seek(const Slice& k) override { virtual void Seek(const Slice& k) override {
PERF_TIMER_GUARD(seek_on_memtable_time); PERF_TIMER_GUARD(seek_on_memtable_time);
PERF_COUNTER_ADD(seek_on_memtable_count, 1); PERF_COUNTER_ADD(seek_on_memtable_count, 1);
if (bloom_ != nullptr && if (bloom_ != nullptr) {
!bloom_->MayContain(prefix_extractor_->Transform(ExtractUserKey(k)))) { if (!bloom_->MayContain(
valid_ = false; prefix_extractor_->Transform(ExtractUserKey(k)))) {
return; PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
valid_ = false;
return;
} else {
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
}
} }
iter_->Seek(k, nullptr); iter_->Seek(k, nullptr);
valid_ = iter_->Valid(); valid_ = iter_->Valid();
@ -508,12 +513,18 @@ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s,
Slice user_key = key.user_key(); Slice user_key = key.user_key();
bool found_final_value = false; bool found_final_value = false;
bool merge_in_progress = s->IsMergeInProgress(); bool merge_in_progress = s->IsMergeInProgress();
bool const may_contain =
if (prefix_bloom_ && nullptr == prefix_bloom_
!prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key))) { ? false
: prefix_bloom_->MayContain(prefix_extractor_->Transform(user_key));
if (prefix_bloom_ && !may_contain) {
// iter is null if prefix bloom says the key does not exist // iter is null if prefix bloom says the key does not exist
PERF_COUNTER_ADD(bloom_memtable_miss_count, 1);
*seq = kMaxSequenceNumber; *seq = kMaxSequenceNumber;
} else { } else {
if (prefix_bloom_) {
PERF_COUNTER_ADD(bloom_memtable_hit_count, 1);
}
Saver saver; Saver saver;
saver.status = s; saver.status = s;
saver.found_final_value = &found_final_value; saver.found_final_value = &found_final_value;

@ -83,6 +83,14 @@ struct PerfContext {
uint64_t block_seek_nanos; uint64_t block_seek_nanos;
// Time spent on finding or creating a table reader // Time spent on finding or creating a table reader
uint64_t find_table_nanos; uint64_t find_table_nanos;
// total number of mem table bloom hits
uint64_t bloom_memtable_hit_count;
// total number of mem table bloom misses
uint64_t bloom_memtable_miss_count;
// total number of SST table bloom hits
uint64_t bloom_sst_hit_count;
// total number of SST table bloom misses
uint64_t bloom_sst_miss_count;
}; };
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE)

@ -7,12 +7,13 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include <algorithm>
#include "table/block_based_filter_block.h" #include "table/block_based_filter_block.h"
#include <algorithm>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/perf_context_imp.h"
#include "util/string_util.h" #include "util/string_util.h"
namespace rocksdb { namespace rocksdb {
@ -219,7 +220,14 @@ bool BlockBasedFilterBlockReader::MayMatch(const Slice& entry,
uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4); uint32_t limit = DecodeFixed32(offset_ + index * 4 + 4);
if (start <= limit && limit <= (uint32_t)(offset_ - data_)) { if (start <= limit && limit <= (uint32_t)(offset_ - data_)) {
Slice filter = Slice(data_ + start, limit - start); Slice filter = Slice(data_ + start, limit - start);
return policy_->KeyMayMatch(entry, filter); bool const may_match = policy_->KeyMayMatch(entry, filter);
if (may_match) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
} else if (start == limit) { } else if (start == limit) {
// Empty filters do not match any entries // Empty filters do not match any entries
return false; return false;

@ -8,6 +8,7 @@
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "port/port.h" #include "port/port.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/perf_context_imp.h"
namespace rocksdb { namespace rocksdb {
@ -89,7 +90,13 @@ bool FullFilterBlockReader::PrefixMayMatch(const Slice& prefix,
bool FullFilterBlockReader::MayMatch(const Slice& entry) { bool FullFilterBlockReader::MayMatch(const Slice& entry) {
if (contents_.size() != 0) { if (contents_.size() != 0) {
return filter_bits_reader_->MayMatch(entry); if (filter_bits_reader_->MayMatch(entry)) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
} }
return true; // remain the same with block_based filter return true; // remain the same with block_based filter
} }

@ -488,7 +488,17 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix,
} }
bool PlainTableReader::MatchBloom(uint32_t hash) const { bool PlainTableReader::MatchBloom(uint32_t hash) const {
return !enable_bloom_ || bloom_.MayContainHash(hash); if (!enable_bloom_) {
return true;
}
if (bloom_.MayContainHash(hash)) {
PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
return true;
} else {
PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
return false;
}
} }
Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset, Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset,

@ -54,6 +54,10 @@ void PerfContext::Reset() {
new_table_iterator_nanos = 0; new_table_iterator_nanos = 0;
block_seek_nanos = 0; block_seek_nanos = 0;
find_table_nanos = 0; find_table_nanos = 0;
bloom_memtable_hit_count = 0;
bloom_memtable_miss_count = 0;
bloom_sst_hit_count = 0;
bloom_sst_miss_count = 0;
#endif #endif
} }
@ -80,7 +84,9 @@ std::string PerfContext::ToString() const {
<< OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time) << OUTPUT(merge_operator_time_nanos) << OUTPUT(write_delay_time)
<< OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos) << OUTPUT(read_index_block_nanos) << OUTPUT(read_filter_block_nanos)
<< OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos) << OUTPUT(new_table_block_iter_nanos) << OUTPUT(new_table_iterator_nanos)
<< OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos); << OUTPUT(block_seek_nanos) << OUTPUT(find_table_nanos)
<< OUTPUT(bloom_memtable_hit_count) << OUTPUT(bloom_memtable_miss_count)
<< OUTPUT(bloom_sst_hit_count) << OUTPUT(bloom_sst_miss_count);
return ss.str(); return ss.str();
#endif #endif
} }

Loading…
Cancel
Save