Log warning for high bits/key in legacy Bloom filter (#6312)

Summary:
Help users that would benefit most from new Bloom filter
implementation by logging a warning that recommends the using
format_version >= 5.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6312

Test Plan:
$ (for BPK in 10 13 14 19 20 50; do ./filter_bench -quick -impl=0 -bits_per_key=$BPK -m_queries=1 2>&1; done) | grep 'its/key'
    Bits/key actual: 10.0647
    Bits/key actual: 13.0593
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (14) bits/key. Significant filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 14.0581
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (19) bits/key. Significant filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 19.0542
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (20) bits/key. Dramatic filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 20.0584
    [WARN] [/block_based/filter_policy.cc:546] Using legacy Bloom filter with high (50) bits/key. Dramatic filter space and/or accuracy improvement is available with format_verion>=5.
    Bits/key actual: 50.0577

Differential Revision: D19457191

Pulled By: pdillinger

fbshipit-source-id: 073d94cde5c70e03a160f953e1100c15ea83eda4
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent 931876e86e
commit 4b86fe1123
  1. BIN
      docs/static/images/bloom_fp_vs_bpk.png
  2. 3
      include/rocksdb/filter_policy.h
  3. 1
      table/block_based/block_based_table_builder.cc
  4. 20
      table/block_based/filter_policy.cc
  5. 5
      table/block_based/filter_policy_internal.h
  6. 1
      table/block_based/mock_block_based_table.h
  7. 4
      util/filter_bench.cc

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

@ -101,6 +101,9 @@ struct FilterBuildingContext {
// The table level at time of constructing the SST file, or -1 if unknown. // The table level at time of constructing the SST file, or -1 if unknown.
// (The table file could later be used at a different level.) // (The table file could later be used at a different level.)
int level_at_creation = -1; int level_at_creation = -1;
// An optional logger for reporting errors, warnings, etc.
Logger* info_log = nullptr;
}; };
// We add a new format of filter block called full filter block // We add a new format of filter block called full filter block

@ -425,6 +425,7 @@ struct BlockBasedTableBuilder::Rep {
context.column_family_name = column_family_name; context.column_family_name = column_family_name;
context.compaction_style = ioptions.compaction_style; context.compaction_style = ioptions.compaction_style;
context.level_at_creation = level_at_creation; context.level_at_creation = level_at_creation;
context.info_log = ioptions.info_log;
filter_builder.reset(CreateFilterBlockBuilder( filter_builder.reset(CreateFilterBlockBuilder(
ioptions, moptions, context, use_delta_encoding_for_index_values, ioptions, moptions, context, use_delta_encoding_for_index_values,
p_index_builder_)); p_index_builder_));

@ -419,7 +419,7 @@ const std::vector<BloomFilterPolicy::Mode> BloomFilterPolicy::kAllUserModes = {
}; };
BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode) BloomFilterPolicy::BloomFilterPolicy(double bits_per_key, Mode mode)
: mode_(mode) { : mode_(mode), warned_(false) {
// Sanitize bits_per_key // Sanitize bits_per_key
if (bits_per_key < 1.0) { if (bits_per_key < 1.0) {
bits_per_key = 1.0; bits_per_key = 1.0;
@ -527,6 +527,24 @@ FilterBitsBuilder* BloomFilterPolicy::GetBuilderWithContext(
case kFastLocalBloom: case kFastLocalBloom:
return new FastLocalBloomBitsBuilder(millibits_per_key_); return new FastLocalBloomBitsBuilder(millibits_per_key_);
case kLegacyBloom: case kLegacyBloom:
if (whole_bits_per_key_ >= 14 && context.info_log &&
!warned_.load(std::memory_order_relaxed)) {
warned_ = true;
const char* adjective;
if (whole_bits_per_key_ >= 20) {
adjective = "Dramatic";
} else {
adjective = "Significant";
}
// For more details, see
// https://github.com/facebook/rocksdb/wiki/RocksDB-Bloom-Filter
ROCKS_LOG_WARN(
context.info_log,
"Using legacy Bloom filter with high (%d) bits/key. "
"%s filter space and/or accuracy improvement is available "
"with format_version>=5.",
whole_bits_per_key_, adjective);
}
return new LegacyBloomBitsBuilder(whole_bits_per_key_); return new LegacyBloomBitsBuilder(whole_bits_per_key_);
} }
} }

@ -8,6 +8,7 @@
#pragma once #pragma once
#include <atomic>
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
@ -125,6 +126,10 @@ class BloomFilterPolicy : public FilterPolicy {
// implementation) for building new SST filters. // implementation) for building new SST filters.
Mode mode_; Mode mode_;
// Whether relevant warnings have been logged already. (Remember so we
// only report once per BloomFilterPolicy instance, to keep the noise down.)
mutable std::atomic<bool> warned_;
// For newer Bloom filter implementation(s) // For newer Bloom filter implementation(s)
FilterBitsReader* GetBloomBitsReader(const Slice& contents) const; FilterBitsReader* GetBloomBitsReader(const Slice& contents) const;
}; };

@ -47,6 +47,7 @@ class MockBlockBasedTableTester {
context.column_family_name = "mock_cf"; context.column_family_name = "mock_cf";
context.compaction_style = ioptions_.compaction_style; context.compaction_style = ioptions_.compaction_style;
context.level_at_creation = kMockLevel; context.level_at_creation = kMockLevel;
context.info_log = ioptions_.info_log;
return BloomFilterPolicy::GetBuilderFromContext(context); return BloomFilterPolicy::GetBuilderFromContext(context);
} }
}; };

@ -26,6 +26,7 @@ int main() {
#include "util/gflags_compat.h" #include "util/gflags_compat.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/random.h" #include "util/random.h"
#include "util/stderr_logger.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::ParseCommandLineFlags;
@ -112,6 +113,7 @@ using rocksdb::ParsedFullFilterBlock;
using rocksdb::PlainTableBloomV1; using rocksdb::PlainTableBloomV1;
using rocksdb::Random32; using rocksdb::Random32;
using rocksdb::Slice; using rocksdb::Slice;
using rocksdb::StderrLogger;
using rocksdb::mock::MockBlockBasedTableTester; using rocksdb::mock::MockBlockBasedTableTester;
struct KeyMaker { struct KeyMaker {
@ -243,6 +245,7 @@ struct FilterBench : public MockBlockBasedTableTester {
Random32 random_; Random32 random_;
std::ostringstream fp_rate_report_; std::ostringstream fp_rate_report_;
Arena arena_; Arena arena_;
StderrLogger stderr_logger_;
FilterBench() FilterBench()
: MockBlockBasedTableTester(new BloomFilterPolicy( : MockBlockBasedTableTester(new BloomFilterPolicy(
@ -252,6 +255,7 @@ struct FilterBench : public MockBlockBasedTableTester {
for (uint32_t i = 0; i < FLAGS_batch_size; ++i) { for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size); kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
} }
ioptions_.info_log = &stderr_logger_;
} }
void Go(); void Go();

Loading…
Cancel
Save