Misc filter_bench improvements (#6444)

Summary:
Useful in validating/testing internal fragmentation changes (https://github.com/facebook/rocksdb/issues/6427)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6444

Test Plan: manual (no changes to production code)

Differential Revision: D20040076

Pulled By: pdillinger

fbshipit-source-id: 32d26f363d2a9ab9f5bebd281dcebd9915ae340e
main
Peter Dillinger 5 years ago committed by Facebook Github Bot
parent fcec56e86c
commit ab65278b1f
  1. 71
      util/filter_bench.cc

@ -36,11 +36,16 @@ using GFLAGS_NAMESPACE::SetUsageMessage;
DEFINE_uint32(seed, 0, "Seed for random number generators"); DEFINE_uint32(seed, 0, "Seed for random number generators");
DEFINE_double(working_mem_size_mb, 200, DEFINE_double(working_mem_size_mb, 200,
"MB of memory to get up to among all filters"); "MB of memory to get up to among all filters, unless "
"m_keys_total_max is specified.");
DEFINE_uint32(average_keys_per_filter, 10000, DEFINE_uint32(average_keys_per_filter, 10000,
"Average number of keys per filter"); "Average number of keys per filter");
DEFINE_double(vary_key_count_ratio, 0.4,
"Vary number of keys by up to +/- vary_key_count_ratio * "
"average_keys_per_filter.");
DEFINE_uint32(key_size, 24, "Average number of bytes for each key"); DEFINE_uint32(key_size, 24, "Average number of bytes for each key");
DEFINE_bool(vary_key_alignment, true, DEFINE_bool(vary_key_alignment, true,
@ -57,6 +62,11 @@ DEFINE_double(bits_per_key, 10.0, "Bits per key setting for filters");
DEFINE_double(m_queries, 200, "Millions of queries for each test mode"); DEFINE_double(m_queries, 200, "Millions of queries for each test mode");
DEFINE_double(m_keys_total_max, 0,
"Maximum total keys added to filters, in millions. "
"0 (default) disables. Non-zero overrides working_mem_size_mb "
"option.");
DEFINE_bool(use_full_block_reader, false, DEFINE_bool(use_full_block_reader, false,
"Use FullFilterBlockReader interface rather than FilterBitsReader"); "Use FullFilterBlockReader interface rather than FilterBitsReader");
@ -87,6 +97,8 @@ DEFINE_bool(legend, false,
"Print more information about interpreting results instead of " "Print more information about interpreting results instead of "
"running tests"); "running tests");
DEFINE_uint32(runs, 1, "Number of times to rebuild and run benchmark tests");
void _always_assert_fail(int line, const char *file, const char *expr) { void _always_assert_fail(int line, const char *file, const char *expr) {
fprintf(stderr, "%s: %d: Assertion %s failed\n", file, line, expr); fprintf(stderr, "%s: %d: Assertion %s failed\n", file, line, expr);
abort(); abort();
@ -252,12 +264,14 @@ struct FilterBench : public MockBlockBasedTableTester {
std::ostringstream fp_rate_report_; std::ostringstream fp_rate_report_;
Arena arena_; Arena arena_;
StderrLogger stderr_logger_; StderrLogger stderr_logger_;
double m_queries_;
FilterBench() FilterBench()
: MockBlockBasedTableTester(new BloomFilterPolicy( : MockBlockBasedTableTester(new BloomFilterPolicy(
FLAGS_bits_per_key, FLAGS_bits_per_key,
static_cast<BloomFilterPolicy::Mode>(FLAGS_impl))), static_cast<BloomFilterPolicy::Mode>(FLAGS_impl))),
random_(FLAGS_seed) { random_(FLAGS_seed),
m_queries_(0) {
for (uint32_t i = 0; i < FLAGS_batch_size; ++i) { for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size); kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
} }
@ -291,19 +305,29 @@ void FilterBench::Go() {
} }
} }
uint32_t variance_mask = 1; if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
while (variance_mask * variance_mask * 4 < FLAGS_average_keys_per_filter) { throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
variance_mask = variance_mask * 2 + 1;
} }
// For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
// Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
// variance_offset = 10, so value - offset average value is always 0.
const uint32_t variance_range =
1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
FLAGS_average_keys_per_filter);
const uint32_t variance_offset = variance_range / 2;
const std::vector<TestMode> &testModes = const std::vector<TestMode> &testModes =
FLAGS_best_case ? bestCaseTestModes FLAGS_best_case ? bestCaseTestModes
: FLAGS_quick ? quickTestModes : allTestModes; : FLAGS_quick ? quickTestModes : allTestModes;
m_queries_ = FLAGS_m_queries;
double working_mem_size_mb = FLAGS_working_mem_size_mb;
if (FLAGS_quick) { if (FLAGS_quick) {
FLAGS_m_queries /= 7.0; m_queries_ /= 7.0;
} else if (FLAGS_best_case) { } else if (FLAGS_best_case) {
FLAGS_m_queries /= 3.0; m_queries_ /= 3.0;
FLAGS_working_mem_size_mb /= 10.0; working_mem_size_mb /= 10.0;
} }
std::cout << "Building..." << std::endl; std::cout << "Building..." << std::endl;
@ -315,15 +339,29 @@ void FilterBench::Go() {
#ifdef PREDICT_FP_RATE #ifdef PREDICT_FP_RATE
double weighted_predicted_fp_rate = 0.0; double weighted_predicted_fp_rate = 0.0;
#endif #endif
size_t max_total_keys;
size_t max_mem;
if (FLAGS_m_keys_total_max > 0) {
max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
max_mem = SIZE_MAX;
} else {
max_total_keys = SIZE_MAX;
max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
}
ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(), ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(),
true); true);
while (total_memory_used < 1024 * 1024 * FLAGS_working_mem_size_mb) { infos_.clear();
while ((working_mem_size_mb == 0 || total_memory_used < max_mem) &&
total_keys_added < max_total_keys) {
uint32_t filter_id = random_.Next(); uint32_t filter_id = random_.Next();
uint32_t keys_to_add = FLAGS_average_keys_per_filter + uint32_t keys_to_add = FLAGS_average_keys_per_filter +
(random_.Next() & variance_mask) - fastrange32(random_.Next(), variance_range) -
(variance_mask / 2); variance_offset;
if (max_total_keys - total_keys_added < keys_to_add) {
keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
}
infos_.emplace_back(); infos_.emplace_back();
FilterInfo &info = infos_.back(); FilterInfo &info = infos_.back();
info.filter_id_ = filter_id; info.filter_id_ = filter_id;
@ -392,7 +430,7 @@ void FilterBench::Go() {
std::cout << "Verifying..." << std::endl; std::cout << "Verifying..." << std::endl;
uint32_t outside_q_per_f = uint32_t outside_q_per_f =
static_cast<uint32_t>(FLAGS_m_queries * 1000000 / infos_.size()); static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
uint64_t fps = 0; uint64_t fps = 0;
for (uint32_t i = 0; i < infos_.size(); ++i) { for (uint32_t i = 0; i < infos_.size(); ++i) {
FilterInfo &info = infos_[i]; FilterInfo &info = infos_[i];
@ -491,8 +529,7 @@ double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
uint32_t num_infos = static_cast<uint32_t>(infos_.size()); uint32_t num_infos = static_cast<uint32_t>(infos_.size());
uint32_t dry_run_hash = 0; uint32_t dry_run_hash = 0;
uint64_t max_queries = uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
static_cast<uint64_t>(FLAGS_m_queries * 1000000 + 0.50);
// Some filters may be considered secondary in order to implement skewed // Some filters may be considered secondary in order to implement skewed
// queries. num_primary_filters is the number that are to be treated as // queries. num_primary_filters is the number that are to be treated as
// equal, and any remainder will be treated as secondary. // equal, and any remainder will be treated as secondary.
@ -701,7 +738,11 @@ int main(int argc, char **argv) {
<< "\n of queries." << std::endl; << "\n of queries." << std::endl;
} else { } else {
FilterBench b; FilterBench b;
b.Go(); for (uint32_t i = 0; i < FLAGS_runs; ++i) {
b.Go();
FLAGS_seed += 100;
b.random_.Seed(FLAGS_seed);
}
} }
return 0; return 0;

Loading…
Cancel
Save