Add columnfamily option optimize_filters_for_hits to optimize for key hits only

Summary:
    Summary:
    Added a new option to ColumnFamllyOptions  - optimize_filters_for_hits. This option can be used in the case where most
    accesses to the store are key hits and we dont need to optimize performance for key misses.
    This is useful when you have a very large database and most of your lookups succeed.  The option allows the store to
     not store and use filters in the last level (the largest level which contains data). These filters can take a large amount of
     space for large databases (in memory and on-disk). For the last level, these filters are only useful for key misses and not
     for key hits. If we are not optimizing for key misses, we can choose to not store these filters for that level.

    This option is only provided for BlockBasedTable. We skip the filters when we are compacting

Test Plan:
1. Modified db_test toalso run tests with an additonal option (skip_filters_on_last_level)
 2. Added another unit test to db_test which specifically tests that filters are being skipped

Reviewers: rven, igor, sdong

Reviewed By: sdong

Subscribers: lgalanis, yoshinorim, MarkCallaghan, rven, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D33717
main
Sameet Agarwal 10 years ago
parent ba9d1737a8
commit e7c434c364
  1. 9
      db/builder.cc
  2. 11
      db/builder.h
  3. 12
      db/compaction_job.cc
  4. 61
      db/db_test.cc
  5. 2
      include/rocksdb/immutable_options.h
  6. 16
      include/rocksdb/options.h
  7. 7
      include/rocksdb/table.h
  8. 10
      table/adaptive_table_factory.cc
  9. 12
      table/adaptive_table_factory.h
  10. 10
      table/block_based_table_builder.cc
  11. 3
      table/block_based_table_builder.h
  12. 11
      table/block_based_table_factory.cc
  13. 7
      table/block_based_table_factory.h
  14. 9
      table/cuckoo_table_factory.cc
  15. 6
      table/cuckoo_table_factory.h
  16. 2
      table/mock_table.cc
  17. 20
      table/mock_table.h
  18. 11
      table/plain_table_factory.cc
  19. 11
      table/plain_table_factory.h
  20. 18
      util/options.cc
  21. 2
      util/options_helper.cc
  22. 13
      util/options_test.cc

@ -30,9 +30,12 @@ TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) {
return ioptions.table_factory->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
const CompressionOptions& compression_opts,
const bool skip_filters) {
return ioptions.table_factory->NewTableBuilder(ioptions, internal_comparator,
file, compression_type,
compression_opts,
skip_filters);
}
Status BuildTable(const std::string& dbname, Env* env,

@ -26,11 +26,12 @@ class VersionEdit;
class TableBuilder;
class WritableFile;
extern TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts);
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false);
// Build a Table file from the contents of *iter. The generated file
// will be named according to number specified in meta. On success, the rest of

@ -1062,10 +1062,20 @@ Status CompactionJob::OpenCompactionOutputFile() {
compact_->compaction->OutputFilePreallocationSize(mutable_cf_options_)));
ColumnFamilyData* cfd = compact_->compaction->column_family_data();
bool skip_filters = false;
// If the Column family flag is to only optimize filters for hits,
// we can skip creating filters if this is the bottommost_level where
// data is going to be found
//
if (cfd->ioptions()->optimize_filters_for_hits && bottommost_level_) {
skip_filters = true;
}
compact_->builder.reset(NewTableBuilder(
*cfd->ioptions(), cfd->internal_comparator(), compact_->outfile.get(),
compact_->compaction->OutputCompressionType(),
cfd->ioptions()->compression_opts));
cfd->ioptions()->compression_opts, skip_filters));
LogFlush(db_options_.info_log);
return s;
}

@ -419,7 +419,8 @@ class DBTest {
kInfiniteMaxOpenFiles = 23,
kxxHashChecksum = 24,
kFIFOCompaction = 25,
kEnd = 26
kOptimizeFiltersForHits = 26,
kEnd = 27
};
int option_config_;
@ -682,6 +683,12 @@ class DBTest {
options.prefix_extractor.reset(NewNoopTransform());
break;
}
case kOptimizeFiltersForHits: {
options.optimize_filters_for_hits = true;
set_block_based_table_factory = true;
break;
}
default:
break;
}
@ -10797,6 +10804,58 @@ TEST(DBTest, DeleteMovedFileAfterCompaction) {
}
}
TEST(DBTest, OptimizeFiltersForHits) {
Options options = CurrentOptions();
options.write_buffer_size = 32 * 1024;
options.target_file_size_base = 32 * 1024;
options.level0_file_num_compaction_trigger = 2;
options.level0_slowdown_writes_trigger = 2;
options.level0_stop_writes_trigger = 4;
options.max_bytes_for_level_base = 64 * 1024;
options.max_write_buffer_number = 2;
options.max_background_compactions = 8;
options.max_background_flushes = 8;
options.compaction_style = kCompactionStyleLevel;
BlockBasedTableOptions bbto;
bbto.filter_policy.reset(NewBloomFilterPolicy(10, true));
bbto.whole_key_filtering = true;
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
options.optimize_filters_for_hits = true;
options.statistics = rocksdb::CreateDBStatistics();
CreateAndReopenWithCF({"mypikachu"}, options);
int numkeys = 200000;
for (int i = 0; i < 20; i += 2) {
for (int j = i; j < numkeys; j += 20) {
ASSERT_OK(Put(1, Key(j), "val"));
}
}
ASSERT_OK(Flush(1));
dbfull()->TEST_WaitForCompact();
for (int i = 1; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "NOT_FOUND");
}
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L0));
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L1));
ASSERT_EQ(0, TestGetTickerCount(options, GET_HIT_L2_AND_UP));
// When the skip_filters_on_last_level is ON, the last level which has
// most of the keys does not use bloom filters. We end up using
// bloom filters in a very small number of cases. Without the flag.
// this number would be close to 150000 (all the key at the last level) +
// some use in the upper levels
//
ASSERT_GT(90000, TestGetTickerCount(options, BLOOM_FILTER_USEFUL));
for (int i = 0; i < numkeys; i += 2) {
ASSERT_EQ(Get(1, Key(i)), "val");
}
}
TEST(DBTest, L0L1L2AndUpHitCounter) {
Options options = CurrentOptions();
options.write_buffer_size = 32 * 1024;

@ -91,6 +91,8 @@ struct ImmutableCFOptions {
int num_levels;
bool optimize_filters_for_hits;
#ifndef ROCKSDB_LITE
// A vector of EventListeners which call-back functions will be called
// when specific RocksDB event happens.

@ -610,6 +610,22 @@ struct ColumnFamilyOptions {
// Default: 2
uint32_t min_partial_merge_operands;
// This flag specifies that the implementation should optimize the filters
// mainly for cases where keys are found rather than also optimize for keys
// missed. This would be used in cases where the application knows that
// there are very few misses or the performance in the case of misses is not
// important.
//
// For now, this flag allows us to not store filters for the last level i.e
// the largest level which contains data of the LSM store. For keys which
// are hits, the filters in this level are not useful because we will search
// for the data anyway. NOTE: the filters in other levels are still useful
// even for key hit because they tell us whether to look in that level or go
// to the higher level.
//
// Default: false
bool optimize_filters_for_hits;
#ifndef ROCKSDB_LITE
// A vector of EventListeners which call-back functions will be called
// when specific RocksDB event happens.

@ -371,9 +371,10 @@ class TableFactory {
// to use in this table.
virtual TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const = 0;
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skipFilters = false) const = 0;
// Sanitizes the specified DB Options and ColumnFamilyOptions.
//

@ -65,11 +65,13 @@ Status AdaptiveTableFactory::NewTableReader(
TableBuilder* AdaptiveTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const {
return table_factory_to_write_->NewTableBuilder(
ioptions, internal_comparator, file, compression_type, compression_opts);
ioptions, internal_comparator, file, compression_type, compression_opts,
skip_filters);
}
std::string AdaptiveTableFactory::GetPrintableTableOptions() const {

@ -39,12 +39,12 @@ class AdaptiveTableFactory : public TableFactory {
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

@ -462,7 +462,7 @@ struct BlockBasedTableBuilder::Rep {
const BlockBasedTableOptions& table_opt,
const InternalKeyComparator& icomparator, WritableFile* f,
const CompressionType _compression_type,
const CompressionOptions& _compression_opts)
const CompressionOptions& _compression_opts, const bool skip_filters)
: ioptions(_ioptions),
table_options(table_opt),
internal_comparator(icomparator),
@ -474,7 +474,8 @@ struct BlockBasedTableBuilder::Rep {
&this->internal_prefix_transform)),
compression_type(_compression_type),
compression_opts(_compression_opts),
filter_block(CreateFilterBlockBuilder(_ioptions, table_options)),
filter_block(skip_filters ? nullptr : CreateFilterBlockBuilder(
_ioptions, table_options)),
flush_block_policy(
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
table_options, data_block)) {
@ -495,7 +496,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) {
const CompressionOptions& compression_opts, const bool skip_filters) {
BlockBasedTableOptions sanitized_table_options(table_options);
if (sanitized_table_options.format_version == 0 &&
sanitized_table_options.checksum != kCRC32c) {
@ -508,7 +509,8 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
}
rep_ = new Rep(ioptions, sanitized_table_options, internal_comparator, file,
compression_type, compression_opts);
compression_type, compression_opts, skip_filters);
if (rep_->filter_block != nullptr) {
rep_->filter_block->StartBlock(0);
}

@ -33,7 +33,8 @@ class BlockBasedTableBuilder : public TableBuilder {
const InternalKeyComparator& internal_comparator,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts);
const CompressionOptions& compression_opts,
const bool skip_filters);
// REQUIRES: Either Finish() or Abandon() has been called.
~BlockBasedTableBuilder();

@ -53,13 +53,12 @@ Status BlockBasedTableFactory::NewTableReader(
TableBuilder* BlockBasedTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters) const {
auto table_builder = new BlockBasedTableBuilder(
ioptions, table_options_, internal_comparator, file,
compression_type, compression_opts);
ioptions, table_options_, internal_comparator, file, compression_type,
compression_opts, skip_filters);
return table_builder;
}

@ -54,9 +54,10 @@ class BlockBasedTableFactory : public TableFactory {
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

@ -27,9 +27,12 @@ Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* CuckooTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType, const CompressionOptions&,
const bool skip_filters) const {
// Ignore the skipFIlters flag. Does not apply to this file format
//
// TODO: change builder to take the option struct
return new CuckooTableBuilder(file, table_options_.hash_table_ratio, 64,
table_options_.max_search_depth, internal_comparator.user_comparator(),

@ -60,8 +60,10 @@ class CuckooTableFactory : public TableFactory {
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator, WritableFile* file,
const CompressionType, const CompressionOptions&) const override;
const InternalKeyComparator& icomparator,
WritableFile* file, const CompressionType,
const CompressionOptions&,
const bool skip_filters = false) const override;
// Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts,

@ -65,7 +65,7 @@ TableBuilder* MockTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const {
const CompressionOptions& compression_opts, const bool skip_filters) const {
uint32_t id = GetAndWriteNextID(file);
return new MockTableBuilder(id, &file_system_);

@ -136,16 +136,16 @@ class MockTableFactory : public TableFactory {
MockTableFactory();
const char* Name() const override { return "MockTable"; }
Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key, WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts) const override;
const EnvOptions& env_options,
const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_key,
WritableFile* file,
const CompressionType compression_type,
const CompressionOptions& compression_opts,
const bool skip_filters = false) const override;
// This function will directly create mock table instead of going through
// MockTableBuilder. MockFileContents has to have a format of <internal_key,

@ -28,9 +28,14 @@ Status PlainTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* PlainTableFactory::NewTableBuilder(
const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator,
WritableFile* file, const CompressionType,
const CompressionOptions&) const {
const InternalKeyComparator& internal_comparator, WritableFile* file,
const CompressionType, const CompressionOptions&,
const bool skip_filters) const {
// Ignore the skip_filters flag. PlainTable format is optimized for small
// in-memory dbs. The skip_filters optimization is not useful for plain
// tables
//
return new PlainTableBuilder(ioptions, file, user_key_len_, encoding_type_,
index_sparseness_, bloom_bits_per_key_, 6,
huge_page_tlb_size_, hash_table_ratio_,

@ -158,12 +158,11 @@ class PlainTableFactory : public TableFactory {
const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator,
WritableFile* file,
const CompressionType,
const CompressionOptions&) const override;
TableBuilder* NewTableBuilder(const ImmutableCFOptions& options,
const InternalKeyComparator& icomparator,
WritableFile* file, const CompressionType,
const CompressionOptions&,
const bool skip_filters = false) const override;
std::string GetPrintableTableOptions() const override;

@ -66,7 +66,8 @@ ImmutableCFOptions::ImmutableCFOptions(const Options& options)
compression_per_level(options.compression_per_level),
compression_opts(options.compression_opts),
access_hint_on_compaction_start(options.access_hint_on_compaction_start),
num_levels(options.num_levels)
num_levels(options.num_levels),
optimize_filters_for_hits(options.optimize_filters_for_hits)
#ifndef ROCKSDB_LITE
, listeners(options.listeners) {}
#else // ROCKSDB_LITE
@ -119,7 +120,8 @@ ColumnFamilyOptions::ColumnFamilyOptions()
memtable_prefix_bloom_huge_page_tlb_size(0),
bloom_locality(0),
max_successive_merges(0),
min_partial_merge_operands(2)
min_partial_merge_operands(2),
optimize_filters_for_hits(false)
#ifndef ROCKSDB_LITE
, listeners() {
#else // ROCKSDB_LITE
@ -184,7 +186,8 @@ ColumnFamilyOptions::ColumnFamilyOptions(const Options& options)
options.memtable_prefix_bloom_huge_page_tlb_size),
bloom_locality(options.bloom_locality),
max_successive_merges(options.max_successive_merges),
min_partial_merge_operands(options.min_partial_merge_operands)
min_partial_merge_operands(options.min_partial_merge_operands),
optimize_filters_for_hits(options.optimize_filters_for_hits)
#ifndef ROCKSDB_LITE
, listeners(options.listeners) {
#else // ROCKSDB_LITE
@ -240,7 +243,8 @@ DBOptions::DBOptions()
access_hint_on_compaction_start(NORMAL),
use_adaptive_mutex(false),
bytes_per_sync(0),
enable_thread_tracking(false) {}
enable_thread_tracking(false) {
}
DBOptions::DBOptions(const Options& options)
: create_if_missing(options.create_if_missing),
@ -342,7 +346,7 @@ void DBOptions::Dump(Logger* log) const {
stats_dump_period_sec);
Log(log, " Options.advise_random_on_open: %d",
advise_random_on_open);
Log(log, " Options.db_write_buffer_size: %zd",
Log(log, " Options.db_write_buffer_size: %zd",
db_write_buffer_size);
Log(log, " Options.access_hint_on_compaction_start: %s",
access_hints[access_hint_on_compaction_start]);
@ -352,7 +356,7 @@ void DBOptions::Dump(Logger* log) const {
rate_limiter.get());
Log(log, " Options.bytes_per_sync: %" PRIu64,
bytes_per_sync);
Log(log, " enable_thread_tracking: %d",
Log(log, " Options.enable_thread_tracking: %d",
enable_thread_tracking);
} // DBOptions::Dump
@ -477,6 +481,8 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
bloom_locality);
Log(log, " Options.max_successive_merges: %zd",
max_successive_merges);
Log(log, " Options.optimize_fllters_for_hits: %d",
optimize_filters_for_hits);
} // ColumnFamilyOptions::Dump
void Options::Dump(Logger* log) const {

@ -447,6 +447,8 @@ bool ParseColumnFamilyOption(const std::string& name, const std::string& value,
} else {
return false;
}
} else if (name == "optimize_filters_for_hits") {
new_options->optimize_filters_for_hits = ParseBoolean(name, value);
} else {
return false;
}

@ -134,7 +134,8 @@ TEST(OptionsTest, GetOptionsFromMapTest) {
{"bloom_locality", "29"},
{"max_successive_merges", "30"},
{"min_partial_merge_operands", "31"},
{"prefix_extractor", "fixed:31"}
{"prefix_extractor", "fixed:31"},
{"optimize_filters_for_hits", "true"},
};
std::unordered_map<std::string, std::string> db_options_map = {
@ -226,6 +227,7 @@ TEST(OptionsTest, GetOptionsFromMapTest) {
ASSERT_EQ(new_cf_opt.max_successive_merges, 30U);
ASSERT_EQ(new_cf_opt.min_partial_merge_operands, 31U);
ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr);
ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true);
ASSERT_EQ(std::string(new_cf_opt.prefix_extractor->Name()),
"rocksdb.FixedPrefix.31");
@ -395,6 +397,15 @@ TEST(OptionsTest, GetColumnFamilyOptionsFromStringTest) {
"write_buffer_size=10;max_write_buffer_number=16;"
"block_based_table_factory={xx_block_size=4;}",
&new_cf_opt));
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=true",
&new_cf_opt));
ASSERT_OK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=false",
&new_cf_opt));
ASSERT_NOK(GetColumnFamilyOptionsFromString(base_cf_opt,
"optimize_filters_for_hits=junk",
&new_cf_opt));
}
#endif // !ROCKSDB_LITE

Loading…
Cancel
Save