From c615689bb54d80b744ea90a62e11dd2b5592b34a Mon Sep 17 00:00:00 2001 From: Islam AbdelRahman Date: Mon, 22 Jan 2018 14:37:37 -0800 Subject: [PATCH] Support skipping bloom filters for SstFileWriter Summary: Add an option for SstFileWriter to skip building bloom filters Closes https://github.com/facebook/rocksdb/pull/3360 Differential Revision: D6709120 Pulled By: IslamAbdelRahman fbshipit-source-id: 964d4bce38822a048691792f447bcfbb4b6bd809 --- db/external_sst_file_test.cc | 49 +++++++++++++++++++++++++++++++ include/rocksdb/sst_file_writer.h | 8 +++-- table/sst_file_writer.cc | 14 +++++---- 3 files changed, 62 insertions(+), 9 deletions(-) diff --git a/db/external_sst_file_test.cc b/db/external_sst_file_test.cc index 4a4e82e79..89e949ff0 100644 --- a/db/external_sst_file_test.cc +++ b/db/external_sst_file_test.cc @@ -1940,6 +1940,55 @@ TEST_F(ExternalSSTFileTest, IngestBehind) { size_t kcnt = 0; VerifyDBFromMap(true_data, &kcnt, false); } + +TEST_F(ExternalSSTFileTest, SkipBloomFilter) { + Options options = CurrentOptions(); + + BlockBasedTableOptions table_options; + table_options.filter_policy.reset(NewBloomFilterPolicy(10)); + table_options.cache_index_and_filter_blocks = true; + options.table_factory.reset(NewBlockBasedTableFactory(table_options)); + + + // Create external SST file and include bloom filters + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + { + std::string file_path = sst_files_dir_ + "sst_with_bloom.sst"; + SstFileWriter sst_file_writer(EnvOptions(), options); + ASSERT_OK(sst_file_writer.Open(file_path)); + ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); + ASSERT_OK(sst_file_writer.Finish()); + + ASSERT_OK( + db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); + + ASSERT_EQ(Get("Key1"), "Value1"); + ASSERT_GE( + options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 1); + } + + // Create external SST file but skip bloom filters + options.statistics = rocksdb::CreateDBStatistics(); + DestroyAndReopen(options); + { + std::string file_path = sst_files_dir_ + "sst_with_no_bloom.sst"; + SstFileWriter sst_file_writer(EnvOptions(), options, nullptr, true, + Env::IOPriority::IO_TOTAL, + true /* skip_filters */); + ASSERT_OK(sst_file_writer.Open(file_path)); + ASSERT_OK(sst_file_writer.Put("Key1", "Value1")); + ASSERT_OK(sst_file_writer.Finish()); + + ASSERT_OK( + db_->IngestExternalFile({file_path}, IngestExternalFileOptions())); + + ASSERT_EQ(Get("Key1"), "Value1"); + ASSERT_EQ( + options.statistics->getTickerCount(Tickers::BLOCK_CACHE_FILTER_ADD), 0); + } +} + } // namespace rocksdb int main(int argc, char** argv) { diff --git a/include/rocksdb/sst_file_writer.h b/include/rocksdb/sst_file_writer.h index 15e89cd67..c7a874ab1 100644 --- a/include/rocksdb/sst_file_writer.h +++ b/include/rocksdb/sst_file_writer.h @@ -72,16 +72,18 @@ class SstFileWriter { SstFileWriter(const EnvOptions& env_options, const Options& options, ColumnFamilyHandle* column_family = nullptr, bool invalidate_page_cache = true, - Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL) + Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL, + bool skip_filters = false) : SstFileWriter(env_options, options, options.comparator, column_family, - invalidate_page_cache, io_priority) {} + invalidate_page_cache, io_priority, skip_filters) {} // Deprecated API SstFileWriter(const EnvOptions& env_options, const Options& options, const Comparator* user_comparator, ColumnFamilyHandle* column_family = nullptr, bool invalidate_page_cache = true, - Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL); + Env::IOPriority io_priority = Env::IOPriority::IO_TOTAL, + bool skip_filters = false); ~SstFileWriter(); diff --git a/table/sst_file_writer.cc b/table/sst_file_writer.cc index adcd91f92..31b408892 100644 --- a/table/sst_file_writer.cc +++ b/table/sst_file_writer.cc @@ -27,7 +27,7 @@ const size_t kFadviseTrigger = 1024 * 1024; // 1MB struct SstFileWriter::Rep { Rep(const EnvOptions& _env_options, const Options& options, Env::IOPriority _io_priority, const Comparator* _user_comparator, - ColumnFamilyHandle* _cfh, bool _invalidate_page_cache) + ColumnFamilyHandle* _cfh, bool _invalidate_page_cache, bool _skip_filters) : env_options(_env_options), ioptions(options), mutable_cf_options(options), @@ -35,7 +35,8 @@ struct SstFileWriter::Rep { internal_comparator(_user_comparator), cfh(_cfh), invalidate_page_cache(_invalidate_page_cache), - last_fadvise_size(0) {} + last_fadvise_size(0), + skip_filters(_skip_filters) {} std::unique_ptr file_writer; std::unique_ptr builder; @@ -54,6 +55,7 @@ struct SstFileWriter::Rep { // The size of the file during the last time we called Fadvise to remove // cached pages from page cache. uint64_t last_fadvise_size; + bool skip_filters; Status Add(const Slice& user_key, const Slice& value, const ValueType value_type) { if (!builder) { @@ -122,9 +124,9 @@ SstFileWriter::SstFileWriter(const EnvOptions& env_options, const Comparator* user_comparator, ColumnFamilyHandle* column_family, bool invalidate_page_cache, - Env::IOPriority io_priority) + Env::IOPriority io_priority, bool skip_filters) : rep_(new Rep(env_options, options, io_priority, user_comparator, - column_family, invalidate_page_cache)) { + column_family, invalidate_page_cache, skip_filters)) { rep_->file_info.file_size = 0; } @@ -189,8 +191,8 @@ Status SstFileWriter::Open(const std::string& file_path) { TableBuilderOptions table_builder_options( r->ioptions, r->internal_comparator, &int_tbl_prop_collector_factories, compression_type, r->ioptions.compression_opts, - nullptr /* compression_dict */, false /* skip_filters */, - r->column_family_name, unknown_level); + nullptr /* compression_dict */, r->skip_filters, r->column_family_name, + unknown_level); r->file_writer.reset( new WritableFileWriter(std::move(sst_file), r->env_options));