From d9f4875e52423d141a086a7361911c85de811d99 Mon Sep 17 00:00:00 2001 From: krad Date: Wed, 25 Feb 2015 16:34:26 -0800 Subject: [PATCH] Disable pre-fetching of index and filter blocks for sst_dump_tool. Summary: BlockBasedTable pre-fetches the filter and index blocks on Open call. This is an optimistic optimization targeted for runtime scenario. The optimization is unnecessary for sst_dump_tool - Added a provision to disable pre-fetching of index and filter blocks in BlockBasedTable - Disabled pre-fetching for the sst_dump tool Stack for reference : #01 0x00000000005ed944 in snappy::InternalUncompress () from /home/engshare/third-party2/snappy/1.0.3/src/snappy-1.0.3/snappy.cc:148 #02 0x00000000005edeee in snappy::RawUncompress () from /home/engshare/third-party2/snappy/1.0.3/src/snappy-1.0.3/snappy.cc:947 #03 0x00000000004e0b4d in rocksdb::UncompressBlockContents () from /data/users/paultuckfield/rocksdb/./util/compression.h:69 #04 0x00000000004e145c in rocksdb::ReadBlockContents () from /data/users/paultuckfield/rocksdb/table/format.cc:334 #05 0x00000000004ca424 in rocksdb::(anonymous namespace)::ReadBlockFromFile () from /data/users/paultuckfield/rocksdb/table/block_based_table_reader.cc:70 #06 0x00000000004cccad in rocksdb::BlockBasedTable::CreateIndexReader () from /data/users/paultuckfield/rocksdb/table/block_based_table_reader.cc:173 #07 0x00000000004d17e5 in rocksdb::BlockBasedTable::Open () from /data/users/paultuckfield/rocksdb/table/block_based_table_reader.cc:553 #08 0x00000000004c8184 in rocksdb::BlockBasedTableFactory::NewTableReader () from /data/users/paultuckfield/rocksdb/table/block_based_table_factory.cc:51 #09 0x0000000000598463 in rocksdb::SstFileReader::NewTableReader () from /data/users/paultuckfield/rocksdb/util/sst_dump_tool.cc:69 #10 0x00000000005986c2 in rocksdb::SstFileReader::SstFileReader () from /data/users/paultuckfield/rocksdb/util/sst_dump_tool.cc:26 #11 0x0000000000599047 in rocksdb::SSTDumpTool::Run () from /data/users/paultuckfield/rocksdb/util/sst_dump_tool.cc:332 #12 0x0000000000409b06 in main () from /data/users/paultuckfield/rocksdb/tools/sst_dump.cc:12 Test Plan: - Added a unit test to trigger the code. - Also did some manual verification. - Passed all unit tests task #6296048 Reviewers: igor, rven, sdong Reviewed By: sdong Subscribers: dhruba, leveldb Differential Revision: https://reviews.facebook.net/D34041 --- table/block_based_table_factory.cc | 4 +-- table/block_based_table_factory.h | 23 ++++++++++--- table/block_based_table_reader.cc | 55 ++++++++++++++++-------------- table/block_based_table_reader.h | 5 ++- util/sst_dump_test.cc | 22 ++++++++++++ util/sst_dump_tool.cc | 35 ++++++++++++++++--- util/sst_dump_tool_imp.h | 11 +++++- 7 files changed, 116 insertions(+), 39 deletions(-) diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 6a80559e2..f4b6214a1 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -45,10 +45,10 @@ Status BlockBasedTableFactory::NewTableReader( const ImmutableCFOptions& ioptions, const EnvOptions& soptions, const InternalKeyComparator& internal_comparator, unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader) const { + unique_ptr* table_reader, const bool prefetch_enabled) const { return BlockBasedTable::Open(ioptions, soptions, table_options_, internal_comparator, std::move(file), file_size, - table_reader); + table_reader, prefetch_enabled); } TableBuilder* BlockBasedTableFactory::NewTableBuilder( diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index aad9053c6..54eaa5a99 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -33,11 +33,24 @@ class BlockBasedTableFactory : public TableFactory { const char* Name() const override { return "BlockBasedTable"; } - Status NewTableReader( - const ImmutableCFOptions& ioptions, const EnvOptions& soptions, - const InternalKeyComparator& internal_comparator, - unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader) const override; + Status NewTableReader(const ImmutableCFOptions& ioptions, + const EnvOptions& soptions, + const InternalKeyComparator& internal_comparator, + unique_ptr&& file, uint64_t file_size, + unique_ptr* table_reader) const override { + return NewTableReader(ioptions, soptions, internal_comparator, + std::move(file), file_size, table_reader, + /*prefetch_index_and_filter=*/true); + } + + // This is a variant of virtual member function NewTableReader function with + // added capability to disable pre-fetching of blocks on BlockBasedTable::Open + Status NewTableReader(const ImmutableCFOptions& ioptions, + const EnvOptions& soptions, + const InternalKeyComparator& internal_comparator, + unique_ptr&& file, uint64_t file_size, + unique_ptr* table_reader, + bool prefetch_index_and_filter) const; TableBuilder* NewTableBuilder( const ImmutableCFOptions& ioptions, diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 01243a4fc..89a497821 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -463,7 +463,8 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, const InternalKeyComparator& internal_comparator, unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader) { + unique_ptr* table_reader, + const bool prefetch_index_and_filter) { table_reader->reset(); Footer footer; @@ -537,34 +538,38 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions, BlockBasedTablePropertyNames::kPrefixFiltering, rep->ioptions.info_log); } - // Will use block cache for index/filter blocks access? - if (table_options.cache_index_and_filter_blocks) { - assert(table_options.block_cache != nullptr); - // Hack: Call NewIndexIterator() to implicitly add index to the block_cache - unique_ptr iter(new_table->NewIndexIterator(ReadOptions())); - s = iter->status(); + if (prefetch_index_and_filter) { + // pre-fetching of blocks is turned on + // Will use block cache for index/filter blocks access? + if (table_options.cache_index_and_filter_blocks) { + assert(table_options.block_cache != nullptr); + // Hack: Call NewIndexIterator() to implicitly add index to the + // block_cache + unique_ptr iter(new_table->NewIndexIterator(ReadOptions())); + s = iter->status(); - if (s.ok()) { - // Hack: Call GetFilter() to implicitly add filter to the block_cache - auto filter_entry = new_table->GetFilter(); - filter_entry.Release(table_options.block_cache.get()); - } - } else { - // If we don't use block cache for index/filter blocks access, we'll - // pre-load these blocks, which will kept in member variables in Rep - // and with a same life-time as this table object. - IndexReader* index_reader = nullptr; - s = new_table->CreateIndexReader(&index_reader, meta_iter.get()); + if (s.ok()) { + // Hack: Call GetFilter() to implicitly add filter to the block_cache + auto filter_entry = new_table->GetFilter(); + filter_entry.Release(table_options.block_cache.get()); + } + } else { + // If we don't use block cache for index/filter blocks access, we'll + // pre-load these blocks, which will kept in member variables in Rep + // and with a same life-time as this table object. + IndexReader* index_reader = nullptr; + s = new_table->CreateIndexReader(&index_reader, meta_iter.get()); - if (s.ok()) { - rep->index_reader.reset(index_reader); + if (s.ok()) { + rep->index_reader.reset(index_reader); - // Set filter block - if (rep->filter_policy) { - rep->filter.reset(ReadFilter(rep, meta_iter.get(), nullptr)); + // Set filter block + if (rep->filter_policy) { + rep->filter.reset(ReadFilter(rep, meta_iter.get(), nullptr)); + } + } else { + delete index_reader; } - } else { - delete index_reader; } } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index e3594cf7c..0e9b5690b 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -63,12 +63,15 @@ class BlockBasedTable : public TableReader { // to nullptr and returns a non-ok status. // // *file must remain live while this Table is in use. + // *prefetch_blocks can be used to disable prefetching of index and filter + // blocks at statup static Status Open(const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const BlockBasedTableOptions& table_options, const InternalKeyComparator& internal_key_comparator, unique_ptr&& file, uint64_t file_size, - unique_ptr* table_reader); + unique_ptr* table_reader, + bool prefetch_index_and_filter = true); bool PrefixMayMatch(const Slice& internal_key); diff --git a/util/sst_dump_test.cc b/util/sst_dump_test.cc index f3fa1664d..33c78c7a4 100644 --- a/util/sst_dump_test.cc +++ b/util/sst_dump_test.cc @@ -147,6 +147,28 @@ TEST(SSTDumpToolTest, FullFilterBlock) { delete[] usage[i]; } } + +TEST(SSTDumpToolTest, GetProperties) { + table_options_.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false)); + std::string file_name = "rocksdb_sst_test.sst"; + createSST(file_name, table_options_); + + char* usage[3]; + for (int i = 0; i < 3; i++) { + usage[i] = new char[optLength]; + } + snprintf(usage[0], optLength, "./sst_dump"); + snprintf(usage[1], optLength, "--show_properties"); + snprintf(usage[2], optLength, "--file=rocksdb_sst_test.sst"); + + rocksdb::SSTDumpTool tool; + ASSERT_TRUE(!tool.Run(3, usage)); + + cleanup(file_name); + for (int i = 0; i < 3; i++) { + delete[] usage[i]; + } +} } // namespace rocksdb int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } diff --git a/util/sst_dump_tool.cc b/util/sst_dump_tool.cc index 1d0270c72..0af2b5b8d 100644 --- a/util/sst_dump_tool.cc +++ b/util/sst_dump_tool.cc @@ -15,6 +15,8 @@ namespace rocksdb { +using std::dynamic_pointer_cast; + SstFileReader::SstFileReader(const std::string& file_path, bool verify_checksum, bool output_hex) @@ -23,7 +25,7 @@ SstFileReader::SstFileReader(const std::string& file_path, internal_comparator_(BytewiseComparator()) { fprintf(stdout, "Process %s\n", file_path.c_str()); - init_result_ = NewTableReader(file_name_); + init_result_ = GetTableReader(file_name_); } extern const uint64_t kBlockBasedTableMagicNumber; @@ -31,7 +33,7 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber; extern const uint64_t kPlainTableMagicNumber; extern const uint64_t kLegacyPlainTableMagicNumber; -Status SstFileReader::NewTableReader(const std::string& file_path) { +Status SstFileReader::GetTableReader(const std::string& file_path) { uint64_t magic_number; // read table magic number @@ -66,13 +68,36 @@ Status SstFileReader::NewTableReader(const std::string& file_path) { } if (s.ok()) { - s = options_.table_factory->NewTableReader( - ioptions_, soptions_, internal_comparator_, std::move(file_), file_size, - &table_reader_); + s = NewTableReader(ioptions_, soptions_, internal_comparator_, + std::move(file_), file_size, &table_reader_); } return s; } +Status SstFileReader::NewTableReader( + const ImmutableCFOptions& ioptions, const EnvOptions& soptions, + const InternalKeyComparator& internal_comparator, + unique_ptr&& file, uint64_t file_size, + unique_ptr* table_reader) { + // We need to turn off pre-fetching of index and filter nodes for + // BlockBasedTable + shared_ptr block_table_factory = + dynamic_pointer_cast(options_.table_factory); + + if (block_table_factory) { + return block_table_factory->NewTableReader( + ioptions_, soptions_, internal_comparator_, std::move(file_), file_size, + &table_reader_, /*enable_prefetch=*/false); + } + + assert(!block_table_factory); + + // For all other factory implementation + return options_.table_factory->NewTableReader( + ioptions_, soptions_, internal_comparator_, std::move(file_), file_size, + &table_reader_); +} + Status SstFileReader::DumpTable(const std::string& out_filename) { unique_ptr out_file; Env* env = Env::Default(); diff --git a/util/sst_dump_tool_imp.h b/util/sst_dump_tool_imp.h index 7e975a534..a5f22679e 100644 --- a/util/sst_dump_tool_imp.h +++ b/util/sst_dump_tool_imp.h @@ -53,12 +53,21 @@ class SstFileReader { Status getStatus() { return init_result_; } private: - Status NewTableReader(const std::string& file_path); + // Get the TableReader implementation for the sst file + Status GetTableReader(const std::string& file_path); Status ReadTableProperties(uint64_t table_magic_number, RandomAccessFile* file, uint64_t file_size); Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); Status SetOldTableOptions(); + // Helper function to call the factory with settings specific to the + // factory implementation + Status NewTableReader(const ImmutableCFOptions& ioptions, + const EnvOptions& soptions, + const InternalKeyComparator& internal_comparator, + unique_ptr&& file, uint64_t file_size, + unique_ptr* table_reader); + std::string file_name_; uint64_t read_num_; bool verify_checksum_;