From 66dc033af33254ff7df85ffae0c6a628615bd7d1 Mon Sep 17 00:00:00 2001 From: kailiu Date: Fri, 24 Jan 2014 10:57:15 -0800 Subject: [PATCH] Temporarily disable caching index/filter blocks Summary: Mixing index/filter blocks with data blocks resulted in some known issues. To make sure in next release our users won't be affected, we added a new option in BlockBasedTableFactory::TableOption to conceal this functionality for now. This patch also introduced a BlockBasedTableReader::OpenOptions, which avoids the "infinite" growth of parameters in BlockBasedTableReader::Open(). Test Plan: make check Reviewers: haobo, sdong, igor, dhruba Reviewed By: igor CC: leveldb, tnovak Differential Revision: https://reviews.facebook.net/D15327 --- db/db_test.cc | 4 +++ table/block_based_table_factory.cc | 6 ++-- table/block_based_table_factory.h | 36 +++++++-------------- table/block_based_table_options.h | 31 ++++++++++++++++++ table/block_based_table_reader.cc | 52 +++++++++++++++--------------- table/block_based_table_reader.h | 8 ++--- table/table_test.cc | 14 ++++---- 7 files changed, 87 insertions(+), 64 deletions(-) create mode 100644 table/block_based_table_options.h diff --git a/db/db_test.cc b/db/db_test.cc index 1161deddb..8828ed1b9 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -17,6 +17,7 @@ #include "db/filename.h" #include "db/version_set.h" #include "db/write_batch_internal.h" +#include "table/block_based_table_factory.h" #include "rocksdb/cache.h" #include "rocksdb/compaction_filter.h" #include "rocksdb/env.h" @@ -732,6 +733,9 @@ TEST(DBTest, IndexAndFilterBlocksOfNewTableAddedToCache) { options.filter_policy = filter_policy.get(); options.create_if_missing = true; options.statistics = rocksdb::CreateDBStatistics(); + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); DestroyAndReopen(&options); ASSERT_OK(db_->Put(WriteOptions(), "key", "val")); diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index 836f6edf6..a9cd35a68 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -20,10 +20,10 @@ namespace rocksdb { Status BlockBasedTableFactory::GetTableReader( const Options& options, const EnvOptions& soptions, - unique_ptr && file, uint64_t file_size, + unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader) const { - return BlockBasedTable::Open(options, soptions, std::move(file), file_size, - table_reader); + return BlockBasedTable::Open(options, soptions, table_options_, + std::move(file), file_size, table_reader); } TableBuilder* BlockBasedTableFactory::GetTableBuilder( diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index ee525816f..5a4d1bd6e 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -14,6 +14,7 @@ #include "rocksdb/flush_block_policy.h" #include "rocksdb/options.h" #include "rocksdb/table.h" +#include "table/block_based_table_options.h" namespace rocksdb { @@ -30,40 +31,25 @@ class BlockBasedTable; class BlockBasedTableBuilder; class BlockBasedTableFactory: public TableFactory { -public: - struct TableOptions { - // @flush_block_policy_factory creates the instances of flush block policy. - // which provides a configurable way to determine when to flush a block in - // the block based tables. If not set, table builder will use the default - // block flush policy, which cut blocks by block size (please refer to - // `FlushBlockBySizePolicy`). - std::shared_ptr flush_block_policy_factory; - }; + public: + BlockBasedTableFactory() : BlockBasedTableFactory(BlockBasedTableOptions()) {} + explicit BlockBasedTableFactory(const BlockBasedTableOptions& table_options) + : table_options_(table_options) {} - BlockBasedTableFactory() : BlockBasedTableFactory(TableOptions()) { } - BlockBasedTableFactory(const TableOptions& table_options): - table_options_(table_options) { - } + ~BlockBasedTableFactory() {} - ~BlockBasedTableFactory() { - } - - const char* Name() const override { - return "BlockBasedTable"; - } + const char* Name() const override { return "BlockBasedTable"; } Status GetTableReader(const Options& options, const EnvOptions& soptions, - unique_ptr && file, - uint64_t file_size, + unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader) const override; TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, - CompressionType compression_type) const - override; + CompressionType compression_type) + const override; private: - TableOptions table_options_; + BlockBasedTableOptions table_options_; }; - } // namespace rocksdb diff --git a/table/block_based_table_options.h b/table/block_based_table_options.h new file mode 100644 index 000000000..f5774e2bf --- /dev/null +++ b/table/block_based_table_options.h @@ -0,0 +1,31 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#include + +namespace rocksdb { + +class FlushBlockPolicyFactory; + +struct BlockBasedTableOptions { + // @flush_block_policy_factory creates the instances of flush block policy. + // which provides a configurable way to determine when to flush a block in + // the block based tables. If not set, table builder will use the default + // block flush policy, which cut blocks by block size (please refer to + // `FlushBlockBySizePolicy`). + std::shared_ptr flush_block_policy_factory; + + // TODO(kailiu) Temporarily disable this feature by making the default value + // to be false. Also in master branch, this file is non-public so no user + // will be able to change the value of `cache_index_and_filter_blocks`. + // + // Indicating if we'd put index/filter blocks to the block cache. + // If not specified, each "table reader" object will pre-load index/filter + // block during table initialization. + bool cache_index_and_filter_blocks = false; +}; + +} // namespace rocksdb diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index dcb55fc36..b08ea1934 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -26,6 +26,7 @@ #include "util/coding.h" #include "util/perf_context_imp.h" #include "util/stop_watch.h" +#include "table/block_based_table_options.h" namespace rocksdb { @@ -45,9 +46,9 @@ struct BlockBasedTable::Rep { Status status; unique_ptr file; char cache_key_prefix[kMaxCacheKeyPrefixSize]; - size_t cache_key_prefix_size; + size_t cache_key_prefix_size = 0; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; - size_t compressed_cache_key_prefix_size; + size_t compressed_cache_key_prefix_size = 0; // Handle to metaindex_block: saved from footer BlockHandle metaindex_handle; @@ -220,20 +221,21 @@ Cache::Handle* GetFromBlockCache( } // end of anonymous namespace -Status BlockBasedTable::Open(const Options& options, - const EnvOptions& soptions, - unique_ptr && file, - uint64_t size, +Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, + const BlockBasedTableOptions& table_options, + unique_ptr&& file, + uint64_t file_size, unique_ptr* table_reader) { table_reader->reset(); - if (size < Footer::kEncodedLength) { + + if (file_size < Footer::kEncodedLength) { return Status::InvalidArgument("file is too short to be an sstable"); } char footer_space[Footer::kEncodedLength]; Slice footer_input; - Status s = file->Read(size - Footer::kEncodedLength, Footer::kEncodedLength, - &footer_input, footer_space); + Status s = file->Read(file_size - Footer::kEncodedLength, + Footer::kEncodedLength, &footer_input, footer_space); if (!s.ok()) return s; // Check that we actually read the whole footer from the file. It may be @@ -277,11 +279,21 @@ Status BlockBasedTable::Open(const Options& options, } } - // Initialize index/filter blocks. If block cache is not specified, - // these blocks will be kept in member variables in Rep, which will - // reside in the memory as long as this table object is alive; otherwise - // they will be added to block cache. - if (!options.block_cache) { + // Will use block cache for index/filter blocks access? + if (options.block_cache && table_options.cache_index_and_filter_blocks) { + // Call IndexBlockReader() to implicitly add index to the block_cache + unique_ptr iter(new_table->IndexBlockReader(ReadOptions())); + s = iter->status(); + + if (s.ok()) { + // Call GetFilter() to implicitly add filter to the block_cache + auto filter_entry = new_table->GetFilter(); + filter_entry.Release(options.block_cache.get()); + } + } else { + // If we don't use block cache for index/filter blocks access, we'll + // pre-load these blocks, which will kept in member variables in Rep + // and with a same life-time as this table object. Block* index_block = nullptr; // TODO: we never really verify check sum for index block s = ReadBlockFromFile( @@ -309,18 +321,7 @@ Status BlockBasedTable::Open(const Options& options, } else { delete index_block; } - } else { - // Call IndexBlockReader() to implicitly add index to the block_cache - unique_ptr iter( - new_table->IndexBlockReader(ReadOptions()) - ); - s = iter->status(); - if (s.ok()) { - // Call GetFilter() to implicitly add filter to the block_cache - auto filter_entry = new_table->GetFilter(); - filter_entry.Release(options.block_cache.get()); - } } if (s.ok()) { @@ -836,7 +837,6 @@ BlockBasedTable::GetFilter(bool no_io) const { // Get the iterator from the index block. Iterator* BlockBasedTable::IndexBlockReader(const ReadOptions& options) const { if (rep_->index_block) { - assert (!rep_->options.block_cache); return rep_->index_block->NewIterator(rep_->options.comparator); } diff --git a/table/block_based_table_reader.h b/table/block_based_table_reader.h index 66f63fc59..52ece7441 100644 --- a/table/block_based_table_reader.h +++ b/table/block_based_table_reader.h @@ -29,6 +29,7 @@ struct ReadOptions; class TableCache; class TableReader; class FilterBlockReader; +struct BlockBasedTableOptions; using std::unique_ptr; @@ -50,10 +51,9 @@ class BlockBasedTable : public TableReader { // to nullptr and returns a non-ok status. // // *file must remain live while this Table is in use. - static Status Open(const Options& options, - const EnvOptions& soptions, - unique_ptr&& file, - uint64_t file_size, + static Status Open(const Options& db_options, const EnvOptions& env_options, + const BlockBasedTableOptions& table_options, + unique_ptr&& file, uint64_t file_size, unique_ptr* table_reader); bool PrefixMayMatch(const Slice& internal_prefix) override; diff --git a/table/table_test.cc b/table/table_test.cc index 9907550ce..af794fd13 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -243,13 +243,12 @@ class BlockConstructor: public Constructor { class BlockBasedTableConstructor: public Constructor { public: - explicit BlockBasedTableConstructor( - const Comparator* cmp) - : Constructor(cmp) { - } + explicit BlockBasedTableConstructor(const Comparator* cmp) + : Constructor(cmp) {} ~BlockBasedTableConstructor() { Reset(); } + virtual Status FinishImpl(const Options& options, const KVMap& data) { Reset(); sink_.reset(new StringSink()); @@ -277,7 +276,6 @@ class BlockBasedTableConstructor: public Constructor { // Open the table uniq_id_ = cur_uniq_id_++; source_.reset(new StringSource(sink_->contents(), uniq_id_)); - unique_ptr table_factory; return options.table_factory->GetTableReader(options, soptions, std::move(source_), sink_->contents().size(), @@ -979,6 +977,11 @@ TEST(TableTest, BlockCacheTest) { options.create_if_missing = true; options.statistics = CreateDBStatistics(); options.block_cache = NewLRUCache(1024); + + // Enable the cache for index/filter blocks + BlockBasedTableOptions table_options; + table_options.cache_index_and_filter_blocks = true; + options.table_factory.reset(new BlockBasedTableFactory(table_options)); std::vector keys; KVMap kvmap; @@ -1292,7 +1295,6 @@ TEST(MemTableTest, Simple) { delete memtable->Unref(); } - } // namespace rocksdb int main(int argc, char** argv) {