diff --git a/HISTORY.md b/HISTORY.md index 1b6a7186d..010e2635a 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -16,6 +16,7 @@ ### Bug Fixes * Fix a assertion failure in MultiGe4t() when BlockBasedTableOptions::no_block_cache is true and there is no compressed block cache * If a call to BackupEngine::PurgeOldBackups or BackupEngine::DeleteBackup suffered a crash, power failure, or I/O error, files could be left over from old backups that could only be purged with a call to GarbageCollect. Any call to PurgeOldBackups, DeleteBackup, or GarbageCollect should now suffice to purge such files. +* Fix a buffer overrun problem in BlockBasedTable::MultiGet() when compression is enabled and no compressed block cache is configured. ## 6.5.1 (10/16/2019) ### Bug Fixes diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 0f6d106f1..c316e3dc8 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -11,6 +11,7 @@ #include "port/stack_trace.h" #include "rocksdb/perf_context.h" #include "rocksdb/utilities/debug.h" +#include "table/block_based/block_based_table_reader.h" #include "table/block_based/block_builder.h" #include "test_util/fault_injection_test_env.h" #if !defined(ROCKSDB_LITE) @@ -1553,6 +1554,45 @@ TEST_F(DBBasicTest, GetAllKeyVersions) { } #endif // !ROCKSDB_LITE +TEST_F(DBBasicTest, MultiGetIOBufferOverrun) { + Options options = CurrentOptions(); + Random rnd(301); + BlockBasedTableOptions table_options; + table_options.pin_l0_filter_and_index_blocks_in_cache = true; + table_options.block_size = 16 * 1024; + assert(table_options.block_size > + BlockBasedTable::kMultiGetReadStackBufSize); + options.table_factory.reset(new BlockBasedTableFactory(table_options)); + Reopen(options); + + std::string zero_str(128, '\0'); + for (int i = 0; i < 100; ++i) { + // Make the value compressible. A purely random string doesn't compress + // and the resultant data block will not be compressed + std::string value(RandomString(&rnd, 128) + zero_str); + assert(Put(Key(i), value) == Status::OK()); + } + Flush(); + + std::vector key_data(10); + std::vector keys; + // We cannot resize a PinnableSlice vector, so just set initial size to + // largest we think we will need + std::vector values(10); + std::vector statuses; + ReadOptions ro; + + // Warm up the cache first + key_data.emplace_back(Key(0)); + keys.emplace_back(Slice(key_data.back())); + key_data.emplace_back(Key(50)); + keys.emplace_back(Slice(key_data.back())); + statuses.resize(keys.size()); + + dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(), + keys.data(), values.data(), statuses.data(), true); +} + class DBBasicTestWithParallelIO : public DBTestBase, public testing::WithParamInterface> { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index c3a2e1293..ffdbdb1bd 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -496,7 +496,7 @@ class PartitionIndexReader : public BlockBasedTable::IndexReaderCommon { return; } handle = biter.value().handle; - uint64_t last_off = handle.offset() + handle.size() + kBlockTrailerSize; + uint64_t last_off = handle.offset() + block_size(handle); uint64_t prefetch_len = last_off - prefetch_off; std::unique_ptr prefetch_buffer; auto& file = rep->file; @@ -2327,7 +2327,7 @@ void BlockBasedTable::RetrieveMultipleBlocks( } ReadRequest req; - req.len = handle.size() + kBlockTrailerSize; + req.len = block_size(handle); if (scratch == nullptr) { req.scratch = new char[req.len]; } else { @@ -2354,11 +2354,11 @@ void BlockBasedTable::RetrieveMultipleBlocks( ReadRequest& req = read_reqs[read_req_idx++]; Status s = req.status; if (s.ok()) { - if (req.result.size() != handle.size() + kBlockTrailerSize) { + if (req.result.size() != req.len) { s = Status::Corruption("truncated block read from " + rep_->file->file_name() + " offset " + ToString(handle.offset()) + ", expected " + - ToString(handle.size() + kBlockTrailerSize) + + ToString(req.len) + " bytes, got " + ToString(req.result.size())); } } @@ -2920,8 +2920,7 @@ void BlockBasedTableIterator::InitDataBlock() { BlockBasedTable::kMinNumFileReadsToStartAutoReadahead) { if (!rep->file->use_direct_io() && (data_block_handle.offset() + - static_cast(data_block_handle.size()) + - kBlockTrailerSize > + static_cast(block_size(data_block_handle)) > readahead_limit_)) { // Buffered I/O // Discarding the return status of Prefetch calls intentionally, as @@ -3422,7 +3421,6 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, autovector block_handles; autovector, MultiGetContext::MAX_BATCH_SIZE> results; autovector statuses; - static const size_t kMultiGetReadStackBufSize = 8192; char stack_buf[kMultiGetReadStackBufSize]; std::unique_ptr block_buf; { @@ -3504,7 +3502,7 @@ void BlockBasedTable::MultiGet(const ReadOptions& read_options, block_handles.emplace_back(BlockHandle::NullBlockHandle()); } else { block_handles.emplace_back(handle); - total_len += handle.size(); + total_len += block_size(handle); } } diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 68c76ddb7..bcc7fd1a3 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -461,8 +461,13 @@ class BlockBasedTable : public TableReader { void DumpKeyValue(const Slice& key, const Slice& value, WritableFile* out_file); + // A cumulative data block file read in MultiGet lower than this size will + // use a stack buffer + static constexpr size_t kMultiGetReadStackBufSize = 8192; + friend class PartitionedFilterBlockReader; friend class PartitionedFilterBlockTest; + friend class DBBasicTest_MultiGetIOBufferOverrun_Test; }; // Maitaning state of a two-level iteration on a partitioned index structure. diff --git a/table/format.h b/table/format.h index 0722d97b8..235c6e894 100644 --- a/table/format.h +++ b/table/format.h @@ -214,6 +214,11 @@ Status ReadFooterFromFile(RandomAccessFileReader* file, // 1-byte type + 32-bit crc static const size_t kBlockTrailerSize = 5; +// Make block size calculation for IO less error prone +inline uint64_t block_size(const BlockHandle& handle) { + return handle.size() + kBlockTrailerSize; +} + inline CompressionType get_block_compression_type(const char* block_data, size_t block_size) { return static_cast(block_data[block_size]);