Refactor to avoid confusing "raw block" (#10408)

Summary:
We have a lot of confusing code because of mixed, sometimes
completely opposite uses of of the term "raw block" or "raw contents",
sometimes within the same source file. For example, in `BlockBasedTableBuilder`,
`raw_block_contents` and `raw_size` generally referred to uncompressed block
contents and size, while `WriteRawBlock` referred to writing a block that
is already compressed if it is going to be. Meanwhile, in
`BlockBasedTable`, `raw_block_contents` either referred to a (maybe
compressed) block with trailer, or a maybe compressed block maybe
without trailer. (Note: left as follow-up work to use C++ typing to
better sort out the various kinds of BlockContents.)

This change primarily tries to apply some consistent terminology around
the kinds of block representations, avoiding the unclear "raw". (Any
meaning of "raw" assumes some bias toward the storage layer or toward
the logical data layer.) Preferred terminology:

* **Serialized block** - bytes that go into storage. For block-based table
(usually the case) this includes the block trailer. WART: block `size` may or
may not include the trailer; need to be clear about whether it does or not.
* **Maybe compressed block** - like a serialized block, but without the
trailer (or no promise of including a trailer). Must be accompanied by a
CompressionType.
* **Uncompressed block** - "payload" bytes that are either stored with no
compression, used as input to compression function, or result of
decompression function.
* **Parsed block** - an in-memory form of a block in block cache, as it is
used by the table reader. Different C++ types are used depending on the
block type (see block_like_traits.h).

Other refactorings:
* Misc corrections/improvements of internal API comments
* Remove a few misleading / unhelpful / redundant comments.
* Use move semantics in some places to simplify contracts
* Use better parameter names to indicate which parameters are used for
outputs
* Remove some extraneous `extern`
* Various clean-ups to `CacheDumperImpl` (mostly unnecessary code)

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10408

Test Plan: existing tests

Reviewed By: akankshamahajan15

Differential Revision: D38172617

Pulled By: pdillinger

fbshipit-source-id: ccb99299f324ac5ca46996d34c5089621a4f260c
main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent 12f5a1e35c
commit ef443cead4
  1. 2
      db/db_properties_test.cc
  2. 7
      db/db_test2.cc
  3. 2
      db/external_sst_file_basic_test.cc
  4. 4
      db/table_properties_collector.cc
  5. 6
      db/table_properties_collector.h
  6. 2
      db/table_properties_collector_test.cc
  7. 6
      include/rocksdb/persistent_cache.h
  8. 6
      include/rocksdb/table_properties.h
  9. 17
      table/block_based/block.h
  10. 201
      table/block_based/block_based_table_builder.cc
  11. 22
      table/block_based/block_based_table_builder.h
  12. 159
      table/block_based/block_based_table_reader.cc
  13. 22
      table/block_based/block_based_table_reader.h
  14. 61
      table/block_based/block_based_table_reader_sync_and_async.h
  15. 8
      table/block_based/cachable_entry.h
  16. 32
      table/block_fetcher.cc
  17. 2
      table/block_fetcher.h
  18. 45
      table/format.cc
  19. 64
      table/format.h
  20. 5
      table/meta_blocks.cc
  21. 2
      table/meta_blocks.h
  22. 19
      table/persistent_cache_helper.cc
  23. 26
      table/persistent_cache_helper.h
  24. 2
      table/sst_file_writer_collectors.h
  25. 2
      utilities/blob_db/blob_db_impl.cc
  26. 2
      utilities/blob_db/blob_dump_tool.cc
  27. 87
      utilities/cache_dump_load_impl.cc
  28. 23
      utilities/cache_dump_load_impl.h

@ -1225,7 +1225,7 @@ class BlockCountingTablePropertiesCollector : public TablePropertiesCollector {
return Status::OK(); return Status::OK();
} }
void BlockAdd(uint64_t /* block_raw_bytes */, void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t block_compressed_bytes_fast, uint64_t block_compressed_bytes_fast,
uint64_t block_compressed_bytes_slow) override { uint64_t block_compressed_bytes_slow) override {
if (block_compressed_bytes_fast > 0 || block_compressed_bytes_slow > 0) { if (block_compressed_bytes_fast > 0 || block_compressed_bytes_slow > 0) {

@ -1796,15 +1796,14 @@ TEST_P(CompressionFailuresTest, CompressionFailures) {
}); });
} else if (compression_failure_type_ == kTestDecompressionFail) { } else if (compression_failure_type_ == kTestDecompressionFail) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"UncompressBlockContentsForCompressionType:TamperWithReturnValue", "UncompressBlockData:TamperWithReturnValue", [](void* arg) {
[](void* arg) {
Status* ret = static_cast<Status*>(arg); Status* ret = static_cast<Status*>(arg);
ASSERT_OK(*ret); ASSERT_OK(*ret);
*ret = Status::Corruption("kTestDecompressionFail"); *ret = Status::Corruption("kTestDecompressionFail");
}); });
} else if (compression_failure_type_ == kTestDecompressionCorruption) { } else if (compression_failure_type_ == kTestDecompressionCorruption) {
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack( ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
"UncompressBlockContentsForCompressionType:" "UncompressBlockData:"
"TamperWithDecompressionOutput", "TamperWithDecompressionOutput",
[](void* arg) { [](void* arg) {
BlockContents* contents = static_cast<BlockContents*>(arg); BlockContents* contents = static_cast<BlockContents*>(arg);
@ -1872,7 +1871,7 @@ TEST_P(CompressionFailuresTest, CompressionFailures) {
"Could not decompress: kTestDecompressionFail"); "Could not decompress: kTestDecompressionFail");
} else if (compression_failure_type_ == kTestDecompressionCorruption) { } else if (compression_failure_type_ == kTestDecompressionCorruption) {
ASSERT_EQ(std::string(s.getState()), ASSERT_EQ(std::string(s.getState()),
"Decompressed block did not match raw block"); "Decompressed block did not match pre-compression block");
} }
} }

@ -1472,7 +1472,7 @@ TEST_P(ExternalSSTFileBasicTest, IngestFileWithBadBlockChecksum) {
SyncPoint::GetInstance()->DisableProcessing(); SyncPoint::GetInstance()->DisableProcessing();
SyncPoint::GetInstance()->ClearAllCallBacks(); SyncPoint::GetInstance()->ClearAllCallBacks();
SyncPoint::GetInstance()->SetCallBack( SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum",
change_checksum); change_checksum);
SyncPoint::GetInstance()->EnableProcessing(); SyncPoint::GetInstance()->EnableProcessing();
int file_id = 0; int file_id = 0;

@ -43,9 +43,9 @@ Status UserKeyTablePropertiesCollector::InternalAdd(const Slice& key,
} }
void UserKeyTablePropertiesCollector::BlockAdd( void UserKeyTablePropertiesCollector::BlockAdd(
uint64_t block_raw_bytes, uint64_t block_compressed_bytes_fast, uint64_t block_uncomp_bytes, uint64_t block_compressed_bytes_fast,
uint64_t block_compressed_bytes_slow) { uint64_t block_compressed_bytes_slow) {
return collector_->BlockAdd(block_raw_bytes, block_compressed_bytes_fast, return collector_->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
block_compressed_bytes_slow); block_compressed_bytes_slow);
} }

@ -29,7 +29,7 @@ class IntTblPropCollector {
virtual Status InternalAdd(const Slice& key, const Slice& value, virtual Status InternalAdd(const Slice& key, const Slice& value,
uint64_t file_size) = 0; uint64_t file_size) = 0;
virtual void BlockAdd(uint64_t block_raw_bytes, virtual void BlockAdd(uint64_t block_uncomp_bytes,
uint64_t block_compressed_bytes_fast, uint64_t block_compressed_bytes_fast,
uint64_t block_compressed_bytes_slow) = 0; uint64_t block_compressed_bytes_slow) = 0;
@ -69,7 +69,7 @@ class UserKeyTablePropertiesCollector : public IntTblPropCollector {
virtual Status InternalAdd(const Slice& key, const Slice& value, virtual Status InternalAdd(const Slice& key, const Slice& value,
uint64_t file_size) override; uint64_t file_size) override;
virtual void BlockAdd(uint64_t block_raw_bytes, virtual void BlockAdd(uint64_t block_uncomp_bytes,
uint64_t block_compressed_bytes_fast, uint64_t block_compressed_bytes_fast,
uint64_t block_compressed_bytes_slow) override; uint64_t block_compressed_bytes_slow) override;
@ -143,7 +143,7 @@ class TimestampTablePropertiesCollector : public IntTblPropCollector {
return Status::OK(); return Status::OK();
} }
void BlockAdd(uint64_t /* block_raw_bytes */, void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) override { uint64_t /* block_compressed_bytes_slow */) override {
return; return;

@ -176,7 +176,7 @@ class RegularKeysStartWithAInternal : public IntTblPropCollector {
return Status::OK(); return Status::OK();
} }
void BlockAdd(uint64_t /* block_raw_bytes */, void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) override { uint64_t /* block_compressed_bytes_slow */) override {
// Nothing to do. // Nothing to do.

@ -44,9 +44,9 @@ class PersistentCache {
virtual Status Lookup(const Slice& key, std::unique_ptr<char[]>* data, virtual Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
size_t* size) = 0; size_t* size) = 0;
// Is cache storing uncompressed data ? // True if the cache is configured to store serialized blocks, which are
// // potentially compressed and include a trailer (when SST format calls for
// True if the cache is configured to store uncompressed data else false // one). False if the cache stores uncompressed blocks (no trailer).
virtual bool IsCompressed() = 0; virtual bool IsCompressed() = 0;
// Return stats as map of {string, double} per-tier // Return stats as map of {string, double} per-tier

@ -111,7 +111,7 @@ class TablePropertiesCollector {
} }
// Called after each new block is cut // Called after each new block is cut
virtual void BlockAdd(uint64_t /* block_raw_bytes */, virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) { uint64_t /* block_compressed_bytes_slow */) {
// Nothing to do here. Callback registers can override. // Nothing to do here. Callback registers can override.
@ -193,9 +193,9 @@ struct TableProperties {
uint64_t index_value_is_delta_encoded = 0; uint64_t index_value_is_delta_encoded = 0;
// the size of filter block. // the size of filter block.
uint64_t filter_size = 0; uint64_t filter_size = 0;
// total raw key size // total raw (uncompressed, undelineated) key size
uint64_t raw_key_size = 0; uint64_t raw_key_size = 0;
// total raw value size // total raw (uncompressed, undelineated) value size
uint64_t raw_value_size = 0; uint64_t raw_value_size = 0;
// the number of blocks in this table // the number of blocks in this table
uint64_t num_data_blocks = 0; uint64_t num_data_blocks = 0;

@ -137,18 +137,19 @@ class BlockReadAmpBitmap {
uint32_t rnd_; uint32_t rnd_;
}; };
// This Block class is not for any old block: it is designed to hold only // class Block is the uncompressed and "parsed" form for blocks containing
// uncompressed blocks containing sorted key-value pairs. It is thus // key-value pairs. (See BlockContents comments for more on terminology.)
// suitable for storing uncompressed data blocks, index blocks (including // This includes the in-memory representation of data blocks, index blocks
// partitions), range deletion blocks, properties blocks, metaindex blocks, // (including partitions), range deletion blocks, properties blocks, metaindex
// as well as the top level of the partitioned filter structure (which is // blocks, as well as the top level of the partitioned filter structure (which
// actually an index of the filter partitions). It is NOT suitable for // is actually an index of the filter partitions). It is NOT suitable for
// compressed blocks in general, filter blocks/partitions, or compression // compressed blocks in general, filter blocks/partitions, or compression
// dictionaries (since the latter do not contain sorted key-value pairs). // dictionaries.
// Use BlockContents directly for those.
// //
// See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format // See https://github.com/facebook/rocksdb/wiki/Rocksdb-BlockBasedTable-Format
// for details of the format and the various block types. // for details of the format and the various block types.
//
// TODO: Rename to ParsedKvBlock?
class Block { class Block {
public: public:
// Initialize the block with the specified contents. // Initialize the block with the specified contents.

@ -104,15 +104,15 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
} }
} }
bool GoodCompressionRatio(size_t compressed_size, size_t raw_size) { bool GoodCompressionRatio(size_t compressed_size, size_t uncomp_size) {
// Check to see if compressed less than 12.5% // Check to see if compressed less than 12.5%
return compressed_size < raw_size - (raw_size / 8u); return compressed_size < uncomp_size - (uncomp_size / 8u);
} }
} // namespace } // namespace
// format_version is the block format as defined in include/rocksdb/table.h // format_version is the block format as defined in include/rocksdb/table.h
Slice CompressBlock(const Slice& raw, const CompressionInfo& info, Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
CompressionType* type, uint32_t format_version, CompressionType* type, uint32_t format_version,
bool do_sample, std::string* compressed_output, bool do_sample, std::string* compressed_output,
std::string* sampled_output_fast, std::string* sampled_output_fast,
@ -139,7 +139,8 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info,
CompressionDict::GetEmptyDict(), c, CompressionDict::GetEmptyDict(), c,
info.SampleForCompression()); info.SampleForCompression());
CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version), CompressData(uncompressed_data, info_tmp,
GetCompressFormatForVersion(format_version),
sampled_output_fast); sampled_output_fast);
} }
@ -152,29 +153,32 @@ Slice CompressBlock(const Slice& raw, const CompressionInfo& info,
CompressionDict::GetEmptyDict(), c, CompressionDict::GetEmptyDict(), c,
info.SampleForCompression()); info.SampleForCompression());
CompressData(raw, info_tmp, GetCompressFormatForVersion(format_version), CompressData(uncompressed_data, info_tmp,
GetCompressFormatForVersion(format_version),
sampled_output_slow); sampled_output_slow);
} }
} }
if (info.type() == kNoCompression) { if (info.type() == kNoCompression) {
*type = kNoCompression; *type = kNoCompression;
return raw; return uncompressed_data;
} }
// Actually compress the data; if the compression method is not supported, // Actually compress the data; if the compression method is not supported,
// or the compression fails etc., just fall back to uncompressed // or the compression fails etc., just fall back to uncompressed
if (!CompressData(raw, info, GetCompressFormatForVersion(format_version), if (!CompressData(uncompressed_data, info,
GetCompressFormatForVersion(format_version),
compressed_output)) { compressed_output)) {
*type = kNoCompression; *type = kNoCompression;
return raw; return uncompressed_data;
} }
// Check the compression ratio; if it's not good enough, just fall back to // Check the compression ratio; if it's not good enough, just fall back to
// uncompressed // uncompressed
if (!GoodCompressionRatio(compressed_output->size(), raw.size())) { if (!GoodCompressionRatio(compressed_output->size(),
uncompressed_data.size())) {
*type = kNoCompression; *type = kNoCompression;
return raw; return uncompressed_data;
} }
*type = info.type(); *type = info.type();
@ -216,7 +220,7 @@ class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
return Status::OK(); return Status::OK();
} }
virtual void BlockAdd(uint64_t /* block_raw_bytes */, virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) override { uint64_t /* block_compressed_bytes_slow */) override {
// Intentionally left blank. No interest in collecting stats for // Intentionally left blank. No interest in collecting stats for
@ -665,21 +669,21 @@ struct BlockBasedTableBuilder::ParallelCompressionRep {
class FileSizeEstimator { class FileSizeEstimator {
public: public:
explicit FileSizeEstimator() explicit FileSizeEstimator()
: raw_bytes_compressed(0), : uncomp_bytes_compressed(0),
raw_bytes_curr_block(0), uncomp_bytes_curr_block(0),
raw_bytes_curr_block_set(false), uncomp_bytes_curr_block_set(false),
raw_bytes_inflight(0), uncomp_bytes_inflight(0),
blocks_inflight(0), blocks_inflight(0),
curr_compression_ratio(0), curr_compression_ratio(0),
estimated_file_size(0) {} estimated_file_size(0) {}
// Estimate file size when a block is about to be emitted to // Estimate file size when a block is about to be emitted to
// compression thread // compression thread
void EmitBlock(uint64_t raw_block_size, uint64_t curr_file_size) { void EmitBlock(uint64_t uncomp_block_size, uint64_t curr_file_size) {
uint64_t new_raw_bytes_inflight = uint64_t new_uncomp_bytes_inflight =
raw_bytes_inflight.fetch_add(raw_block_size, uncomp_bytes_inflight.fetch_add(uncomp_block_size,
std::memory_order_relaxed) + std::memory_order_relaxed) +
raw_block_size; uncomp_block_size;
uint64_t new_blocks_inflight = uint64_t new_blocks_inflight =
blocks_inflight.fetch_add(1, std::memory_order_relaxed) + 1; blocks_inflight.fetch_add(1, std::memory_order_relaxed) + 1;
@ -687,7 +691,7 @@ struct BlockBasedTableBuilder::ParallelCompressionRep {
estimated_file_size.store( estimated_file_size.store(
curr_file_size + curr_file_size +
static_cast<uint64_t>( static_cast<uint64_t>(
static_cast<double>(new_raw_bytes_inflight) * static_cast<double>(new_uncomp_bytes_inflight) *
curr_compression_ratio.load(std::memory_order_relaxed)) + curr_compression_ratio.load(std::memory_order_relaxed)) +
new_blocks_inflight * kBlockTrailerSize, new_blocks_inflight * kBlockTrailerSize,
std::memory_order_relaxed); std::memory_order_relaxed);
@ -696,24 +700,24 @@ struct BlockBasedTableBuilder::ParallelCompressionRep {
// Estimate file size when a block is already reaped from // Estimate file size when a block is already reaped from
// compression thread // compression thread
void ReapBlock(uint64_t compressed_block_size, uint64_t curr_file_size) { void ReapBlock(uint64_t compressed_block_size, uint64_t curr_file_size) {
assert(raw_bytes_curr_block_set); assert(uncomp_bytes_curr_block_set);
uint64_t new_raw_bytes_compressed = uint64_t new_uncomp_bytes_compressed =
raw_bytes_compressed + raw_bytes_curr_block; uncomp_bytes_compressed + uncomp_bytes_curr_block;
assert(new_raw_bytes_compressed > 0); assert(new_uncomp_bytes_compressed > 0);
curr_compression_ratio.store( curr_compression_ratio.store(
(curr_compression_ratio.load(std::memory_order_relaxed) * (curr_compression_ratio.load(std::memory_order_relaxed) *
raw_bytes_compressed + uncomp_bytes_compressed +
compressed_block_size) / compressed_block_size) /
static_cast<double>(new_raw_bytes_compressed), static_cast<double>(new_uncomp_bytes_compressed),
std::memory_order_relaxed); std::memory_order_relaxed);
raw_bytes_compressed = new_raw_bytes_compressed; uncomp_bytes_compressed = new_uncomp_bytes_compressed;
uint64_t new_raw_bytes_inflight = uint64_t new_uncomp_bytes_inflight =
raw_bytes_inflight.fetch_sub(raw_bytes_curr_block, uncomp_bytes_inflight.fetch_sub(uncomp_bytes_curr_block,
std::memory_order_relaxed) - std::memory_order_relaxed) -
raw_bytes_curr_block; uncomp_bytes_curr_block;
uint64_t new_blocks_inflight = uint64_t new_blocks_inflight =
blocks_inflight.fetch_sub(1, std::memory_order_relaxed) - 1; blocks_inflight.fetch_sub(1, std::memory_order_relaxed) - 1;
@ -721,12 +725,12 @@ struct BlockBasedTableBuilder::ParallelCompressionRep {
estimated_file_size.store( estimated_file_size.store(
curr_file_size + curr_file_size +
static_cast<uint64_t>( static_cast<uint64_t>(
static_cast<double>(new_raw_bytes_inflight) * static_cast<double>(new_uncomp_bytes_inflight) *
curr_compression_ratio.load(std::memory_order_relaxed)) + curr_compression_ratio.load(std::memory_order_relaxed)) +
new_blocks_inflight * kBlockTrailerSize, new_blocks_inflight * kBlockTrailerSize,
std::memory_order_relaxed); std::memory_order_relaxed);
raw_bytes_curr_block_set = false; uncomp_bytes_curr_block_set = false;
} }
void SetEstimatedFileSize(uint64_t size) { void SetEstimatedFileSize(uint64_t size) {
@ -737,24 +741,24 @@ struct BlockBasedTableBuilder::ParallelCompressionRep {
return estimated_file_size.load(std::memory_order_relaxed); return estimated_file_size.load(std::memory_order_relaxed);
} }
void SetCurrBlockRawSize(uint64_t size) { void SetCurrBlockUncompSize(uint64_t size) {
raw_bytes_curr_block = size; uncomp_bytes_curr_block = size;
raw_bytes_curr_block_set = true; uncomp_bytes_curr_block_set = true;
} }
private: private:
// Raw bytes compressed so far. // Input bytes compressed so far.
uint64_t raw_bytes_compressed; uint64_t uncomp_bytes_compressed;
// Size of current block being appended. // Size of current block being appended.
uint64_t raw_bytes_curr_block; uint64_t uncomp_bytes_curr_block;
// Whether raw_bytes_curr_block has been set for next // Whether uncomp_bytes_curr_block has been set for next
// ReapBlock call. // ReapBlock call.
bool raw_bytes_curr_block_set; bool uncomp_bytes_curr_block_set;
// Raw bytes under compression and not appended yet. // Input bytes under compression and not appended yet.
std::atomic<uint64_t> raw_bytes_inflight; std::atomic<uint64_t> uncomp_bytes_inflight;
// Number of blocks under compression and not appended yet. // Number of blocks under compression and not appended yet.
std::atomic<uint64_t> blocks_inflight; std::atomic<uint64_t> blocks_inflight;
// Current compression ratio, maintained by BGWorkWriteRawBlock. // Current compression ratio, maintained by BGWorkWriteMaybeCompressedBlock.
std::atomic<double> curr_compression_ratio; std::atomic<double> curr_compression_ratio;
// Estimated SST file size. // Estimated SST file size.
std::atomic<uint64_t> estimated_file_size; std::atomic<uint64_t> estimated_file_size;
@ -1040,19 +1044,19 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block,
BlockHandle* handle, BlockHandle* handle,
BlockType block_type) { BlockType block_type) {
block->Finish(); block->Finish();
std::string raw_block_contents; std::string uncompressed_block_data;
raw_block_contents.reserve(rep_->table_options.block_size); uncompressed_block_data.reserve(rep_->table_options.block_size);
block->SwapAndReset(raw_block_contents); block->SwapAndReset(uncompressed_block_data);
if (rep_->state == Rep::State::kBuffered) { if (rep_->state == Rep::State::kBuffered) {
assert(block_type == BlockType::kData); assert(block_type == BlockType::kData);
rep_->data_block_buffers.emplace_back(std::move(raw_block_contents)); rep_->data_block_buffers.emplace_back(std::move(uncompressed_block_data));
rep_->data_begin_offset += rep_->data_block_buffers.back().size(); rep_->data_begin_offset += rep_->data_block_buffers.back().size();
return; return;
} }
WriteBlock(raw_block_contents, handle, block_type); WriteBlock(uncompressed_block_data, handle, block_type);
} }
void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents, void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
BlockHandle* handle, BlockHandle* handle,
BlockType block_type) { BlockType block_type) {
Rep* r = rep_; Rep* r = rep_;
@ -1061,7 +1065,7 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
CompressionType type; CompressionType type;
Status compress_status; Status compress_status;
bool is_data_block = block_type == BlockType::kData; bool is_data_block = block_type == BlockType::kData;
CompressAndVerifyBlock(raw_block_contents, is_data_block, CompressAndVerifyBlock(uncompressed_block_data, is_data_block,
*(r->compression_ctxs[0]), r->verify_ctxs[0].get(), *(r->compression_ctxs[0]), r->verify_ctxs[0].get(),
&(r->compressed_output), &(block_contents), &type, &(r->compressed_output), &(block_contents), &type,
&compress_status); &compress_status);
@ -1070,7 +1074,8 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& raw_block_contents,
return; return;
} }
WriteRawBlock(block_contents, type, handle, block_type, &raw_block_contents); WriteMaybeCompressedBlock(block_contents, type, handle, block_type,
&uncompressed_block_data);
r->compressed_output.clear(); r->compressed_output.clear();
if (is_data_block) { if (is_data_block) {
r->props.data_size = r->get_offset(); r->props.data_size = r->get_offset();
@ -1094,7 +1099,7 @@ void BlockBasedTableBuilder::BGWorkCompression(
} }
void BlockBasedTableBuilder::CompressAndVerifyBlock( void BlockBasedTableBuilder::CompressAndVerifyBlock(
const Slice& raw_block_contents, bool is_data_block, const Slice& uncompressed_block_data, bool is_data_block,
const CompressionContext& compression_ctx, UncompressionContext* verify_ctx, const CompressionContext& compression_ctx, UncompressionContext* verify_ctx,
std::string* compressed_output, Slice* block_contents, std::string* compressed_output, Slice* block_contents,
CompressionType* type, Status* out_status) { CompressionType* type, Status* out_status) {
@ -1116,9 +1121,9 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
r->ioptions.clock, r->ioptions.clock,
ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats)); ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats));
if (is_status_ok && raw_block_contents.size() < kCompressionSizeLimit) { if (is_status_ok && uncompressed_block_data.size() < kCompressionSizeLimit) {
if (is_data_block) { if (is_data_block) {
r->compressible_input_data_bytes.fetch_add(raw_block_contents.size(), r->compressible_input_data_bytes.fetch_add(uncompressed_block_data.size(),
std::memory_order_relaxed); std::memory_order_relaxed);
} }
const CompressionDict* compression_dict; const CompressionDict* compression_dict;
@ -1135,14 +1140,14 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
std::string sampled_output_fast; std::string sampled_output_fast;
std::string sampled_output_slow; std::string sampled_output_slow;
*block_contents = CompressBlock( *block_contents = CompressBlock(
raw_block_contents, compression_info, type, uncompressed_block_data, compression_info, type,
r->table_options.format_version, is_data_block /* do_sample */, r->table_options.format_version, is_data_block /* do_sample */,
compressed_output, &sampled_output_fast, &sampled_output_slow); compressed_output, &sampled_output_fast, &sampled_output_slow);
if (sampled_output_slow.size() > 0 || sampled_output_fast.size() > 0) { if (sampled_output_slow.size() > 0 || sampled_output_fast.size() > 0) {
// Currently compression sampling is only enabled for data block. // Currently compression sampling is only enabled for data block.
assert(is_data_block); assert(is_data_block);
r->sampled_input_data_bytes.fetch_add(raw_block_contents.size(), r->sampled_input_data_bytes.fetch_add(uncompressed_block_data.size(),
std::memory_order_relaxed); std::memory_order_relaxed);
r->sampled_output_slow_data_bytes.fetch_add(sampled_output_slow.size(), r->sampled_output_slow_data_bytes.fetch_add(sampled_output_slow.size(),
std::memory_order_relaxed); std::memory_order_relaxed);
@ -1151,7 +1156,7 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
} }
// notify collectors on block add // notify collectors on block add
NotifyCollectTableCollectorsOnBlockAdd( NotifyCollectTableCollectorsOnBlockAdd(
r->table_properties_collectors, raw_block_contents.size(), r->table_properties_collectors, uncompressed_block_data.size(),
sampled_output_fast.size(), sampled_output_slow.size()); sampled_output_fast.size(), sampled_output_slow.size());
// Some of the compression algorithms are known to be unreliable. If // Some of the compression algorithms are known to be unreliable. If
@ -1169,19 +1174,20 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
BlockContents contents; BlockContents contents;
UncompressionInfo uncompression_info(*verify_ctx, *verify_dict, UncompressionInfo uncompression_info(*verify_ctx, *verify_dict,
r->compression_type); r->compression_type);
Status stat = UncompressBlockContentsForCompressionType( Status stat = UncompressBlockData(
uncompression_info, block_contents->data(), block_contents->size(), uncompression_info, block_contents->data(), block_contents->size(),
&contents, r->table_options.format_version, r->ioptions); &contents, r->table_options.format_version, r->ioptions);
if (stat.ok()) { if (stat.ok()) {
bool compressed_ok = contents.data.compare(raw_block_contents) == 0; bool compressed_ok =
contents.data.compare(uncompressed_block_data) == 0;
if (!compressed_ok) { if (!compressed_ok) {
// The result of the compression was invalid. abort. // The result of the compression was invalid. abort.
abort_compression = true; abort_compression = true;
ROCKS_LOG_ERROR(r->ioptions.logger, const char* const msg =
"Decompressed block did not match raw block"); "Decompressed block did not match pre-compression block";
*out_status = ROCKS_LOG_ERROR(r->ioptions.logger, "%s", msg);
Status::Corruption("Decompressed block did not match raw block"); *out_status = Status::Corruption(msg);
} }
} else { } else {
// Decompression reported an error. abort. // Decompression reported an error. abort.
@ -1193,8 +1199,8 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
} else { } else {
// Block is too big to be compressed. // Block is too big to be compressed.
if (is_data_block) { if (is_data_block) {
r->uncompressible_input_data_bytes.fetch_add(raw_block_contents.size(), r->uncompressible_input_data_bytes.fetch_add(
std::memory_order_relaxed); uncompressed_block_data.size(), std::memory_order_relaxed);
} }
abort_compression = true; abort_compression = true;
} }
@ -1208,27 +1214,26 @@ void BlockBasedTableBuilder::CompressAndVerifyBlock(
if (abort_compression) { if (abort_compression) {
RecordTick(r->ioptions.stats, NUMBER_BLOCK_NOT_COMPRESSED); RecordTick(r->ioptions.stats, NUMBER_BLOCK_NOT_COMPRESSED);
*type = kNoCompression; *type = kNoCompression;
*block_contents = raw_block_contents; *block_contents = uncompressed_block_data;
} else if (*type != kNoCompression) { } else if (*type != kNoCompression) {
if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats)) { if (ShouldReportDetailedTime(r->ioptions.env, r->ioptions.stats)) {
RecordTimeToHistogram(r->ioptions.stats, COMPRESSION_TIMES_NANOS, RecordTimeToHistogram(r->ioptions.stats, COMPRESSION_TIMES_NANOS,
timer.ElapsedNanos()); timer.ElapsedNanos());
} }
RecordInHistogram(r->ioptions.stats, BYTES_COMPRESSED, RecordInHistogram(r->ioptions.stats, BYTES_COMPRESSED,
raw_block_contents.size()); uncompressed_block_data.size());
RecordTick(r->ioptions.stats, NUMBER_BLOCK_COMPRESSED); RecordTick(r->ioptions.stats, NUMBER_BLOCK_COMPRESSED);
} else if (*type != r->compression_type) { } else if (*type != r->compression_type) {
RecordTick(r->ioptions.stats, NUMBER_BLOCK_NOT_COMPRESSED); RecordTick(r->ioptions.stats, NUMBER_BLOCK_NOT_COMPRESSED);
} }
} }
void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents, void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
CompressionType type, const Slice& block_contents, CompressionType type, BlockHandle* handle,
BlockHandle* handle, BlockType block_type, const Slice* uncompressed_block_data) {
BlockType block_type,
const Slice* raw_block_contents) {
Rep* r = rep_; Rep* r = rep_;
bool is_data_block = block_type == BlockType::kData; bool is_data_block = block_type == BlockType::kData;
// Old, misleading name of this function: WriteRawBlock
StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS); StopWatch sw(r->ioptions.clock, r->ioptions.stats, WRITE_RAW_BLOCK_MICROS);
handle->set_offset(r->get_offset()); handle->set_offset(r->get_offset());
handle->set_size(block_contents.size()); handle->set_size(block_contents.size());
@ -1259,7 +1264,7 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
EncodeFixed32(trailer.data() + 1, checksum); EncodeFixed32(trailer.data() + 1, checksum);
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WriteRawBlock:TamperWithChecksum", "BlockBasedTableBuilder::WriteMaybeCompressedBlock:TamperWithChecksum",
trailer.data()); trailer.data());
{ {
IOStatus io_s = r->file->Append(Slice(trailer.data(), trailer.size())); IOStatus io_s = r->file->Append(Slice(trailer.data(), trailer.size()));
@ -1287,8 +1292,9 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
if (warm_cache) { if (warm_cache) {
if (type == kNoCompression) { if (type == kNoCompression) {
s = InsertBlockInCacheHelper(block_contents, handle, block_type); s = InsertBlockInCacheHelper(block_contents, handle, block_type);
} else if (raw_block_contents != nullptr) { } else if (uncompressed_block_data != nullptr) {
s = InsertBlockInCacheHelper(*raw_block_contents, handle, block_type); s = InsertBlockInCacheHelper(*uncompressed_block_data, handle,
block_type);
} }
if (!s.ok()) { if (!s.ok()) {
r->SetStatus(s); r->SetStatus(s);
@ -1327,7 +1333,7 @@ void BlockBasedTableBuilder::WriteRawBlock(const Slice& block_contents,
} }
} }
void BlockBasedTableBuilder::BGWorkWriteRawBlock() { void BlockBasedTableBuilder::BGWorkWriteMaybeCompressedBlock() {
Rep* r = rep_; Rep* r = rep_;
ParallelCompressionRep::BlockRepSlot* slot = nullptr; ParallelCompressionRep::BlockRepSlot* slot = nullptr;
ParallelCompressionRep::BlockRep* block_rep = nullptr; ParallelCompressionRep::BlockRep* block_rep = nullptr;
@ -1354,9 +1360,11 @@ void BlockBasedTableBuilder::BGWorkWriteRawBlock() {
r->index_builder->OnKeyAdded(key); r->index_builder->OnKeyAdded(key);
} }
r->pc_rep->file_size_estimator.SetCurrBlockRawSize(block_rep->data->size()); r->pc_rep->file_size_estimator.SetCurrBlockUncompSize(
WriteRawBlock(block_rep->compressed_contents, block_rep->compression_type, block_rep->data->size());
&r->pending_handle, BlockType::kData, &block_rep->contents); WriteMaybeCompressedBlock(block_rep->compressed_contents,
block_rep->compression_type, &r->pending_handle,
BlockType::kData, &block_rep->contents);
if (!ok()) { if (!ok()) {
break; break;
} }
@ -1391,7 +1399,7 @@ void BlockBasedTableBuilder::StartParallelCompression() {
}); });
} }
rep_->pc_rep->write_thread.reset( rep_->pc_rep->write_thread.reset(
new port::Thread([this] { BGWorkWriteRawBlock(); })); new port::Thread([this] { BGWorkWriteMaybeCompressedBlock(); }));
} }
void BlockBasedTableBuilder::StopParallelCompression() { void BlockBasedTableBuilder::StopParallelCompression() {
@ -1438,7 +1446,7 @@ Status BlockBasedTableBuilder::InsertBlockInCompressedCache(
BlockContents* block_contents_to_cache = BlockContents* block_contents_to_cache =
new BlockContents(std::move(ubuf), size); new BlockContents(std::move(ubuf), size);
#ifndef NDEBUG #ifndef NDEBUG
block_contents_to_cache->is_raw_block = true; block_contents_to_cache->has_trailer = true;
#endif // NDEBUG #endif // NDEBUG
CacheKey key = BlockBasedTable::GetCacheKey(rep_->base_cache_key, *handle); CacheKey key = BlockBasedTable::GetCacheKey(rep_->base_cache_key, *handle);
@ -1567,8 +1575,8 @@ void BlockBasedTableBuilder::WriteFilterBlock(
BlockType btype = is_partitioned_filter && /* last */ s.ok() BlockType btype = is_partitioned_filter && /* last */ s.ok()
? BlockType::kFilterPartitionIndex ? BlockType::kFilterPartitionIndex
: BlockType::kFilter; : BlockType::kFilter;
WriteRawBlock(filter_content, kNoCompression, &filter_block_handle, btype, WriteMaybeCompressedBlock(filter_content, kNoCompression,
nullptr /*raw_contents*/); &filter_block_handle, btype);
} }
rep_->filter_builder->ResetFilterBitsBuilder(); rep_->filter_builder->ResetFilterBitsBuilder();
} }
@ -1613,8 +1621,9 @@ void BlockBasedTableBuilder::WriteIndexBlock(
WriteBlock(index_blocks.index_block_contents, index_block_handle, WriteBlock(index_blocks.index_block_contents, index_block_handle,
BlockType::kIndex); BlockType::kIndex);
} else { } else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression, WriteMaybeCompressedBlock(index_blocks.index_block_contents,
index_block_handle, BlockType::kIndex); kNoCompression, index_block_handle,
BlockType::kIndex);
} }
} }
// If there are more index partitions, finish them and write them out // If there are more index partitions, finish them and write them out
@ -1638,8 +1647,9 @@ void BlockBasedTableBuilder::WriteIndexBlock(
WriteBlock(index_blocks.index_block_contents, index_block_handle, WriteBlock(index_blocks.index_block_contents, index_block_handle,
BlockType::kIndex); BlockType::kIndex);
} else { } else {
WriteRawBlock(index_blocks.index_block_contents, kNoCompression, WriteMaybeCompressedBlock(index_blocks.index_block_contents,
index_block_handle, BlockType::kIndex); kNoCompression, index_block_handle,
BlockType::kIndex);
} }
// The last index_block_handle will be for the partition index block // The last index_block_handle will be for the partition index block
} }
@ -1727,8 +1737,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
Slice block_data = property_block_builder.Finish(); Slice block_data = property_block_builder.Finish();
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"BlockBasedTableBuilder::WritePropertiesBlock:BlockData", &block_data); "BlockBasedTableBuilder::WritePropertiesBlock:BlockData", &block_data);
WriteRawBlock(block_data, kNoCompression, &properties_block_handle, WriteMaybeCompressedBlock(block_data, kNoCompression,
BlockType::kProperties); &properties_block_handle, BlockType::kProperties);
} }
if (ok()) { if (ok()) {
#ifndef NDEBUG #ifndef NDEBUG
@ -1758,8 +1768,8 @@ void BlockBasedTableBuilder::WriteCompressionDictBlock(
rep_->compression_dict->GetRawDict().size()) { rep_->compression_dict->GetRawDict().size()) {
BlockHandle compression_dict_block_handle; BlockHandle compression_dict_block_handle;
if (ok()) { if (ok()) {
WriteRawBlock(rep_->compression_dict->GetRawDict(), kNoCompression, WriteMaybeCompressedBlock(rep_->compression_dict->GetRawDict(),
&compression_dict_block_handle, kNoCompression, &compression_dict_block_handle,
BlockType::kCompressionDictionary); BlockType::kCompressionDictionary);
#ifndef NDEBUG #ifndef NDEBUG
Slice compression_dict = rep_->compression_dict->GetRawDict(); Slice compression_dict = rep_->compression_dict->GetRawDict();
@ -1779,8 +1789,9 @@ void BlockBasedTableBuilder::WriteRangeDelBlock(
MetaIndexBuilder* meta_index_builder) { MetaIndexBuilder* meta_index_builder) {
if (ok() && !rep_->range_del_block.empty()) { if (ok() && !rep_->range_del_block.empty()) {
BlockHandle range_del_block_handle; BlockHandle range_del_block_handle;
WriteRawBlock(rep_->range_del_block.Finish(), kNoCompression, WriteMaybeCompressedBlock(rep_->range_del_block.Finish(), kNoCompression,
&range_del_block_handle, BlockType::kRangeDeletion); &range_del_block_handle,
BlockType::kRangeDeletion);
meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle); meta_index_builder->Add(kRangeDelBlockName, range_del_block_handle);
} }
} }
@ -2001,7 +2012,7 @@ Status BlockBasedTableBuilder::Finish() {
WritePropertiesBlock(&meta_index_builder); WritePropertiesBlock(&meta_index_builder);
if (ok()) { if (ok()) {
// flush the meta index block // flush the meta index block
WriteRawBlock(meta_index_builder.Finish(), kNoCompression, WriteMaybeCompressedBlock(meta_index_builder.Finish(), kNoCompression,
&metaindex_block_handle, BlockType::kMetaIndex); &metaindex_block_handle, BlockType::kMetaIndex);
} }
if (ok()) { if (ok()) {
@ -2036,7 +2047,7 @@ uint64_t BlockBasedTableBuilder::FileSize() const { return rep_->offset; }
uint64_t BlockBasedTableBuilder::EstimatedFileSize() const { uint64_t BlockBasedTableBuilder::EstimatedFileSize() const {
if (rep_->IsParallelCompressionEnabled()) { if (rep_->IsParallelCompressionEnabled()) {
// Use compression ratio so far and inflight raw bytes to estimate // Use compression ratio so far and inflight uncompressed bytes to estimate
// final SST size. // final SST size.
return rep_->pc_rep->file_size_estimator.GetEstimatedFileSize(); return rep_->pc_rep->file_size_estimator.GetEstimatedFileSize();
} else { } else {

@ -122,8 +122,9 @@ class BlockBasedTableBuilder : public TableBuilder {
void WriteBlock(const Slice& block_contents, BlockHandle* handle, void WriteBlock(const Slice& block_contents, BlockHandle* handle,
BlockType block_type); BlockType block_type);
// Directly write data to the file. // Directly write data to the file.
void WriteRawBlock(const Slice& data, CompressionType, BlockHandle* handle, void WriteMaybeCompressedBlock(const Slice& data, CompressionType,
BlockType block_type, const Slice* raw_data = nullptr); BlockHandle* handle, BlockType block_type,
const Slice* raw_data = nullptr);
void SetupCacheKeyPrefix(const TableBuilderOptions& tbo); void SetupCacheKeyPrefix(const TableBuilderOptions& tbo);
@ -161,8 +162,9 @@ class BlockBasedTableBuilder : public TableBuilder {
// REQUIRES: Finish(), Abandon() have not been called // REQUIRES: Finish(), Abandon() have not been called
void Flush(); void Flush();
// Some compression libraries fail when the raw size is bigger than int. If // Some compression libraries fail when the uncompressed size is bigger than
// uncompressed size is bigger than kCompressionSizeLimit, don't compress it // int. If uncompressed size is bigger than kCompressionSizeLimit, don't
// compress it
const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max(); const uint64_t kCompressionSizeLimit = std::numeric_limits<int>::max();
// Get blocks from mem-table walking thread, compress them and // Get blocks from mem-table walking thread, compress them and
@ -170,9 +172,9 @@ class BlockBasedTableBuilder : public TableBuilder {
void BGWorkCompression(const CompressionContext& compression_ctx, void BGWorkCompression(const CompressionContext& compression_ctx,
UncompressionContext* verify_ctx); UncompressionContext* verify_ctx);
// Given raw block content, try to compress it and return result and // Given uncompressed block content, try to compress it and return result and
// compression type // compression type
void CompressAndVerifyBlock(const Slice& raw_block_contents, void CompressAndVerifyBlock(const Slice& uncompressed_block_data,
bool is_data_block, bool is_data_block,
const CompressionContext& compression_ctx, const CompressionContext& compression_ctx,
UncompressionContext* verify_ctx, UncompressionContext* verify_ctx,
@ -182,17 +184,17 @@ class BlockBasedTableBuilder : public TableBuilder {
Status* out_status); Status* out_status);
// Get compressed blocks from BGWorkCompression and write them into SST // Get compressed blocks from BGWorkCompression and write them into SST
void BGWorkWriteRawBlock(); void BGWorkWriteMaybeCompressedBlock();
// Initialize parallel compression context and // Initialize parallel compression context and
// start BGWorkCompression and BGWorkWriteRawBlock threads // start BGWorkCompression and BGWorkWriteMaybeCompressedBlock threads
void StartParallelCompression(); void StartParallelCompression();
// Stop BGWorkCompression and BGWorkWriteRawBlock threads // Stop BGWorkCompression and BGWorkWriteMaybeCompressedBlock threads
void StopParallelCompression(); void StopParallelCompression();
}; };
Slice CompressBlock(const Slice& raw, const CompressionInfo& info, Slice CompressBlock(const Slice& uncompressed_data, const CompressionInfo& info,
CompressionType* type, uint32_t format_version, CompressionType* type, uint32_t format_version,
bool do_sample, std::string* compressed_output, bool do_sample, std::string* compressed_output,
std::string* sampled_output_fast, std::string* sampled_output_fast,

@ -412,7 +412,7 @@ template <typename TBlocklike>
Status BlockBasedTable::InsertEntryToCache( Status BlockBasedTable::InsertEntryToCache(
const CacheTier& cache_tier, Cache* block_cache, const Slice& key, const CacheTier& cache_tier, Cache* block_cache, const Slice& key,
const Cache::CacheItemHelper* cache_helper, const Cache::CacheItemHelper* cache_helper,
std::unique_ptr<TBlocklike>& block_holder, size_t charge, std::unique_ptr<TBlocklike>&& block_holder, size_t charge,
Cache::Handle** cache_handle, Cache::Priority priority) const { Cache::Handle** cache_handle, Cache::Priority priority) const {
Status s = Status::OK(); Status s = Status::OK();
if (cache_tier == CacheTier::kNonVolatileBlockTier) { if (cache_tier == CacheTier::kNonVolatileBlockTier) {
@ -422,6 +422,11 @@ Status BlockBasedTable::InsertEntryToCache(
s = block_cache->Insert(key, block_holder.get(), charge, s = block_cache->Insert(key, block_holder.get(), charge,
cache_helper->del_cb, cache_handle, priority); cache_helper->del_cb, cache_handle, priority);
} }
if (s.ok()) {
// Cache took ownership
block_holder.release();
}
s.MustCheck();
return s; return s;
} }
@ -1271,15 +1276,16 @@ Status BlockBasedTable::ReadMetaIndexBlock(
template <typename TBlocklike> template <typename TBlocklike>
Status BlockBasedTable::GetDataBlockFromCache( Status BlockBasedTable::GetDataBlockFromCache(
const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed, const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
const ReadOptions& read_options, CachableEntry<TBlocklike>* block, const ReadOptions& read_options,
CachableEntry<TBlocklike>* out_parsed_block,
const UncompressionDict& uncompression_dict, BlockType block_type, const UncompressionDict& uncompression_dict, BlockType block_type,
const bool wait, GetContext* get_context) const { const bool wait, GetContext* get_context) const {
const size_t read_amp_bytes_per_bit = const size_t read_amp_bytes_per_bit =
block_type == BlockType::kData block_type == BlockType::kData
? rep_->table_options.read_amp_bytes_per_bit ? rep_->table_options.read_amp_bytes_per_bit
: 0; : 0;
assert(block); assert(out_parsed_block);
assert(block->IsEmpty()); assert(out_parsed_block->IsEmpty());
// Here we treat the legacy name "...index_and_filter_blocks..." to mean all // Here we treat the legacy name "...index_and_filter_blocks..." to mean all
// metadata blocks that might go into block cache, EXCEPT only those needed // metadata blocks that might go into block cache, EXCEPT only those needed
// for the read path (Get, etc.). TableProperties should not be needed on the // for the read path (Get, etc.). TableProperties should not be needed on the
@ -1312,7 +1318,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), create_cb, BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), create_cb,
priority); priority);
if (cache_handle != nullptr) { if (cache_handle != nullptr) {
block->SetCachedValue( out_parsed_block->SetCachedValue(
reinterpret_cast<TBlocklike*>(block_cache->Value(cache_handle)), reinterpret_cast<TBlocklike*>(block_cache->Value(cache_handle)),
block_cache, cache_handle); block_cache, cache_handle);
return s; return s;
@ -1320,7 +1326,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
} }
// If not found, search from the compressed block cache. // If not found, search from the compressed block cache.
assert(block->IsEmpty()); assert(out_parsed_block->IsEmpty());
if (block_cache_compressed == nullptr) { if (block_cache_compressed == nullptr) {
return s; return s;
@ -1358,31 +1364,32 @@ Status BlockBasedTable::GetDataBlockFromCache(
// Retrieve the uncompressed contents into a new buffer // Retrieve the uncompressed contents into a new buffer
UncompressionContext context(compression_type); UncompressionContext context(compression_type);
UncompressionInfo info(context, uncompression_dict, compression_type); UncompressionInfo info(context, uncompression_dict, compression_type);
s = UncompressBlockContents( s = UncompressSerializedBlock(
info, compressed_block->data.data(), compressed_block->data.size(), info, compressed_block->data.data(), compressed_block->data.size(),
&contents, rep_->table_options.format_version, rep_->ioptions, &contents, rep_->table_options.format_version, rep_->ioptions,
GetMemoryAllocator(rep_->table_options)); GetMemoryAllocator(rep_->table_options));
// Insert uncompressed block into block cache, the priority is based on the // Insert parsed block into block cache, the priority is based on the
// data block type. // data block type.
if (s.ok()) { if (s.ok()) {
std::unique_ptr<TBlocklike> block_holder( std::unique_ptr<TBlocklike> block_holder(
BlocklikeTraits<TBlocklike>::Create( BlocklikeTraits<TBlocklike>::Create(
std::move(contents), read_amp_bytes_per_bit, statistics, std::move(contents), read_amp_bytes_per_bit, statistics,
rep_->blocks_definitely_zstd_compressed, rep_->blocks_definitely_zstd_compressed,
rep_->table_options.filter_policy.get())); // uncompressed block rep_->table_options.filter_policy.get()));
if (block_cache != nullptr && block_holder->own_bytes() && if (block_cache != nullptr && block_holder->own_bytes() &&
read_options.fill_cache) { read_options.fill_cache) {
size_t charge = block_holder->ApproximateMemoryUsage(); size_t charge = block_holder->ApproximateMemoryUsage();
Cache::Handle* cache_handle = nullptr; Cache::Handle* cache_handle = nullptr;
auto block_holder_raw_ptr = block_holder.get();
s = InsertEntryToCache( s = InsertEntryToCache(
rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key, rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
block_holder, charge, &cache_handle, priority); std::move(block_holder), charge, &cache_handle, priority);
if (s.ok()) { if (s.ok()) {
assert(cache_handle != nullptr); assert(cache_handle != nullptr);
block->SetCachedValue(block_holder.release(), block_cache, out_parsed_block->SetCachedValue(block_holder_raw_ptr, block_cache,
cache_handle); cache_handle);
UpdateCacheInsertionMetrics(block_type, get_context, charge, UpdateCacheInsertionMetrics(block_type, get_context, charge,
@ -1391,7 +1398,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
} }
} else { } else {
block->SetOwnedValue(block_holder.release()); out_parsed_block->SetOwnedValue(std::move(block_holder));
} }
} }
@ -1403,8 +1410,8 @@ Status BlockBasedTable::GetDataBlockFromCache(
template <typename TBlocklike> template <typename TBlocklike>
Status BlockBasedTable::PutDataBlockToCache( Status BlockBasedTable::PutDataBlockToCache(
const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed, const Slice& cache_key, Cache* block_cache, Cache* block_cache_compressed,
CachableEntry<TBlocklike>* cached_block, BlockContents* raw_block_contents, CachableEntry<TBlocklike>* out_parsed_block, BlockContents&& block_contents,
CompressionType raw_block_comp_type, CompressionType block_comp_type,
const UncompressionDict& uncompression_dict, const UncompressionDict& uncompression_dict,
MemoryAllocator* memory_allocator, BlockType block_type, MemoryAllocator* memory_allocator, BlockType block_type,
GetContext* get_context) const { GetContext* get_context) const {
@ -1419,20 +1426,20 @@ Status BlockBasedTable::PutDataBlockToCache(
block_type != BlockType::kData block_type != BlockType::kData
? Cache::Priority::HIGH ? Cache::Priority::HIGH
: Cache::Priority::LOW; : Cache::Priority::LOW;
assert(cached_block); assert(out_parsed_block);
assert(cached_block->IsEmpty()); assert(out_parsed_block->IsEmpty());
Status s; Status s;
Statistics* statistics = ioptions.stats; Statistics* statistics = ioptions.stats;
std::unique_ptr<TBlocklike> block_holder; std::unique_ptr<TBlocklike> block_holder;
if (raw_block_comp_type != kNoCompression) { if (block_comp_type != kNoCompression) {
// Retrieve the uncompressed contents into a new buffer // Retrieve the uncompressed contents into a new buffer
BlockContents uncompressed_block_contents; BlockContents uncompressed_block_contents;
UncompressionContext context(raw_block_comp_type); UncompressionContext context(block_comp_type);
UncompressionInfo info(context, uncompression_dict, raw_block_comp_type); UncompressionInfo info(context, uncompression_dict, block_comp_type);
s = UncompressBlockContents(info, raw_block_contents->data.data(), s = UncompressBlockData(info, block_contents.data.data(),
raw_block_contents->data.size(), block_contents.data.size(),
&uncompressed_block_contents, format_version, &uncompressed_block_contents, format_version,
ioptions, memory_allocator); ioptions, memory_allocator);
if (!s.ok()) { if (!s.ok()) {
@ -1445,52 +1452,50 @@ Status BlockBasedTable::PutDataBlockToCache(
rep_->table_options.filter_policy.get())); rep_->table_options.filter_policy.get()));
} else { } else {
block_holder.reset(BlocklikeTraits<TBlocklike>::Create( block_holder.reset(BlocklikeTraits<TBlocklike>::Create(
std::move(*raw_block_contents), read_amp_bytes_per_bit, statistics, std::move(block_contents), read_amp_bytes_per_bit, statistics,
rep_->blocks_definitely_zstd_compressed, rep_->blocks_definitely_zstd_compressed,
rep_->table_options.filter_policy.get())); rep_->table_options.filter_policy.get()));
} }
// Insert compressed block into compressed block cache. // Insert compressed block into compressed block cache.
// Release the hold on the compressed cache entry immediately. // Release the hold on the compressed cache entry immediately.
if (block_cache_compressed != nullptr && if (block_cache_compressed != nullptr && block_comp_type != kNoCompression &&
raw_block_comp_type != kNoCompression && raw_block_contents != nullptr && block_contents.own_bytes()) {
raw_block_contents->own_bytes()) { assert(block_contents.has_trailer);
assert(raw_block_contents->is_raw_block);
assert(!cache_key.empty()); assert(!cache_key.empty());
// We cannot directly put raw_block_contents because this could point to // We cannot directly put block_contents because this could point to
// an object in the stack. // an object in the stack.
std::unique_ptr<BlockContents> block_cont_for_comp_cache( auto block_cont_for_comp_cache =
new BlockContents(std::move(*raw_block_contents))); std::make_unique<BlockContents>(std::move(block_contents));
size_t charge = block_cont_for_comp_cache->ApproximateMemoryUsage();
s = InsertEntryToCache( s = InsertEntryToCache(
rep_->ioptions.lowest_used_cache_tier, block_cache_compressed, rep_->ioptions.lowest_used_cache_tier, block_cache_compressed,
cache_key, cache_key,
BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type), BlocklikeTraits<BlockContents>::GetCacheItemHelper(block_type),
block_cont_for_comp_cache, std::move(block_cont_for_comp_cache), charge, nullptr,
block_cont_for_comp_cache->ApproximateMemoryUsage(), nullptr,
Cache::Priority::LOW); Cache::Priority::LOW);
BlockContents* block_cont_raw_ptr = block_cont_for_comp_cache.release();
if (s.ok()) { if (s.ok()) {
// Avoid the following code to delete this cached block. // Avoid the following code to delete this cached block.
RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD); RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD);
} else { } else {
RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES); RecordTick(statistics, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
delete block_cont_raw_ptr;
} }
} }
// insert into uncompressed block cache // insert into uncompressed block cache
if (block_cache != nullptr && block_holder->own_bytes()) { if (block_cache != nullptr && block_holder->own_bytes()) {
size_t charge = block_holder->ApproximateMemoryUsage(); size_t charge = block_holder->ApproximateMemoryUsage();
auto block_holder_raw_ptr = block_holder.get();
Cache::Handle* cache_handle = nullptr; Cache::Handle* cache_handle = nullptr;
s = InsertEntryToCache( s = InsertEntryToCache(
rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key, rep_->ioptions.lowest_used_cache_tier, block_cache, cache_key,
BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type), BlocklikeTraits<TBlocklike>::GetCacheItemHelper(block_type),
block_holder, charge, &cache_handle, priority); std::move(block_holder), charge, &cache_handle, priority);
if (s.ok()) { if (s.ok()) {
assert(cache_handle != nullptr); assert(cache_handle != nullptr);
cached_block->SetCachedValue(block_holder.release(), block_cache, out_parsed_block->SetCachedValue(block_holder_raw_ptr, block_cache,
cache_handle); cache_handle);
UpdateCacheInsertionMetrics(block_type, get_context, charge, UpdateCacheInsertionMetrics(block_type, get_context, charge,
@ -1499,7 +1504,7 @@ Status BlockBasedTable::PutDataBlockToCache(
RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES); RecordTick(statistics, BLOCK_CACHE_ADD_FAILURES);
} }
} else { } else {
cached_block->SetOwnedValue(block_holder.release()); out_parsed_block->SetOwnedValue(std::move(block_holder));
} }
return s; return s;
@ -1580,10 +1585,10 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
const bool wait, const bool for_compaction, const bool wait, const bool for_compaction,
CachableEntry<TBlocklike>* block_entry, BlockType block_type, CachableEntry<TBlocklike>* out_parsed_block, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
BlockContents* contents, bool async_read) const { BlockContents* contents, bool async_read) const {
assert(block_entry != nullptr); assert(out_parsed_block != nullptr);
const bool no_io = (ro.read_tier == kBlockCacheTier); const bool no_io = (ro.read_tier == kBlockCacheTier);
Cache* block_cache = rep_->table_options.block_cache.get(); Cache* block_cache = rep_->table_options.block_cache.get();
Cache* block_cache_compressed = Cache* block_cache_compressed =
@ -1603,11 +1608,11 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
if (!contents) { if (!contents) {
s = GetDataBlockFromCache(key, block_cache, block_cache_compressed, ro, s = GetDataBlockFromCache(key, block_cache, block_cache_compressed, ro,
block_entry, uncompression_dict, block_type, out_parsed_block, uncompression_dict,
wait, get_context); block_type, wait, get_context);
// Value could still be null at this point, so check the cache handle // Value could still be null at this point, so check the cache handle
// and update the read pattern for prefetching // and update the read pattern for prefetching
if (block_entry->GetValue() || block_entry->GetCacheHandle()) { if (out_parsed_block->GetValue() || out_parsed_block->GetCacheHandle()) {
// TODO(haoyu): Differentiate cache hit on uncompressed block cache and // TODO(haoyu): Differentiate cache hit on uncompressed block cache and
// compressed block cache. // compressed block cache.
is_cache_hit = true; is_cache_hit = true;
@ -1624,25 +1629,26 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
// Can't find the block from the cache. If I/O is allowed, read from the // Can't find the block from the cache. If I/O is allowed, read from the
// file. // file.
if (block_entry->GetValue() == nullptr && if (out_parsed_block->GetValue() == nullptr &&
block_entry->GetCacheHandle() == nullptr && !no_io && ro.fill_cache) { out_parsed_block->GetCacheHandle() == nullptr && !no_io &&
ro.fill_cache) {
Statistics* statistics = rep_->ioptions.stats; Statistics* statistics = rep_->ioptions.stats;
const bool maybe_compressed = const bool maybe_compressed =
block_type != BlockType::kFilter && block_type != BlockType::kFilter &&
block_type != BlockType::kCompressionDictionary && block_type != BlockType::kCompressionDictionary &&
rep_->blocks_maybe_compressed; rep_->blocks_maybe_compressed;
const bool do_uncompress = maybe_compressed && !block_cache_compressed; const bool do_uncompress = maybe_compressed && !block_cache_compressed;
CompressionType raw_block_comp_type; CompressionType contents_comp_type;
BlockContents raw_block_contents; // Maybe serialized or uncompressed
BlockContents tmp_contents;
if (!contents) { if (!contents) {
Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS Histograms histogram = for_compaction ? READ_BLOCK_COMPACTION_MICROS
: READ_BLOCK_GET_MICROS; : READ_BLOCK_GET_MICROS;
StopWatch sw(rep_->ioptions.clock, statistics, histogram); StopWatch sw(rep_->ioptions.clock, statistics, histogram);
BlockFetcher block_fetcher( BlockFetcher block_fetcher(
rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle, rep_->file.get(), prefetch_buffer, rep_->footer, ro, handle,
&raw_block_contents, rep_->ioptions, do_uncompress, &tmp_contents, rep_->ioptions, do_uncompress, maybe_compressed,
maybe_compressed, block_type, uncompression_dict, block_type, uncompression_dict, rep_->persistent_cache_options,
rep_->persistent_cache_options,
GetMemoryAllocator(rep_->table_options), GetMemoryAllocator(rep_->table_options),
GetMemoryAllocatorForCompressedBlock(rep_->table_options)); GetMemoryAllocatorForCompressedBlock(rep_->table_options));
@ -1657,8 +1663,8 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
} }
raw_block_comp_type = block_fetcher.get_compression_type(); contents_comp_type = block_fetcher.get_compression_type();
contents = &raw_block_contents; contents = &tmp_contents;
if (get_context) { if (get_context) {
switch (block_type) { switch (block_type) {
case BlockType::kIndex: case BlockType::kIndex:
@ -1673,15 +1679,15 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
} }
} }
} else { } else {
raw_block_comp_type = GetBlockCompressionType(*contents); contents_comp_type = GetBlockCompressionType(*contents);
} }
if (s.ok()) { if (s.ok()) {
// If filling cache is allowed and a cache is configured, try to put the // If filling cache is allowed and a cache is configured, try to put the
// block to the cache. // block to the cache.
s = PutDataBlockToCache( s = PutDataBlockToCache(
key, block_cache, block_cache_compressed, block_entry, contents, key, block_cache, block_cache_compressed, out_parsed_block,
raw_block_comp_type, uncompression_dict, std::move(*contents), contents_comp_type, uncompression_dict,
GetMemoryAllocator(rep_->table_options), block_type, get_context); GetMemoryAllocator(rep_->table_options), block_type, get_context);
} }
} }
@ -1692,12 +1698,12 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
lookup_context) { lookup_context) {
size_t usage = 0; size_t usage = 0;
uint64_t nkeys = 0; uint64_t nkeys = 0;
if (block_entry->GetValue()) { if (out_parsed_block->GetValue()) {
// Approximate the number of keys in the block using restarts. // Approximate the number of keys in the block using restarts.
nkeys = nkeys = rep_->table_options.block_restart_interval *
rep_->table_options.block_restart_interval * BlocklikeTraits<TBlocklike>::GetNumRestarts(
BlocklikeTraits<TBlocklike>::GetNumRestarts(*block_entry->GetValue()); *out_parsed_block->GetValue());
usage = block_entry->GetValue()->ApproximateMemoryUsage(); usage = out_parsed_block->GetValue()->ApproximateMemoryUsage();
} }
TraceType trace_block_type = TraceType::kTraceMax; TraceType trace_block_type = TraceType::kTraceMax;
switch (block_type) { switch (block_type) {
@ -1752,7 +1758,7 @@ Status BlockBasedTable::MaybeReadBlockAndLoadToCache(
} }
} }
assert(s.ok() || block_entry->GetValue() == nullptr); assert(s.ok() || out_parsed_block->GetValue() == nullptr);
return s; return s;
} }
@ -1760,32 +1766,33 @@ template <typename TBlocklike>
Status BlockBasedTable::RetrieveBlock( Status BlockBasedTable::RetrieveBlock(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<TBlocklike>* block_entry, BlockType block_type, CachableEntry<TBlocklike>* out_parsed_block, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction, bool use_cache, bool wait_for_cache, bool for_compaction, bool use_cache, bool wait_for_cache,
bool async_read) const { bool async_read) const {
assert(block_entry); assert(out_parsed_block);
assert(block_entry->IsEmpty()); assert(out_parsed_block->IsEmpty());
Status s; Status s;
if (use_cache) { if (use_cache) {
s = MaybeReadBlockAndLoadToCache( s = MaybeReadBlockAndLoadToCache(prefetch_buffer, ro, handle,
prefetch_buffer, ro, handle, uncompression_dict, wait_for_cache, uncompression_dict, wait_for_cache,
for_compaction, block_entry, block_type, get_context, lookup_context, for_compaction, out_parsed_block,
block_type, get_context, lookup_context,
/*contents=*/nullptr, async_read); /*contents=*/nullptr, async_read);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
if (block_entry->GetValue() != nullptr || if (out_parsed_block->GetValue() != nullptr ||
block_entry->GetCacheHandle() != nullptr) { out_parsed_block->GetCacheHandle() != nullptr) {
assert(s.ok()); assert(s.ok());
return s; return s;
} }
} }
assert(block_entry->IsEmpty()); assert(out_parsed_block->IsEmpty());
const bool no_io = ro.read_tier == kBlockCacheTier; const bool no_io = ro.read_tier == kBlockCacheTier;
if (no_io) { if (no_io) {
@ -1833,7 +1840,7 @@ Status BlockBasedTable::RetrieveBlock(
return s; return s;
} }
block_entry->SetOwnedValue(block.release()); out_parsed_block->SetOwnedValue(std::move(block));
assert(s.ok()); assert(s.ok());
return s; return s;
@ -1844,7 +1851,7 @@ Status BlockBasedTable::RetrieveBlock(
template Status BlockBasedTable::RetrieveBlock<BlockContents>( template Status BlockBasedTable::RetrieveBlock<BlockContents>(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<BlockContents>* block_entry, BlockType block_type, CachableEntry<BlockContents>* out_parsed_block, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction, bool use_cache, bool wait_for_cache, bool for_compaction, bool use_cache, bool wait_for_cache,
bool async_read) const; bool async_read) const;
@ -1852,15 +1859,15 @@ template Status BlockBasedTable::RetrieveBlock<BlockContents>(
template Status BlockBasedTable::RetrieveBlock<ParsedFullFilterBlock>( template Status BlockBasedTable::RetrieveBlock<ParsedFullFilterBlock>(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<ParsedFullFilterBlock>* block_entry, BlockType block_type, CachableEntry<ParsedFullFilterBlock>* out_parsed_block,
GetContext* get_context, BlockCacheLookupContext* lookup_context, BlockType block_type, GetContext* get_context,
bool for_compaction, bool use_cache, bool wait_for_cache, BlockCacheLookupContext* lookup_context, bool for_compaction,
bool async_read) const; bool use_cache, bool wait_for_cache, bool async_read) const;
template Status BlockBasedTable::RetrieveBlock<Block>( template Status BlockBasedTable::RetrieveBlock<Block>(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<Block>* block_entry, BlockType block_type, CachableEntry<Block>* out_parsed_block, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction, bool use_cache, bool wait_for_cache, bool for_compaction, bool use_cache, bool wait_for_cache,
bool async_read) const; bool async_read) const;
@ -1868,7 +1875,7 @@ template Status BlockBasedTable::RetrieveBlock<Block>(
template Status BlockBasedTable::RetrieveBlock<UncompressionDict>( template Status BlockBasedTable::RetrieveBlock<UncompressionDict>(
FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro, FilePrefetchBuffer* prefetch_buffer, const ReadOptions& ro,
const BlockHandle& handle, const UncompressionDict& uncompression_dict, const BlockHandle& handle, const UncompressionDict& uncompression_dict,
CachableEntry<UncompressionDict>* block_entry, BlockType block_type, CachableEntry<UncompressionDict>* out_parsed_block, BlockType block_type,
GetContext* get_context, BlockCacheLookupContext* lookup_context, GetContext* get_context, BlockCacheLookupContext* lookup_context,
bool for_compaction, bool use_cache, bool wait_for_cache, bool for_compaction, bool use_cache, bool wait_for_cache,
bool async_read) const; bool async_read) const;

@ -249,16 +249,16 @@ class BlockBasedTable : public TableReader {
return static_cast<size_t>(handle.size() + kBlockTrailerSize); return static_cast<size_t>(handle.size() + kBlockTrailerSize);
} }
// It's the caller's responsibility to make sure that this is // It is the caller's responsibility to make sure that this is called with
// for raw block contents, which contains the compression // block-based table serialized block contents, which contains the compression
// byte in the end. // byte in the trailer after `block_size`.
static inline CompressionType GetBlockCompressionType(const char* block_data, static inline CompressionType GetBlockCompressionType(const char* block_data,
size_t block_size) { size_t block_size) {
return static_cast<CompressionType>(block_data[block_size]); return static_cast<CompressionType>(block_data[block_size]);
} }
static inline CompressionType GetBlockCompressionType( static inline CompressionType GetBlockCompressionType(
const BlockContents& contents) { const BlockContents& contents) {
assert(contents.is_raw_block); assert(contents.has_trailer);
return GetBlockCompressionType(contents.data.data(), contents.data.size()); return GetBlockCompressionType(contents.data.data(), contents.data.size());
} }
@ -327,7 +327,7 @@ class BlockBasedTable : public TableReader {
Status InsertEntryToCache(const CacheTier& cache_tier, Cache* block_cache, Status InsertEntryToCache(const CacheTier& cache_tier, Cache* block_cache,
const Slice& key, const Slice& key,
const Cache::CacheItemHelper* cache_helper, const Cache::CacheItemHelper* cache_helper,
std::unique_ptr<TBlocklike>& block_holder, std::unique_ptr<TBlocklike>&& block_holder,
size_t charge, Cache::Handle** cache_handle, size_t charge, Cache::Handle** cache_handle,
Cache::Priority priority) const; Cache::Priority priority) const;
@ -411,13 +411,13 @@ class BlockBasedTable : public TableReader {
BlockType block_type, const bool wait, BlockType block_type, const bool wait,
GetContext* get_context) const; GetContext* get_context) const;
// Put a raw block (maybe compressed) to the corresponding block caches. // Put a maybe compressed block to the corresponding block caches.
// This method will perform decompression against raw_block if needed and then // This method will perform decompression against block_contents if needed
// populate the block caches. // and then populate the block caches.
// On success, Status::OK will be returned; also @block will be populated with // On success, Status::OK will be returned; also @block will be populated with
// uncompressed block and its cache handle. // uncompressed block and its cache handle.
// //
// Allocated memory managed by raw_block_contents will be transferred to // Allocated memory managed by block_contents will be transferred to
// PutDataBlockToCache(). After the call, the object will be invalid. // PutDataBlockToCache(). After the call, the object will be invalid.
// @param uncompression_dict Data for presetting the compression library's // @param uncompression_dict Data for presetting the compression library's
// dictionary. // dictionary.
@ -425,8 +425,8 @@ class BlockBasedTable : public TableReader {
Status PutDataBlockToCache(const Slice& cache_key, Cache* block_cache, Status PutDataBlockToCache(const Slice& cache_key, Cache* block_cache,
Cache* block_cache_compressed, Cache* block_cache_compressed,
CachableEntry<TBlocklike>* cached_block, CachableEntry<TBlocklike>* cached_block,
BlockContents* raw_block_contents, BlockContents&& block_contents,
CompressionType raw_block_comp_type, CompressionType block_comp_type,
const UncompressionDict& uncompression_dict, const UncompressionDict& uncompression_dict,
MemoryAllocator* memory_allocator, MemoryAllocator* memory_allocator,
BlockType block_type, BlockType block_type,

@ -191,7 +191,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
} }
} }
BlockContents raw_block_contents; BlockContents serialized_block;
if (s.ok()) { if (s.ok()) {
if (!use_shared_buffer) { if (!use_shared_buffer) {
// We allocated a buffer for this block. Give ownership of it to // We allocated a buffer for this block. Give ownership of it to
@ -199,17 +199,17 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
assert(req.result.data() == req.scratch); assert(req.result.data() == req.scratch);
assert(req.result.size() == BlockSizeWithTrailer(handle)); assert(req.result.size() == BlockSizeWithTrailer(handle));
assert(req_offset == 0); assert(req_offset == 0);
std::unique_ptr<char[]> raw_block(req.scratch); serialized_block =
raw_block_contents = BlockContents(std::move(raw_block), handle.size()); BlockContents(std::unique_ptr<char[]>(req.scratch), handle.size());
} else { } else {
// We used the scratch buffer or direct io buffer // We used the scratch buffer or direct io buffer
// which are shared by the blocks. // which are shared by the blocks.
// raw_block_contents does not have the ownership. // serialized_block does not have the ownership.
raw_block_contents = serialized_block =
BlockContents(Slice(req.result.data() + req_offset, handle.size())); BlockContents(Slice(req.result.data() + req_offset, handle.size()));
} }
#ifndef NDEBUG #ifndef NDEBUG
raw_block_contents.is_raw_block = true; serialized_block.has_trailer = true;
#endif #endif
if (options.verify_checksums) { if (options.verify_checksums) {
@ -232,28 +232,29 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
if (s.ok()) { if (s.ok()) {
// When the blocks share the same underlying buffer (scratch or direct io // When the blocks share the same underlying buffer (scratch or direct io
// buffer), we may need to manually copy the block into heap if the raw // buffer), we may need to manually copy the block into heap if the
// block has to be inserted into a cache. That falls into th following // serialized block has to be inserted into a cache. That falls into the
// cases - // following cases -
// 1. Raw block is not compressed, it needs to be inserted into the // 1. serialized block is not compressed, it needs to be inserted into
// uncompressed block cache if there is one // the uncompressed block cache if there is one
// 2. If the raw block is compressed, it needs to be inserted into the // 2. If the serialized block is compressed, it needs to be inserted
// compressed block cache if there is one // into the compressed block cache if there is one
// //
// In all other cases, the raw block is either uncompressed into a heap // In all other cases, the serialized block is either uncompressed into a
// buffer or there is no cache at all. // heap buffer or there is no cache at all.
CompressionType compression_type = CompressionType compression_type =
GetBlockCompressionType(raw_block_contents); GetBlockCompressionType(serialized_block);
if (use_shared_buffer && (compression_type == kNoCompression || if (use_shared_buffer && (compression_type == kNoCompression ||
(compression_type != kNoCompression && (compression_type != kNoCompression &&
rep_->table_options.block_cache_compressed))) { rep_->table_options.block_cache_compressed))) {
Slice raw = Slice serialized =
Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle)); Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
raw_block_contents = BlockContents( serialized_block = BlockContents(
CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), raw), CopyBufferToHeap(GetMemoryAllocator(rep_->table_options),
serialized),
handle.size()); handle.size());
#ifndef NDEBUG #ifndef NDEBUG
raw_block_contents.is_raw_block = true; serialized_block.has_trailer = true;
#endif #endif
} }
} }
@ -264,13 +265,13 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
TableReaderCaller::kUserMultiGet); TableReaderCaller::kUserMultiGet);
CachableEntry<Block>* block_entry = &(*results)[idx_in_batch]; CachableEntry<Block>* block_entry = &(*results)[idx_in_batch];
// MaybeReadBlockAndLoadToCache will insert into the block caches if // MaybeReadBlockAndLoadToCache will insert into the block caches if
// necessary. Since we're passing the raw block contents, it will // necessary. Since we're passing the serialized block contents, it
// avoid looking up the block cache // will avoid looking up the block cache
s = MaybeReadBlockAndLoadToCache( s = MaybeReadBlockAndLoadToCache(
nullptr, options, handle, uncompression_dict, /*wait=*/true, nullptr, options, handle, uncompression_dict, /*wait=*/true,
/*for_compaction=*/false, block_entry, BlockType::kData, /*for_compaction=*/false, block_entry, BlockType::kData,
mget_iter->get_context, &lookup_data_block_context, mget_iter->get_context, &lookup_data_block_context,
&raw_block_contents, /*async_read=*/false); &serialized_block, /*async_read=*/false);
// block_entry value could be null if no block cache is present, i.e // block_entry value could be null if no block cache is present, i.e
// BlockBasedTableOptions::no_block_cache is true and no compressed // BlockBasedTableOptions::no_block_cache is true and no compressed
@ -283,12 +284,12 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
} }
CompressionType compression_type = CompressionType compression_type =
GetBlockCompressionType(raw_block_contents); GetBlockCompressionType(serialized_block);
BlockContents contents; BlockContents contents;
if (compression_type != kNoCompression) { if (compression_type != kNoCompression) {
UncompressionContext context(compression_type); UncompressionContext context(compression_type);
UncompressionInfo info(context, uncompression_dict, compression_type); UncompressionInfo info(context, uncompression_dict, compression_type);
s = UncompressBlockContents( s = UncompressSerializedBlock(
info, req.result.data() + req_offset, handle.size(), &contents, info, req.result.data() + req_offset, handle.size(), &contents,
footer.format_version(), rep_->ioptions, memory_allocator); footer.format_version(), rep_->ioptions, memory_allocator);
} else { } else {
@ -296,13 +297,13 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
// 1) caller uses the shared buffer (scratch or direct io buffer); // 1) caller uses the shared buffer (scratch or direct io buffer);
// 2) we use the requst buffer. // 2) we use the requst buffer.
// If scratch buffer or direct io buffer is used, we ensure that // If scratch buffer or direct io buffer is used, we ensure that
// all raw blocks are copyed to the heap as single blocks. If scratch // all serialized blocks are copyed to the heap as single blocks. If
// buffer is not used, we also have no combined read, so the raw // scratch buffer is not used, we also have no combined read, so the
// block can be used directly. // serialized block can be used directly.
contents = std::move(raw_block_contents); contents = std::move(serialized_block);
} }
if (s.ok()) { if (s.ok()) {
(*results)[idx_in_batch].SetOwnedValue(new Block( (*results)[idx_in_batch].SetOwnedValue(std::make_unique<Block>(
std::move(contents), read_amp_bytes_per_bit, ioptions.stats)); std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
} }
} }

@ -132,17 +132,17 @@ public:
ResetFields(); ResetFields();
} }
void SetOwnedValue(T* value) { void SetOwnedValue(std::unique_ptr<T>&& value) {
assert(value != nullptr); assert(value.get() != nullptr);
if (UNLIKELY(value_ == value && own_value_)) { if (UNLIKELY(value_ == value.get() && own_value_)) {
assert(cache_ == nullptr && cache_handle_ == nullptr); assert(cache_ == nullptr && cache_handle_ == nullptr);
return; return;
} }
Reset(); Reset();
value_ = value; value_ = value.release();
own_value_ = true; own_value_ = true;
} }

@ -49,7 +49,7 @@ inline void BlockFetcher::ProcessTrailerIfPresent() {
inline bool BlockFetcher::TryGetUncompressBlockFromPersistentCache() { inline bool BlockFetcher::TryGetUncompressBlockFromPersistentCache() {
if (cache_options_.persistent_cache && if (cache_options_.persistent_cache &&
!cache_options_.persistent_cache->IsCompressed()) { !cache_options_.persistent_cache->IsCompressed()) {
Status status = PersistentCacheHelper::LookupUncompressedPage( Status status = PersistentCacheHelper::LookupUncompressed(
cache_options_, handle_, contents_); cache_options_, handle_, contents_);
if (status.ok()) { if (status.ok()) {
// uncompressed page is found for the block handle // uncompressed page is found for the block handle
@ -99,15 +99,14 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
return got_from_prefetch_buffer_; return got_from_prefetch_buffer_;
} }
inline bool BlockFetcher::TryGetCompressedBlockFromPersistentCache() { inline bool BlockFetcher::TryGetSerializedBlockFromPersistentCache() {
if (cache_options_.persistent_cache && if (cache_options_.persistent_cache &&
cache_options_.persistent_cache->IsCompressed()) { cache_options_.persistent_cache->IsCompressed()) {
// lookup uncompressed cache mode p-cache std::unique_ptr<char[]> buf;
std::unique_ptr<char[]> raw_data; io_status_ = status_to_io_status(PersistentCacheHelper::LookupSerialized(
io_status_ = status_to_io_status(PersistentCacheHelper::LookupRawPage( cache_options_, handle_, &buf, block_size_with_trailer_));
cache_options_, handle_, &raw_data, block_size_with_trailer_));
if (io_status_.ok()) { if (io_status_.ok()) {
heap_buf_ = CacheAllocationPtr(raw_data.release()); heap_buf_ = CacheAllocationPtr(buf.release());
used_buf_ = heap_buf_.get(); used_buf_ = heap_buf_.get();
slice_ = Slice(heap_buf_.get(), block_size_); slice_ = Slice(heap_buf_.get(), block_size_);
ProcessTrailerIfPresent(); ProcessTrailerIfPresent();
@ -162,8 +161,7 @@ inline void BlockFetcher::InsertCompressedBlockToPersistentCacheIfNeeded() {
if (io_status_.ok() && read_options_.fill_cache && if (io_status_.ok() && read_options_.fill_cache &&
cache_options_.persistent_cache && cache_options_.persistent_cache &&
cache_options_.persistent_cache->IsCompressed()) { cache_options_.persistent_cache->IsCompressed()) {
// insert to raw cache PersistentCacheHelper::InsertSerialized(cache_options_, handle_, used_buf_,
PersistentCacheHelper::InsertRawPage(cache_options_, handle_, used_buf_,
block_size_with_trailer_); block_size_with_trailer_);
} }
} }
@ -173,7 +171,7 @@ inline void BlockFetcher::InsertUncompressedBlockToPersistentCacheIfNeeded() {
read_options_.fill_cache && cache_options_.persistent_cache && read_options_.fill_cache && cache_options_.persistent_cache &&
!cache_options_.persistent_cache->IsCompressed()) { !cache_options_.persistent_cache->IsCompressed()) {
// insert to uncompressed cache // insert to uncompressed cache
PersistentCacheHelper::InsertUncompressedPage(cache_options_, handle_, PersistentCacheHelper::InsertUncompressed(cache_options_, handle_,
*contents_); *contents_);
} }
} }
@ -234,7 +232,7 @@ inline void BlockFetcher::GetBlockContents() {
*contents_ = BlockContents(std::move(heap_buf_), block_size_); *contents_ = BlockContents(std::move(heap_buf_), block_size_);
} }
#ifndef NDEBUG #ifndef NDEBUG
contents_->is_raw_block = true; contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
#endif #endif
} }
@ -242,7 +240,7 @@ IOStatus BlockFetcher::ReadBlockContents() {
if (TryGetUncompressBlockFromPersistentCache()) { if (TryGetUncompressBlockFromPersistentCache()) {
compression_type_ = kNoCompression; compression_type_ = kNoCompression;
#ifndef NDEBUG #ifndef NDEBUG
contents_->is_raw_block = true; contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
#endif // NDEBUG #endif // NDEBUG
return IOStatus::OK(); return IOStatus::OK();
} }
@ -250,7 +248,7 @@ IOStatus BlockFetcher::ReadBlockContents() {
if (!io_status_.ok()) { if (!io_status_.ok()) {
return io_status_; return io_status_;
} }
} else if (!TryGetCompressedBlockFromPersistentCache()) { } else if (!TryGetSerializedBlockFromPersistentCache()) {
IOOptions opts; IOOptions opts;
io_status_ = file_->PrepareIOOptions(read_options_, opts); io_status_ = file_->PrepareIOOptions(read_options_, opts);
// Actual file read // Actual file read
@ -327,7 +325,7 @@ IOStatus BlockFetcher::ReadBlockContents() {
// compressed page, uncompress, update cache // compressed page, uncompress, update cache
UncompressionContext context(compression_type_); UncompressionContext context(compression_type_);
UncompressionInfo info(context, uncompression_dict_, compression_type_); UncompressionInfo info(context, uncompression_dict_, compression_type_);
io_status_ = status_to_io_status(UncompressBlockContents( io_status_ = status_to_io_status(UncompressSerializedBlock(
info, slice_.data(), block_size_, contents_, footer_.format_version(), info, slice_.data(), block_size_, contents_, footer_.format_version(),
ioptions_, memory_allocator_)); ioptions_, memory_allocator_));
#ifndef NDEBUG #ifndef NDEBUG
@ -347,10 +345,10 @@ IOStatus BlockFetcher::ReadAsyncBlockContents() {
if (TryGetUncompressBlockFromPersistentCache()) { if (TryGetUncompressBlockFromPersistentCache()) {
compression_type_ = kNoCompression; compression_type_ = kNoCompression;
#ifndef NDEBUG #ifndef NDEBUG
contents_->is_raw_block = true; contents_->has_trailer = footer_.GetBlockTrailerSize() > 0;
#endif // NDEBUG #endif // NDEBUG
return IOStatus::OK(); return IOStatus::OK();
} else if (!TryGetCompressedBlockFromPersistentCache()) { } else if (!TryGetSerializedBlockFromPersistentCache()) {
assert(prefetch_buffer_ != nullptr); assert(prefetch_buffer_ != nullptr);
if (!for_compaction_) { if (!for_compaction_) {
IOOptions opts; IOOptions opts;
@ -378,7 +376,7 @@ IOStatus BlockFetcher::ReadAsyncBlockContents() {
UncompressionContext context(compression_type_); UncompressionContext context(compression_type_);
UncompressionInfo info(context, uncompression_dict_, UncompressionInfo info(context, uncompression_dict_,
compression_type_); compression_type_);
io_status_ = status_to_io_status(UncompressBlockContents( io_status_ = status_to_io_status(UncompressSerializedBlock(
info, slice_.data(), block_size_, contents_, info, slice_.data(), block_size_, contents_,
footer_.format_version(), ioptions_, memory_allocator_)); footer_.format_version(), ioptions_, memory_allocator_));
#ifndef NDEBUG #ifndef NDEBUG

@ -128,7 +128,7 @@ class BlockFetcher {
bool TryGetUncompressBlockFromPersistentCache(); bool TryGetUncompressBlockFromPersistentCache();
// return true if found // return true if found
bool TryGetFromPrefetchBuffer(); bool TryGetFromPrefetchBuffer();
bool TryGetCompressedBlockFromPersistentCache(); bool TryGetSerializedBlockFromPersistentCache();
void PrepareBufferForBlockFromFile(); void PrepareBufferForBlockFromFile();
// Copy content from used_buf_ to new heap_buf_. // Copy content from used_buf_ to new heap_buf_.
void CopyBufferToHeapBuf(); void CopyBufferToHeapBuf();

@ -498,10 +498,11 @@ uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
} }
} }
Status UncompressBlockContentsForCompressionType( Status UncompressBlockData(const UncompressionInfo& uncompression_info,
const UncompressionInfo& uncompression_info, const char* data, size_t n, const char* data, size_t size,
BlockContents* contents, uint32_t format_version, BlockContents* out_contents, uint32_t format_version,
const ImmutableOptions& ioptions, MemoryAllocator* allocator) { const ImmutableOptions& ioptions,
MemoryAllocator* allocator) {
Status ret = Status::OK(); Status ret = Status::OK();
assert(uncompression_info.type() != kNoCompression && assert(uncompression_info.type() != kNoCompression &&
@ -511,7 +512,7 @@ Status UncompressBlockContentsForCompressionType(
ShouldReportDetailedTime(ioptions.env, ioptions.stats)); ShouldReportDetailedTime(ioptions.env, ioptions.stats));
size_t uncompressed_size = 0; size_t uncompressed_size = 0;
CacheAllocationPtr ubuf = CacheAllocationPtr ubuf =
UncompressData(uncompression_info, data, n, &uncompressed_size, UncompressData(uncompression_info, data, size, &uncompressed_size,
GetCompressFormatForVersion(format_version), allocator); GetCompressFormatForVersion(format_version), allocator);
if (!ubuf) { if (!ubuf) {
if (!CompressionTypeSupported(uncompression_info.type())) { if (!CompressionTypeSupported(uncompression_info.type())) {
@ -525,44 +526,36 @@ Status UncompressBlockContentsForCompressionType(
} }
} }
*contents = BlockContents(std::move(ubuf), uncompressed_size); *out_contents = BlockContents(std::move(ubuf), uncompressed_size);
if (ShouldReportDetailedTime(ioptions.env, ioptions.stats)) { if (ShouldReportDetailedTime(ioptions.env, ioptions.stats)) {
RecordTimeToHistogram(ioptions.stats, DECOMPRESSION_TIMES_NANOS, RecordTimeToHistogram(ioptions.stats, DECOMPRESSION_TIMES_NANOS,
timer.ElapsedNanos()); timer.ElapsedNanos());
} }
RecordTimeToHistogram(ioptions.stats, BYTES_DECOMPRESSED, RecordTimeToHistogram(ioptions.stats, BYTES_DECOMPRESSED,
contents->data.size()); out_contents->data.size());
RecordTick(ioptions.stats, NUMBER_BLOCK_DECOMPRESSED); RecordTick(ioptions.stats, NUMBER_BLOCK_DECOMPRESSED);
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK("UncompressBlockData:TamperWithReturnValue",
"UncompressBlockContentsForCompressionType:TamperWithReturnValue",
static_cast<void*>(&ret)); static_cast<void*>(&ret));
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"UncompressBlockContentsForCompressionType:" "UncompressBlockData:"
"TamperWithDecompressionOutput", "TamperWithDecompressionOutput",
static_cast<void*>(contents)); static_cast<void*>(out_contents));
return ret; return ret;
} }
// Status UncompressSerializedBlock(const UncompressionInfo& uncompression_info,
// The 'data' points to the raw block contents that was read in from file. const char* data, size_t size,
// This method allocates a new heap buffer and the raw block BlockContents* out_contents,
// contents are uncompresed into this buffer. This uint32_t format_version,
// buffer is returned via 'result' and it is upto the caller to
// free this buffer.
// format_version is the block format as defined in include/rocksdb/table.h
Status UncompressBlockContents(const UncompressionInfo& uncompression_info,
const char* data, size_t n,
BlockContents* contents, uint32_t format_version,
const ImmutableOptions& ioptions, const ImmutableOptions& ioptions,
MemoryAllocator* allocator) { MemoryAllocator* allocator) {
assert(data[n] != kNoCompression); assert(data[size] != kNoCompression);
assert(data[n] == static_cast<char>(uncompression_info.type())); assert(data[size] == static_cast<char>(uncompression_info.type()));
return UncompressBlockContentsForCompressionType(uncompression_info, data, n, return UncompressBlockData(uncompression_info, data, size, out_contents,
contents, format_version, format_version, ioptions, allocator);
ioptions, allocator);
} }
// Replace the contents of db_host_id with the actual hostname, if db_host_id // Replace the contents of db_host_id with the actual hostname, if db_host_id

@ -29,7 +29,7 @@ namespace ROCKSDB_NAMESPACE {
class RandomAccessFile; class RandomAccessFile;
struct ReadOptions; struct ReadOptions;
extern bool ShouldReportDetailedTime(Env* env, Statistics* stats); bool ShouldReportDetailedTime(Env* env, Statistics* stats);
// the length of the magic number in bytes. // the length of the magic number in bytes.
constexpr uint32_t kMagicNumberLengthByte = 8; constexpr uint32_t kMagicNumberLengthByte = 8;
@ -256,7 +256,26 @@ uint32_t ComputeBuiltinChecksumWithLastByte(ChecksumType type, const char* data,
// Represents the contents of a block read from an SST file. Depending on how // Represents the contents of a block read from an SST file. Depending on how
// it's created, it may or may not own the actual block bytes. As an example, // it's created, it may or may not own the actual block bytes. As an example,
// BlockContents objects representing data read from mmapped files only point // BlockContents objects representing data read from mmapped files only point
// into the mmapped region. // into the mmapped region. Depending on context, it might be a serialized
// (potentially compressed) block, including a trailer beyond `size`, or an
// uncompressed block.
//
// Please try to use this terminology when dealing with blocks:
// * "Serialized block" - bytes that go into storage. For block-based table
// (usually the case) this includes the block trailer. Here the `size` does
// not include the trailer, but other places in code might include the trailer
// in the size.
// * "Maybe compressed block" - like a serialized block, but without the
// trailer (or no promise of including a trailer). Must be accompanied by a
// CompressionType in some other variable or field.
// * "Uncompressed block" - "payload" bytes that are either stored with no
// compression, used as input to compression function, or result of
// decompression function.
// * "Parsed block" - an in-memory form of a block in block cache, as it is
// used by the table reader. Different C++ types are used depending on the
// block type (see block_like_traits.h). Only trivially parsable block types
// use BlockContents as the parsed form.
//
struct BlockContents { struct BlockContents {
// Points to block payload (without trailer) // Points to block payload (without trailer)
Slice data; Slice data;
@ -265,7 +284,7 @@ struct BlockContents {
#ifndef NDEBUG #ifndef NDEBUG
// Whether there is a known trailer after what is pointed to by `data`. // Whether there is a known trailer after what is pointed to by `data`.
// See BlockBasedTable::GetCompressionType. // See BlockBasedTable::GetCompressionType.
bool is_raw_block = false; bool has_trailer = false;
#endif // NDEBUG #endif // NDEBUG
BlockContents() {} BlockContents() {}
@ -313,36 +332,35 @@ struct BlockContents {
data = std::move(other.data); data = std::move(other.data);
allocation = std::move(other.allocation); allocation = std::move(other.allocation);
#ifndef NDEBUG #ifndef NDEBUG
is_raw_block = other.is_raw_block; has_trailer = other.has_trailer;
#endif // NDEBUG #endif // NDEBUG
return *this; return *this;
} }
}; };
// The 'data' points to the raw block contents read in from file. // The `data` points to serialized block contents read in from file, which
// This method allocates a new heap buffer and the raw block // must be compressed and include a trailer beyond `size`. A new buffer is
// contents are uncompresed into this buffer. This buffer is // allocated with the given allocator (or default) and the uncompressed
// returned via 'result' and it is upto the caller to // contents are returned in `out_contents`.
// free this buffer. // format_version is as defined in include/rocksdb/table.h, which is
// For description of compress_format_version and possible values, see // used to determine compression format version.
// util/compression.h Status UncompressSerializedBlock(const UncompressionInfo& info,
extern Status UncompressBlockContents(const UncompressionInfo& info, const char* data, size_t size,
const char* data, size_t n, BlockContents* out_contents,
BlockContents* contents, uint32_t format_version,
uint32_t compress_format_version,
const ImmutableOptions& ioptions, const ImmutableOptions& ioptions,
MemoryAllocator* allocator = nullptr); MemoryAllocator* allocator = nullptr);
// This is an extension to UncompressBlockContents that accepts // This is a variant of UncompressSerializedBlock that does not expect a
// a specific compression type. This is used by un-wrapped blocks // block trailer beyond `size`. (CompressionType is taken from `info`.)
// with no compression header. Status UncompressBlockData(const UncompressionInfo& info, const char* data,
extern Status UncompressBlockContentsForCompressionType( size_t size, BlockContents* out_contents,
const UncompressionInfo& info, const char* data, size_t n, uint32_t format_version,
BlockContents* contents, uint32_t compress_format_version, const ImmutableOptions& ioptions,
const ImmutableOptions& ioptions, MemoryAllocator* allocator = nullptr); MemoryAllocator* allocator = nullptr);
// Replace db_host_id contents with the real hostname if necessary // Replace db_host_id contents with the real hostname if necessary
extern Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id); Status ReifyDbHostIdProperty(Env* env, std::string* db_host_id);
// Implementation details follow. Clients should ignore, // Implementation details follow. Clients should ignore,

@ -196,10 +196,11 @@ bool NotifyCollectTableCollectorsOnAdd(
void NotifyCollectTableCollectorsOnBlockAdd( void NotifyCollectTableCollectorsOnBlockAdd(
const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors, const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
const uint64_t block_raw_bytes, const uint64_t block_compressed_bytes_fast, const uint64_t block_uncomp_bytes,
const uint64_t block_compressed_bytes_fast,
const uint64_t block_compressed_bytes_slow) { const uint64_t block_compressed_bytes_slow) {
for (auto& collector : collectors) { for (auto& collector : collectors) {
collector->BlockAdd(block_raw_bytes, block_compressed_bytes_fast, collector->BlockAdd(block_uncomp_bytes, block_compressed_bytes_fast,
block_compressed_bytes_slow); block_compressed_bytes_slow);
} }
} }

@ -92,7 +92,7 @@ bool NotifyCollectTableCollectorsOnAdd(
void NotifyCollectTableCollectorsOnBlockAdd( void NotifyCollectTableCollectorsOnBlockAdd(
const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors, const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
uint64_t block_raw_bytes, uint64_t block_compressed_bytes_fast, uint64_t block_uncomp_bytes, uint64_t block_compressed_bytes_fast,
uint64_t block_compressed_bytes_slow); uint64_t block_compressed_bytes_slow);
// NotifyCollectTableCollectorsOnFinish() triggers the `Finish` event for all // NotifyCollectTableCollectorsOnFinish() triggers the `Finish` event for all

@ -11,7 +11,7 @@ namespace ROCKSDB_NAMESPACE {
const PersistentCacheOptions PersistentCacheOptions::kEmpty; const PersistentCacheOptions PersistentCacheOptions::kEmpty;
void PersistentCacheHelper::InsertRawPage( void PersistentCacheHelper::InsertSerialized(
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const PersistentCacheOptions& cache_options, const BlockHandle& handle,
const char* data, const size_t size) { const char* data, const size_t size) {
assert(cache_options.persistent_cache); assert(cache_options.persistent_cache);
@ -24,7 +24,7 @@ void PersistentCacheHelper::InsertRawPage(
.PermitUncheckedError(); .PermitUncheckedError();
} }
void PersistentCacheHelper::InsertUncompressedPage( void PersistentCacheHelper::InsertUncompressed(
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const PersistentCacheOptions& cache_options, const BlockHandle& handle,
const BlockContents& contents) { const BlockContents& contents) {
assert(cache_options.persistent_cache); assert(cache_options.persistent_cache);
@ -42,11 +42,11 @@ void PersistentCacheHelper::InsertUncompressedPage(
; ;
} }
Status PersistentCacheHelper::LookupRawPage( Status PersistentCacheHelper::LookupSerialized(
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const PersistentCacheOptions& cache_options, const BlockHandle& handle,
std::unique_ptr<char[]>* raw_data, const size_t raw_data_size) { std::unique_ptr<char[]>* out_data, const size_t expected_data_size) {
#ifdef NDEBUG #ifdef NDEBUG
(void)raw_data_size; (void)expected_data_size;
#endif #endif
assert(cache_options.persistent_cache); assert(cache_options.persistent_cache);
assert(cache_options.persistent_cache->IsCompressed()); assert(cache_options.persistent_cache->IsCompressed());
@ -56,7 +56,7 @@ Status PersistentCacheHelper::LookupRawPage(
size_t size; size_t size;
Status s = Status s =
cache_options.persistent_cache->Lookup(key.AsSlice(), raw_data, &size); cache_options.persistent_cache->Lookup(key.AsSlice(), out_data, &size);
if (!s.ok()) { if (!s.ok()) {
// cache miss // cache miss
RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS); RecordTick(cache_options.statistics, PERSISTENT_CACHE_MISS);
@ -65,13 +65,14 @@ Status PersistentCacheHelper::LookupRawPage(
// cache hit // cache hit
// Block-based table is assumed // Block-based table is assumed
assert(raw_data_size == handle.size() + BlockBasedTable::kBlockTrailerSize); assert(expected_data_size ==
assert(size == raw_data_size); handle.size() + BlockBasedTable::kBlockTrailerSize);
assert(size == expected_data_size);
RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT); RecordTick(cache_options.statistics, PERSISTENT_CACHE_HIT);
return Status::OK(); return Status::OK();
} }
Status PersistentCacheHelper::LookupUncompressedPage( Status PersistentCacheHelper::LookupUncompressed(
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const PersistentCacheOptions& cache_options, const BlockHandle& handle,
BlockContents* contents) { BlockContents* contents) {
assert(cache_options.persistent_cache); assert(cache_options.persistent_cache);

@ -19,25 +19,27 @@ struct BlockContents;
// Encapsulates some of the helper logic for read and writing from the cache // Encapsulates some of the helper logic for read and writing from the cache
class PersistentCacheHelper { class PersistentCacheHelper {
public: public:
// insert block into raw page cache // Insert block into cache of serialized blocks. Size includes block trailer
static void InsertRawPage(const PersistentCacheOptions& cache_options, // (if applicable).
static void InsertSerialized(const PersistentCacheOptions& cache_options,
const BlockHandle& handle, const char* data, const BlockHandle& handle, const char* data,
const size_t size); const size_t size);
// insert block into uncompressed cache // Insert block into cache of uncompressed blocks. No block trailer.
static void InsertUncompressedPage( static void InsertUncompressed(const PersistentCacheOptions& cache_options,
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const BlockHandle& handle,
const BlockContents& contents); const BlockContents& contents);
// lookup block from raw page cacge // Lookup block from cache of serialized blocks. Size includes block trailer
static Status LookupRawPage(const PersistentCacheOptions& cache_options, // (if applicable).
static Status LookupSerialized(const PersistentCacheOptions& cache_options,
const BlockHandle& handle, const BlockHandle& handle,
std::unique_ptr<char[]>* raw_data, std::unique_ptr<char[]>* out_data,
const size_t raw_data_size); const size_t expected_data_size);
// lookup block from uncompressed cache // Lookup block from uncompressed cache. No block trailer.
static Status LookupUncompressedPage( static Status LookupUncompressed(const PersistentCacheOptions& cache_options,
const PersistentCacheOptions& cache_options, const BlockHandle& handle, const BlockHandle& handle,
BlockContents* contents); BlockContents* contents);
}; };

@ -36,7 +36,7 @@ class SstFileWriterPropertiesCollector : public IntTblPropCollector {
return Status::OK(); return Status::OK();
} }
virtual void BlockAdd(uint64_t /* block_raw_bytes */, virtual void BlockAdd(uint64_t /* block_uncomp_bytes */,
uint64_t /* block_compressed_bytes_fast */, uint64_t /* block_compressed_bytes_fast */,
uint64_t /* block_compressed_bytes_slow */) override { uint64_t /* block_compressed_bytes_slow */) override {
// Intentionally left blank. No interest in collecting stats for // Intentionally left blank. No interest in collecting stats for

@ -1165,7 +1165,7 @@ Status BlobDBImpl::DecompressSlice(const Slice& compressed_value,
UncompressionContext context(compression_type); UncompressionContext context(compression_type);
UncompressionInfo info(context, UncompressionDict::GetEmptyDict(), UncompressionInfo info(context, UncompressionDict::GetEmptyDict(),
compression_type); compression_type);
Status s = UncompressBlockContentsForCompressionType( Status s = UncompressBlockData(
info, compressed_value.data(), compressed_value.size(), &contents, info, compressed_value.data(), compressed_value.size(), &contents,
kBlockBasedTableVersionFormat, *(cfh->cfd()->ioptions())); kBlockBasedTableVersionFormat, *(cfh->cfd()->ioptions()));
if (!s.ok()) { if (!s.ok()) {

@ -213,7 +213,7 @@ Status BlobDumpTool::DumpRecord(DisplayType show_key, DisplayType show_blob,
UncompressionContext context(compression); UncompressionContext context(compression);
UncompressionInfo info(context, UncompressionDict::GetEmptyDict(), UncompressionInfo info(context, UncompressionDict::GetEmptyDict(),
compression); compression);
s = UncompressBlockContentsForCompressionType( s = UncompressBlockData(
info, slice.data() + key_size, static_cast<size_t>(value_size), info, slice.data() + key_size, static_cast<size_t>(value_size),
&contents, 2 /*compress_format_version*/, ImmutableOptions(Options())); &contents, 2 /*compress_format_version*/, ImmutableOptions(Options()));
if (!s.ok()) { if (!s.ok()) {

@ -175,33 +175,33 @@ CacheDumperImpl::DumpOneBlockCallBack() {
// Step 4: if the block should not be filter out, write the block to the // Step 4: if the block should not be filter out, write the block to the
// CacheDumpWriter // CacheDumpWriter
if (!filter_out && block_start != nullptr) { if (!filter_out && block_start != nullptr) {
char* buffer = new char[block_len]; WriteBlock(type, key, Slice(block_start, block_len))
memcpy(buffer, block_start, block_len);
WriteCacheBlock(type, key, (void*)buffer, block_len)
.PermitUncheckedError(); .PermitUncheckedError();
delete[] buffer;
} }
}; };
} }
// Write the raw block to the writer. It takes the timestamp of the block being
// copied from block cache, block type, key, block pointer, raw block size and // Write the block to the writer. It takes the timestamp of the
// the block checksum as the input. When writing the raw block, we first create // block being copied from block cache, block type, key, block pointer,
// the dump unit and encoude it to a string. Then, we calculate the checksum of // block size and block checksum as the input. When writing the dumper raw
// the how dump unit string and store it in the dump unit metadata. // block, we first create the dump unit and encoude it to a string. Then,
// we calculate the checksum of the whole dump unit string and store it in
// the dump unit metadata.
// First, we write the metadata first, which is a fixed size string. Then, we // First, we write the metadata first, which is a fixed size string. Then, we
// Append the dump unit string to the writer. // Append the dump unit string to the writer.
IOStatus CacheDumperImpl::WriteRawBlock(uint64_t timestamp, IOStatus CacheDumperImpl::WriteBlock(CacheDumpUnitType type, const Slice& key,
CacheDumpUnitType type, const Slice& value) {
const Slice& key, void* value, uint64_t timestamp = clock_->NowMicros();
size_t len, uint32_t checksum) { uint32_t value_checksum = crc32c::Value(value.data(), value.size());
// First, serilize the block information in a string
// First, serialize the block information in a string
DumpUnit dump_unit; DumpUnit dump_unit;
dump_unit.timestamp = timestamp; dump_unit.timestamp = timestamp;
dump_unit.key = key; dump_unit.key = key;
dump_unit.type = type; dump_unit.type = type;
dump_unit.value_len = len; dump_unit.value_len = value.size();
dump_unit.value = value; dump_unit.value = const_cast<char*>(value.data());
dump_unit.value_checksum = checksum; dump_unit.value_checksum = value_checksum;
std::string encoded_data; std::string encoded_data;
CacheDumperHelper::EncodeDumpUnit(dump_unit, &encoded_data); CacheDumperHelper::EncodeDumpUnit(dump_unit, &encoded_data);
@ -212,19 +212,19 @@ IOStatus CacheDumperImpl::WriteRawBlock(uint64_t timestamp,
unit_meta.sequence_num = sequence_num_; unit_meta.sequence_num = sequence_num_;
sequence_num_++; sequence_num_++;
unit_meta.dump_unit_checksum = unit_meta.dump_unit_checksum =
crc32c::Value(encoded_data.c_str(), encoded_data.size()); crc32c::Value(encoded_data.data(), encoded_data.size());
unit_meta.dump_unit_size = static_cast<uint64_t>(encoded_data.size()); unit_meta.dump_unit_size = encoded_data.size();
std::string encoded_meta; std::string encoded_meta;
CacheDumperHelper::EncodeDumpUnitMeta(unit_meta, &encoded_meta); CacheDumperHelper::EncodeDumpUnitMeta(unit_meta, &encoded_meta);
// We write the metadata first. // We write the metadata first.
assert(writer_ != nullptr); assert(writer_ != nullptr);
IOStatus io_s = writer_->WriteMetadata(Slice(encoded_meta)); IOStatus io_s = writer_->WriteMetadata(encoded_meta);
if (!io_s.ok()) { if (!io_s.ok()) {
return io_s; return io_s;
} }
// followed by the dump unit. // followed by the dump unit.
return writer_->WritePacket(Slice(encoded_data)); return writer_->WritePacket(encoded_data);
} }
// Before we write any block, we write the header first to store the cache dump // Before we write any block, we write the header first to store the cache dump
@ -238,38 +238,18 @@ IOStatus CacheDumperImpl::WriteHeader() {
<< "RocksDB Version: " << kMajorVersion << "." << kMinorVersion << "\t" << "RocksDB Version: " << kMajorVersion << "." << kMinorVersion << "\t"
<< "Format: dump_unit_metadata <sequence_number, dump_unit_checksum, " << "Format: dump_unit_metadata <sequence_number, dump_unit_checksum, "
"dump_unit_size>, dump_unit <timestamp, key, block_type, " "dump_unit_size>, dump_unit <timestamp, key, block_type, "
"block_size, raw_block, raw_block_checksum> cache_value\n"; "block_size, block_data, block_checksum> cache_value\n";
std::string header_value(s.str()); std::string header_value(s.str());
CacheDumpUnitType type = CacheDumpUnitType::kHeader; CacheDumpUnitType type = CacheDumpUnitType::kHeader;
uint64_t timestamp = clock_->NowMicros(); return WriteBlock(type, header_key, header_value);
uint32_t header_checksum =
crc32c::Value(header_value.c_str(), header_value.size());
return WriteRawBlock(timestamp, type, Slice(header_key),
(void*)header_value.c_str(), header_value.size(),
header_checksum);
}
// Write the block dumped from cache
IOStatus CacheDumperImpl::WriteCacheBlock(const CacheDumpUnitType type,
const Slice& key, void* value,
size_t len) {
uint64_t timestamp = clock_->NowMicros();
uint32_t value_checksum = crc32c::Value((char*)value, len);
return WriteRawBlock(timestamp, type, key, value, len, value_checksum);
} }
// Write the footer after all the blocks are stored to indicate the ending. // Write the footer after all the blocks are stored to indicate the ending.
IOStatus CacheDumperImpl::WriteFooter() { IOStatus CacheDumperImpl::WriteFooter() {
std::string footer_key = "footer"; std::string footer_key = "footer";
std::ostringstream s;
std::string footer_value("cache dump completed"); std::string footer_value("cache dump completed");
CacheDumpUnitType type = CacheDumpUnitType::kFooter; CacheDumpUnitType type = CacheDumpUnitType::kFooter;
uint64_t timestamp = clock_->NowMicros(); return WriteBlock(type, footer_key, footer_value);
uint32_t footer_checksum =
crc32c::Value(footer_value.c_str(), footer_value.size());
return WriteRawBlock(timestamp, type, Slice(footer_key),
(void*)footer_value.c_str(), footer_value.size(),
footer_checksum);
} }
// This is the main function to restore the cache entries to secondary cache. // This is the main function to restore the cache entries to secondary cache.
@ -309,9 +289,10 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
if (!io_s.ok()) { if (!io_s.ok()) {
break; break;
} }
// create the raw_block_content based on the information in the dump_unit // Create the uncompressed_block based on the information in the dump_unit
BlockContents raw_block_contents( // (There is no block trailer here compatible with block-based SST file.)
Slice((char*)dump_unit.value, dump_unit.value_len)); BlockContents uncompressed_block(
Slice(static_cast<char*>(dump_unit.value), dump_unit.value_len));
Cache::CacheItemHelper* helper = nullptr; Cache::CacheItemHelper* helper = nullptr;
Statistics* statistics = nullptr; Statistics* statistics = nullptr;
Status s = Status::OK(); Status s = Status::OK();
@ -323,7 +304,7 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
BlockType::kFilter); BlockType::kFilter);
std::unique_ptr<ParsedFullFilterBlock> block_holder; std::unique_ptr<ParsedFullFilterBlock> block_holder;
block_holder.reset(BlocklikeTraits<ParsedFullFilterBlock>::Create( block_holder.reset(BlocklikeTraits<ParsedFullFilterBlock>::Create(
std::move(raw_block_contents), toptions_.read_amp_bytes_per_bit, std::move(uncompressed_block), toptions_.read_amp_bytes_per_bit,
statistics, false, toptions_.filter_policy.get())); statistics, false, toptions_.filter_policy.get()));
if (helper != nullptr) { if (helper != nullptr) {
s = secondary_cache_->Insert(dump_unit.key, s = secondary_cache_->Insert(dump_unit.key,
@ -335,7 +316,7 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
helper = BlocklikeTraits<Block>::GetCacheItemHelper(BlockType::kData); helper = BlocklikeTraits<Block>::GetCacheItemHelper(BlockType::kData);
std::unique_ptr<Block> block_holder; std::unique_ptr<Block> block_holder;
block_holder.reset(BlocklikeTraits<Block>::Create( block_holder.reset(BlocklikeTraits<Block>::Create(
std::move(raw_block_contents), toptions_.read_amp_bytes_per_bit, std::move(uncompressed_block), toptions_.read_amp_bytes_per_bit,
statistics, false, toptions_.filter_policy.get())); statistics, false, toptions_.filter_policy.get()));
if (helper != nullptr) { if (helper != nullptr) {
s = secondary_cache_->Insert(dump_unit.key, s = secondary_cache_->Insert(dump_unit.key,
@ -347,7 +328,7 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
helper = BlocklikeTraits<Block>::GetCacheItemHelper(BlockType::kIndex); helper = BlocklikeTraits<Block>::GetCacheItemHelper(BlockType::kIndex);
std::unique_ptr<Block> block_holder; std::unique_ptr<Block> block_holder;
block_holder.reset(BlocklikeTraits<Block>::Create( block_holder.reset(BlocklikeTraits<Block>::Create(
std::move(raw_block_contents), 0, statistics, false, std::move(uncompressed_block), 0, statistics, false,
toptions_.filter_policy.get())); toptions_.filter_policy.get()));
if (helper != nullptr) { if (helper != nullptr) {
s = secondary_cache_->Insert(dump_unit.key, s = secondary_cache_->Insert(dump_unit.key,
@ -360,7 +341,7 @@ IOStatus CacheDumpedLoaderImpl::RestoreCacheEntriesToSecondaryCache() {
BlockType::kFilterPartitionIndex); BlockType::kFilterPartitionIndex);
std::unique_ptr<Block> block_holder; std::unique_ptr<Block> block_holder;
block_holder.reset(BlocklikeTraits<Block>::Create( block_holder.reset(BlocklikeTraits<Block>::Create(
std::move(raw_block_contents), toptions_.read_amp_bytes_per_bit, std::move(uncompressed_block), toptions_.read_amp_bytes_per_bit,
statistics, false, toptions_.filter_policy.get())); statistics, false, toptions_.filter_policy.get()));
if (helper != nullptr) { if (helper != nullptr) {
s = secondary_cache_->Insert(dump_unit.key, s = secondary_cache_->Insert(dump_unit.key,
@ -436,7 +417,7 @@ IOStatus CacheDumpedLoaderImpl::ReadHeader(std::string* data,
if (!io_s.ok()) { if (!io_s.ok()) {
return io_s; return io_s;
} }
uint32_t unit_checksum = crc32c::Value(data->c_str(), data->size()); uint32_t unit_checksum = crc32c::Value(data->data(), data->size());
if (unit_checksum != header_meta.dump_unit_checksum) { if (unit_checksum != header_meta.dump_unit_checksum) {
return IOStatus::Corruption("Read header unit corrupted!"); return IOStatus::Corruption("Read header unit corrupted!");
} }
@ -461,7 +442,7 @@ IOStatus CacheDumpedLoaderImpl::ReadCacheBlock(std::string* data,
if (!io_s.ok()) { if (!io_s.ok()) {
return io_s; return io_s;
} }
uint32_t unit_checksum = crc32c::Value(data->c_str(), data->size()); uint32_t unit_checksum = crc32c::Value(data->data(), data->size());
if (unit_checksum != unit_meta.dump_unit_checksum) { if (unit_checksum != unit_meta.dump_unit_checksum) {
return IOStatus::Corruption( return IOStatus::Corruption(
"Checksum does not match! Read dumped unit corrupted!"); "Checksum does not match! Read dumped unit corrupted!");

@ -75,7 +75,7 @@ struct DumpUnit {
// Pointer to the block. Note that, in the dump process, it points to a memory // Pointer to the block. Note that, in the dump process, it points to a memory
// buffer copied from cache block. The buffer is freed when we process the // buffer copied from cache block. The buffer is freed when we process the
// next block. In the load process, we use an std::string to store the // next block. In the load process, we use an std::string to store the
// serilized dump_unit read from the reader. So it points to the memory // serialized dump_unit read from the reader. So it points to the memory
// address of the begin of the block in this string. // address of the begin of the block in this string.
void* value; void* value;
@ -103,14 +103,9 @@ class CacheDumperImpl : public CacheDumper {
IOStatus DumpCacheEntriesToWriter() override; IOStatus DumpCacheEntriesToWriter() override;
private: private:
IOStatus WriteRawBlock(uint64_t timestamp, CacheDumpUnitType type, IOStatus WriteBlock(CacheDumpUnitType type, const Slice& key,
const Slice& key, void* value, size_t len, const Slice& value);
uint32_t checksum);
IOStatus WriteHeader(); IOStatus WriteHeader();
IOStatus WriteCacheBlock(const CacheDumpUnitType type, const Slice& key,
void* value, size_t len);
IOStatus WriteFooter(); IOStatus WriteFooter();
bool ShouldFilterOut(const Slice& key); bool ShouldFilterOut(const Slice& key);
std::function<void(const Slice&, void*, size_t, Cache::DeleterFn)> std::function<void(const Slice&, void*, size_t, Cache::DeleterFn)>
@ -166,7 +161,7 @@ class ToFileCacheDumpWriter : public CacheDumpWriter {
~ToFileCacheDumpWriter() { Close().PermitUncheckedError(); } ~ToFileCacheDumpWriter() { Close().PermitUncheckedError(); }
// Write the serilized metadata to the file // Write the serialized metadata to the file
virtual IOStatus WriteMetadata(const Slice& metadata) override { virtual IOStatus WriteMetadata(const Slice& metadata) override {
assert(file_writer_ != nullptr); assert(file_writer_ != nullptr);
std::string prefix; std::string prefix;
@ -179,7 +174,7 @@ class ToFileCacheDumpWriter : public CacheDumpWriter {
return io_s; return io_s;
} }
// Write the serilized data to the file // Write the serialized data to the file
virtual IOStatus WritePacket(const Slice& data) override { virtual IOStatus WritePacket(const Slice& data) override {
assert(file_writer_ != nullptr); assert(file_writer_ != nullptr);
std::string prefix; std::string prefix;
@ -284,7 +279,7 @@ class FromFileCacheDumpReader : public CacheDumpReader {
// The cache dump and load helper class // The cache dump and load helper class
class CacheDumperHelper { class CacheDumperHelper {
public: public:
// serilize the dump_unit_meta to a string, it is fixed 16 bytes size. // serialize the dump_unit_meta to a string, it is fixed 16 bytes size.
static void EncodeDumpUnitMeta(const DumpUnitMeta& meta, std::string* data) { static void EncodeDumpUnitMeta(const DumpUnitMeta& meta, std::string* data) {
assert(data); assert(data);
PutFixed32(data, static_cast<uint32_t>(meta.sequence_num)); PutFixed32(data, static_cast<uint32_t>(meta.sequence_num));
@ -292,7 +287,7 @@ class CacheDumperHelper {
PutFixed64(data, meta.dump_unit_size); PutFixed64(data, meta.dump_unit_size);
} }
// Serilize the dump_unit to a string. // Serialize the dump_unit to a string.
static void EncodeDumpUnit(const DumpUnit& dump_unit, std::string* data) { static void EncodeDumpUnit(const DumpUnit& dump_unit, std::string* data) {
assert(data); assert(data);
PutFixed64(data, dump_unit.timestamp); PutFixed64(data, dump_unit.timestamp);
@ -304,7 +299,7 @@ class CacheDumperHelper {
Slice((char*)dump_unit.value, dump_unit.value_len)); Slice((char*)dump_unit.value, dump_unit.value_len));
} }
// Deserilize the dump_unit_meta from a string // Deserialize the dump_unit_meta from a string
static Status DecodeDumpUnitMeta(const std::string& encoded_data, static Status DecodeDumpUnitMeta(const std::string& encoded_data,
DumpUnitMeta* unit_meta) { DumpUnitMeta* unit_meta) {
assert(unit_meta != nullptr); assert(unit_meta != nullptr);
@ -323,7 +318,7 @@ class CacheDumperHelper {
return Status::OK(); return Status::OK();
} }
// Deserilize the dump_unit from a string. // Deserialize the dump_unit from a string.
static Status DecodeDumpUnit(const std::string& encoded_data, static Status DecodeDumpUnit(const std::string& encoded_data,
DumpUnit* dump_unit) { DumpUnit* dump_unit) {
assert(dump_unit != nullptr); assert(dump_unit != nullptr);

Loading…
Cancel
Save