Introduce CacheAllocator, a custom allocator for cache blocks (#4437)

Summary:
This is a conceptually simple change, but it touches many files to
pass the allocator through function calls.

We introduce CacheAllocator, which can be used by clients to configure
custom allocator for cache blocks. Our motivation is to hook this up
with folly's `JemallocNodumpAllocator`
(f43ce6d686/folly/experimental/JemallocNodumpAllocator.h),
but there are many other possible use cases.

Additionally, this commit cleans up memory allocation in
`util/compression.h`, making sure that all allocations are wrapped in a
unique_ptr as soon as possible.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4437

Differential Revision: D10132814

Pulled By: yiwu-arbug

fbshipit-source-id: be1343a4b69f6048df127939fea9bbc96969f564
main
Igor Canadi 6 years ago committed by Facebook Github Bot
parent 4e58b2ea3d
commit 1cf5deb8fd
  1. 1
      HISTORY.md
  2. 19
      cache/lru_cache.cc
  3. 3
      cache/lru_cache.h
  4. 9
      cache/sharded_cache.cc
  5. 3
      cache/sharded_cache.h
  6. 28
      include/rocksdb/cache.h
  7. 29
      include/rocksdb/cache_allocator.h
  8. 4
      table/block_based_table_builder.cc
  9. 69
      table/block_based_table_reader.cc
  10. 5
      table/block_based_table_reader.h
  11. 17
      table/block_fetcher.cc
  12. 9
      table/block_fetcher.h
  13. 41
      table/format.cc
  14. 21
      table/format.h
  15. 4
      table/plain_table_reader.h
  16. 72
      table/table_test.cc
  17. 23
      tools/db_bench_tool.cc
  18. 38
      util/cache_allocator.h
  19. 80
      util/compression.h

@ -7,6 +7,7 @@
### New Features ### New Features
* TransactionOptions::skip_concurrency_control allows pessimistic transactions to skip the overhead of concurrency control. Could be used for optimizing certain transactions or during recovery. * TransactionOptions::skip_concurrency_control allows pessimistic transactions to skip the overhead of concurrency control. Could be used for optimizing certain transactions or during recovery.
* Introduced CacheAllocator, which lets the user specify custom allocator for memory in block cache.
### Bug Fixes ### Bug Fixes
* Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction. * Avoid creating empty SSTs and subsequently deleting them in certain cases during compaction.

19
cache/lru_cache.cc vendored

@ -461,8 +461,10 @@ std::string LRUCacheShard::GetPrintableOptions() const {
} }
LRUCache::LRUCache(size_t capacity, int num_shard_bits, LRUCache::LRUCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit, double high_pri_pool_ratio) bool strict_capacity_limit, double high_pri_pool_ratio,
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { std::shared_ptr<CacheAllocator> allocator)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit,
std::move(allocator)) {
num_shards_ = 1 << num_shard_bits; num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<LRUCacheShard*>( shards_ = reinterpret_cast<LRUCacheShard*>(
port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_)); port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
@ -537,12 +539,14 @@ double LRUCache::GetHighPriPoolRatio() {
std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) { std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits, return NewLRUCache(cache_opts.capacity, cache_opts.num_shard_bits,
cache_opts.strict_capacity_limit, cache_opts.strict_capacity_limit,
cache_opts.high_pri_pool_ratio); cache_opts.high_pri_pool_ratio,
cache_opts.cache_allocator);
} }
std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits, std::shared_ptr<Cache> NewLRUCache(
bool strict_capacity_limit, size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio) { double high_pri_pool_ratio,
std::shared_ptr<CacheAllocator> cache_allocator) {
if (num_shard_bits >= 20) { if (num_shard_bits >= 20) {
return nullptr; // the cache cannot be sharded into too many fine pieces return nullptr; // the cache cannot be sharded into too many fine pieces
} }
@ -554,7 +558,8 @@ std::shared_ptr<Cache> NewLRUCache(size_t capacity, int num_shard_bits,
num_shard_bits = GetDefaultCacheShardBits(capacity); num_shard_bits = GetDefaultCacheShardBits(capacity);
} }
return std::make_shared<LRUCache>(capacity, num_shard_bits, return std::make_shared<LRUCache>(capacity, num_shard_bits,
strict_capacity_limit, high_pri_pool_ratio); strict_capacity_limit, high_pri_pool_ratio,
std::move(cache_allocator));
} }
} // namespace rocksdb } // namespace rocksdb

3
cache/lru_cache.h vendored

@ -279,7 +279,8 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard : public CacheShard {
class LRUCache : public ShardedCache { class LRUCache : public ShardedCache {
public: public:
LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit, LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
double high_pri_pool_ratio); double high_pri_pool_ratio,
std::shared_ptr<CacheAllocator> cache_allocator = nullptr);
virtual ~LRUCache(); virtual ~LRUCache();
virtual const char* Name() const override { return "LRUCache"; } virtual const char* Name() const override { return "LRUCache"; }
virtual CacheShard* GetShard(int shard) override; virtual CacheShard* GetShard(int shard) override;

@ -20,8 +20,10 @@
namespace rocksdb { namespace rocksdb {
ShardedCache::ShardedCache(size_t capacity, int num_shard_bits, ShardedCache::ShardedCache(size_t capacity, int num_shard_bits,
bool strict_capacity_limit) bool strict_capacity_limit,
: num_shard_bits_(num_shard_bits), std::shared_ptr<CacheAllocator> allocator)
: Cache(std::move(allocator)),
num_shard_bits_(num_shard_bits),
capacity_(capacity), capacity_(capacity),
strict_capacity_limit_(strict_capacity_limit), strict_capacity_limit_(strict_capacity_limit),
last_id_(1) {} last_id_(1) {}
@ -142,6 +144,9 @@ std::string ShardedCache::GetPrintableOptions() const {
strict_capacity_limit_); strict_capacity_limit_);
ret.append(buffer); ret.append(buffer);
} }
snprintf(buffer, kBufferSize, " cache_allocator : %s\n",
cache_allocator() ? cache_allocator()->Name() : "None");
ret.append(buffer);
ret.append(GetShard(0)->GetPrintableOptions()); ret.append(GetShard(0)->GetPrintableOptions());
return ret; return ret;
} }

@ -47,7 +47,8 @@ class CacheShard {
// Keys are sharded by the highest num_shard_bits bits of hash value. // Keys are sharded by the highest num_shard_bits bits of hash value.
class ShardedCache : public Cache { class ShardedCache : public Cache {
public: public:
ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit); ShardedCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
std::shared_ptr<CacheAllocator> cache_allocator = nullptr);
virtual ~ShardedCache() = default; virtual ~ShardedCache() = default;
virtual const char* Name() const override = 0; virtual const char* Name() const override = 0;
virtual CacheShard* GetShard(int shard) = 0; virtual CacheShard* GetShard(int shard) = 0;

@ -25,6 +25,7 @@
#include <stdint.h> #include <stdint.h>
#include <memory> #include <memory>
#include <string> #include <string>
#include "rocksdb/cache_allocator.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
@ -58,13 +59,20 @@ struct LRUCacheOptions {
// BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority. // BlockBasedTableOptions::cache_index_and_filter_blocks_with_high_priority.
double high_pri_pool_ratio = 0.0; double high_pri_pool_ratio = 0.0;
// If non-nullptr will use this allocator instead of system allocator when
// allocating memory for cache blocks. Call this method before you start using
// the cache!
std::shared_ptr<CacheAllocator> cache_allocator;
LRUCacheOptions() {} LRUCacheOptions() {}
LRUCacheOptions(size_t _capacity, int _num_shard_bits, LRUCacheOptions(size_t _capacity, int _num_shard_bits,
bool _strict_capacity_limit, double _high_pri_pool_ratio) bool _strict_capacity_limit, double _high_pri_pool_ratio,
std::shared_ptr<CacheAllocator> _cache_allocator = nullptr)
: capacity(_capacity), : capacity(_capacity),
num_shard_bits(_num_shard_bits), num_shard_bits(_num_shard_bits),
strict_capacity_limit(_strict_capacity_limit), strict_capacity_limit(_strict_capacity_limit),
high_pri_pool_ratio(_high_pri_pool_ratio) {} high_pri_pool_ratio(_high_pri_pool_ratio),
cache_allocator(std::move(_cache_allocator)) {}
}; };
// Create a new cache with a fixed size capacity. The cache is sharded // Create a new cache with a fixed size capacity. The cache is sharded
@ -75,10 +83,10 @@ struct LRUCacheOptions {
// high_pri_pool_pct. // high_pri_pool_pct.
// num_shard_bits = -1 means it is automatically determined: every shard // num_shard_bits = -1 means it is automatically determined: every shard
// will be at least 512KB and number of shard bits will not exceed 6. // will be at least 512KB and number of shard bits will not exceed 6.
extern std::shared_ptr<Cache> NewLRUCache(size_t capacity, extern std::shared_ptr<Cache> NewLRUCache(
int num_shard_bits = -1, size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.0,
double high_pri_pool_ratio = 0.0); std::shared_ptr<CacheAllocator> cache_allocator = nullptr);
extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts); extern std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts);
@ -91,13 +99,15 @@ extern std::shared_ptr<Cache> NewClockCache(size_t capacity,
int num_shard_bits = -1, int num_shard_bits = -1,
bool strict_capacity_limit = false); bool strict_capacity_limit = false);
class Cache { class Cache {
public: public:
// Depending on implementation, cache entries with high priority could be less // Depending on implementation, cache entries with high priority could be less
// likely to get evicted than low priority entries. // likely to get evicted than low priority entries.
enum class Priority { HIGH, LOW }; enum class Priority { HIGH, LOW };
Cache() {} Cache(std::shared_ptr<CacheAllocator> allocator = nullptr)
: cache_allocator_(std::move(allocator)) {}
// Destroys all existing entries by calling the "deleter" // Destroys all existing entries by calling the "deleter"
// function that was passed via the Insert() function. // function that was passed via the Insert() function.
@ -228,10 +238,14 @@ class Cache {
virtual void TEST_mark_as_data_block(const Slice& /*key*/, virtual void TEST_mark_as_data_block(const Slice& /*key*/,
size_t /*charge*/) {} size_t /*charge*/) {}
CacheAllocator* cache_allocator() const { return cache_allocator_.get(); }
private: private:
// No copying allowed // No copying allowed
Cache(const Cache&); Cache(const Cache&);
Cache& operator=(const Cache&); Cache& operator=(const Cache&);
std::shared_ptr<CacheAllocator> cache_allocator_;
}; };
} // namespace rocksdb } // namespace rocksdb

@ -0,0 +1,29 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
// CacheAllocator is an interface that a client can implement to supply custom
// cache allocation and deallocation methods. See rocksdb/cache.h for more
// information.
// All methods should be thread-safe.
class CacheAllocator {
public:
virtual ~CacheAllocator() = default;
// Name of the cache allocator, printed in the log
virtual const char* Name() const = 0;
// Allocate a block of at least size size
virtual void* Allocate(size_t size) = 0;
// Deallocate previously allocated block
virtual void Deallocate(void* p) = 0;
// Returns the memory size of the block allocated at p. The default
// implementation that just returns the original allocation_size is fine.
virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
// default implementation just returns the allocation size
return allocation_size;
}
};

@ -39,6 +39,7 @@
#include "table/full_filter_block.h" #include "table/full_filter_block.h"
#include "table/table_builder.h" #include "table/table_builder.h"
#include "util/cache_allocator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
@ -654,7 +655,8 @@ Status BlockBasedTableBuilder::InsertBlockInCache(const Slice& block_contents,
size_t size = block_contents.size(); size_t size = block_contents.size();
std::unique_ptr<char[]> ubuf(new char[size + 1]); auto ubuf =
AllocateBlock(size + 1, block_cache_compressed->cache_allocator());
memcpy(ubuf.get(), block_contents.data(), size); memcpy(ubuf.get(), block_contents.data(), size);
ubuf[size] = type; ubuf[size] = type;

@ -80,11 +80,12 @@ Status ReadBlockFromFile(
std::unique_ptr<Block>* result, const ImmutableCFOptions& ioptions, std::unique_ptr<Block>* result, const ImmutableCFOptions& ioptions,
bool do_uncompress, const Slice& compression_dict, bool do_uncompress, const Slice& compression_dict,
const PersistentCacheOptions& cache_options, SequenceNumber global_seqno, const PersistentCacheOptions& cache_options, SequenceNumber global_seqno,
size_t read_amp_bytes_per_bit, const bool immortal_file = false) { size_t read_amp_bytes_per_bit, CacheAllocator* allocator = nullptr,
const bool immortal_file = false) {
BlockContents contents; BlockContents contents;
BlockFetcher block_fetcher(file, prefetch_buffer, footer, options, handle, BlockFetcher block_fetcher(
&contents, ioptions, do_uncompress, file, prefetch_buffer, footer, options, handle, &contents, ioptions,
compression_dict, cache_options, immortal_file); do_uncompress, compression_dict, cache_options, allocator, immortal_file);
Status s = block_fetcher.ReadBlockContents(); Status s = block_fetcher.ReadBlockContents();
if (s.ok()) { if (s.ok()) {
result->reset(new Block(std::move(contents), global_seqno, result->reset(new Block(std::move(contents), global_seqno,
@ -94,6 +95,13 @@ Status ReadBlockFromFile(
return s; return s;
} }
inline CacheAllocator* GetCacheAllocator(
const BlockBasedTableOptions& table_options) {
return table_options.block_cache.get()
? table_options.block_cache->cache_allocator()
: nullptr;
}
// Delete the resource that is held by the iterator. // Delete the resource that is held by the iterator.
template <class ResourceType> template <class ResourceType>
void DeleteHeldResource(void* arg, void* /*ignored*/) { void DeleteHeldResource(void* arg, void* /*ignored*/) {
@ -1150,7 +1158,8 @@ Status BlockBasedTable::ReadMetaBlock(Rep* rep,
rep->footer.metaindex_handle(), &meta, rep->ioptions, rep->footer.metaindex_handle(), &meta, rep->ioptions,
true /* decompress */, Slice() /*compression dict*/, true /* decompress */, Slice() /*compression dict*/,
rep->persistent_cache_options, kDisableGlobalSequenceNumber, rep->persistent_cache_options, kDisableGlobalSequenceNumber,
0 /* read_amp_bytes_per_bit */); 0 /* read_amp_bytes_per_bit */,
GetCacheAllocator(rep->table_options));
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_ERROR(rep->ioptions.info_log, ROCKS_LOG_ERROR(rep->ioptions.info_log,
@ -1173,7 +1182,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
const ImmutableCFOptions& ioptions, const ReadOptions& read_options, const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version, BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index, const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index,
GetContext* get_context) { GetContext* get_context, CacheAllocator* allocator) {
Status s; Status s;
Block* compressed_block = nullptr; Block* compressed_block = nullptr;
Cache::Handle* block_cache_compressed_handle = nullptr; Cache::Handle* block_cache_compressed_handle = nullptr;
@ -1230,7 +1239,7 @@ Status BlockBasedTable::GetDataBlockFromCache(
compression_dict); compression_dict);
s = UncompressBlockContents(uncompresssion_ctx, compressed_block->data(), s = UncompressBlockContents(uncompresssion_ctx, compressed_block->data(),
compressed_block->size(), &contents, compressed_block->size(), &contents,
format_version, ioptions); format_version, ioptions, allocator);
// Insert uncompressed block into block cache // Insert uncompressed block into block cache
if (s.ok()) { if (s.ok()) {
@ -1292,7 +1301,8 @@ Status BlockBasedTable::PutDataBlockToCache(
const ReadOptions& /*read_options*/, const ImmutableCFOptions& ioptions, const ReadOptions& /*read_options*/, const ImmutableCFOptions& ioptions,
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version, CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index, const Slice& compression_dict, size_t read_amp_bytes_per_bit, bool is_index,
Cache::Priority priority, GetContext* get_context) { Cache::Priority priority, GetContext* get_context,
CacheAllocator* allocator) {
assert(raw_block->compression_type() == kNoCompression || assert(raw_block->compression_type() == kNoCompression ||
block_cache_compressed != nullptr); block_cache_compressed != nullptr);
@ -1305,7 +1315,7 @@ Status BlockBasedTable::PutDataBlockToCache(
compression_dict); compression_dict);
s = UncompressBlockContents(uncompression_ctx, raw_block->data(), s = UncompressBlockContents(uncompression_ctx, raw_block->data(),
raw_block->size(), &contents, format_version, raw_block->size(), &contents, format_version,
ioptions); ioptions, allocator);
} }
if (!s.ok()) { if (!s.ok()) {
delete raw_block; delete raw_block;
@ -1402,7 +1412,8 @@ FilterBlockReader* BlockBasedTable::ReadFilter(
BlockFetcher block_fetcher(rep->file.get(), prefetch_buffer, rep->footer, BlockFetcher block_fetcher(rep->file.get(), prefetch_buffer, rep->footer,
ReadOptions(), filter_handle, &block, ReadOptions(), filter_handle, &block,
rep->ioptions, false /* decompress */, rep->ioptions, false /* decompress */,
dummy_comp_dict, rep->persistent_cache_options); dummy_comp_dict, rep->persistent_cache_options,
GetCacheAllocator(rep->table_options));
Status s = block_fetcher.ReadBlockContents(); Status s = block_fetcher.ReadBlockContents();
if (!s.ok()) { if (!s.ok()) {
@ -1700,7 +1711,9 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
&block_value, rep->ioptions, rep->blocks_maybe_compressed, &block_value, rep->ioptions, rep->blocks_maybe_compressed,
compression_dict, rep->persistent_cache_options, compression_dict, rep->persistent_cache_options,
is_index ? kDisableGlobalSequenceNumber : rep->global_seqno, is_index ? kDisableGlobalSequenceNumber : rep->global_seqno,
rep->table_options.read_amp_bytes_per_bit, rep->immortal_table); rep->table_options.read_amp_bytes_per_bit,
GetCacheAllocator(rep->table_options),
rep->immortal_table);
} }
if (s.ok()) { if (s.ok()) {
block.value = block_value.release(); block.value = block_value.release();
@ -1792,7 +1805,8 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
s = GetDataBlockFromCache( s = GetDataBlockFromCache(
key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro, key, ckey, block_cache, block_cache_compressed, rep->ioptions, ro,
block_entry, rep->table_options.format_version, compression_dict, block_entry, rep->table_options.format_version, compression_dict,
rep->table_options.read_amp_bytes_per_bit, is_index, get_context); rep->table_options.read_amp_bytes_per_bit, is_index, get_context,
GetCacheAllocator(rep->table_options));
if (block_entry->value == nullptr && !no_io && ro.fill_cache) { if (block_entry->value == nullptr && !no_io && ro.fill_cache) {
std::unique_ptr<Block> raw_block; std::unique_ptr<Block> raw_block;
@ -1804,7 +1818,9 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
block_cache_compressed == nullptr && rep->blocks_maybe_compressed, block_cache_compressed == nullptr && rep->blocks_maybe_compressed,
compression_dict, rep->persistent_cache_options, compression_dict, rep->persistent_cache_options,
is_index ? kDisableGlobalSequenceNumber : rep->global_seqno, is_index ? kDisableGlobalSequenceNumber : rep->global_seqno,
rep->table_options.read_amp_bytes_per_bit, rep->immortal_table); rep->table_options.read_amp_bytes_per_bit,
GetCacheAllocator(rep->table_options),
rep->immortal_table);
} }
if (s.ok()) { if (s.ok()) {
@ -1817,7 +1833,7 @@ Status BlockBasedTable::MaybeLoadDataBlockToCache(
.cache_index_and_filter_blocks_with_high_priority .cache_index_and_filter_blocks_with_high_priority
? Cache::Priority::HIGH ? Cache::Priority::HIGH
: Cache::Priority::LOW, : Cache::Priority::LOW,
get_context); get_context, GetCacheAllocator(rep->table_options));
} }
} }
} }
@ -2524,11 +2540,12 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
BlockHandle handle = index_iter->value(); BlockHandle handle = index_iter->value();
BlockContents contents; BlockContents contents;
Slice dummy_comp_dict; Slice dummy_comp_dict;
BlockFetcher block_fetcher(rep_->file.get(), nullptr /* prefetch buffer */, BlockFetcher block_fetcher(
rep_->footer, ReadOptions(), handle, &contents, rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
rep_->ioptions, false /* decompress */, ReadOptions(), handle, &contents, rep_->ioptions,
dummy_comp_dict /*compression dict*/, false /* decompress */, dummy_comp_dict /*compression dict*/,
rep_->persistent_cache_options); rep_->persistent_cache_options,
GetCacheAllocator(rep_->table_options));
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
if (!s.ok()) { if (!s.ok()) {
break; break;
@ -2550,11 +2567,12 @@ Status BlockBasedTable::VerifyChecksumInBlocks(
s = handle.DecodeFrom(&input); s = handle.DecodeFrom(&input);
BlockContents contents; BlockContents contents;
Slice dummy_comp_dict; Slice dummy_comp_dict;
BlockFetcher block_fetcher(rep_->file.get(), nullptr /* prefetch buffer */, BlockFetcher block_fetcher(
rep_->footer, ReadOptions(), handle, &contents, rep_->file.get(), nullptr /* prefetch buffer */, rep_->footer,
rep_->ioptions, false /* decompress */, ReadOptions(), handle, &contents, rep_->ioptions,
dummy_comp_dict /*compression dict*/, false /* decompress */, dummy_comp_dict /*compression dict*/,
rep_->persistent_cache_options); rep_->persistent_cache_options,
GetCacheAllocator(rep_->table_options));
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
if (!s.ok()) { if (!s.ok()) {
break; break;
@ -2858,7 +2876,8 @@ Status BlockBasedTable::DumpTable(WritableFile* out_file,
rep_->file.get(), nullptr /* prefetch_buffer */, rep_->footer, rep_->file.get(), nullptr /* prefetch_buffer */, rep_->footer,
ReadOptions(), handle, &block, rep_->ioptions, ReadOptions(), handle, &block, rep_->ioptions,
false /*decompress*/, dummy_comp_dict /*compression dict*/, false /*decompress*/, dummy_comp_dict /*compression dict*/,
rep_->persistent_cache_options); rep_->persistent_cache_options,
GetCacheAllocator(rep_->table_options));
s = block_fetcher.ReadBlockContents(); s = block_fetcher.ReadBlockContents();
if (!s.ok()) { if (!s.ok()) {
rep_->filter.reset(new BlockBasedFilterBlockReader( rep_->filter.reset(new BlockBasedFilterBlockReader(

@ -303,7 +303,8 @@ class BlockBasedTable : public TableReader {
const ImmutableCFOptions& ioptions, const ReadOptions& read_options, const ImmutableCFOptions& ioptions, const ReadOptions& read_options,
BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version, BlockBasedTable::CachableEntry<Block>* block, uint32_t format_version,
const Slice& compression_dict, size_t read_amp_bytes_per_bit, const Slice& compression_dict, size_t read_amp_bytes_per_bit,
bool is_index = false, GetContext* get_context = nullptr); bool is_index = false, GetContext* get_context = nullptr,
CacheAllocator* allocator = nullptr);
// Put a raw block (maybe compressed) to the corresponding block caches. // Put a raw block (maybe compressed) to the corresponding block caches.
// This method will perform decompression against raw_block if needed and then // This method will perform decompression against raw_block if needed and then
@ -322,7 +323,7 @@ class BlockBasedTable : public TableReader {
CachableEntry<Block>* block, Block* raw_block, uint32_t format_version, CachableEntry<Block>* block, Block* raw_block, uint32_t format_version,
const Slice& compression_dict, size_t read_amp_bytes_per_bit, const Slice& compression_dict, size_t read_amp_bytes_per_bit,
bool is_index = false, Cache::Priority pri = Cache::Priority::LOW, bool is_index = false, Cache::Priority pri = Cache::Priority::LOW,
GetContext* get_context = nullptr); GetContext* get_context = nullptr, CacheAllocator* allocator = nullptr);
// Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found // Calls (*handle_result)(arg, ...) repeatedly, starting with the entry found
// after a call to Seek(key), until handle_result returns false. // after a call to Seek(key), until handle_result returns false.

@ -17,8 +17,9 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "table/block.h" #include "table/block.h"
#include "table/block_based_table_reader.h" #include "table/block_based_table_reader.h"
#include "table/persistent_cache_helper.h"
#include "table/format.h" #include "table/format.h"
#include "table/persistent_cache_helper.h"
#include "util/cache_allocator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
@ -107,9 +108,11 @@ bool BlockFetcher::TryGetCompressedBlockFromPersistentCache() {
if (cache_options_.persistent_cache && if (cache_options_.persistent_cache &&
cache_options_.persistent_cache->IsCompressed()) { cache_options_.persistent_cache->IsCompressed()) {
// lookup uncompressed cache mode p-cache // lookup uncompressed cache mode p-cache
std::unique_ptr<char[]> raw_data;
status_ = PersistentCacheHelper::LookupRawPage( status_ = PersistentCacheHelper::LookupRawPage(
cache_options_, handle_, &heap_buf_, block_size_ + kBlockTrailerSize); cache_options_, handle_, &raw_data, block_size_ + kBlockTrailerSize);
if (status_.ok()) { if (status_.ok()) {
heap_buf_ = CacheAllocationPtr(raw_data.release());
used_buf_ = heap_buf_.get(); used_buf_ = heap_buf_.get();
slice_ = Slice(heap_buf_.get(), block_size_); slice_ = Slice(heap_buf_.get(), block_size_);
return true; return true;
@ -132,7 +135,7 @@ void BlockFetcher::PrepareBufferForBlockFromFile() {
// trivially allocated stack buffer instead of needing a full malloc() // trivially allocated stack buffer instead of needing a full malloc()
used_buf_ = &stack_buf_[0]; used_buf_ = &stack_buf_[0];
} else { } else {
heap_buf_.reset(new char[block_size_ + kBlockTrailerSize]); heap_buf_ = AllocateBlock(block_size_ + kBlockTrailerSize, allocator_);
used_buf_ = heap_buf_.get(); used_buf_ = heap_buf_.get();
} }
} }
@ -170,7 +173,7 @@ void BlockFetcher::GetBlockContents() {
// or heap provided. Refer to https://github.com/facebook/rocksdb/pull/4096 // or heap provided. Refer to https://github.com/facebook/rocksdb/pull/4096
if (got_from_prefetch_buffer_ || used_buf_ == &stack_buf_[0]) { if (got_from_prefetch_buffer_ || used_buf_ == &stack_buf_[0]) {
assert(used_buf_ != heap_buf_.get()); assert(used_buf_ != heap_buf_.get());
heap_buf_.reset(new char[block_size_ + kBlockTrailerSize]); heap_buf_ = AllocateBlock(block_size_ + kBlockTrailerSize, allocator_);
memcpy(heap_buf_.get(), used_buf_, block_size_ + kBlockTrailerSize); memcpy(heap_buf_.get(), used_buf_, block_size_ + kBlockTrailerSize);
} }
*contents_ = BlockContents(std::move(heap_buf_), block_size_, true, *contents_ = BlockContents(std::move(heap_buf_), block_size_, true,
@ -228,9 +231,9 @@ Status BlockFetcher::ReadBlockContents() {
if (do_uncompress_ && compression_type != kNoCompression) { if (do_uncompress_ && compression_type != kNoCompression) {
// compressed page, uncompress, update cache // compressed page, uncompress, update cache
UncompressionContext uncompression_ctx(compression_type, compression_dict_); UncompressionContext uncompression_ctx(compression_type, compression_dict_);
status_ = status_ = UncompressBlockContents(uncompression_ctx, slice_.data(),
UncompressBlockContents(uncompression_ctx, slice_.data(), block_size_, block_size_, contents_, footer_.version(),
contents_, footer_.version(), ioptions_); ioptions_, allocator_);
} else { } else {
GetBlockContents(); GetBlockContents();
} }

@ -11,6 +11,8 @@
#include "table/block.h" #include "table/block.h"
#include "table/format.h" #include "table/format.h"
#include "util/cache_allocator.h"
namespace rocksdb { namespace rocksdb {
class BlockFetcher { class BlockFetcher {
public: public:
@ -26,6 +28,7 @@ class BlockFetcher {
BlockContents* contents, const ImmutableCFOptions& ioptions, BlockContents* contents, const ImmutableCFOptions& ioptions,
bool do_uncompress, const Slice& compression_dict, bool do_uncompress, const Slice& compression_dict,
const PersistentCacheOptions& cache_options, const PersistentCacheOptions& cache_options,
CacheAllocator* allocator = nullptr,
const bool immortal_source = false) const bool immortal_source = false)
: file_(file), : file_(file),
prefetch_buffer_(prefetch_buffer), prefetch_buffer_(prefetch_buffer),
@ -37,7 +40,8 @@ class BlockFetcher {
do_uncompress_(do_uncompress), do_uncompress_(do_uncompress),
immortal_source_(immortal_source), immortal_source_(immortal_source),
compression_dict_(compression_dict), compression_dict_(compression_dict),
cache_options_(cache_options) {} cache_options_(cache_options),
allocator_(allocator) {}
Status ReadBlockContents(); Status ReadBlockContents();
private: private:
@ -54,11 +58,12 @@ class BlockFetcher {
const bool immortal_source_; const bool immortal_source_;
const Slice& compression_dict_; const Slice& compression_dict_;
const PersistentCacheOptions& cache_options_; const PersistentCacheOptions& cache_options_;
CacheAllocator* allocator_;
Status status_; Status status_;
Slice slice_; Slice slice_;
char* used_buf_ = nullptr; char* used_buf_ = nullptr;
size_t block_size_; size_t block_size_;
std::unique_ptr<char[]> heap_buf_; CacheAllocationPtr heap_buf_;
char stack_buf_[kDefaultStackBufferSize]; char stack_buf_[kDefaultStackBufferSize];
bool got_from_prefetch_buffer_ = false; bool got_from_prefetch_buffer_ = false;
rocksdb::CompressionType compression_type; rocksdb::CompressionType compression_type;

@ -19,6 +19,7 @@
#include "table/block_based_table_reader.h" #include "table/block_based_table_reader.h"
#include "table/block_fetcher.h" #include "table/block_fetcher.h"
#include "table/persistent_cache_helper.h" #include "table/persistent_cache_helper.h"
#include "util/cache_allocator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
@ -279,8 +280,9 @@ Status ReadFooterFromFile(RandomAccessFileReader* file,
Status UncompressBlockContentsForCompressionType( Status UncompressBlockContentsForCompressionType(
const UncompressionContext& uncompression_ctx, const char* data, size_t n, const UncompressionContext& uncompression_ctx, const char* data, size_t n,
BlockContents* contents, uint32_t format_version, BlockContents* contents, uint32_t format_version,
const ImmutableCFOptions& ioptions) { const ImmutableCFOptions& ioptions,
std::unique_ptr<char[]> ubuf; CacheAllocator* allocator) {
CacheAllocationPtr ubuf;
assert(uncompression_ctx.type() != kNoCompression && assert(uncompression_ctx.type() != kNoCompression &&
"Invalid compression type"); "Invalid compression type");
@ -296,7 +298,7 @@ Status UncompressBlockContentsForCompressionType(
if (!Snappy_GetUncompressedLength(data, n, &ulength)) { if (!Snappy_GetUncompressedLength(data, n, &ulength)) {
return Status::Corruption(snappy_corrupt_msg); return Status::Corruption(snappy_corrupt_msg);
} }
ubuf.reset(new char[ulength]); ubuf = AllocateBlock(ulength, allocator);
if (!Snappy_Uncompress(data, n, ubuf.get())) { if (!Snappy_Uncompress(data, n, ubuf.get())) {
return Status::Corruption(snappy_corrupt_msg); return Status::Corruption(snappy_corrupt_msg);
} }
@ -304,9 +306,10 @@ Status UncompressBlockContentsForCompressionType(
break; break;
} }
case kZlibCompression: case kZlibCompression:
ubuf.reset(Zlib_Uncompress( ubuf = Zlib_Uncompress(
uncompression_ctx, data, n, &decompress_size, uncompression_ctx, data, n, &decompress_size,
GetCompressFormatForVersion(kZlibCompression, format_version))); GetCompressFormatForVersion(kZlibCompression, format_version),
allocator);
if (!ubuf) { if (!ubuf) {
static char zlib_corrupt_msg[] = static char zlib_corrupt_msg[] =
"Zlib not supported or corrupted Zlib compressed block contents"; "Zlib not supported or corrupted Zlib compressed block contents";
@ -316,9 +319,10 @@ Status UncompressBlockContentsForCompressionType(
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break; break;
case kBZip2Compression: case kBZip2Compression:
ubuf.reset(BZip2_Uncompress( ubuf = BZip2_Uncompress(
data, n, &decompress_size, data, n, &decompress_size,
GetCompressFormatForVersion(kBZip2Compression, format_version))); GetCompressFormatForVersion(kBZip2Compression, format_version),
allocator);
if (!ubuf) { if (!ubuf) {
static char bzip2_corrupt_msg[] = static char bzip2_corrupt_msg[] =
"Bzip2 not supported or corrupted Bzip2 compressed block contents"; "Bzip2 not supported or corrupted Bzip2 compressed block contents";
@ -328,9 +332,10 @@ Status UncompressBlockContentsForCompressionType(
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break; break;
case kLZ4Compression: case kLZ4Compression:
ubuf.reset(LZ4_Uncompress( ubuf = LZ4_Uncompress(
uncompression_ctx, data, n, &decompress_size, uncompression_ctx, data, n, &decompress_size,
GetCompressFormatForVersion(kLZ4Compression, format_version))); GetCompressFormatForVersion(kLZ4Compression, format_version),
allocator);
if (!ubuf) { if (!ubuf) {
static char lz4_corrupt_msg[] = static char lz4_corrupt_msg[] =
"LZ4 not supported or corrupted LZ4 compressed block contents"; "LZ4 not supported or corrupted LZ4 compressed block contents";
@ -340,9 +345,10 @@ Status UncompressBlockContentsForCompressionType(
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break; break;
case kLZ4HCCompression: case kLZ4HCCompression:
ubuf.reset(LZ4_Uncompress( ubuf = LZ4_Uncompress(
uncompression_ctx, data, n, &decompress_size, uncompression_ctx, data, n, &decompress_size,
GetCompressFormatForVersion(kLZ4HCCompression, format_version))); GetCompressFormatForVersion(kLZ4HCCompression, format_version),
allocator);
if (!ubuf) { if (!ubuf) {
static char lz4hc_corrupt_msg[] = static char lz4hc_corrupt_msg[] =
"LZ4HC not supported or corrupted LZ4HC compressed block contents"; "LZ4HC not supported or corrupted LZ4HC compressed block contents";
@ -352,6 +358,8 @@ Status UncompressBlockContentsForCompressionType(
BlockContents(std::move(ubuf), decompress_size, true, kNoCompression); BlockContents(std::move(ubuf), decompress_size, true, kNoCompression);
break; break;
case kXpressCompression: case kXpressCompression:
// XPRESS allocates memory internally, thus no support for custom
// allocator.
ubuf.reset(XPRESS_Uncompress(data, n, &decompress_size)); ubuf.reset(XPRESS_Uncompress(data, n, &decompress_size));
if (!ubuf) { if (!ubuf) {
static char xpress_corrupt_msg[] = static char xpress_corrupt_msg[] =
@ -363,7 +371,8 @@ Status UncompressBlockContentsForCompressionType(
break; break;
case kZSTD: case kZSTD:
case kZSTDNotFinalCompression: case kZSTDNotFinalCompression:
ubuf.reset(ZSTD_Uncompress(uncompression_ctx, data, n, &decompress_size)); ubuf = ZSTD_Uncompress(uncompression_ctx, data, n, &decompress_size,
allocator);
if (!ubuf) { if (!ubuf) {
static char zstd_corrupt_msg[] = static char zstd_corrupt_msg[] =
"ZSTD not supported or corrupted ZSTD compressed block contents"; "ZSTD not supported or corrupted ZSTD compressed block contents";
@ -396,11 +405,13 @@ Status UncompressBlockContentsForCompressionType(
Status UncompressBlockContents(const UncompressionContext& uncompression_ctx, Status UncompressBlockContents(const UncompressionContext& uncompression_ctx,
const char* data, size_t n, const char* data, size_t n,
BlockContents* contents, uint32_t format_version, BlockContents* contents, uint32_t format_version,
const ImmutableCFOptions& ioptions) { const ImmutableCFOptions& ioptions,
CacheAllocator* allocator) {
assert(data[n] != kNoCompression); assert(data[n] != kNoCompression);
assert(data[n] == uncompression_ctx.type()); assert(data[n] == uncompression_ctx.type());
return UncompressBlockContentsForCompressionType( return UncompressBlockContentsForCompressionType(uncompression_ctx, data, n,
uncompression_ctx, data, n, contents, format_version, ioptions); contents, format_version,
ioptions, allocator);
} }
} // namespace rocksdb } // namespace rocksdb

@ -26,6 +26,7 @@
#include "port/port.h" // noexcept #include "port/port.h" // noexcept
#include "table/persistent_cache_options.h" #include "table/persistent_cache_options.h"
#include "util/file_reader_writer.h" #include "util/file_reader_writer.h"
#include "util/cache_allocator.h"
namespace rocksdb { namespace rocksdb {
@ -192,7 +193,7 @@ struct BlockContents {
Slice data; // Actual contents of data Slice data; // Actual contents of data
bool cachable; // True iff data can be cached bool cachable; // True iff data can be cached
CompressionType compression_type; CompressionType compression_type;
std::unique_ptr<char[]> allocation; CacheAllocationPtr allocation;
BlockContents() : cachable(false), compression_type(kNoCompression) {} BlockContents() : cachable(false), compression_type(kNoCompression) {}
@ -200,16 +201,28 @@ struct BlockContents {
CompressionType _compression_type) CompressionType _compression_type)
: data(_data), cachable(_cachable), compression_type(_compression_type) {} : data(_data), cachable(_cachable), compression_type(_compression_type) {}
BlockContents(std::unique_ptr<char[]>&& _data, size_t _size, bool _cachable, BlockContents(CacheAllocationPtr&& _data, size_t _size, bool _cachable,
CompressionType _compression_type) CompressionType _compression_type)
: data(_data.get(), _size), : data(_data.get(), _size),
cachable(_cachable), cachable(_cachable),
compression_type(_compression_type), compression_type(_compression_type),
allocation(std::move(_data)) {} allocation(std::move(_data)) {}
BlockContents(std::unique_ptr<char[]>&& _data, size_t _size, bool _cachable,
CompressionType _compression_type)
: data(_data.get(), _size),
cachable(_cachable),
compression_type(_compression_type) {
allocation.reset(_data.release());
}
// The additional memory space taken by the block data. // The additional memory space taken by the block data.
size_t usable_size() const { size_t usable_size() const {
if (allocation.get() != nullptr) { if (allocation.get() != nullptr) {
auto allocator = allocation.get_deleter().allocator;
if (allocator) {
return allocator->UsableSize(allocation.get(), data.size());
}
#ifdef ROCKSDB_MALLOC_USABLE_SIZE #ifdef ROCKSDB_MALLOC_USABLE_SIZE
return malloc_usable_size(allocation.get()); return malloc_usable_size(allocation.get());
#else #else
@ -252,7 +265,7 @@ extern Status ReadBlockContents(
extern Status UncompressBlockContents( extern Status UncompressBlockContents(
const UncompressionContext& uncompression_ctx, const char* data, size_t n, const UncompressionContext& uncompression_ctx, const char* data, size_t n,
BlockContents* contents, uint32_t compress_format_version, BlockContents* contents, uint32_t compress_format_version,
const ImmutableCFOptions& ioptions); const ImmutableCFOptions& ioptions, CacheAllocator* allocator = nullptr);
// This is an extension to UncompressBlockContents that accepts // This is an extension to UncompressBlockContents that accepts
// a specific compression type. This is used by un-wrapped blocks // a specific compression type. This is used by un-wrapped blocks
@ -260,7 +273,7 @@ extern Status UncompressBlockContents(
extern Status UncompressBlockContentsForCompressionType( extern Status UncompressBlockContentsForCompressionType(
const UncompressionContext& uncompression_ctx, const char* data, size_t n, const UncompressionContext& uncompression_ctx, const char* data, size_t n,
BlockContents* contents, uint32_t compress_format_version, BlockContents* contents, uint32_t compress_format_version,
const ImmutableCFOptions& ioptions); const ImmutableCFOptions& ioptions, CacheAllocator* allocator = nullptr);
// Implementation details follow. Clients should ignore, // Implementation details follow. Clients should ignore,

@ -153,8 +153,8 @@ class PlainTableReader: public TableReader {
DynamicBloom bloom_; DynamicBloom bloom_;
PlainTableReaderFileInfo file_info_; PlainTableReaderFileInfo file_info_;
Arena arena_; Arena arena_;
std::unique_ptr<char[]> index_block_alloc_; CacheAllocationPtr index_block_alloc_;
std::unique_ptr<char[]> bloom_block_alloc_; CacheAllocationPtr bloom_block_alloc_;
const ImmutableCFOptions& ioptions_; const ImmutableCFOptions& ioptions_;
uint64_t file_size_; uint64_t file_size_;

@ -2477,6 +2477,78 @@ TEST_P(BlockBasedTableTest, BlockCacheLeak) {
c.ResetTableReader(); c.ResetTableReader();
} }
namespace {
class CustomCacheAllocator : public CacheAllocator {
public:
virtual const char* Name() const override { return "CustomCacheAllocator"; }
void* Allocate(size_t size) override {
++numAllocations;
auto ptr = new char[size + 16];
memcpy(ptr, "cache_allocator_", 16); // mangle first 16 bytes
return reinterpret_cast<void*>(ptr + 16);
}
void Deallocate(void* p) override {
++numDeallocations;
char* ptr = reinterpret_cast<char*>(p) - 16;
delete[] ptr;
}
std::atomic<int> numAllocations;
std::atomic<int> numDeallocations;
};
} // namespace
TEST_P(BlockBasedTableTest, CacheAllocator) {
auto custom_cache_allocator = std::make_shared<CustomCacheAllocator>();
{
Options opt;
unique_ptr<InternalKeyComparator> ikc;
ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
opt.compression = kNoCompression;
BlockBasedTableOptions table_options;
table_options.block_size = 1024;
LRUCacheOptions lruOptions;
lruOptions.cache_allocator = custom_cache_allocator;
lruOptions.capacity = 16 * 1024 * 1024;
lruOptions.num_shard_bits = 4;
table_options.block_cache = NewLRUCache(std::move(lruOptions));
opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
TableConstructor c(BytewiseComparator(),
true /* convert_to_internal_key_ */);
c.Add("k01", "hello");
c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x'));
c.Add("k04", std::string(200000, 'x'));
c.Add("k05", std::string(300000, 'x'));
c.Add("k06", "hello3");
c.Add("k07", std::string(100000, 'x'));
std::vector<std::string> keys;
stl_wrappers::KVMap kvmap;
const ImmutableCFOptions ioptions(opt);
const MutableCFOptions moptions(opt);
c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
unique_ptr<InternalIterator> iter(
c.NewIterator(moptions.prefix_extractor.get()));
iter->SeekToFirst();
while (iter->Valid()) {
iter->key();
iter->value();
iter->Next();
}
ASSERT_OK(iter->status());
}
// out of scope, block cache should have been deleted, all allocations
// deallocated
EXPECT_EQ(custom_cache_allocator->numAllocations.load(),
custom_cache_allocator->numDeallocations.load());
// make sure that allocations actually happened through the cache allocator
EXPECT_GT(custom_cache_allocator->numAllocations.load(), 0);
}
TEST_P(BlockBasedTableTest, NewIndexIteratorLeak) { TEST_P(BlockBasedTableTest, NewIndexIteratorLeak) {
// A regression test to avoid data race described in // A regression test to avoid data race described in
// https://github.com/facebook/rocksdb/issues/1267 // https://github.com/facebook/rocksdb/issues/1267

@ -3040,7 +3040,7 @@ void VerifyDBFromDB(std::string& truth_db_name) {
int64_t bytes = 0; int64_t bytes = 0;
int decompress_size; int decompress_size;
while (ok && bytes < 1024 * 1048576) { while (ok && bytes < 1024 * 1048576) {
char *uncompressed = nullptr; CacheAllocationPtr uncompressed;
switch (FLAGS_compression_type_e) { switch (FLAGS_compression_type_e) {
case rocksdb::kSnappyCompression: { case rocksdb::kSnappyCompression: {
// get size and allocate here to make comparison fair // get size and allocate here to make comparison fair
@ -3050,45 +3050,44 @@ void VerifyDBFromDB(std::string& truth_db_name) {
ok = false; ok = false;
break; break;
} }
uncompressed = new char[ulength]; uncompressed = AllocateBlock(ulength, nullptr);
ok = Snappy_Uncompress(compressed.data(), compressed.size(), ok = Snappy_Uncompress(compressed.data(), compressed.size(),
uncompressed); uncompressed.get());
break; break;
} }
case rocksdb::kZlibCompression: case rocksdb::kZlibCompression:
uncompressed = Zlib_Uncompress(uncompression_ctx, compressed.data(), uncompressed = Zlib_Uncompress(uncompression_ctx, compressed.data(),
compressed.size(), &decompress_size, 2); compressed.size(), &decompress_size, 2);
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
case rocksdb::kBZip2Compression: case rocksdb::kBZip2Compression:
uncompressed = BZip2_Uncompress(compressed.data(), compressed.size(), uncompressed = BZip2_Uncompress(compressed.data(), compressed.size(),
&decompress_size, 2); &decompress_size, 2);
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
case rocksdb::kLZ4Compression: case rocksdb::kLZ4Compression:
uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(), uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(),
compressed.size(), &decompress_size, 2); compressed.size(), &decompress_size, 2);
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
case rocksdb::kLZ4HCCompression: case rocksdb::kLZ4HCCompression:
uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(), uncompressed = LZ4_Uncompress(uncompression_ctx, compressed.data(),
compressed.size(), &decompress_size, 2); compressed.size(), &decompress_size, 2);
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
case rocksdb::kXpressCompression: case rocksdb::kXpressCompression:
uncompressed = XPRESS_Uncompress(compressed.data(), compressed.size(), uncompressed.reset(XPRESS_Uncompress(
&decompress_size); compressed.data(), compressed.size(), &decompress_size));
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
case rocksdb::kZSTD: case rocksdb::kZSTD:
uncompressed = ZSTD_Uncompress(uncompression_ctx, compressed.data(), uncompressed = ZSTD_Uncompress(uncompression_ctx, compressed.data(),
compressed.size(), &decompress_size); compressed.size(), &decompress_size);
ok = uncompressed != nullptr; ok = uncompressed.get() != nullptr;
break; break;
default: default:
ok = false; ok = false;
} }
delete[] uncompressed;
bytes += input.size(); bytes += input.size();
thread->stats.FinishedOps(nullptr, nullptr, 1, kUncompress); thread->stats.FinishedOps(nullptr, nullptr, 1, kUncompress);
} }

@ -0,0 +1,38 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#pragma once
#include "rocksdb/cache_allocator.h"
namespace rocksdb {
struct CustomDeleter {
CustomDeleter(CacheAllocator* a = nullptr) : allocator(a) {}
void operator()(char* ptr) const {
if (allocator) {
allocator->Deallocate(reinterpret_cast<void*>(ptr));
} else {
delete[] ptr;
}
}
CacheAllocator* allocator;
};
using CacheAllocationPtr = std::unique_ptr<char[], CustomDeleter>;
inline CacheAllocationPtr AllocateBlock(size_t size,
CacheAllocator* allocator) {
if (allocator) {
auto block = reinterpret_cast<char*>(allocator->Allocate(size));
return CacheAllocationPtr(block, allocator);
}
return CacheAllocationPtr(new char[size]);
}
} // namespace rocksdb

@ -14,6 +14,8 @@
#include <string> #include <string>
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/table.h"
#include "util/cache_allocator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression_context_cache.h" #include "util/compression_context_cache.h"
@ -495,11 +497,10 @@ inline bool Zlib_Compress(const CompressionContext& ctx,
// header in varint32 format // header in varint32 format
// @param compression_dict Data for presetting the compression library's // @param compression_dict Data for presetting the compression library's
// dictionary. // dictionary.
inline char* Zlib_Uncompress(const UncompressionContext& ctx, inline CacheAllocationPtr Zlib_Uncompress(
const char* input_data, size_t input_length, const UncompressionContext& ctx, const char* input_data,
int* decompress_size, size_t input_length, int* decompress_size, uint32_t compress_format_version,
uint32_t compress_format_version, CacheAllocator* allocator = nullptr, int windowBits = -14) {
int windowBits = -14) {
#ifdef ZLIB #ifdef ZLIB
uint32_t output_len = 0; uint32_t output_len = 0;
if (compress_format_version == 2) { if (compress_format_version == 2) {
@ -541,9 +542,9 @@ inline char* Zlib_Uncompress(const UncompressionContext& ctx,
_stream.next_in = (Bytef*)input_data; _stream.next_in = (Bytef*)input_data;
_stream.avail_in = static_cast<unsigned int>(input_length); _stream.avail_in = static_cast<unsigned int>(input_length);
char* output = new char[output_len]; auto output = AllocateBlock(output_len, allocator);
_stream.next_out = (Bytef*)output; _stream.next_out = (Bytef*)output.get();
_stream.avail_out = static_cast<unsigned int>(output_len); _stream.avail_out = static_cast<unsigned int>(output_len);
bool done = false; bool done = false;
@ -561,19 +562,17 @@ inline char* Zlib_Uncompress(const UncompressionContext& ctx,
size_t old_sz = output_len; size_t old_sz = output_len;
uint32_t output_len_delta = output_len / 5; uint32_t output_len_delta = output_len / 5;
output_len += output_len_delta < 10 ? 10 : output_len_delta; output_len += output_len_delta < 10 ? 10 : output_len_delta;
char* tmp = new char[output_len]; auto tmp = AllocateBlock(output_len, allocator);
memcpy(tmp, output, old_sz); memcpy(tmp.get(), output.get(), old_sz);
delete[] output; output = std::move(tmp);
output = tmp;
// Set more output. // Set more output.
_stream.next_out = (Bytef*)(output + old_sz); _stream.next_out = (Bytef*)(output.get() + old_sz);
_stream.avail_out = static_cast<unsigned int>(output_len - old_sz); _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
break; break;
} }
case Z_BUF_ERROR: case Z_BUF_ERROR:
default: default:
delete[] output;
inflateEnd(&_stream); inflateEnd(&_stream);
return nullptr; return nullptr;
} }
@ -660,9 +659,9 @@ inline bool BZip2_Compress(const CompressionContext& /*ctx*/,
// block header // block header
// compress_format_version == 2 -- decompressed size is included in the block // compress_format_version == 2 -- decompressed size is included in the block
// header in varint32 format // header in varint32 format
inline char* BZip2_Uncompress(const char* input_data, size_t input_length, inline CacheAllocationPtr BZip2_Uncompress(
int* decompress_size, const char* input_data, size_t input_length, int* decompress_size,
uint32_t compress_format_version) { uint32_t compress_format_version, CacheAllocator* allocator = nullptr) {
#ifdef BZIP2 #ifdef BZIP2
uint32_t output_len = 0; uint32_t output_len = 0;
if (compress_format_version == 2) { if (compress_format_version == 2) {
@ -690,9 +689,9 @@ inline char* BZip2_Uncompress(const char* input_data, size_t input_length,
_stream.next_in = (char*)input_data; _stream.next_in = (char*)input_data;
_stream.avail_in = static_cast<unsigned int>(input_length); _stream.avail_in = static_cast<unsigned int>(input_length);
char* output = new char[output_len]; auto output = AllocateBlock(output_len, allocator);
_stream.next_out = (char*)output; _stream.next_out = (char*)output.get();
_stream.avail_out = static_cast<unsigned int>(output_len); _stream.avail_out = static_cast<unsigned int>(output_len);
bool done = false; bool done = false;
@ -709,18 +708,16 @@ inline char* BZip2_Uncompress(const char* input_data, size_t input_length,
assert(compress_format_version != 2); assert(compress_format_version != 2);
uint32_t old_sz = output_len; uint32_t old_sz = output_len;
output_len = output_len * 1.2; output_len = output_len * 1.2;
char* tmp = new char[output_len]; auto tmp = AllocateBlock(output_len, allocator);
memcpy(tmp, output, old_sz); memcpy(tmp.get(), output.get(), old_sz);
delete[] output; output = std::move(tmp);
output = tmp;
// Set more output. // Set more output.
_stream.next_out = (char*)(output + old_sz); _stream.next_out = (char*)(output.get() + old_sz);
_stream.avail_out = static_cast<unsigned int>(output_len - old_sz); _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
break; break;
} }
default: default:
delete[] output;
BZ2_bzDecompressEnd(&_stream); BZ2_bzDecompressEnd(&_stream);
return nullptr; return nullptr;
} }
@ -814,10 +811,12 @@ inline bool LZ4_Compress(const CompressionContext& ctx,
// header in varint32 format // header in varint32 format
// @param compression_dict Data for presetting the compression library's // @param compression_dict Data for presetting the compression library's
// dictionary. // dictionary.
inline char* LZ4_Uncompress(const UncompressionContext& ctx, inline CacheAllocationPtr LZ4_Uncompress(const UncompressionContext& ctx,
const char* input_data, size_t input_length, const char* input_data,
size_t input_length,
int* decompress_size, int* decompress_size,
uint32_t compress_format_version) { uint32_t compress_format_version,
CacheAllocator* allocator = nullptr) {
#ifdef LZ4 #ifdef LZ4
uint32_t output_len = 0; uint32_t output_len = 0;
if (compress_format_version == 2) { if (compress_format_version == 2) {
@ -837,7 +836,7 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx,
input_data += 8; input_data += 8;
} }
char* output = new char[output_len]; auto output = AllocateBlock(output_len, allocator);
#if LZ4_VERSION_NUMBER >= 10400 // r124+ #if LZ4_VERSION_NUMBER >= 10400 // r124+
LZ4_streamDecode_t* stream = LZ4_createStreamDecode(); LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
if (ctx.dict().size()) { if (ctx.dict().size()) {
@ -845,17 +844,16 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx,
static_cast<int>(ctx.dict().size())); static_cast<int>(ctx.dict().size()));
} }
*decompress_size = LZ4_decompress_safe_continue( *decompress_size = LZ4_decompress_safe_continue(
stream, input_data, output, static_cast<int>(input_length), stream, input_data, output.get(), static_cast<int>(input_length),
static_cast<int>(output_len)); static_cast<int>(output_len));
LZ4_freeStreamDecode(stream); LZ4_freeStreamDecode(stream);
#else // up to r123 #else // up to r123
*decompress_size = *decompress_size = LZ4_decompress_safe(input_data, output.get(),
LZ4_decompress_safe(input_data, output, static_cast<int>(input_length), static_cast<int>(input_length),
static_cast<int>(output_len)); static_cast<int>(output_len));
#endif // LZ4_VERSION_NUMBER >= 10400 #endif // LZ4_VERSION_NUMBER >= 10400
if (*decompress_size < 0) { if (*decompress_size < 0) {
delete[] output;
return nullptr; return nullptr;
} }
assert(*decompress_size == static_cast<int>(output_len)); assert(*decompress_size == static_cast<int>(output_len));
@ -866,6 +864,7 @@ inline char* LZ4_Uncompress(const UncompressionContext& ctx,
(void)input_length; (void)input_length;
(void)decompress_size; (void)decompress_size;
(void)compress_format_version; (void)compress_format_version;
(void)allocator;
return nullptr; return nullptr;
#endif #endif
} }
@ -1028,9 +1027,11 @@ inline bool ZSTD_Compress(const CompressionContext& ctx, const char* input,
// @param compression_dict Data for presetting the compression library's // @param compression_dict Data for presetting the compression library's
// dictionary. // dictionary.
inline char* ZSTD_Uncompress(const UncompressionContext& ctx, inline CacheAllocationPtr ZSTD_Uncompress(const UncompressionContext& ctx,
const char* input_data, size_t input_length, const char* input_data,
int* decompress_size) { size_t input_length,
int* decompress_size,
CacheAllocator* allocator = nullptr) {
#ifdef ZSTD #ifdef ZSTD
uint32_t output_len = 0; uint32_t output_len = 0;
if (!compression::GetDecompressedSizeInfo(&input_data, &input_length, if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
@ -1038,17 +1039,17 @@ inline char* ZSTD_Uncompress(const UncompressionContext& ctx,
return nullptr; return nullptr;
} }
char* output = new char[output_len]; auto output = AllocateBlock(output_len, allocator);
size_t actual_output_length; size_t actual_output_length;
#if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+ #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
ZSTD_DCtx* context = ctx.GetZSTDContext(); ZSTD_DCtx* context = ctx.GetZSTDContext();
assert(context != nullptr); assert(context != nullptr);
actual_output_length = ZSTD_decompress_usingDict( actual_output_length = ZSTD_decompress_usingDict(
context, output, output_len, input_data, input_length, ctx.dict().data(), context, output.get(), output_len, input_data, input_length,
ctx.dict().size()); ctx.dict().data(), ctx.dict().size());
#else // up to v0.4.x #else // up to v0.4.x
actual_output_length = actual_output_length =
ZSTD_decompress(output, output_len, input_data, input_length); ZSTD_decompress(output.get(), output_len, input_data, input_length);
#endif // ZSTD_VERSION_NUMBER >= 500 #endif // ZSTD_VERSION_NUMBER >= 500
assert(actual_output_length == output_len); assert(actual_output_length == output_len);
*decompress_size = static_cast<int>(actual_output_length); *decompress_size = static_cast<int>(actual_output_length);
@ -1058,6 +1059,7 @@ inline char* ZSTD_Uncompress(const UncompressionContext& ctx,
(void)input_data; (void)input_data;
(void)input_length; (void)input_length;
(void)decompress_size; (void)decompress_size;
(void)allocator;
return nullptr; return nullptr;
#endif #endif
} }

Loading…
Cancel
Save