You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
rocksdb/include/rocksdb/cache.h

389 lines
18 KiB

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// Various APIs for configuring, creating, and monitoring read caches.
#pragma once
#include <cstdint>
#include <memory>
#include <string>
#include "rocksdb/compression_type.h"
#include "rocksdb/data_structure.h"
#include "rocksdb/memory_allocator.h"
namespace ROCKSDB_NAMESPACE {
class Cache; // defined in advanced_cache.h
struct ConfigOptions;
class SecondaryCache;
// Classifications of block cache entries.
//
// Developer notes: Adding a new enum to this class requires corresponding
// updates to `kCacheEntryRoleToCamelString` and
// `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since
// `kNumCacheEntryRoles` assumes `kMisc` comes last.
enum class CacheEntryRole {
// Block-based table data block
kDataBlock,
// Block-based table filter block (full or partitioned)
kFilterBlock,
// Block-based table metadata block for partitioned filter
kFilterMetaBlock,
// OBSOLETE / DEPRECATED: old/removed block-based filter
kDeprecatedFilterBlock,
// Block-based table index block
kIndexBlock,
// Other kinds of block-based table block
kOtherBlock,
// WriteBufferManager's charge to account for its memtable usage
kWriteBuffer,
// Compression dictionary building buffer's charge to account for
// its memory usage
kCompressionDictionaryBuildingBuffer,
// Filter's charge to account for
// (new) bloom and ribbon filter construction's memory usage
kFilterConstruction,
// BlockBasedTableReader's charge to account for its memory usage
kBlockBasedTableReader,
// FileMetadata's charge to account for its memory usage
kFileMetadata,
// Blob value (when using the same cache as block cache and blob cache)
kBlobValue,
// Blob cache's charge to account for its memory usage (when using a
// separate block cache and blob cache)
kBlobCache,
// Default bucket, for miscellaneous cache entries. Do not use for
// entries that could potentially add up to large usage.
kMisc,
};
constexpr uint32_t kNumCacheEntryRoles =
static_cast<uint32_t>(CacheEntryRole::kMisc) + 1;
// Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`.
const std::string& GetCacheEntryRoleName(CacheEntryRole);
// A fast bit set for CacheEntryRoles
using CacheEntryRoleSet = SmallEnumSet<CacheEntryRole, CacheEntryRole::kMisc>;
// For use with `GetMapProperty()` for property
// `DB::Properties::kBlockCacheEntryStats`. On success, the map will
// be populated with all keys that can be obtained from these functions.
struct BlockCacheEntryStatsMapKeys {
static const std::string& CacheId();
static const std::string& CacheCapacityBytes();
static const std::string& LastCollectionDurationSeconds();
static const std::string& LastCollectionAgeSeconds();
static std::string EntryCount(CacheEntryRole);
static std::string UsedBytes(CacheEntryRole);
static std::string UsedPercent(CacheEntryRole);
};
extern const bool kDefaultToAdaptiveMutex;
enum CacheMetadataChargePolicy {
// Only the `charge` of each entry inserted into a Cache counts against
// the `capacity`
kDontChargeCacheMetadata,
// In addition to the `charge`, the approximate space overheads in the
// Cache (in bytes) also count against `capacity`. These space overheads
// are for supporting fast Lookup and managing the lifetime of entries.
kFullChargeCacheMetadata
};
const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
kFullChargeCacheMetadata;
// Options shared betweeen various cache implementations that
// divide the key space into shards using hashing.
struct ShardedCacheOptions {
// Capacity of the cache, in the same units as the `charge` of each entry.
// This is typically measured in bytes, but can be a different unit if using
// kDontChargeCacheMetadata.
size_t capacity = 0;
// Cache is sharded into 2^num_shard_bits shards, by hash of key.
// If < 0, a good default is chosen based on the capacity and the
// implementation. (Mutex-based implementations are much more reliant
// on many shards for parallel scalability.)
int num_shard_bits = -1;
// If strict_capacity_limit is set, Insert() will fail if there is not
// enough capacity for the new entry along with all the existing referenced
// (pinned) cache entries. (Unreferenced cache entries are evicted as
// needed, sometimes immediately.) If strict_capacity_limit == false
// (default), Insert() never fails.
bool strict_capacity_limit = false;
// If non-nullptr, RocksDB will use this allocator instead of system
// allocator when allocating memory for cache blocks.
//
// Caveat: when the cache is used as block cache, the memory allocator is
// ignored when dealing with compression libraries that allocate memory
// internally (currently only XPRESS).
std::shared_ptr<MemoryAllocator> memory_allocator;
// See CacheMetadataChargePolicy
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy;
// A SecondaryCache instance to use the non-volatile tier.
std::shared_ptr<SecondaryCache> secondary_cache;
ShardedCacheOptions() {}
ShardedCacheOptions(
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy)
: capacity(_capacity),
num_shard_bits(_num_shard_bits),
strict_capacity_limit(_strict_capacity_limit),
memory_allocator(std::move(_memory_allocator)),
metadata_charge_policy(_metadata_charge_policy) {}
};
// LRUCache - A cache using LRU eviction to stay at or below a set capacity.
// The cache is sharded to 2^num_shard_bits shards, by hash of the key.
// The total capacity is divided and evenly assigned to each shard, and each
// shard has its own LRU list for evictions. Each shard also has a mutex for
// exclusive access during operations; even read operations need exclusive
// access in order to update the LRU list. Mutex contention is usually low
// with enough shards.
struct LRUCacheOptions : public ShardedCacheOptions {
// Ratio of cache reserved for high-priority and low-priority entries,
// respectively. (See Cache::Priority below more information on the levels.)
// Valid values are between 0 and 1 (inclusive), and the sum of the two
// values cannot exceed 1.
//
// If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU
// list is maintained by the cache. A ratio of 0.5 means non-high-priority
// entries will use midpoint insertion. Similarly, if low_pri_pool_ratio is
// greater than zero, a dedicated low-priority LRU list is maintained.
// There is also a bottom-priority LRU list, which is always enabled and not
// explicitly configurable. Entries are spilled over to the next available
// lower-priority pool if a certain pool's capacity is exceeded.
//
// Entries with cache hits are inserted into the highest priority LRU list
// available regardless of the entry's priority. Entries without hits
// are inserted into highest priority LRU list available whose priority
// does not exceed the entry's priority. (For example, high-priority items
// with no hits are placed in the high-priority pool if available;
// otherwise, they are placed in the low-priority pool if available;
// otherwise, they are placed in the bottom-priority pool.) This results
// in lower-priority entries without hits getting evicted from the cache
// sooner.
double high_pri_pool_ratio = 0.5;
double low_pri_pool_ratio = 0.0;
// Whether to use adaptive mutexes for cache shards. Note that adaptive
// mutexes need to be supported by the platform in order for this to have any
// effect. The default value is true if RocksDB is compiled with
// -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
LRUCacheOptions() {}
LRUCacheOptions(size_t _capacity, int _num_shard_bits,
bool _strict_capacity_limit, double _high_pri_pool_ratio,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
double _low_pri_pool_ratio = 0.0)
: ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
std::move(_memory_allocator),
_metadata_charge_policy),
high_pri_pool_ratio(_high_pri_pool_ratio),
low_pri_pool_ratio(_low_pri_pool_ratio),
use_adaptive_mutex(_use_adaptive_mutex) {}
// Construct an instance of LRUCache using these options
std::shared_ptr<Cache> MakeSharedCache() const;
};
// DEPRECATED wrapper function
inline std::shared_ptr<Cache> NewLRUCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
double low_pri_pool_ratio = 0.0) {
return LRUCacheOptions(capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy,
low_pri_pool_ratio)
.MakeSharedCache();
}
// DEPRECATED wrapper function
inline std::shared_ptr<Cache> NewLRUCache(const LRUCacheOptions& cache_opts) {
return cache_opts.MakeSharedCache();
}
// EXPERIMENTAL
// Options structure for configuring a SecondaryCache instance with in-memory
// compression. The implementation uses LRUCache so inherits its options,
// except LRUCacheOptions.secondary_cache is not used and should not be set.
struct CompressedSecondaryCacheOptions : LRUCacheOptions {
// The compression method (if any) that is used to compress data.
CompressionType compression_type = CompressionType::kLZ4Compression;
// compress_format_version can have two values:
// compress_format_version == 1 -- decompressed size is not included in the
// block header.
// compress_format_version == 2 -- decompressed size is included in the block
// header in varint32 format.
uint32_t compress_format_version = 2;
// Enable the custom split and merge feature, which split the compressed value
// into chunks so that they may better fit jemalloc bins.
bool enable_custom_split_merge = false;
// Kinds of entries that should not be compressed, but can be stored.
// (Filter blocks are essentially non-compressible but others usually are.)
CacheEntryRoleSet do_not_compress_roles = {CacheEntryRole::kFilterBlock};
CompressedSecondaryCacheOptions() {}
CompressedSecondaryCacheOptions(
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
double _high_pri_pool_ratio, double _low_pri_pool_ratio = 0.0,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
bool _use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
CompressionType _compression_type = CompressionType::kLZ4Compression,
uint32_t _compress_format_version = 2,
bool _enable_custom_split_merge = false,
const CacheEntryRoleSet& _do_not_compress_roles =
{CacheEntryRole::kFilterBlock})
: LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
_high_pri_pool_ratio, std::move(_memory_allocator),
_use_adaptive_mutex, _metadata_charge_policy,
_low_pri_pool_ratio),
compression_type(_compression_type),
compress_format_version(_compress_format_version),
enable_custom_split_merge(_enable_custom_split_merge),
do_not_compress_roles(_do_not_compress_roles) {}
// Construct an instance of CompressedSecondaryCache using these options
std::shared_ptr<SecondaryCache> MakeSharedSecondaryCache() const;
// Avoid confusion with LRUCache
std::shared_ptr<Cache> MakeSharedCache() const = delete;
};
// DEPRECATED wrapper function
inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5,
double low_pri_pool_ratio = 0.0,
std::shared_ptr<MemoryAllocator> memory_allocator = nullptr,
bool use_adaptive_mutex = kDefaultToAdaptiveMutex,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
CompressionType compression_type = CompressionType::kLZ4Compression,
uint32_t compress_format_version = 2,
bool enable_custom_split_merge = false,
const CacheEntryRoleSet& _do_not_compress_roles = {
CacheEntryRole::kFilterBlock}) {
return CompressedSecondaryCacheOptions(
capacity, num_shard_bits, strict_capacity_limit,
high_pri_pool_ratio, low_pri_pool_ratio, memory_allocator,
use_adaptive_mutex, metadata_charge_policy, compression_type,
compress_format_version, enable_custom_split_merge,
_do_not_compress_roles)
.MakeSharedSecondaryCache();
}
// DEPRECATED wrapper function
inline std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts) {
return opts.MakeSharedSecondaryCache();
}
// HyperClockCache - A lock-free Cache alternative for RocksDB block cache
// that offers much improved CPU efficiency vs. LRUCache under high parallel
// load or high contention, with some caveats:
// * Not a general Cache implementation: can only be used for
// BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is
// compatible with HyperClockCache.
// * Requires an extra tuning parameter: see estimated_entry_charge below.
// Similarly, substantially changing the capacity with SetCapacity could
// harm efficiency.
// * SecondaryCache is not yet supported.
// * Cache priorities are less aggressively enforced, which could cause
// cache dilution from long range scans (unless they use fill_cache=false).
// * Can be worse for small caches, because if almost all of a cache shard is
// pinned (more likely with non-partitioned filters), then CLOCK eviction
// becomes very CPU intensive.
//
// See internal cache/clock_cache.h for full description.
struct HyperClockCacheOptions : public ShardedCacheOptions {
// The estimated average `charge` associated with cache entries. This is a
// critical configuration parameter for good performance from the hyper
// cache, because having a table size that is fixed at creation time greatly
// reduces the required synchronization between threads.
// * If the estimate is substantially too low (e.g. less than half the true
// average) then metadata space overhead with be substantially higher (e.g.
// 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
// can slightly reduce cache hit rates, and slightly reduce access times due
// to the larger working memory size.
// * If the estimate is substantially too high (e.g. 25% higher than the true
// average) then there might not be sufficient slots in the hash table for
// both efficient operation and capacity utilization (hit rate). The hyper
// cache will evict entries to prevent load factors that could dramatically
// affect lookup times, instead letting the hit rate suffer by not utilizing
// the full capacity.
//
// A reasonable choice is the larger of block_size and metadata_block_size.
// When WriteBufferManager (and similar) charge memory usage to the block
// cache, this can lead to the same effect as estimate being too low, which
// is better than the opposite. Therefore, the general recommendation is to
// assume that other memory charged to block cache could be negligible, and
// ignore it in making the estimate.
//
// The best parameter choice based on a cache in use is given by
// GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
// with kDontChargeCacheMetadata. More precisely with
// kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
// GetOccupancyCount(). However, when the average value size might vary
// (e.g. balance between metadata and data blocks in cache), it is better
// to estimate toward the lower side than the higher side.
size_t estimated_entry_charge;
HyperClockCacheOptions(
size_t _capacity, size_t _estimated_entry_charge,
int _num_shard_bits = -1, bool _strict_capacity_limit = false,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy)
: ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
std::move(_memory_allocator),
_metadata_charge_policy),
estimated_entry_charge(_estimated_entry_charge) {}
// Construct an instance of HyperClockCache using these options
std::shared_ptr<Cache> MakeSharedCache() const;
};
// DEPRECATED - The old Clock Cache implementation had an unresolved bug and
// has been removed. The new HyperClockCache requires an additional
// configuration parameter that is not provided by this API. This function
// simply returns a new LRUCache for functional compatibility.
extern std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy);
} // namespace ROCKSDB_NAMESPACE