// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. // // Various APIs for configuring, creating, and monitoring read caches. #pragma once #include #include #include #include "rocksdb/compression_type.h" #include "rocksdb/data_structure.h" #include "rocksdb/memory_allocator.h" namespace ROCKSDB_NAMESPACE { class Cache; // defined in advanced_cache.h struct ConfigOptions; class SecondaryCache; // These definitions begin source compatibility for a future change in which // a specific class for block cache is split away from general caches, so that // the block cache API can continue to become more specialized and // customizeable, including in ways incompatible with a general cache. For // example, HyperClockCache is not usable as a general cache because it expects // only fixed-size block cache keys, but this limitation is not yet reflected // in the API function signatures. // * Phase 1 (done) - Make both BlockCache and RowCache aliases for Cache, // and make a factory function for row caches. Encourage users of row_cache // (not common) to switch to the factory function for row caches. // * Phase 2 - Split off RowCache as its own class, removing secondary // cache support features and more from the API to simplify it. Between Phase 1 // and Phase 2 users of row_cache will need to update their code. Any time // after Phase 2, the block cache and row cache APIs can become more specialized // in ways incompatible with general caches. // * Phase 3 - Move existing RocksDB uses of Cache to BlockCache, and deprecate // (but not yet remove) Cache as an alias for BlockCache. using BlockCache = Cache; using RowCache = Cache; // Classifications of block cache entries. // // Developer notes: Adding a new enum to this class requires corresponding // updates to `kCacheEntryRoleToCamelString` and // `kCacheEntryRoleToHyphenString`. Do not add to this enum after `kMisc` since // `kNumCacheEntryRoles` assumes `kMisc` comes last. enum class CacheEntryRole { // Block-based table data block kDataBlock, // Block-based table filter block (full or partitioned) kFilterBlock, // Block-based table metadata block for partitioned filter kFilterMetaBlock, // OBSOLETE / DEPRECATED: old/removed block-based filter kDeprecatedFilterBlock, // Block-based table index block kIndexBlock, // Other kinds of block-based table block kOtherBlock, // WriteBufferManager's charge to account for its memtable usage kWriteBuffer, // Compression dictionary building buffer's charge to account for // its memory usage kCompressionDictionaryBuildingBuffer, // Filter's charge to account for // (new) bloom and ribbon filter construction's memory usage kFilterConstruction, // BlockBasedTableReader's charge to account for its memory usage kBlockBasedTableReader, // FileMetadata's charge to account for its memory usage kFileMetadata, // Blob value (when using the same cache as block cache and blob cache) kBlobValue, // Blob cache's charge to account for its memory usage (when using a // separate block cache and blob cache) kBlobCache, // Default bucket, for miscellaneous cache entries. Do not use for // entries that could potentially add up to large usage. kMisc, }; constexpr uint32_t kNumCacheEntryRoles = static_cast(CacheEntryRole::kMisc) + 1; // Obtain a hyphen-separated, lowercase name of a `CacheEntryRole`. const std::string& GetCacheEntryRoleName(CacheEntryRole); // A fast bit set for CacheEntryRoles using CacheEntryRoleSet = SmallEnumSet; // For use with `GetMapProperty()` for property // `DB::Properties::kBlockCacheEntryStats`. On success, the map will // be populated with all keys that can be obtained from these functions. struct BlockCacheEntryStatsMapKeys { static const std::string& CacheId(); static const std::string& CacheCapacityBytes(); static const std::string& LastCollectionDurationSeconds(); static const std::string& LastCollectionAgeSeconds(); static std::string EntryCount(CacheEntryRole); static std::string UsedBytes(CacheEntryRole); static std::string UsedPercent(CacheEntryRole); }; extern const bool kDefaultToAdaptiveMutex; enum CacheMetadataChargePolicy { // Only the `charge` of each entry inserted into a Cache counts against // the `capacity` kDontChargeCacheMetadata, // In addition to the `charge`, the approximate space overheads in the // Cache (in bytes) also count against `capacity`. These space overheads // are for supporting fast Lookup and managing the lifetime of entries. kFullChargeCacheMetadata }; const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy = kFullChargeCacheMetadata; // Options shared betweeen various cache implementations that // divide the key space into shards using hashing. struct ShardedCacheOptions { // Capacity of the cache, in the same units as the `charge` of each entry. // This is typically measured in bytes, but can be a different unit if using // kDontChargeCacheMetadata. size_t capacity = 0; // Cache is sharded into 2^num_shard_bits shards, by hash of key. // If < 0, a good default is chosen based on the capacity and the // implementation. (Mutex-based implementations are much more reliant // on many shards for parallel scalability.) int num_shard_bits = -1; // If strict_capacity_limit is set, Insert() will fail if there is not // enough capacity for the new entry along with all the existing referenced // (pinned) cache entries. (Unreferenced cache entries are evicted as // needed, sometimes immediately.) If strict_capacity_limit == false // (default), Insert() never fails. bool strict_capacity_limit = false; // If non-nullptr, RocksDB will use this allocator instead of system // allocator when allocating memory for cache blocks. // // Caveat: when the cache is used as block cache, the memory allocator is // ignored when dealing with compression libraries that allocate memory // internally (currently only XPRESS). std::shared_ptr memory_allocator; // See CacheMetadataChargePolicy CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy; // A SecondaryCache instance to use the non-volatile tier. For a RowCache // this option must be kept as default empty. std::shared_ptr secondary_cache; // See hash_seed comments below static constexpr int32_t kQuasiRandomHashSeed = -1; static constexpr int32_t kHostHashSeed = -2; // EXPERT OPTION: Specifies how a hash seed should be determined for the // cache, or specifies a specific seed (only recommended for diagnostics or // testing). // // Background: it could be dangerous to have different cache instances // access the same SST files with the same hash seed, as correlated unlucky // hashing across hosts or restarts could cause a widespread issue, rather // than an isolated one. For example, with smaller block caches, it is // possible for large full Bloom filters in a set of SST files to be randomly // clustered into one cache shard, causing mutex contention or a thrashing // condition as there's little or no space left for other entries assigned to // the shard. If a set of SST files is broadcast and used on many hosts, we // should ensure all have an independent chance of balanced shards. // // Values >= 0 will be treated as fixed hash seeds. Values < 0 are reserved // for methods of dynamically choosing a seed, currently: // * kQuasiRandomHashSeed - Each cache created chooses a seed mostly randomly, // except that within a process, no seed is repeated until all have been // issued. // * kHostHashSeed - The seed is determined based on hashing the host name. // Although this is arguably slightly worse for production reliability, it // solves the essential problem of cross-host correlation while ensuring // repeatable behavior on a host, for diagnostic purposes. int32_t hash_seed = kHostHashSeed; ShardedCacheOptions() {} ShardedCacheOptions( size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit, std::shared_ptr _memory_allocator = nullptr, CacheMetadataChargePolicy _metadata_charge_policy = kDefaultCacheMetadataChargePolicy) : capacity(_capacity), num_shard_bits(_num_shard_bits), strict_capacity_limit(_strict_capacity_limit), memory_allocator(std::move(_memory_allocator)), metadata_charge_policy(_metadata_charge_policy) {} // Make ShardedCacheOptions polymorphic virtual ~ShardedCacheOptions() = default; }; // LRUCache - A cache using LRU eviction to stay at or below a set capacity. // The cache is sharded to 2^num_shard_bits shards, by hash of the key. // The total capacity is divided and evenly assigned to each shard, and each // shard has its own LRU list for evictions. Each shard also has a mutex for // exclusive access during operations; even read operations need exclusive // access in order to update the LRU list. Mutex contention is usually low // with enough shards. struct LRUCacheOptions : public ShardedCacheOptions { // Ratio of cache reserved for high-priority and low-priority entries, // respectively. (See Cache::Priority below more information on the levels.) // Valid values are between 0 and 1 (inclusive), and the sum of the two // values cannot exceed 1. // // If high_pri_pool_ratio is greater than zero, a dedicated high-priority LRU // list is maintained by the cache. A ratio of 0.5 means non-high-priority // entries will use midpoint insertion. Similarly, if low_pri_pool_ratio is // greater than zero, a dedicated low-priority LRU list is maintained. // There is also a bottom-priority LRU list, which is always enabled and not // explicitly configurable. Entries are spilled over to the next available // lower-priority pool if a certain pool's capacity is exceeded. // // Entries with cache hits are inserted into the highest priority LRU list // available regardless of the entry's priority. Entries without hits // are inserted into highest priority LRU list available whose priority // does not exceed the entry's priority. (For example, high-priority items // with no hits are placed in the high-priority pool if available; // otherwise, they are placed in the low-priority pool if available; // otherwise, they are placed in the bottom-priority pool.) This results // in lower-priority entries without hits getting evicted from the cache // sooner. double high_pri_pool_ratio = 0.5; double low_pri_pool_ratio = 0.0; // Whether to use adaptive mutexes for cache shards. Note that adaptive // mutexes need to be supported by the platform in order for this to have any // effect. The default value is true if RocksDB is compiled with // -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise. bool use_adaptive_mutex = kDefaultToAdaptiveMutex; LRUCacheOptions() {} LRUCacheOptions(size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit, double _high_pri_pool_ratio, std::shared_ptr _memory_allocator = nullptr, bool _use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy _metadata_charge_policy = kDefaultCacheMetadataChargePolicy, double _low_pri_pool_ratio = 0.0) : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit, std::move(_memory_allocator), _metadata_charge_policy), high_pri_pool_ratio(_high_pri_pool_ratio), low_pri_pool_ratio(_low_pri_pool_ratio), use_adaptive_mutex(_use_adaptive_mutex) {} // Construct an instance of LRUCache using these options std::shared_ptr MakeSharedCache() const; // Construct an instance of LRUCache for use as a row cache, typically for // `DBOptions::row_cache`. Some options are not relevant to row caches. std::shared_ptr MakeSharedRowCache() const; }; // DEPRECATED wrapper function inline std::shared_ptr NewLRUCache( size_t capacity, int num_shard_bits = -1, bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5, std::shared_ptr memory_allocator = nullptr, bool use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy, double low_pri_pool_ratio = 0.0) { return LRUCacheOptions(capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio, memory_allocator, use_adaptive_mutex, metadata_charge_policy, low_pri_pool_ratio) .MakeSharedCache(); } // DEPRECATED wrapper function inline std::shared_ptr NewLRUCache(const LRUCacheOptions& cache_opts) { return cache_opts.MakeSharedCache(); } // EXPERIMENTAL // Options structure for configuring a SecondaryCache instance with in-memory // compression. The implementation uses LRUCache so inherits its options, // except LRUCacheOptions.secondary_cache is not used and should not be set. struct CompressedSecondaryCacheOptions : LRUCacheOptions { // The compression method (if any) that is used to compress data. CompressionType compression_type = CompressionType::kLZ4Compression; // compress_format_version can have two values: // compress_format_version == 1 -- decompressed size is not included in the // block header. // compress_format_version == 2 -- decompressed size is included in the block // header in varint32 format. uint32_t compress_format_version = 2; // Enable the custom split and merge feature, which split the compressed value // into chunks so that they may better fit jemalloc bins. bool enable_custom_split_merge = false; // Kinds of entries that should not be compressed, but can be stored. // (Filter blocks are essentially non-compressible but others usually are.) CacheEntryRoleSet do_not_compress_roles = {CacheEntryRole::kFilterBlock}; CompressedSecondaryCacheOptions() {} CompressedSecondaryCacheOptions( size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit, double _high_pri_pool_ratio, double _low_pri_pool_ratio = 0.0, std::shared_ptr _memory_allocator = nullptr, bool _use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy _metadata_charge_policy = kDefaultCacheMetadataChargePolicy, CompressionType _compression_type = CompressionType::kLZ4Compression, uint32_t _compress_format_version = 2, bool _enable_custom_split_merge = false, const CacheEntryRoleSet& _do_not_compress_roles = {CacheEntryRole::kFilterBlock}) : LRUCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit, _high_pri_pool_ratio, std::move(_memory_allocator), _use_adaptive_mutex, _metadata_charge_policy, _low_pri_pool_ratio), compression_type(_compression_type), compress_format_version(_compress_format_version), enable_custom_split_merge(_enable_custom_split_merge), do_not_compress_roles(_do_not_compress_roles) {} // Construct an instance of CompressedSecondaryCache using these options std::shared_ptr MakeSharedSecondaryCache() const; // Avoid confusion with LRUCache std::shared_ptr MakeSharedCache() const = delete; }; // DEPRECATED wrapper function inline std::shared_ptr NewCompressedSecondaryCache( size_t capacity, int num_shard_bits = -1, bool strict_capacity_limit = false, double high_pri_pool_ratio = 0.5, double low_pri_pool_ratio = 0.0, std::shared_ptr memory_allocator = nullptr, bool use_adaptive_mutex = kDefaultToAdaptiveMutex, CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy, CompressionType compression_type = CompressionType::kLZ4Compression, uint32_t compress_format_version = 2, bool enable_custom_split_merge = false, const CacheEntryRoleSet& _do_not_compress_roles = { CacheEntryRole::kFilterBlock}) { return CompressedSecondaryCacheOptions( capacity, num_shard_bits, strict_capacity_limit, high_pri_pool_ratio, low_pri_pool_ratio, memory_allocator, use_adaptive_mutex, metadata_charge_policy, compression_type, compress_format_version, enable_custom_split_merge, _do_not_compress_roles) .MakeSharedSecondaryCache(); } // DEPRECATED wrapper function inline std::shared_ptr NewCompressedSecondaryCache( const CompressedSecondaryCacheOptions& opts) { return opts.MakeSharedSecondaryCache(); } // HyperClockCache - A lock-free Cache alternative for RocksDB block cache // that offers much improved CPU efficiency vs. LRUCache under high parallel // load or high contention, with some caveats: // * Not a general Cache implementation: can only be used for // BlockBasedTableOptions::block_cache, which RocksDB uses in a way that is // compatible with HyperClockCache. // * Requires an extra tuning parameter: see estimated_entry_charge below. // Similarly, substantially changing the capacity with SetCapacity could // harm efficiency. // * Cache priorities are less aggressively enforced, which could cause // cache dilution from long range scans (unless they use fill_cache=false). // * Can be worse for small caches, because if almost all of a cache shard is // pinned (more likely with non-partitioned filters), then CLOCK eviction // becomes very CPU intensive. // // See internal cache/clock_cache.h for full description. struct HyperClockCacheOptions : public ShardedCacheOptions { // The estimated average `charge` associated with cache entries. This is a // critical configuration parameter for good performance from the hyper // cache, because having a table size that is fixed at creation time greatly // reduces the required synchronization between threads. // * If the estimate is substantially too low (e.g. less than half the true // average) then metadata space overhead with be substantially higher (e.g. // 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this // can slightly reduce cache hit rates, and slightly reduce access times due // to the larger working memory size. // * If the estimate is substantially too high (e.g. 25% higher than the true // average) then there might not be sufficient slots in the hash table for // both efficient operation and capacity utilization (hit rate). The hyper // cache will evict entries to prevent load factors that could dramatically // affect lookup times, instead letting the hit rate suffer by not utilizing // the full capacity. // // A reasonable choice is the larger of block_size and metadata_block_size. // When WriteBufferManager (and similar) charge memory usage to the block // cache, this can lead to the same effect as estimate being too low, which // is better than the opposite. Therefore, the general recommendation is to // assume that other memory charged to block cache could be negligible, and // ignore it in making the estimate. // // The best parameter choice based on a cache in use is given by // GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as // with kDontChargeCacheMetadata. More precisely with // kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) / // GetOccupancyCount(). However, when the average value size might vary // (e.g. balance between metadata and data blocks in cache), it is better // to estimate toward the lower side than the higher side. size_t estimated_entry_charge; HyperClockCacheOptions( size_t _capacity, size_t _estimated_entry_charge, int _num_shard_bits = -1, bool _strict_capacity_limit = false, std::shared_ptr _memory_allocator = nullptr, CacheMetadataChargePolicy _metadata_charge_policy = kDefaultCacheMetadataChargePolicy) : ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit, std::move(_memory_allocator), _metadata_charge_policy), estimated_entry_charge(_estimated_entry_charge) {} // Construct an instance of HyperClockCache using these options std::shared_ptr MakeSharedCache() const; }; // DEPRECATED - The old Clock Cache implementation had an unresolved bug and // has been removed. The new HyperClockCache requires an additional // configuration parameter that is not provided by this API. This function // simply returns a new LRUCache for functional compatibility. extern std::shared_ptr NewClockCache( size_t capacity, int num_shard_bits = -1, bool strict_capacity_limit = false, CacheMetadataChargePolicy metadata_charge_policy = kDefaultCacheMetadataChargePolicy); enum PrimaryCacheType { kCacheTypeLRU, // LRU cache type kCacheTypeHCC, // Hyper Clock Cache type kCacheTypeMax, }; // A 2-tier cache with a primary block cache, and a compressed secondary // cache. The returned cache instance will internally allocate a primary // uncompressed cache of the specified type, and a compressed secondary // cache. Any cache memory reservations, such as WriteBufferManager // allocations costed to the block cache, will be distributed // proportionally across both the primary and secondary. struct TieredVolatileCacheOptions { ShardedCacheOptions* cache_opts; PrimaryCacheType cache_type; CompressedSecondaryCacheOptions comp_cache_opts; }; extern std::shared_ptr NewTieredVolatileCache( TieredVolatileCacheOptions& cache_opts); } // namespace ROCKSDB_NAMESPACE