Call experimental new clock cache HyperClockCache (#10684)

Summary:
This change establishes a distinctive name for the experimental new lock-free clock cache (originally developed by guidotag and revamped in PR https://github.com/facebook/rocksdb/issues/10626). A few reasons:
* We want to make it clear that this is a fundamentally different implementation vs. the old clock cache, to avoid people saying "I already tried clock cache."
* We want to highlight the key feature: it's fast (especially under parallel load)
* Because it requires an estimated charge per entry, it is not drop-in API compatible with old clock cache. This estimate might always be required for highest performance, and giving it a distinct name should reduce confusion about the distinct API requirements.
* We might develop a variant requiring the same estimate parameter but with LRU eviction. In that case, using the name HyperLRUCache should make things more clear. (FastLRUCache is just a prototype that might soon be removed.)

Some API detail:
* To reduce copy-pasting parameter lists, etc. as in LRUCache construction, I have a `MakeSharedCache()` function on `HyperClockCacheOptions` instead of `NewHyperClockCache()`.
* Changes -cache_type=clock_cache to -cache_type=hyper_clock_cache for applicable tools. I think this is more consistent / sustainable for reasons already stated.

For performance tests see https://github.com/facebook/rocksdb/pull/10626

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10684

Test Plan: no interesting functional changes; tests updated

Reviewed By: anand1976

Differential Revision: D39547800

Pulled By: pdillinger

fbshipit-source-id: 5c0fe1b5cf3cb680ab369b928c8569682b9795bf
main
Peter Dillinger 2 years ago committed by Facebook GitHub Bot
parent 5724348689
commit 0f91c72adc
  1. 1
      HISTORY.md
  2. 12
      cache/cache_bench_tool.cc
  3. 63
      cache/cache_test.cc
  4. 44
      cache/clock_cache.cc
  5. 27
      cache/clock_cache.h
  6. 16
      cache/lru_cache_test.cc
  7. 20
      db/db_block_cache_test.cc
  8. 14
      db_stress_tool/db_stress_test_base.cc
  9. 141
      include/rocksdb/cache.h
  10. 15
      tools/db_bench_tool.cc
  11. 2
      tools/db_crashtest.py

@ -32,6 +32,7 @@
* RocksDB does internal auto prefetching if it notices 2 sequential reads if readahead_size is not specified. New option `num_file_reads_for_auto_readahead` is added in BlockBasedTableOptions which indicates after how many sequential reads internal auto prefetching should be start (default is 2).
* Added new perf context counters `block_cache_standalone_handle_count`, `block_cache_real_handle_count`,`compressed_sec_cache_insert_real_count`, `compressed_sec_cache_insert_dummy_count`, `compressed_sec_cache_uncompressed_bytes`, and `compressed_sec_cache_compressed_bytes`.
* Memory for blobs which are to be inserted into the blob cache is now allocated using the cache's allocator (see #10628 and #10647).
* HyperClockCache is an experimental, lock-free Cache alternative for block cache that offers much improved CPU efficiency under high parallel load or high contention, with some caveats. As much as 4.5x higher ops/sec vs. LRUCache has been seen in db_bench under high parallel load.
### Performance Improvements
* Iterator performance is improved for `DeleteRange()` users. Internally, iterator will skip to the end of a range tombstone when possible, instead of looping through each key and check individually if a key is range deleted.

@ -13,7 +13,6 @@
#include <set>
#include <sstream>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "db/db_impl/db_impl.h"
#include "monitoring/histogram.h"
@ -292,13 +291,12 @@ class CacheBench {
}
if (FLAGS_cache_type == "clock_cache") {
cache_ = ExperimentalNewClockCache(
FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
if (!cache_) {
fprintf(stderr, "Clock cache not supported.\n");
fprintf(stderr, "Old clock cache implementation has been removed.\n");
exit(1);
}
} else if (FLAGS_cache_type == "hyper_clock_cache") {
cache_ = HyperClockCacheOptions(FLAGS_cache_size, FLAGS_value_bytes,
FLAGS_num_shard_bits)
.MakeSharedCache();
} else if (FLAGS_cache_type == "fast_lru_cache") {
cache_ = NewFastLRUCache(
FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,

@ -15,7 +15,6 @@
#include <string>
#include <vector>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "cache/lru_cache.h"
#include "port/stack_trace.h"
@ -23,7 +22,7 @@
#include "util/coding.h"
#include "util/string_util.h"
// FastLRUCache and ClockCache only support 16-byte keys, so some of
// FastLRUCache and HyperClockCache only support 16-byte keys, so some of
// the tests originally wrote for LRUCache do not work on the other caches.
// Those tests were adapted to use 16-byte keys. We kept the original ones.
// TODO: Remove the original tests if they ever become unused.
@ -76,7 +75,7 @@ void EraseDeleter2(const Slice& /*key*/, void* value) {
}
const std::string kLRU = "lru";
const std::string kClock = "clock";
const std::string kHyperClock = "hyper_clock";
const std::string kFast = "fast";
} // anonymous namespace
@ -87,7 +86,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
static std::string type_;
static void Deleter(const Slice& key, void* v) {
if (type_ == kFast || type_ == kClock) {
if (type_ == kFast || type_ == kHyperClock) {
current_->deleted_keys_.push_back(DecodeKey16Bytes(key));
} else {
current_->deleted_keys_.push_back(DecodeKey32Bits(key));
@ -122,10 +121,10 @@ class CacheTest : public testing::TestWithParam<std::string> {
if (type == kLRU) {
return NewLRUCache(capacity);
}
if (type == kClock) {
return ExperimentalNewClockCache(
capacity, estimated_value_size_, -1 /*num_shard_bits*/,
false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
if (type == kHyperClock) {
return HyperClockCacheOptions(
capacity, estimated_value_size_ /*estimated_value_size*/)
.MakeSharedCache();
}
if (type == kFast) {
return NewFastLRUCache(
@ -148,10 +147,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
co.metadata_charge_policy = charge_policy;
return NewLRUCache(co);
}
if (type == kClock) {
return ExperimentalNewClockCache(capacity, 1 /*estimated_value_size*/,
if (type == kHyperClock) {
return HyperClockCacheOptions(capacity, 1 /*estimated_value_size*/,
num_shard_bits, strict_capacity_limit,
charge_policy);
nullptr /*allocator*/, charge_policy)
.MakeSharedCache();
}
if (type == kFast) {
return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
@ -163,12 +163,11 @@ class CacheTest : public testing::TestWithParam<std::string> {
// These functions encode/decode keys in tests cases that use
// int keys.
// Currently, FastLRUCache requires keys to be 16B long, whereas
// LRUCache and ClockCache don't, so the encoding depends on
// the cache type.
// Currently, HyperClockCache requires keys to be 16B long, whereas
// LRUCache doesn't, so the encoding depends on the cache type.
std::string EncodeKey(int k) {
auto type = GetParam();
if (type == kFast || type == kClock) {
if (type == kFast || type == kHyperClock) {
return EncodeKey16Bytes(k);
} else {
return EncodeKey32Bits(k);
@ -177,7 +176,7 @@ class CacheTest : public testing::TestWithParam<std::string> {
int DecodeKey(const Slice& k) {
auto type = GetParam();
if (type == kFast || type == kClock) {
if (type == kFast || type == kHyperClock) {
return DecodeKey16Bytes(k);
} else {
return DecodeKey32Bits(k);
@ -242,7 +241,7 @@ TEST_P(CacheTest, UsageTest) {
auto precise_cache = NewCache(kCapacity, 0, false, kFullChargeCacheMetadata);
ASSERT_EQ(0, cache->GetUsage());
size_t baseline_meta_usage = precise_cache->GetUsage();
if (type != kClock) {
if (type != kHyperClock) {
ASSERT_EQ(0, baseline_meta_usage);
}
@ -263,7 +262,7 @@ TEST_P(CacheTest, UsageTest) {
kv_size, DumbDeleter));
usage += kv_size;
ASSERT_EQ(usage, cache->GetUsage());
if (type == kClock) {
if (type == kHyperClock) {
ASSERT_EQ(baseline_meta_usage + usage, precise_cache->GetUsage());
} else {
ASSERT_LT(usage, precise_cache->GetUsage());
@ -293,7 +292,7 @@ TEST_P(CacheTest, UsageTest) {
ASSERT_GT(kCapacity, cache->GetUsage());
ASSERT_GT(kCapacity, precise_cache->GetUsage());
ASSERT_LT(kCapacity * 0.95, cache->GetUsage());
if (type != kClock) {
if (type != kHyperClock) {
ASSERT_LT(kCapacity * 0.95, precise_cache->GetUsage());
} else {
// estimated value size of 1 is weird for clock cache, because
@ -319,7 +318,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
auto cache = NewCache(kCapacity, 8, false, kDontChargeCacheMetadata);
auto precise_cache = NewCache(kCapacity, 8, false, kFullChargeCacheMetadata);
size_t baseline_meta_usage = precise_cache->GetUsage();
if (type != kClock) {
if (type != kHyperClock) {
ASSERT_EQ(0, baseline_meta_usage);
}
@ -428,7 +427,7 @@ TEST_P(CacheTest, HitAndMiss) {
ASSERT_EQ(-1, Lookup(300));
Insert(100, 102);
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
// ClockCache usually doesn't overwrite on Insert
ASSERT_EQ(101, Lookup(100));
} else {
@ -439,7 +438,7 @@ TEST_P(CacheTest, HitAndMiss) {
ASSERT_EQ(1U, deleted_keys_.size());
ASSERT_EQ(100, deleted_keys_[0]);
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
ASSERT_EQ(102, deleted_values_[0]);
} else {
ASSERT_EQ(101, deleted_values_[0]);
@ -447,7 +446,7 @@ TEST_P(CacheTest, HitAndMiss) {
}
TEST_P(CacheTest, InsertSameKey) {
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"ClockCache doesn't guarantee Insert overwrite same key.");
return;
@ -477,7 +476,7 @@ TEST_P(CacheTest, Erase) {
}
TEST_P(CacheTest, EntriesArePinned) {
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"ClockCache doesn't guarantee Insert overwrite same key.");
return;
@ -543,7 +542,7 @@ TEST_P(CacheTest, ExternalRefPinsEntries) {
Insert(1000 + j, 2000 + j);
}
// Clock cache is even more stateful and needs more churn to evict
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
for (int j = 0; j < kCacheSize; j++) {
Insert(11000 + j, 11000 + j);
}
@ -742,9 +741,9 @@ TEST_P(CacheTest, ReleaseWithoutErase) {
TEST_P(CacheTest, SetCapacity) {
auto type = GetParam();
if (type == kFast || type == kClock) {
if (type == kFast || type == kHyperClock) {
ROCKSDB_GTEST_BYPASS(
"FastLRUCache and ClockCache don't support arbitrary capacity "
"FastLRUCache and HyperClockCache don't support arbitrary capacity "
"adjustments.");
return;
}
@ -883,7 +882,7 @@ TEST_P(CacheTest, OverCapacity) {
cache->Release(handles[i]);
}
if (GetParam() == kClock) {
if (GetParam() == kHyperClock) {
// Make sure eviction is triggered.
ASSERT_OK(cache->Insert(EncodeKey(-1), nullptr, 1, &deleter, &handles[0]));
@ -1020,7 +1019,8 @@ TEST_P(CacheTest, DefaultShardBits) {
// Prevent excessive allocation (to save time & space)
estimated_value_size_ = 100000;
// Implementations use different minimum shard sizes
size_t min_shard_size = (GetParam() == kClock ? 32U * 1024U : 512U) * 1024U;
size_t min_shard_size =
(GetParam() == kHyperClock ? 32U * 1024U : 512U) * 1024U;
std::shared_ptr<Cache> cache = NewCache(32U * min_shard_size);
ShardedCache* sc = dynamic_cast<ShardedCache*>(cache.get());
@ -1052,11 +1052,8 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
cache_->Release(h1);
}
std::shared_ptr<Cache> (*new_clock_cache_func)(size_t, size_t, int, bool,
CacheMetadataChargePolicy) =
ExperimentalNewClockCache;
INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
testing::Values(kLRU, kClock, kFast));
testing::Values(kLRU, kHyperClock, kFast));
INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
testing::Values(kLRU, kFast));

@ -21,7 +21,7 @@
namespace ROCKSDB_NAMESPACE {
namespace clock_cache {
namespace hyper_clock_cache {
static_assert(sizeof(ClockHandle) == 64U,
"Expecting size / alignment with common cache line size");
@ -1126,8 +1126,9 @@ size_t ClockCacheShard::GetTableAddressCount() const {
return table_.GetTableSize();
}
ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
int num_shard_bits, bool strict_capacity_limit,
HyperClockCache::HyperClockCache(
size_t capacity, size_t estimated_value_size, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit),
num_shards_(1 << num_shard_bits) {
@ -1145,7 +1146,7 @@ ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
}
}
ClockCache::~ClockCache() {
HyperClockCache::~HyperClockCache() {
if (shards_ != nullptr) {
assert(num_shards_ > 0);
for (int i = 0; i < num_shards_; i++) {
@ -1155,32 +1156,32 @@ ClockCache::~ClockCache() {
}
}
CacheShard* ClockCache::GetShard(uint32_t shard) {
CacheShard* HyperClockCache::GetShard(uint32_t shard) {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
const CacheShard* ClockCache::GetShard(uint32_t shard) const {
const CacheShard* HyperClockCache::GetShard(uint32_t shard) const {
return reinterpret_cast<CacheShard*>(&shards_[shard]);
}
void* ClockCache::Value(Handle* handle) {
void* HyperClockCache::Value(Handle* handle) {
return reinterpret_cast<const ClockHandle*>(handle)->value;
}
size_t ClockCache::GetCharge(Handle* handle) const {
size_t HyperClockCache::GetCharge(Handle* handle) const {
return reinterpret_cast<const ClockHandle*>(handle)->total_charge;
}
Cache::DeleterFn ClockCache::GetDeleter(Handle* handle) const {
Cache::DeleterFn HyperClockCache::GetDeleter(Handle* handle) const {
auto h = reinterpret_cast<const ClockHandle*>(handle);
return h->deleter;
}
uint32_t ClockCache::GetHash(Handle* handle) const {
uint32_t HyperClockCache::GetHash(Handle* handle) const {
return reinterpret_cast<const ClockHandle*>(handle)->hash;
}
void ClockCache::DisownData() {
void HyperClockCache::DisownData() {
// Leak data only if that won't generate an ASAN/valgrind warning.
if (!kMustFreeHeapAllocations) {
shards_ = nullptr;
@ -1188,8 +1189,9 @@ void ClockCache::DisownData() {
}
}
} // namespace clock_cache
} // namespace hyper_clock_cache
// DEPRECATED (see public API)
std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
@ -1199,22 +1201,20 @@ std::shared_ptr<Cache> NewClockCache(
/* low_pri_pool_ratio */ 0.0);
}
std::shared_ptr<Cache> ExperimentalNewClockCache(
size_t capacity, size_t estimated_value_size, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) {
if (num_shard_bits >= 20) {
std::shared_ptr<Cache> HyperClockCacheOptions::MakeSharedCache() const {
auto my_num_shard_bits = num_shard_bits;
if (my_num_shard_bits >= 20) {
return nullptr; // The cache cannot be sharded into too many fine pieces.
}
if (num_shard_bits < 0) {
if (my_num_shard_bits < 0) {
// Use larger shard size to reduce risk of large entries clustering
// or skewing individual shards.
constexpr size_t min_shard_size = 32U * 1024U * 1024U;
num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
my_num_shard_bits = GetDefaultCacheShardBits(capacity, min_shard_size);
}
return std::make_shared<clock_cache::ClockCache>(
capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
metadata_charge_policy);
return std::make_shared<hyper_clock_cache::HyperClockCache>(
capacity, estimated_entry_charge, my_num_shard_bits,
strict_capacity_limit, metadata_charge_policy);
}
} // namespace ROCKSDB_NAMESPACE

@ -27,22 +27,22 @@
namespace ROCKSDB_NAMESPACE {
namespace clock_cache {
namespace hyper_clock_cache {
// Forward declaration of friend class.
class ClockCacheTest;
// ClockCache is an experimental alternative to LRUCache.
// HyperClockCache is an experimental alternative to LRUCache.
//
// Benefits
// --------
// * Fully lock free (no waits or spins) for efficiency under high concurrency
// * Optimized for hot path reads. For concurrency control, most Lookup() and
// essentially all Release() are a single atomic add operation.
// * Eviction on insertion is fully parallel and lock-free.
// * Uses a generalized + aging variant of CLOCK eviction that might outperform
// LRU in some cases. (For background, see
// https://en.wikipedia.org/wiki/Page_replacement_algorithm)
// * Eviction on insertion is fully parallel and lock-free.
//
// Costs
// -----
@ -582,20 +582,20 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
std::atomic<bool> strict_capacity_limit_;
}; // class ClockCacheShard
class ClockCache
class HyperClockCache
#ifdef NDEBUG
final
#endif
: public ShardedCache {
public:
ClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
bool strict_capacity_limit,
HyperClockCache(size_t capacity, size_t estimated_value_size,
int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata);
~ClockCache() override;
~HyperClockCache() override;
const char* Name() const override { return "ClockCache"; }
const char* Name() const override { return "HyperClockCache"; }
CacheShard* GetShard(uint32_t shard) override;
@ -615,15 +615,8 @@ class ClockCache
ClockCacheShard* shards_ = nullptr;
int num_shards_;
}; // class ClockCache
}; // class HyperClockCache
} // namespace clock_cache
// Only for internal testing, temporarily replacing NewClockCache.
// TODO(Guido) Remove once NewClockCache constructs a ClockCache again.
extern std::shared_ptr<Cache> ExperimentalNewClockCache(
size_t capacity, size_t estimated_value_size, int num_shard_bits,
bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy);
} // namespace hyper_clock_cache
} // namespace ROCKSDB_NAMESPACE

@ -506,7 +506,7 @@ TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
} // namespace fast_lru_cache
namespace clock_cache {
namespace hyper_clock_cache {
class ClockCacheTest : public testing::Test {
public:
@ -975,9 +975,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
SCOPED_TRACE("est_count = " + std::to_string(est_count));
size_t capacity = static_cast<size_t>(est_val_size * est_count);
// kDontChargeCacheMetadata
auto cache = ExperimentalNewClockCache(
auto cache = HyperClockCacheOptions(
capacity, est_val_size, /*num shard_bits*/ -1,
/*strict_capacity_limit*/ false, kDontChargeCacheMetadata);
/*strict_capacity_limit*/ false,
/*memory_allocator*/ nullptr, kDontChargeCacheMetadata)
.MakeSharedCache();
// Table sizes are currently only powers of two
EXPECT_GE(cache->GetTableAddressCount(), est_count / kLoadFactor);
EXPECT_LE(cache->GetTableAddressCount(), est_count / kLoadFactor * 2.0);
@ -989,9 +991,11 @@ TEST_F(ClockCacheTest, TableSizesTest) {
// doubling the table size could cut by 90% the space available to
// values. Therefore, we omit those weird cases for now.
if (est_val_size >= 512) {
cache = ExperimentalNewClockCache(
cache = HyperClockCacheOptions(
capacity, est_val_size, /*num shard_bits*/ -1,
/*strict_capacity_limit*/ false, kFullChargeCacheMetadata);
/*strict_capacity_limit*/ false,
/*memory_allocator*/ nullptr, kFullChargeCacheMetadata)
.MakeSharedCache();
double est_count_after_meta =
(capacity - cache->GetUsage()) * 1.0 / est_val_size;
EXPECT_GE(cache->GetTableAddressCount(),
@ -1003,7 +1007,7 @@ TEST_F(ClockCacheTest, TableSizesTest) {
}
}
} // namespace clock_cache
} // namespace hyper_clock_cache
class TestSecondaryCache : public SecondaryCache {
public:

@ -13,7 +13,6 @@
#include "cache/cache_entry_roles.h"
#include "cache/cache_key.h"
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "cache/lru_cache.h"
#include "db/column_family.h"
@ -938,15 +937,13 @@ TEST_F(DBBlockCacheTest, AddRedundantStats) {
int iterations_tested = 0;
for (std::shared_ptr<Cache> base_cache :
{NewLRUCache(capacity, num_shard_bits),
ExperimentalNewClockCache(
HyperClockCacheOptions(
capacity,
BlockBasedTableOptions().block_size /*estimated_value_size*/,
num_shard_bits, false /*strict_capacity_limit*/,
kDefaultCacheMetadataChargePolicy),
NewFastLRUCache(
capacity,
BlockBasedTableOptions().block_size /*estimated_value_size*/,
num_shard_bits, false /*strict_capacity_limit*/,
num_shard_bits)
.MakeSharedCache(),
NewFastLRUCache(capacity, 1 /*estimated_value_size*/, num_shard_bits,
false /*strict_capacity_limit*/,
kDefaultCacheMetadataChargePolicy)}) {
if (!base_cache) {
// Skip clock cache when not supported
@ -1302,11 +1299,10 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
for (bool partition : {false, true}) {
for (std::shared_ptr<Cache> cache :
{NewLRUCache(capacity),
ExperimentalNewClockCache(
HyperClockCacheOptions(
capacity,
BlockBasedTableOptions().block_size /*estimated_value_size*/,
-1 /*num_shard_bits*/, false /*strict_capacity_limit*/,
kDefaultCacheMetadataChargePolicy)}) {
BlockBasedTableOptions().block_size /*estimated_value_size*/)
.MakeSharedCache()}) {
if (!cache) {
// Skip clock cache when not supported
continue;

@ -10,7 +10,6 @@
#include "util/compression.h"
#ifdef GFLAGS
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "db_stress_tool/db_stress_common.h"
#include "db_stress_tool/db_stress_compaction_filter.h"
@ -115,14 +114,13 @@ std::shared_ptr<Cache> StressTest::NewCache(size_t capacity,
}
if (FLAGS_cache_type == "clock_cache") {
auto cache = ExperimentalNewClockCache(
static_cast<size_t>(capacity), FLAGS_block_size, num_shard_bits,
false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
if (!cache) {
fprintf(stderr, "Clock cache not supported.");
fprintf(stderr, "Old clock cache implementation has been removed.\n");
exit(1);
}
return cache;
} else if (FLAGS_cache_type == "hyper_clock_cache") {
return HyperClockCacheOptions(static_cast<size_t>(capacity),
FLAGS_block_size /*estimated_entry_charge*/,
num_shard_bits)
.MakeSharedCache();
} else if (FLAGS_cache_type == "fast_lru_cache") {
return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
num_shard_bits, false /*strict_capacity_limit*/,

@ -42,24 +42,64 @@ class SecondaryCache;
extern const bool kDefaultToAdaptiveMutex;
enum CacheMetadataChargePolicy {
// Only the `charge` of each entry inserted into a Cache counts against
// the `capacity`
kDontChargeCacheMetadata,
// In addition to the `charge`, the approximate space overheads in the
// Cache (in bytes) also count against `capacity`. These space overheads
// are for supporting fast Lookup and managing the lifetime of entries.
kFullChargeCacheMetadata
};
const CacheMetadataChargePolicy kDefaultCacheMetadataChargePolicy =
kFullChargeCacheMetadata;
struct LRUCacheOptions {
// Capacity of the cache.
// Options shared betweeen various cache implementations that
// divide the key space into shards using hashing.
struct ShardedCacheOptions {
// Capacity of the cache, in the same units as the `charge` of each entry.
// This is typically measured in bytes, but can be a different unit if using
// kDontChargeCacheMetadata.
size_t capacity = 0;
// Cache is sharded into 2^num_shard_bits shards, by hash of key.
// Refer to NewLRUCache for further information.
// If < 0, a good default is chosen based on the capacity and the
// implementation. (Mutex-based implementations are much more reliant
// on many shards for parallel scalability.)
int num_shard_bits = -1;
// If strict_capacity_limit is set,
// insert to the cache will fail when cache is full.
// If strict_capacity_limit is set, Insert() will fail if there is not
// enough capacity for the new entry along with all the existing referenced
// (pinned) cache entries. (Unreferenced cache entries are evicted as
// needed, sometimes immediately.) If strict_capacity_limit == false
// (default), Insert() never fails.
bool strict_capacity_limit = false;
// If non-nullptr, RocksDB will use this allocator instead of system
// allocator when allocating memory for cache blocks.
//
// Caveat: when the cache is used as block cache, the memory allocator is
// ignored when dealing with compression libraries that allocate memory
// internally (currently only XPRESS).
std::shared_ptr<MemoryAllocator> memory_allocator;
// See CacheMetadataChargePolicy
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy;
ShardedCacheOptions() {}
ShardedCacheOptions(
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy)
: capacity(_capacity),
num_shard_bits(_num_shard_bits),
strict_capacity_limit(_strict_capacity_limit),
memory_allocator(std::move(_memory_allocator)),
metadata_charge_policy(_metadata_charge_policy) {}
};
struct LRUCacheOptions : public ShardedCacheOptions {
// Percentage of cache reserved for high priority entries.
// If greater than zero, the LRU list will be split into a high-pri
// list and a low-pri list. High-pri entries will be inserted to the
@ -83,24 +123,12 @@ struct LRUCacheOptions {
// See also high_pri_pool_ratio.
double low_pri_pool_ratio = 0.0;
// If non-nullptr will use this allocator instead of system allocator when
// allocating memory for cache blocks. Call this method before you start using
// the cache!
//
// Caveat: when the cache is used as block cache, the memory allocator is
// ignored when dealing with compression libraries that allocate memory
// internally (currently only XPRESS).
std::shared_ptr<MemoryAllocator> memory_allocator;
// Whether to use adaptive mutexes for cache shards. Note that adaptive
// mutexes need to be supported by the platform in order for this to have any
// effect. The default value is true if RocksDB is compiled with
// -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
CacheMetadataChargePolicy metadata_charge_policy =
kDefaultCacheMetadataChargePolicy;
// A SecondaryCache instance to use a the non-volatile tier.
std::shared_ptr<SecondaryCache> secondary_cache;
@ -112,14 +140,12 @@ struct LRUCacheOptions {
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy,
double _low_pri_pool_ratio = 0.0)
: capacity(_capacity),
num_shard_bits(_num_shard_bits),
strict_capacity_limit(_strict_capacity_limit),
: ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
std::move(_memory_allocator),
_metadata_charge_policy),
high_pri_pool_ratio(_high_pri_pool_ratio),
low_pri_pool_ratio(_low_pri_pool_ratio),
memory_allocator(std::move(_memory_allocator)),
use_adaptive_mutex(_use_adaptive_mutex),
metadata_charge_policy(_metadata_charge_policy) {}
use_adaptive_mutex(_use_adaptive_mutex) {}
};
// Create a new cache with a fixed size capacity. The cache is sharded
@ -190,18 +216,65 @@ extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
extern std::shared_ptr<SecondaryCache> NewCompressedSecondaryCache(
const CompressedSecondaryCacheOptions& opts);
// EXPERIMENTAL Currently ClockCache is under development, although it's
// already exposed in the public API. To avoid unreliable performance and
// correctness issues, NewClockCache will temporarily return an LRUCache
// constructed with the corresponding arguments.
// HyperClockCache - EXPERIMENTAL
//
// TODO(Guido) When ClockCache is complete, roll back to the old text:
// ``
// Similar to NewLRUCache, but create a cache based on clock algorithm with
// better concurrent performance in some cases. See util/clock_cache.cc for
// more detail.
// Return nullptr if it is not supported.
// ``
// A lock-free Cache alternative for RocksDB block cache that offers much
// improved CPU efficiency under high parallel load or high contention, with
// some caveats.
//
// See internal cache/clock_cache.h for full description.
struct HyperClockCacheOptions : public ShardedCacheOptions {
// The estimated average `charge` associated with cache entries. This is a
// critical configuration parameter for good performance from the hyper
// cache, because having a table size that is fixed at creation time greatly
// reduces the required synchronization between threads.
// * If the estimate is substantially too low (e.g. less than half the true
// average) then metadata space overhead with be substantially higher (e.g.
// 200 bytes per entry rather than 100). With kFullChargeCacheMetadata, this
// can slightly reduce cache hit rates, and slightly reduce access times due
// to the larger working memory size.
// * If the estimate is substantially too high (e.g. 25% higher than the true
// average) then there might not be sufficient slots in the hash table for
// both efficient operation and capacity utilization (hit rate). The hyper
// cache will evict entries to prevent load factors that could dramatically
// affect lookup times, instead letting the hit rate suffer by not utilizing
// the full capacity.
//
// A reasonable choice is the larger of block_size and metadata_block_size.
// When WriteBufferManager (and similar) charge memory usage to the block
// cache, this can lead to the same effect as estimate being too low, which
// is better than the opposite. Therefore, the general recommendation is to
// assume that other memory charged to block cache could be negligible, and
// ignore it in making the estimate.
//
// The best parameter choice based on a cache in use is given by
// GetUsage() / GetOccupancyCount(), ignoring metadata overheads such as
// with kDontChargeCacheMetadata. More precisely with
// kFullChargeCacheMetadata is (GetUsage() - 64 * GetTableAddressCount()) /
// GetOccupancyCount(). However, when the average value size might vary
// (e.g. balance between metadata and data blocks in cache), it is better
// to estimate toward the lower side than the higher side.
size_t estimated_entry_charge;
HyperClockCacheOptions(
size_t _capacity, size_t _estimated_entry_charge,
int _num_shard_bits = -1, bool _strict_capacity_limit = false,
std::shared_ptr<MemoryAllocator> _memory_allocator = nullptr,
CacheMetadataChargePolicy _metadata_charge_policy =
kDefaultCacheMetadataChargePolicy)
: ShardedCacheOptions(_capacity, _num_shard_bits, _strict_capacity_limit,
std::move(_memory_allocator),
_metadata_charge_policy),
estimated_entry_charge(_estimated_entry_charge) {}
// Construct an instance of HyperClockCache using these options
std::shared_ptr<Cache> MakeSharedCache() const;
};
// DEPRECATED - The old Clock Cache implementation had an unresolved bug and
// has been removed. The new HyperClockCache requires an additional
// configuration parameter that is not provided by this API. This function
// simply returns a new LRUCache for functional compatibility.
extern std::shared_ptr<Cache> NewClockCache(
size_t capacity, int num_shard_bits = -1,
bool strict_capacity_limit = false,

@ -37,7 +37,6 @@
#include <thread>
#include <unordered_map>
#include "cache/clock_cache.h"
#include "cache/fast_lru_cache.h"
#include "db/db_impl/db_impl.h"
#include "db/malloc_stats.h"
@ -3057,15 +3056,13 @@ class Benchmark {
return nullptr;
}
if (FLAGS_cache_type == "clock_cache") {
auto cache = ExperimentalNewClockCache(
static_cast<size_t>(capacity), FLAGS_block_size,
FLAGS_cache_numshardbits, false /*strict_capacity_limit*/,
kDefaultCacheMetadataChargePolicy);
if (!cache) {
fprintf(stderr, "Clock cache not supported.");
fprintf(stderr, "Old clock cache implementation has been removed.\n");
exit(1);
}
return cache;
} else if (FLAGS_cache_type == "hyper_clock_cache") {
return HyperClockCacheOptions(static_cast<size_t>(capacity),
FLAGS_block_size /*estimated_entry_charge*/,
FLAGS_cache_numshardbits)
.MakeSharedCache();
} else if (FLAGS_cache_type == "fast_lru_cache") {
return NewFastLRUCache(static_cast<size_t>(capacity), FLAGS_block_size,
FLAGS_cache_numshardbits,

@ -116,7 +116,7 @@ default_params = {
"use_direct_reads": lambda: random.randint(0, 1),
"use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
"mock_direct_io": False,
"cache_type": lambda: random.choice(["lru_cache", "clock_cache"]),
"cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
# fast_lru_cache is incompatible with stress tests, because it doesn't support strict_capacity_limit == false.
"use_full_merge_v1": lambda: random.randint(0, 1),
"use_merge": lambda: random.randint(0, 1),

Loading…
Cancel
Save