Lock-free ClockCache (#10390)

Summary:
ClockCache completely free of locks. As part of this PR we have also pushed clock algorithm functionality out of ClockCacheShard into ClockHandleTable, so that ClockCacheShard acts more as an interface and less as an actual data structure.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10390

Test Plan:
- ``make -j24 check``
- ``make -j24 CRASH_TEST_EXT_ARGS="--duration=960 --cache_type=clock_cache --cache_size=1073741824 --block_size=16384" blackbox_crash_test_with_atomic_flush``

Reviewed By: pdillinger

Differential Revision: D38106945

Pulled By: guidotag

fbshipit-source-id: 6cbf6bd2397dc9f582809ccff5118a8a33ea6cb1
main
Guido Tagliavini Ponce 2 years ago committed by Facebook GitHub Bot
parent 8860fc902a
commit 6a160e1fec
  1. 595
      cache/clock_cache.cc
  2. 732
      cache/clock_cache.h

595
cache/clock_cache.cc vendored

@ -17,7 +17,6 @@
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "port/lang.h" #include "port/lang.h"
#include "util/distributed_mutex.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/math.h" #include "util/math.h"
#include "util/random.h" #include "util/random.h"
@ -26,86 +25,91 @@ namespace ROCKSDB_NAMESPACE {
namespace clock_cache { namespace clock_cache {
ClockHandleTable::ClockHandleTable(int hash_bits) ClockHandleTable::ClockHandleTable(size_t capacity, int hash_bits)
: length_bits_(hash_bits), : length_bits_(hash_bits),
length_bits_mask_((uint32_t{1} << length_bits_) - 1), length_bits_mask_((uint32_t{1} << length_bits_) - 1),
occupancy_(0),
occupancy_limit_(static_cast<uint32_t>((uint32_t{1} << length_bits_) * occupancy_limit_(static_cast<uint32_t>((uint32_t{1} << length_bits_) *
kStrictLoadFactor)), kStrictLoadFactor)),
array_(new ClockHandle[size_t{1} << length_bits_]) { capacity_(capacity),
array_(new ClockHandle[size_t{1} << length_bits_]),
clock_pointer_(0),
occupancy_(0),
usage_(0) {
assert(hash_bits <= 32); assert(hash_bits <= 32);
} }
ClockHandleTable::~ClockHandleTable() { ClockHandleTable::~ClockHandleTable() {
ApplyToEntriesRange([](ClockHandle* h) { h->FreeData(); }, 0, GetTableSize(), // Assumes there are no references (of any type) to any slot in the table.
true); for (uint32_t i = 0; i < GetTableSize(); i++) {
ClockHandle* h = &array_[i];
if (h->IsElement()) {
h->FreeData();
}
}
} }
ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) { ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) {
uint32_t probe = 0; uint32_t probe = 0;
int slot = FindElement(key, hash, probe); ClockHandle* e = FindSlot(
return (slot == -1) ? nullptr : &array_[slot]; key,
[&](ClockHandle* h) {
if (h->TryInternalRef()) {
if (h->IsElement() && h->Matches(key, hash)) {
return true;
} }
h->ReleaseInternalRef();
}
return false;
},
[&](ClockHandle* h) { return h->displacements == 0; },
[&](ClockHandle* /*h*/) {}, probe);
ClockHandle* ClockHandleTable::Insert(ClockHandle* h, ClockHandle** old) { if (e != nullptr) {
uint32_t probe = 0; // TODO(Guido) Comment from #10347: Here it looks like we have three atomic
int slot = FindElementOrAvailableSlot(h->key(), h->hash, probe); // updates where it would be possible to combine into one CAS (more metadata
*old = nullptr; // under one atomic field) or maybe two atomic updates (one arithmetic, one
if (slot == -1) { // bitwise). Something to think about optimizing.
// The key is not already present, and there's no available slot to place e->InternalToExternalRef();
// the new copy. e->SetHit();
return nullptr; // The handle is now referenced, so we take it out of clock.
ClockOff(e);
} }
if (!array_[slot].IsElement()) { return e;
// The slot is empty or is a tombstone.
ClockHandle* new_entry = &array_[slot];
new_entry->InternalToExclusiveRef();
Assign(new_entry, h);
if (new_entry->displacements == 0) {
// The slot was empty.
return new_entry;
} }
// It used to be a tombstone, so there may already be a copy of the
// key in the table. ClockHandle* ClockHandleTable::Insert(ClockHandle* h,
slot = FindElement(h->key(), h->hash, probe); autovector<ClockHandle>* deleted,
if (slot == -1) { bool take_reference) {
// Nope, no existing copy of the key. uint32_t probe = 0;
return new_entry; ClockHandle* e = FindAvailableSlot(h->key(), h->hash, probe, deleted);
} if (e == nullptr) {
ClockHandle* old_entry = &array_[slot]; // No available slot to place the handle.
old_entry->ReleaseInternalRef();
*old = old_entry;
return new_entry;
} else {
// There is an existing copy of the key.
ClockHandle* old_entry = &array_[slot];
old_entry->ReleaseInternalRef();
*old = old_entry;
// Find an available slot for the new element.
old_entry->displacements++;
slot = FindAvailableSlot(h->key(), probe);
if (slot == -1) {
// No available slots.
return nullptr; return nullptr;
} }
ClockHandle* new_entry = &array_[slot];
new_entry->InternalToExclusiveRef(); // The slot is empty or is a tombstone. And we have an exclusive ref.
Assign(new_entry, h); Assign(e, h);
return new_entry; // TODO(Guido) The following RemoveAll can probably be run outside of
} // the exclusive ref. I had a bad case in mind: multiple inserts could
// annihilate each. Although I think this is impossible, I'm not sure
// my mental proof covers every case.
if (e->displacements != 0) {
// It used to be a tombstone, so there may already be copies of the
// key in the table.
RemoveAll(h->key(), h->hash, probe, deleted);
} }
void ClockHandleTable::Remove(ClockHandle* h) { if (take_reference) {
assert(!h->IsInClock()); // Already off clock. // The user wants to take a reference.
uint32_t probe = 0; e->ExclusiveToExternalRef();
FindSlot( } else {
h->key(), [&](ClockHandle* e) { return e == h; }, // The user doesn't want to immediately take a reference, so we make
[&](ClockHandle* /*e*/) { return false; }, // it evictable.
[&](ClockHandle* e) { e->displacements--; }, probe); ClockOn(e);
h->SetWillBeDeleted(false); e->ReleaseExclusiveRef();
h->SetIsElement(false); }
occupancy_--; return e;
} }
void ClockHandleTable::Assign(ClockHandle* dst, ClockHandle* src) { void ClockHandleTable::Assign(ClockHandle* dst, ClockHandle* src) {
@ -117,75 +121,152 @@ void ClockHandleTable::Assign(ClockHandle* dst, ClockHandle* src) {
dst->key_data = src->key_data; dst->key_data = src->key_data;
dst->flags.store(0); dst->flags.store(0);
dst->SetIsElement(true); dst->SetIsElement(true);
dst->SetClockPriority(ClockHandle::ClockPriority::NONE);
dst->SetCachePriority(src->GetCachePriority()); dst->SetCachePriority(src->GetCachePriority());
usage_ += dst->total_charge;
occupancy_++; occupancy_++;
} }
int ClockHandleTable::FindElement(const Slice& key, uint32_t hash, bool ClockHandleTable::TryRemove(ClockHandle* h,
uint32_t& probe) { autovector<ClockHandle>* deleted) {
return FindSlot( if (h->TryExclusiveRef()) {
key, if (h->WillBeDeleted()) {
[&](ClockHandle* h) { Remove(h, deleted);
if (h->TryInternalRef()) {
if (h->Matches(key, hash)) {
return true; return true;
} }
h->ReleaseInternalRef(); h->ReleaseExclusiveRef();
} }
return false; return false;
},
[&](ClockHandle* h) { return h->displacements == 0; },
[&](ClockHandle* /*h*/) {}, probe);
} }
int ClockHandleTable::FindAvailableSlot(const Slice& key, uint32_t& probe) { bool ClockHandleTable::SpinTryRemove(ClockHandle* h,
int slot = FindSlot( autovector<ClockHandle>* deleted) {
if (h->SpinTryExclusiveRef()) {
if (h->WillBeDeleted()) {
Remove(h, deleted);
return true;
}
h->ReleaseExclusiveRef();
}
return false;
}
void ClockHandleTable::ClockOff(ClockHandle* h) {
h->SetClockPriority(ClockHandle::ClockPriority::NONE);
}
void ClockHandleTable::ClockOn(ClockHandle* h) {
assert(!h->IsInClock());
bool is_high_priority =
h->HasHit() || h->GetCachePriority() == Cache::Priority::HIGH;
h->SetClockPriority(static_cast<ClockHandle::ClockPriority>(
is_high_priority ? ClockHandle::ClockPriority::HIGH
: ClockHandle::ClockPriority::MEDIUM));
}
void ClockHandleTable::Remove(ClockHandle* h,
autovector<ClockHandle>* deleted) {
deleted->push_back(*h);
ClockOff(h);
uint32_t probe = 0;
FindSlot(
h->key(), [&](ClockHandle* e) { return e == h; },
[&](ClockHandle* /*e*/) { return false; },
[&](ClockHandle* e) { e->displacements--; }, probe);
h->SetWillBeDeleted(false);
h->SetIsElement(false);
}
void ClockHandleTable::RemoveAll(const Slice& key, uint32_t hash,
uint32_t& probe,
autovector<ClockHandle>* deleted) {
FindSlot(
key, key,
[&](ClockHandle* h) { [&](ClockHandle* h) {
if (h->TryInternalRef()) { if (h->TryInternalRef()) {
if (!h->IsElement()) { if (h->IsElement() && h->Matches(key, hash)) {
return true; h->SetWillBeDeleted(true);
h->ReleaseInternalRef();
if (TryRemove(h, deleted)) {
h->ReleaseExclusiveRef();
}
return false;
} }
h->ReleaseInternalRef(); h->ReleaseInternalRef();
} }
return false; return false;
}, },
[&](ClockHandle* /*h*/) { return false; }, [&](ClockHandle* h) { return h->displacements == 0; },
[&](ClockHandle* h) { h->displacements++; }, probe); [&](ClockHandle* /*h*/) {}, probe);
if (slot == -1) {
Rollback(key, probe);
} }
return slot;
void ClockHandleTable::Free(autovector<ClockHandle>* deleted) {
if (deleted->size() == 0) {
// Avoid unnecessarily reading usage_ and occupancy_.
return;
} }
int ClockHandleTable::FindElementOrAvailableSlot(const Slice& key, size_t deleted_charge = 0;
uint32_t hash, for (auto& h : *deleted) {
uint32_t& probe) { deleted_charge += h.total_charge;
int slot = FindSlot( h.FreeData();
}
assert(usage_ >= deleted_charge);
usage_ -= deleted_charge;
occupancy_ -= static_cast<uint32_t>(deleted->size());
}
ClockHandle* ClockHandleTable::FindAvailableSlot(
const Slice& key, uint32_t hash, uint32_t& probe,
autovector<ClockHandle>* deleted) {
ClockHandle* e = FindSlot(
key, key,
[&](ClockHandle* h) { [&](ClockHandle* h) {
// To read the handle, first acquire a shared ref.
if (h->TryInternalRef()) { if (h->TryInternalRef()) {
if (!h->IsElement() || h->Matches(key, hash)) { if (h->IsElement()) {
// The slot is not available.
// TODO(Guido) Is it worth testing h->WillBeDeleted()?
if (h->WillBeDeleted() || h->Matches(key, hash)) {
// The slot can be freed up, or the key we're inserting is already
// in the table, so we try to delete it. When the attempt is
// successful, the slot becomes available, so we stop probing.
// Notice that in that case TryRemove returns an exclusive ref.
h->SetWillBeDeleted(true);
h->ReleaseInternalRef();
if (TryRemove(h, deleted)) {
return true; return true;
} }
return false;
}
h->ReleaseInternalRef();
return false;
}
// Available slot.
h->ReleaseInternalRef(); h->ReleaseInternalRef();
// Try to acquire an exclusive ref. If we fail, continue probing.
if (h->SpinTryExclusiveRef()) {
// Check that the slot is still available.
if (!h->IsElement()) {
return true;
}
h->ReleaseExclusiveRef();
}
} }
return false; return false;
}, },
[&](ClockHandle* /*h*/) { return false; }, [&](ClockHandle* /*h*/) { return false; },
[&](ClockHandle* h) { h->displacements++; }, probe); [&](ClockHandle* h) { h->displacements++; }, probe);
if (slot == -1) { if (e == nullptr) {
Rollback(key, probe); Rollback(key, probe);
} }
return slot; return e;
} }
int ClockHandleTable::FindSlot(const Slice& key, ClockHandle* ClockHandleTable::FindSlot(
std::function<bool(ClockHandle*)> match, const Slice& key, std::function<bool(ClockHandle*)> match,
std::function<bool(ClockHandle*)> abort, std::function<bool(ClockHandle*)> abort,
std::function<void(ClockHandle*)> update, std::function<void(ClockHandle*)> update, uint32_t& probe) {
uint32_t& probe) {
// We use double-hashing probing. Every probe in the sequence is a // We use double-hashing probing. Every probe in the sequence is a
// pseudorandom integer, computed as a linear function of two random hashes, // pseudorandom integer, computed as a linear function of two random hashes,
// which we call base and increment. Specifically, the i-th probe is base + i // which we call base and increment. Specifically, the i-th probe is base + i
@ -201,14 +282,14 @@ int ClockHandleTable::FindSlot(const Slice& key,
ClockHandle* h = &array_[current]; ClockHandle* h = &array_[current];
if (current == base && probe > 0) { if (current == base && probe > 0) {
// We looped back. // We looped back.
return -1; return nullptr;
} }
if (match(h)) { if (match(h)) {
probe++; probe++;
return current; return h;
} }
if (abort(h)) { if (abort(h)) {
return -1; return nullptr;
} }
probe++; probe++;
update(h); update(h);
@ -226,35 +307,73 @@ void ClockHandleTable::Rollback(const Slice& key, uint32_t probe) {
} }
} }
void ClockHandleTable::ClockRun(size_t charge) {
// TODO(Guido) When an element is in the probe sequence of a
// hot element, it will be hard to get an exclusive ref.
// Do we need a mechanism to prevent an element from sitting
// for a long time in cache waiting to be evicted?
assert(charge <= capacity_);
autovector<ClockHandle> deleted;
uint32_t max_iterations =
1 + static_cast<uint32_t>(GetTableSize() * kLoadFactor);
size_t usage_local = usage_;
while (usage_local + charge > capacity_ && max_iterations--) {
uint32_t steps = 1 + static_cast<uint32_t>(1 / kLoadFactor);
uint32_t clock_pointer_local = (clock_pointer_ += steps) - steps;
for (uint32_t i = 0; i < steps; i++) {
ClockHandle* h = &array_[ModTableSize(clock_pointer_local + i)];
if (h->TryExclusiveRef()) {
if (h->WillBeDeleted()) {
Remove(h, &deleted);
usage_local -= h->total_charge;
} else {
if (!h->IsInClock() && h->IsElement()) {
// We adjust the clock priority to make the element evictable again.
// Why? Elements that are not in clock are either currently
// externally referenced or used to be. Because we are holding an
// exclusive ref, we know we are in the latter case. This can only
// happen when the last external reference to an element was
// released, and the element was not immediately removed.
ClockOn(h);
}
ClockHandle::ClockPriority priority = h->GetClockPriority();
if (priority == ClockHandle::ClockPriority::LOW) {
Remove(h, &deleted);
usage_local -= h->total_charge;
} else if (priority > ClockHandle::ClockPriority::LOW) {
h->DecreaseClockPriority();
}
}
h->ReleaseExclusiveRef();
}
}
}
Free(&deleted);
}
ClockCacheShard::ClockCacheShard( ClockCacheShard::ClockCacheShard(
size_t capacity, size_t estimated_value_size, bool strict_capacity_limit, size_t capacity, size_t estimated_value_size, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) CacheMetadataChargePolicy metadata_charge_policy)
: capacity_(capacity), : strict_capacity_limit_(strict_capacity_limit),
strict_capacity_limit_(strict_capacity_limit), table_(capacity, CalcHashBits(capacity, estimated_value_size,
clock_pointer_(0), metadata_charge_policy)) {
table_(
CalcHashBits(capacity, estimated_value_size, metadata_charge_policy)),
usage_(0) {
set_metadata_charge_policy(metadata_charge_policy); set_metadata_charge_policy(metadata_charge_policy);
} }
void ClockCacheShard::EraseUnRefEntries() { void ClockCacheShard::EraseUnRefEntries() {
autovector<ClockHandle> last_reference_list; autovector<ClockHandle> deleted;
{
DMutexLock l(mutex_);
table_.ApplyToEntriesRange( table_.ApplyToEntriesRange(
[this, &last_reference_list](ClockHandle* h) { [this, &deleted](ClockHandle* h) {
// Externally unreferenced element. // Externally unreferenced element.
last_reference_list.push_back(*h); table_.Remove(h, &deleted);
Evict(h);
}, },
0, table_.GetTableSize(), true); 0, table_.GetTableSize(), true);
}
// Free the entry outside of the mutex for performance reasons. table_.Free(&deleted);
for (auto& h : last_reference_list) {
h.FreeData();
}
} }
void ClockCacheShard::ApplyToSomeEntries( void ClockCacheShard::ApplyToSomeEntries(
@ -264,7 +383,6 @@ void ClockCacheShard::ApplyToSomeEntries(
// The state is essentially going to be the starting hash, which works // The state is essentially going to be the starting hash, which works
// nicely even if we resize between calls because we use upper-most // nicely even if we resize between calls because we use upper-most
// hash bits for table indexes. // hash bits for table indexes.
DMutexLock l(mutex_);
uint32_t length_bits = table_.GetLengthBits(); uint32_t length_bits = table_.GetLengthBits();
uint32_t length = table_.GetTableSize(); uint32_t length = table_.GetTableSize();
@ -276,7 +394,7 @@ void ClockCacheShard::ApplyToSomeEntries(
uint32_t index_begin = *state >> (32 - length_bits); uint32_t index_begin = *state >> (32 - length_bits);
uint32_t index_end = index_begin + average_entries_per_lock; uint32_t index_end = index_begin + average_entries_per_lock;
if (index_end >= length) { if (index_end >= length) {
// Going to end // Going to end.
index_end = length; index_end = length;
*state = UINT32_MAX; *state = UINT32_MAX;
} else { } else {
@ -292,60 +410,6 @@ void ClockCacheShard::ApplyToSomeEntries(
index_begin, index_end, false); index_begin, index_end, false);
} }
void ClockCacheShard::ClockOff(ClockHandle* h) {
h->SetClockPriority(ClockHandle::ClockPriority::NONE);
}
void ClockCacheShard::ClockOn(ClockHandle* h) {
assert(!h->IsInClock());
bool is_high_priority =
h->HasHit() || h->GetCachePriority() == Cache::Priority::HIGH;
h->SetClockPriority(static_cast<ClockHandle::ClockPriority>(
is_high_priority * ClockHandle::ClockPriority::HIGH +
(1 - is_high_priority) * ClockHandle::ClockPriority::MEDIUM));
}
void ClockCacheShard::Evict(ClockHandle* h) {
ClockOff(h);
table_.Remove(h);
assert(usage_ >= h->total_charge);
usage_ -= h->total_charge;
}
void ClockCacheShard::EvictFromClock(size_t charge,
autovector<ClockHandle>* deleted) {
// TODO(Guido) When an element is in the probe sequence of a
// hot element, it will be hard to get an exclusive ref.
// We may need a mechanism to avoid that an element sits forever
// in cache waiting to be evicted.
assert(charge <= capacity_);
uint32_t max_iterations = table_.GetTableSize();
while (usage_ + charge > capacity_ && max_iterations--) {
ClockHandle* h = &table_.array_[clock_pointer_];
clock_pointer_ = table_.ModTableSize(clock_pointer_ + 1);
if (h->TryExclusiveRef()) {
if (!h->IsInClock() && h->IsElement()) {
// We adjust the clock priority to make the element evictable again.
// Why? Elements that are not in clock are either currently
// externally referenced or used to be---because we are holding an
// exclusive ref, we know we are in the latter case. This can only
// happen when the last external reference to an element was released,
// and the element was not immediately removed.
ClockOn(h);
}
if (h->GetClockPriority() == ClockHandle::ClockPriority::LOW) {
deleted->push_back(*h);
Evict(h);
} else if (h->GetClockPriority() > ClockHandle::ClockPriority::LOW) {
h->DecreaseClockPriority();
}
h->ReleaseExclusiveRef();
}
}
}
size_t ClockCacheShard::CalcEstimatedHandleCharge( size_t ClockCacheShard::CalcEstimatedHandleCharge(
size_t estimated_value_size, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy) { CacheMetadataChargePolicy metadata_charge_policy) {
@ -366,25 +430,12 @@ int ClockCacheShard::CalcHashBits(
return FloorLog2((num_entries << 1) - 1); return FloorLog2((num_entries << 1) - 1);
} }
void ClockCacheShard::SetCapacity(size_t capacity) { void ClockCacheShard::SetCapacity(size_t /*capacity*/) {
assert(false); // Not supported. TODO(Guido) Support it? assert(false); // Not supported.
autovector<ClockHandle> last_reference_list;
{
DMutexLock l(mutex_);
capacity_ = capacity;
EvictFromClock(0, &last_reference_list);
} }
// Free the entry outside of the mutex for performance reasons. void ClockCacheShard::SetStrictCapacityLimit(bool /*strict_capacity_limit*/) {
for (auto& h : last_reference_list) { assert(false); // Not supported.
h.FreeData();
}
}
void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
assert(false); // Not supported. TODO(Guido) Support it?
DMutexLock l(mutex_);
strict_capacity_limit_ = strict_capacity_limit;
} }
Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value, Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
@ -407,23 +458,28 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
} }
Status s = Status::OK(); Status s = Status::OK();
autovector<ClockHandle> last_reference_list;
{ // Free space with the clock policy until enough space is freed or there are
DMutexLock l(mutex_); // no evictable elements.
table_.ClockRun(tmp.total_charge);
assert(table_.GetOccupancy() <= table_.GetOccupancyLimit());
// Free the space following strict clock policy until enough space // occupancy_ and usage_ are contended members across concurrent updates
// is freed or there are no evictable elements. // on the same shard, so we use a single copy to reduce cache synchronization.
EvictFromClock(tmp.total_charge, &last_reference_list); uint32_t occupancy_local = table_.GetOccupancy();
if ((usage_ + tmp.total_charge > capacity_ && size_t usage_local = table_.GetUsage();
assert(occupancy_local <= table_.GetOccupancyLimit());
autovector<ClockHandle> deleted;
if ((usage_local + tmp.total_charge > table_.GetCapacity() &&
(strict_capacity_limit_ || handle == nullptr)) || (strict_capacity_limit_ || handle == nullptr)) ||
table_.GetOccupancy() == table_.GetOccupancyLimit()) { occupancy_local > table_.GetOccupancyLimit()) {
if (handle == nullptr) { if (handle == nullptr) {
// Don't insert the entry but still return ok, as if the entry inserted // Don't insert the entry but still return ok, as if the entry inserted
// into cache and get evicted immediately. // into cache and get evicted immediately.
last_reference_list.push_back(tmp); deleted.push_back(tmp);
} else { } else {
if (table_.GetOccupancy() == table_.GetOccupancyLimit()) { if (occupancy_local > table_.GetOccupancyLimit()) {
// TODO: Consider using a distinct status for this case, but usually // TODO: Consider using a distinct status for this case, but usually
// it will be handled the same way as reaching charge capacity limit // it will be handled the same way as reaching charge capacity limit
s = Status::MemoryLimit( s = Status::MemoryLimit(
@ -437,57 +493,25 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
} else { } else {
// Insert into the cache. Note that the cache might get larger than its // Insert into the cache. Note that the cache might get larger than its
// capacity if not enough space was freed up. // capacity if not enough space was freed up.
ClockHandle* old; ClockHandle* h = table_.Insert(&tmp, &deleted, handle != nullptr);
ClockHandle* h = table_.Insert(&tmp, &old); assert(h != nullptr); // The occupancy is way below the table size, so this
assert(h != nullptr); // We're below occupancy, so this insertion should // insertion should never fail.
// never fail. if (handle != nullptr) {
usage_ += h->total_charge;
if (old != nullptr) {
s = Status::OkOverwritten();
assert(!old->WillBeDeleted());
old->SetWillBeDeleted(true);
// Try to evict the old copy of the element.
if (old->TryExclusiveRef()) {
last_reference_list.push_back(*old);
Evict(old);
old->ReleaseExclusiveRef();
}
}
if (handle == nullptr) {
// If the user didn't provide a handle, no reference is taken,
// so we make the element evictable.
ClockOn(h);
h->ReleaseExclusiveRef();
} else {
// The caller already holds a ref.
h->ExclusiveToExternalRef();
*handle = reinterpret_cast<Cache::Handle*>(h); *handle = reinterpret_cast<Cache::Handle*>(h);
} }
if (deleted.size() > 0) {
s = Status::OkOverwritten();
} }
} }
// Free the entry outside of the mutex for performance reasons. table_.Free(&deleted);
for (auto& h : last_reference_list) {
h.FreeData();
}
return s; return s;
} }
Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) { Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
ClockHandle* h = nullptr; return reinterpret_cast<Cache::Handle*>(table_.Lookup(key, hash));
h = table_.Lookup(key, hash);
if (h != nullptr) {
// TODO(Guido) Comment from #10347: Here it looks like we have three atomic
// updates where it would be possible to combine into one CAS (more metadata
// under one atomic field) or maybe two atomic updates (one arithmetic, one
// bitwise). Something to think about optimizing.
h->InternalToExternalRef();
h->SetHit();
// The handle is now referenced, so we take it out of clock.
ClockOff(h);
}
return reinterpret_cast<Cache::Handle*>(h);
} }
bool ClockCacheShard::Ref(Cache::Handle* h) { bool ClockCacheShard::Ref(Cache::Handle* h) {
@ -498,97 +522,50 @@ bool ClockCacheShard::Ref(Cache::Handle* h) {
bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) { bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
// In contrast with LRUCache's Release, this function won't delete the handle // In contrast with LRUCache's Release, this function won't delete the handle
// when the reference is the last one and the cache is above capacity. Space // when the cache is above capacity and the reference is the last one. Space
// is only freed up by EvictFromClock (called by Insert when space is needed) // is only freed up by EvictFromClock (called by Insert when space is needed)
// and Erase. // and Erase. We do this to avoid an extra atomic read of the variable usage_.
if (handle == nullptr) { if (handle == nullptr) {
return false; return false;
} }
ClockHandle* h = reinterpret_cast<ClockHandle*>(handle); ClockHandle* h = reinterpret_cast<ClockHandle*>(handle);
uint32_t hash = h->hash; uint32_t refs = h->refs;
uint32_t refs = h->ReleaseExternalRef(); bool last_reference = ((refs & ClockHandle::EXTERNAL_REFS) == 1);
bool last_reference = !(refs & ClockHandle::EXTERNAL_REFS);
bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED; bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED;
if (last_reference && (will_be_deleted || erase_if_last_ref)) { if (last_reference && (will_be_deleted || erase_if_last_ref)) {
// At this point we want to evict the element, so we need to take autovector<ClockHandle> deleted;
// a lock and an exclusive reference. But there's a problem: h->SetWillBeDeleted(true);
// as soon as we released the last reference, an Insert or Erase could've h->ReleaseExternalRef();
// replaced this element, and by the time we take the lock and ref if (table_.SpinTryRemove(h, &deleted)) {
// we could potentially be referencing a different element.
// Thus, before evicting the (potentially different) element, we need to
// re-check that it's unreferenced and marked as WILL_BE_DELETED, so the
// eviction is safe. Additionally, we check that the hash doesn't change,
// which will detect, most of the time, whether the element is a different
// one. The bottomline is that we only guarantee that the input handle will
// be deleted, and occasionally also another handle, but in any case all
// deleted handles are safe to delete.
// TODO(Guido) With lock-free inserts and deletes we may be able to
// "atomically" transition to an exclusive ref, without creating a deadlock.
ClockHandle copy;
{
DMutexLock l(mutex_);
if (h->TrySpinExclusiveRef()) {
will_be_deleted = h->refs & ClockHandle::WILL_BE_DELETED;
// Check that it's still safe to delete.
if (h->IsElement() && (will_be_deleted || erase_if_last_ref) &&
h->hash == hash) {
copy = *h;
Evict(h);
}
h->ReleaseExclusiveRef(); h->ReleaseExclusiveRef();
} else { table_.Free(&deleted);
// An external ref was detected.
return false;
}
}
// Free the entry outside of the mutex for performance reasons.
copy.FreeData();
return true; return true;
} }
} else {
h->ReleaseExternalRef();
}
return false; return false;
} }
void ClockCacheShard::Erase(const Slice& key, uint32_t hash) { void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
ClockHandle copy; autovector<ClockHandle> deleted;
bool last_reference = false; uint32_t probe = 0;
{ table_.RemoveAll(key, hash, probe, &deleted);
DMutexLock l(mutex_); table_.Free(&deleted);
ClockHandle* h = table_.Lookup(key, hash);
if (h != nullptr) {
h->SetWillBeDeleted(true);
h->ReleaseInternalRef();
if (h->TryExclusiveRef()) {
copy = *h;
Evict(h);
last_reference = true;
h->ReleaseExclusiveRef();
}
}
}
// Free the entry outside of the mutex for performance reasons.
if (last_reference) {
copy.FreeData();
}
} }
size_t ClockCacheShard::GetUsage() const { size_t ClockCacheShard::GetUsage() const { return table_.GetUsage(); }
DMutexLock l(mutex_);
return usage_;
}
size_t ClockCacheShard::GetPinnedUsage() const { size_t ClockCacheShard::GetPinnedUsage() const {
// Computes the pinned usage scanning the whole hash table. This // Computes the pinned usage by scanning the whole hash table. This
// is slow, but avoid keeping an exact counter on the clock usage, // is slow, but avoids keeping an exact counter on the clock usage,
// i.e., the number of not externally referenced elements. // i.e., the number of not externally referenced elements.
// Why avoid this? Because Lookup removes elements from the clock // Why avoid this counter? Because Lookup removes elements from the clock
// list, so it would need to update the pinned usage every time, // list, so it would need to update the pinned usage every time,
// which creates additional synchronization costs. // which creates additional synchronization costs.
DMutexLock l(mutex_);
size_t clock_usage = 0; size_t clock_usage = 0;
table_.ConstApplyToEntriesRange( table_.ConstApplyToEntriesRange(
@ -602,17 +579,13 @@ size_t ClockCacheShard::GetPinnedUsage() const {
return clock_usage; return clock_usage;
} }
std::string ClockCacheShard::GetPrintableOptions() const {
return std::string{};
}
ClockCache::ClockCache(size_t capacity, size_t estimated_value_size, ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
int num_shard_bits, bool strict_capacity_limit, int num_shard_bits, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy) CacheMetadataChargePolicy metadata_charge_policy)
: ShardedCache(capacity, num_shard_bits, strict_capacity_limit) { : ShardedCache(capacity, num_shard_bits, strict_capacity_limit),
num_shards_(1 << num_shard_bits) {
assert(estimated_value_size > 0 || assert(estimated_value_size > 0 ||
metadata_charge_policy != kDontChargeCacheMetadata); metadata_charge_policy != kDontChargeCacheMetadata);
num_shards_ = 1 << num_shard_bits;
shards_ = reinterpret_cast<ClockCacheShard*>( shards_ = reinterpret_cast<ClockCacheShard*>(
port::cacheline_aligned_alloc(sizeof(ClockCacheShard) * num_shards_)); port::cacheline_aligned_alloc(sizeof(ClockCacheShard) * num_shards_));
size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_; size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;

732
cache/clock_cache.h vendored

@ -23,102 +23,137 @@
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/secondary_cache.h" #include "rocksdb/secondary_cache.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/distributed_mutex.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace clock_cache { namespace clock_cache {
// Block cache implementation using a lock-free open-address hash table // An experimental alternative to LRUCache, using a lock-free, open-addressed
// and clock eviction. // hash table and clock eviction.
/////////////////////////////////////////////////////////////////////////////// // ----------------------------------------------------------------------------
// Part 1: Handles // 1. INTRODUCTION
// //
// Every slot in the hash table is a ClockHandle. A handle can be in a few // In RocksDB, a Cache is a concurrent unordered dictionary that supports
// different states, that stem from the fact that handles can be externally // external references (a.k.a. user references). A ClockCache is a type of Cache
// referenced and, thus, can't always be immediately evicted when a delete // that uses the clock algorithm as its eviction policy. Internally, a
// operation is executed or when they are replaced by a new version (via an // ClockCache is an open-addressed hash table that stores all KV pairs in a
// insert of the same key). Concretely, the state of a handle is defined by the // large array. Every slot in the hash table is a ClockHandle, which holds a KV
// following two properties: // pair plus some additional metadata that controls the different aspects of the
// (R) Externally referenced: A handle can be referenced externally, or not. // cache: external references, the hashing mechanism, concurrent access and the
// Importantly, a handle can be evicted if and only if it's not // clock algorithm.
// referenced. In particular, when an handle becomes referenced, it's
// temporarily taken out of clock until all references to it are released.
// (M) Marked for deletion (or invisible): An handle is marked for deletion
// when an operation attempts to delete it, but the handle is externally
// referenced, so it can't be immediately deleted. When this mark is placed,
// lookups will no longer be able to find it. Consequently, no more external
// references will be taken to the handle. When a handle is marked for
// deletion, we also say it's invisible.
// These properties induce 4 different states, with transitions defined as
// follows:
// - Not M --> M: When a handle is deleted or replaced by a new version, but
// not immediately evicted.
// - M --> not M: This cannot happen. Once a handle is marked for deletion,
// there is no can't go back.
// - R --> not R: When all references to an handle are released.
// - Not R --> R: When an unreferenced handle becomes referenced. This can only
// happen if the handle is visible, since references to an handle can only be
// created when it's visible.
// //
///////////////////////////////////////////////////////////////////////////////
// Part 2: Hash table structure
// //
// Internally, the cache uses an open-addressed hash table to index the handles. // 2. EXTERNAL REFERENCES
// We use tombstone counters to keep track of displacements. Probes are //
// generated with double-hashing (but the code can be easily modified to use // An externally referenced handle can't be deleted (either evicted by the clock
// other probing schemes, like linear hashing). Because of the tombstones and // algorithm, or explicitly deleted) or replaced by a new version (via an insert
// the two possible visibility states of a handle, the table slots (we use the // of the same key) until all external references to it have been released by
// word "slot" to refer to handles that are not necessary valid key-value // the users. ClockHandles have two members to support external references:
// elements) can be in 4 different states: // - EXTERNAL_REFS counter: The number of external refs. When EXTERNAL_REFS > 0,
// 1. Visible element: The slot contains an element in not M state. // the handle is externally referenced. Updates that intend to modify the
// 2. To-be-deleted element: The slot contains an element in M state. // handle will refrain from doing so. Eventually, when all references are
// 3. Tombstone: The slot doesn't contain an element, but there is some other // released, we have EXTERNAL_REFS == 0, and updates can operate normally on
// the handle.
// - WILL_BE_DELETED flag: An handle is marked for deletion when an operation
// decides the handle should be deleted. This happens either when the last
// reference to a handle is released (and the release operation is instructed
// to delete on last reference) or on when a delete operation is called on
// the item. This flag is needed because an externally referenced handle
// can't be immediately deleted. In these cases, the flag will be later read
// and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
// used not only to defer deletions, but also as a barrier for external
// references: once WILL_BE_DELETED is set, lookups (which are the means to
// acquire new external references) will ignore the handle. For this reason,
// when WILL_BE_DELETED is set, we say the handle is invisible (and
// otherwise, that it's visible).
//
//
// 3. HASHING AND COLLISION RESOLUTION
//
// ClockCache uses an open-addressed hash table to store the handles.
// We use a variant of tombstones to manage collisions: every slot keeps a
// count of how many KV pairs that are currently in the cache have probed the
// slot in an attempt to insert. Probes are generated with double-hashing
// (although the code can be easily modified to use other probing schemes, like
// linear probing).
//
// A slot in the hash table can be in a few different states:
// - Element: The slot contains an element. This is indicated with the
// IS_ELEMENT flag. Element can be sub-classified depending on the
// value of WILL_BE_DELETED:
// * Visible element.
// * Invisible element.
// - Tombstone: The slot doesn't contain an element, but there is some other
// element that probed this slot during its insertion. // element that probed this slot during its insertion.
// 4. Empty: The slot is unused. // - Empty: The slot is unused---it's neither an element nor a tombstone.
// When a ghost is removed from the table, it can either transition to being a
// tombstone or an empty slot, depending on the number of displacements of the
// slot. In any case, the slot becomes available. When a handle is inserted
// into that slot, it becomes a visible element again.
// //
/////////////////////////////////////////////////////////////////////////////// // A slot cycles through the following sequence of states:
// Part 3: The clock algorithm // empty or tombstone --> visible element --> invisible element -->
// empty or tombstone. Initially a slot is available---it's either
// empty or a tombstone. As soon as a KV pair is written into the slot, it
// becomes a visible element. At some point, the handle will be deleted
// by an explicit delete operation, the eviction algorithm, or an overwriting
// insert. In either case, the handle is marked for deletion. When the an
// attempt to delete the element finally succeeds, the slot is freed up
// and becomes available again.
// //
// We maintain a circular buffer with the handles available for eviction,
// which the clock algorithm traverses (using a "clock pointer") to pick the
// next victim. We use the hash table array as the circular buffer, and mark
// the handles that are evictable. For this we use different clock flags, namely
// NONE, LOW, MEDIUM, HIGH, that represent priorities: LOW, MEDIUM and HIGH
// represent how close an element is from being evictable, LOW being immediately
// evictable. NONE means the slot is not evictable. This is due to one of the
// following reasons:
// (i) the slot doesn't contain an element, or
// (ii) the slot contains an element that is in R state, or
// (iii) the slot contains an element that was in R state but it's
// not any more, and the clock pointer has not swept through the
// slot since the element stopped being referenced.
// //
// The priority NONE is really only important for case (iii), as in the other // 4. CONCURRENCY
// two cases there are other metadata fields that already capture the state.
// When an element stops being referenced (and is not deleted), the clock
// algorithm must acknowledge this, and assign a non-NONE priority to make
// the element evictable again.
// //
/////////////////////////////////////////////////////////////////////////////// // ClockCache is lock-free. At a high level, we synchronize the operations
// Part 4: Synchronization // using a read-prioritized, non-blocking variant of RW locks on every slot of
// the hash table. To do this we generalize the concept of reference:
// - Internal reference: Taken by a thread that is attempting to read a slot
// or do a very precise type of update.
// - Exclusive reference: Taken by a thread that is attempting to write a
// a slot extensively.
// //
// We provide the following synchronization guarantees: // We defer the precise definitions to the comments in the code below.
// - Lookup is lock-free. // A crucial feature of our references is that attempting to take one never
// - Release is lock-free, unless (i) no references to the element are left, // blocks the thread. Another important feature is that readers are
// and (ii) it was marked for deletion or the user wishes to delete if // prioritized, as they use extremely fast synchronization primitives---they
// releasing the last reference. // use atomic arithmetic/bit operations, but no compare-and-swaps (which are
// - Insert and Erase still use a per-shard lock. // much slower).
// //
// Our hash table is lock-free, in the sense that system-wide progress is // Internal references are used by threads to read slots during a probing
// guaranteed, i.e., some thread is always able to make progress. // sequence, making them the most common references (probing is performed
// in almost every operation, not just lookups). During a lookup, once
// the target element is found, and just before the handle is handed over
// to the user, an internal reference is converted into an external reference.
// During an update operation, once the target slot is found, an internal
// reference is converted into an exclusive reference. Interestingly, we
// can't atomically upgrade from internal to exclusive, or we may run into a
// deadlock. Releasing the internal reference and then taking an exclusive
// reference avoids the deadlock, but then the handle may change inbetween.
// One of the key observations we use in our implementation is that we can
// make up for this lack of atomicity using IS_ELEMENT and WILL_BE_DELETED.
// //
/////////////////////////////////////////////////////////////////////////////// // Distinguishing internal from external references is useful for two reasons:
// - Internal references are short lived, but external references are typically
// not. This is helpful when acquiring an exclusive ref: if there are any
// external references to the item, it's probably not worth waiting until
// they go away.
// - We can precisely determine when there are no more external references to a
// handle, and proceed to mark it for deletion. This is useful when users
// release external references.
//
//
// 5. CLOCK ALGORITHM
//
// The clock algorithm circularly sweeps through the hash table to find the next
// victim. Recall that handles that are referenced are not evictable; the clock
// algorithm never picks those. We use different clock priorities: NONE, LOW,
// MEDIUM and HIGH. Priorities LOW, MEDIUM and HIGH represent how close an
// element is from being evicted, LOW being the closest to evicted. NONE means
// the slot is not evictable. NONE priority is used in one of the following
// cases:
// (a) the slot doesn't contain an element, or
// (b) the slot contains an externally referenced element, or
// (c) the slot contains an element that used to be externally referenced,
// and the clock pointer has not swept through the slot since the element
// stopped being externally referenced.
// ----------------------------------------------------------------------------
// The load factor p is a real number in (0, 1) such that at all // The load factor p is a real number in (0, 1) such that at all
// times at most a fraction p of all slots, without counting tombstones, // times at most a fraction p of all slots, without counting tombstones,
@ -138,15 +173,18 @@ constexpr double kLoadFactor = 0.35;
// The user can exceed kLoadFactor if the sizes of the inserted values don't // The user can exceed kLoadFactor if the sizes of the inserted values don't
// match estimated_value_size, or if strict_capacity_limit == false. To // match estimated_value_size, or if strict_capacity_limit == false. To
// avoid performance to plunge, we set a strict upper bound on the load factor. // avoid a performance drop, we set a strict upper bound on the load factor.
constexpr double kStrictLoadFactor = 0.7; constexpr double kStrictLoadFactor = 0.7;
// Maximum number of spins when trying to acquire a ref.
// TODO(Guido) This value was set arbitrarily. Is it appropriate?
// What's the best way to bound the spinning?
constexpr uint32_t kSpinsPerTry = 100000;
// Arbitrary seeds. // Arbitrary seeds.
constexpr uint32_t kProbingSeed1 = 0xbc9f1d34; constexpr uint32_t kProbingSeed1 = 0xbc9f1d34;
constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5; constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5;
// An experimental (under development!) alternative to LRUCache.
struct ClockHandle { struct ClockHandle {
void* value; void* value;
Cache::DeleterFn deleter; Cache::DeleterFn deleter;
@ -154,49 +192,6 @@ struct ClockHandle {
size_t total_charge; size_t total_charge;
std::array<char, kCacheKeySize> key_data; std::array<char, kCacheKeySize> key_data;
static constexpr uint8_t kExternalRefsOffset = 0;
static constexpr uint8_t kSharedRefsOffset = 15;
static constexpr uint8_t kExclusiveRefOffset = 30;
static constexpr uint8_t kWillBeDeletedOffset = 31;
enum Refs : uint32_t {
// Number of external references to the slot.
EXTERNAL_REFS = ((uint32_t{1} << 15) - 1)
<< kExternalRefsOffset, // Bits 0, ..., 14
// Number of internal references plus external references to the slot.
SHARED_REFS = ((uint32_t{1} << 15) - 1)
<< kSharedRefsOffset, // Bits 15, ..., 29
// Whether a thread has an exclusive reference to the slot.
EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
// Whether the handle will be deleted soon. When this bit is set, new
// internal
// or external references to this handle stop being accepted.
// There is an exception: external references can be created from
// existing external references, or converting from existing internal
// references.
WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
// Shared references (i.e., external and internal references) and exclusive
// references are our custom implementation of RW locks---external and
// internal references are read locks, and exclusive references are write
// locks. We prioritize readers, which never block; in fact, they don't even
// use compare-and-swap operations. Using our own implementation of RW locks
// allows us to save many atomic operations by packing data more carefully.
// In particular:
// - Combining EXTERNAL_REFS and SHARED_REFS allows us to convert an
// internal
// reference into an external reference in a single atomic arithmetic
// operation.
// - Combining SHARED_REFS and WILL_BE_DELETED allows us to attempt to take
// a shared reference and check whether the entry is marked for deletion
// in a single atomic arithmetic operation.
};
static constexpr uint32_t kOneInternalRef = 0x8000;
static constexpr uint32_t kOneExternalRef = 0x8001;
std::atomic<uint32_t> refs;
static constexpr uint8_t kIsElementOffset = 1; static constexpr uint8_t kIsElementOffset = 1;
static constexpr uint8_t kClockPriorityOffset = 2; static constexpr uint8_t kClockPriorityOffset = 2;
static constexpr uint8_t kIsHitOffset = 4; static constexpr uint8_t kIsHitOffset = 4;
@ -209,7 +204,7 @@ struct ClockHandle {
CLOCK_PRIORITY = 3 << kClockPriorityOffset, CLOCK_PRIORITY = 3 << kClockPriorityOffset,
// Whether the handle has been looked up after its insertion. // Whether the handle has been looked up after its insertion.
HAS_HIT = 1 << kIsHitOffset, HAS_HIT = 1 << kIsHitOffset,
// The value of Cache::Priority for the handle. // The value of Cache::Priority of the handle.
CACHE_PRIORITY = 1 << kCachePriorityOffset, CACHE_PRIORITY = 1 << kCachePriorityOffset,
}; };
@ -226,30 +221,67 @@ struct ClockHandle {
// up in this slot or a higher one. // up in this slot or a higher one.
std::atomic<uint32_t> displacements; std::atomic<uint32_t> displacements;
// Synchronization rules: static constexpr uint8_t kExternalRefsOffset = 0;
// - Use a shared reference when we want the handle's identity static constexpr uint8_t kSharedRefsOffset = 15;
// members (key_data, hash, value and IS_ELEMENT flag) to static constexpr uint8_t kExclusiveRefOffset = 30;
// remain untouched, but not modify them. The only updates static constexpr uint8_t kWillBeDeletedOffset = 31;
// that a shared reference allows are:
enum Refs : uint32_t {
// Synchronization model:
// - An external reference guarantees that hash, value, key_data
// and the IS_ELEMENT flag are not modified. Doesn't allow
// any writes.
// - An internal reference has the same guarantees as an
// external reference, and additionally allows the following
// idempotent updates on the handle:
// * set CLOCK_PRIORITY to NONE; // * set CLOCK_PRIORITY to NONE;
// * set the HAS_HIT bit. // * set the HAS_HIT bit;
// Notice that these two types of updates are idempotent, so // * set the WILL_BE_DELETED bit.
// they don't require synchronization across shared references. // - A shared reference is either an external reference or an
// - Use an exclusive reference when we want identity members // internal reference.
// to remain untouched, as well as modify any identity member // - An exclusive reference guarantees that no other thread has a shared
// or flag. // or exclusive reference to the handle, and allows writes
// - displacements can be modified without holding a reference. // on the handle.
// - refs is only modified through appropriate functions to
// take or release references. // Number of external references to the slot.
EXTERNAL_REFS = ((uint32_t{1} << 15) - 1)
<< kExternalRefsOffset, // Bits 0, ..., 14
// Number of internal references plus external references to the slot.
SHARED_REFS = ((uint32_t{1} << 15) - 1)
<< kSharedRefsOffset, // Bits 15, ..., 29
// Whether a thread has an exclusive reference to the slot.
EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
// Whether the handle will be deleted soon. When this bit is set, new
// internal
// or external references to this handle stop being accepted.
// There is an exception: external references can be created from
// existing external references, or converting from existing internal
// references.
WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
// Having these 4 fields in a single variable allows us to support the
// following operations efficiently:
// - Convert an internal reference into an external reference in a single
// atomic arithmetic operation.
// - Attempt to take a shared reference using a single atomic arithmetic
// operation. This is because we can increment the internal ref count
// as well as checking whether the entry is marked for deletion using a
// single atomic arithmetic operation (and one non-atomic comparison).
};
static constexpr uint32_t kOneInternalRef = 0x8000;
static constexpr uint32_t kOneExternalRef = 0x8001;
std::atomic<uint32_t> refs;
ClockHandle() ClockHandle()
: value(nullptr), : value(nullptr),
deleter(nullptr), deleter(nullptr),
hash(0), hash(0),
total_charge(0), total_charge(0),
refs(0),
flags(0), flags(0),
displacements(0) { displacements(0),
refs(0) {
SetWillBeDeleted(false); SetWillBeDeleted(false);
SetIsElement(false); SetIsElement(false);
SetClockPriority(ClockPriority::NONE); SetClockPriority(ClockPriority::NONE);
@ -257,26 +289,66 @@ struct ClockHandle {
key_data.fill(0); key_data.fill(0);
} }
// The copy ctor and assignment operator are only used to copy a handle
// for immediate deletion. (We need to copy because the slot may become
// re-used before the deletion is completed.) We only copy the necessary
// members to carry out the deletion. In particular, we don't need
// the atomic members.
ClockHandle(const ClockHandle& other) { *this = other; } ClockHandle(const ClockHandle& other) { *this = other; }
void operator=(const ClockHandle& other) { void operator=(const ClockHandle& other) {
value = other.value; value = other.value;
deleter = other.deleter; deleter = other.deleter;
hash = other.hash;
total_charge = other.total_charge;
refs.store(other.refs);
key_data = other.key_data; key_data = other.key_data;
flags.store(other.flags); total_charge = other.total_charge;
SetWillBeDeleted(other.WillBeDeleted());
SetIsElement(other.IsElement());
SetClockPriority(other.GetClockPriority());
SetCachePriority(other.GetCachePriority());
displacements.store(other.displacements);
} }
Slice key() const { return Slice(key_data.data(), kCacheKeySize); } Slice key() const { return Slice(key_data.data(), kCacheKeySize); }
bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; } void FreeData() {
if (deleter) {
(*deleter)(key(), value);
}
}
// Calculate the memory usage by metadata.
inline size_t CalcMetaCharge(
CacheMetadataChargePolicy metadata_charge_policy) const {
if (metadata_charge_policy != kFullChargeCacheMetadata) {
return 0;
} else {
// #ifdef ROCKSDB_MALLOC_USABLE_SIZE
// return malloc_usable_size(
// const_cast<void*>(static_cast<const void*>(this)));
// #else
// TODO(Guido) malloc_usable_size only works when we call it on
// a pointer allocated with malloc. Because our handles are all
// allocated in a single shot as an array, the user can't call
// CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
// pointer returned by the cache. Moreover, malloc_usable_size
// expects a heap-allocated handle, but sometimes in our code we
// wish to pass a stack-allocated handle (this is only a performance
// concern).
// What is the right way to compute metadata charges with pre-allocated
// handles?
return sizeof(ClockHandle);
// #endif
}
}
inline void CalcTotalCharge(
size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
total_charge = charge + CalcMetaCharge(metadata_charge_policy);
}
inline size_t GetCharge(
CacheMetadataChargePolicy metadata_charge_policy) const {
size_t meta_charge = CalcMetaCharge(metadata_charge_policy);
assert(total_charge >= meta_charge);
return total_charge - meta_charge;
}
// flags functions.
bool IsElement() const { return flags & IS_ELEMENT; } bool IsElement() const { return flags & IS_ELEMENT; }
@ -292,10 +364,6 @@ struct ClockHandle {
void SetHit() { flags |= HAS_HIT; } void SetHit() { flags |= HAS_HIT; }
bool IsInClock() const {
return GetClockPriority() != ClockHandle::ClockPriority::NONE;
}
Cache::Priority GetCachePriority() const { Cache::Priority GetCachePriority() const {
return static_cast<Cache::Priority>(flags & CACHE_PRIORITY); return static_cast<Cache::Priority>(flags & CACHE_PRIORITY);
} }
@ -308,6 +376,10 @@ struct ClockHandle {
} }
} }
bool IsInClock() const {
return GetClockPriority() != ClockHandle::ClockPriority::NONE;
}
ClockPriority GetClockPriority() const { ClockPriority GetClockPriority() const {
return static_cast<ClockPriority>(flags & Flags::CLOCK_PRIORITY); return static_cast<ClockPriority>(flags & Flags::CLOCK_PRIORITY);
} }
@ -328,49 +400,6 @@ struct ClockHandle {
flags |= new_priority; flags |= new_priority;
} }
void FreeData() {
if (deleter) {
(*deleter)(key(), value);
}
}
// Calculate the memory usage by metadata.
inline size_t CalcMetaCharge(
CacheMetadataChargePolicy metadata_charge_policy) const {
if (metadata_charge_policy != kFullChargeCacheMetadata) {
return 0;
} else {
// #ifdef ROCKSDB_MALLOC_USABLE_SIZE
// return malloc_usable_size(
// const_cast<void*>(static_cast<const void*>(this)));
// #else
// TODO(Guido) malloc_usable_size only works when we call it on
// a pointer allocated with malloc. Because our handles are all
// allocated in a single shot as an array, the user can't call
// CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle
// pointer returned by the cache. Moreover, malloc_usable_size
// expects a heap-allocated handle, but sometimes in our code we
// wish to pass a stack-allocated handle (this is only a performance
// concern).
// What is the right way to compute metadata charges with pre-allocated
// handles?
return sizeof(ClockHandle);
// #endif
}
}
inline void CalcTotalCharge(
size_t charge, CacheMetadataChargePolicy metadata_charge_policy) {
total_charge = charge + CalcMetaCharge(metadata_charge_policy);
}
inline size_t GetCharge(
CacheMetadataChargePolicy metadata_charge_policy) const {
size_t meta_charge = CalcMetaCharge(metadata_charge_policy);
assert(total_charge >= meta_charge);
return total_charge - meta_charge;
}
inline bool IsEmpty() const { inline bool IsEmpty() const {
return !this->IsElement() && this->displacements == 0; return !this->IsElement() && this->displacements == 0;
} }
@ -380,11 +409,12 @@ struct ClockHandle {
} }
inline bool Matches(const Slice& some_key, uint32_t some_hash) const { inline bool Matches(const Slice& some_key, uint32_t some_hash) const {
return this->IsElement() && this->hash == some_hash && return this->hash == some_hash && this->key() == some_key;
this->key() == some_key;
} }
bool WillBeDeleted() const { return refs & WILL_BE_DELETED; } // refs functions.
inline bool WillBeDeleted() const { return refs & WILL_BE_DELETED; }
void SetWillBeDeleted(bool will_be_deleted) { void SetWillBeDeleted(bool will_be_deleted) {
if (will_be_deleted) { if (will_be_deleted) {
@ -394,28 +424,7 @@ struct ClockHandle {
} }
} }
// The following functions are for taking and releasing refs. bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
// Tries to take an external ref. Returns true iff it succeeds.
inline bool TryExternalRef() {
if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
return true;
}
refs -= kOneExternalRef;
return false;
}
// Releases an external ref. Returns the new value (this is useful to
// avoid an extra atomic read).
inline uint32_t ReleaseExternalRef() { return refs -= kOneExternalRef; }
// Take an external ref, assuming there is already one external ref
// to the handle.
void Ref() {
// TODO(Guido) Is it okay to assume that the existing external reference
// survives until this function returns?
refs += kOneExternalRef;
}
// Tries to take an internal ref. Returns true iff it succeeds. // Tries to take an internal ref. Returns true iff it succeeds.
inline bool TryInternalRef() { inline bool TryInternalRef() {
@ -426,9 +435,19 @@ struct ClockHandle {
return false; return false;
} }
inline void ReleaseInternalRef() { refs -= kOneInternalRef; } // Tries to take an external ref. Returns true iff it succeeds.
inline bool TryExternalRef() {
if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
return true;
}
refs -= kOneExternalRef;
return false;
}
// Tries to take an exclusive ref. Returns true iff it succeeds. // Tries to take an exclusive ref. Returns true iff it succeeds.
// TODO(Guido) After every TryExclusiveRef call, we always call
// WillBeDeleted(). We could save an atomic read by having an output parameter
// with the last value of refs.
inline bool TryExclusiveRef() { inline bool TryExclusiveRef() {
uint32_t will_be_deleted = refs & WILL_BE_DELETED; uint32_t will_be_deleted = refs & WILL_BE_DELETED;
uint32_t expected = will_be_deleted; uint32_t expected = will_be_deleted;
@ -436,15 +455,18 @@ struct ClockHandle {
EXCLUSIVE_REF | will_be_deleted); EXCLUSIVE_REF | will_be_deleted);
} }
// Repeatedly tries to take an exclusive reference, but stops as soon // Repeatedly tries to take an exclusive reference, but aborts as soon
// as an external reference is detected (in this case the wait would // as an external or exclusive reference is detected (since the wait
// presumably be too long). // would presumably be too long).
inline bool TrySpinExclusiveRef() { inline bool SpinTryExclusiveRef() {
uint32_t expected = 0; uint32_t expected = 0;
uint32_t will_be_deleted = 0; uint32_t will_be_deleted = 0;
uint32_t spins = kSpinsPerTry;
while (!refs.compare_exchange_strong(expected, while (!refs.compare_exchange_strong(expected,
EXCLUSIVE_REF | will_be_deleted)) { EXCLUSIVE_REF | will_be_deleted) &&
if (expected & EXTERNAL_REFS) { spins--) {
std::this_thread::yield();
if (expected & (EXTERNAL_REFS | EXCLUSIVE_REF)) {
return false; return false;
} }
will_be_deleted = expected & WILL_BE_DELETED; will_be_deleted = expected & WILL_BE_DELETED;
@ -453,75 +475,88 @@ struct ClockHandle {
return true; return true;
} }
inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); } // Take an external ref, assuming there is already one external ref
// to the handle.
void Ref() {
// TODO(Guido) Is it okay to assume that the existing external reference
// survives until this function returns?
refs += kOneExternalRef;
}
// The following functions are for upgrading and downgrading refs. inline void ReleaseExternalRef() { refs -= kOneExternalRef; }
// They guarantee atomicity, i.e., no exclusive refs to the handle
// can be taken by a different thread during the conversion.
inline void ExclusiveToInternalRef() { inline void ReleaseInternalRef() { refs -= kOneInternalRef; }
refs += kOneInternalRef;
ReleaseExclusiveRef(); inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); }
}
// Downgrade an exclusive ref to external.
inline void ExclusiveToExternalRef() { inline void ExclusiveToExternalRef() {
refs += kOneExternalRef; refs += kOneExternalRef;
ReleaseExclusiveRef(); ReleaseExclusiveRef();
} }
// TODO(Guido) Do we want to bound the loop and prepare the // Convert an internal ref into external.
// algorithms to react to a failure?
inline void InternalToExclusiveRef() {
uint32_t expected = kOneInternalRef;
uint32_t will_be_deleted = 0;
while (!refs.compare_exchange_strong(expected,
EXCLUSIVE_REF | will_be_deleted)) {
will_be_deleted = expected & WILL_BE_DELETED;
expected = kOneInternalRef | will_be_deleted;
}
}
inline void InternalToExternalRef() { inline void InternalToExternalRef() {
refs += kOneExternalRef - kOneInternalRef; refs += kOneExternalRef - kOneInternalRef;
} }
// TODO(Guido) Same concern.
inline void ExternalToExclusiveRef() {
uint32_t expected = kOneExternalRef;
uint32_t will_be_deleted = 0;
while (!refs.compare_exchange_strong(expected,
EXCLUSIVE_REF | will_be_deleted)) {
will_be_deleted = expected & WILL_BE_DELETED;
expected = kOneExternalRef | will_be_deleted;
}
}
}; // struct ClockHandle }; // struct ClockHandle
class ClockHandleTable { class ClockHandleTable {
public: public:
explicit ClockHandleTable(int hash_bits); explicit ClockHandleTable(size_t capacity, int hash_bits);
~ClockHandleTable(); ~ClockHandleTable();
// Returns a pointer to a visible element matching the key/hash, or // Returns a pointer to a visible handle matching the key/hash, or
// nullptr if not present. // nullptr if not present. When an actual handle is produced, an
// internal reference is handed over.
ClockHandle* Lookup(const Slice& key, uint32_t hash); ClockHandle* Lookup(const Slice& key, uint32_t hash);
// Inserts a copy of h into the hash table. // Inserts a copy of h into the hash table. Returns a pointer to the
// Returns a pointer to the inserted handle, or nullptr if no slot // inserted handle, or nullptr if no available slot was found. Every
// available was found. If an existing visible element matching the // existing visible handle matching the key is already present in the
// key/hash is already present in the hash table, the argument old // hash table is marked as WILL_BE_DELETED. The deletion is also attempted,
// is set to point to it; otherwise, it's set to nullptr. // and, if the attempt is successful, the handle is inserted into the
// Returns an exclusive reference to h, and no references to old. // autovector deleted. When take_reference is true, the function hands
ClockHandle* Insert(ClockHandle* h, ClockHandle** old); // over an external reference on the handle, and otherwise no reference is
// produced.
ClockHandle* Insert(ClockHandle* h, autovector<ClockHandle>* deleted,
bool take_reference);
// Assigns h the appropriate clock priority, making it evictable.
void ClockOn(ClockHandle* h);
// Makes h non-evictable.
void ClockOff(ClockHandle* h);
// Removes h from the hash table. The handle must already be off clock. // Runs the clock eviction algorithm until there is enough space to
void Remove(ClockHandle* h); // insert an element with the given charge.
void ClockRun(size_t charge);
// Extracts the element information from a handle (src), and assigns it // Remove h from the hash table. Requires an exclusive ref to h.
// to a hash table slot (dst). Doesn't touch displacements and refs, void Remove(ClockHandle* h, autovector<ClockHandle>* deleted);
// which are maintained by the hash table algorithm.
void Assign(ClockHandle* dst, ClockHandle* src); // Remove from the hash table all handles with matching key/hash along a
// probe sequence, starting from the given probe number. Doesn't
// require any references.
void RemoveAll(const Slice& key, uint32_t hash, uint32_t& probe,
autovector<ClockHandle>* deleted);
void RemoveAll(const Slice& key, uint32_t hash,
autovector<ClockHandle>* deleted) {
uint32_t probe = 0;
RemoveAll(key, hash, probe, deleted);
}
void Free(autovector<ClockHandle>* deleted);
// Tries to remove h from the hash table. If the attempt is successful,
// the function hands over an exclusive ref to h.
bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
// Similar to TryRemove, except that it spins, increasing the chances of
// success. Requires that the caller thread has no shared ref to h.
bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
template <typename T> template <typename T>
void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end, void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
@ -531,12 +566,9 @@ class ClockHandleTable {
if (h->TryExclusiveRef()) { if (h->TryExclusiveRef()) {
if (h->IsElement() && if (h->IsElement() &&
(apply_if_will_be_deleted || !h->WillBeDeleted())) { (apply_if_will_be_deleted || !h->WillBeDeleted())) {
// Hand the internal ref over to func, which is now responsible
// to release it.
func(h); func(h);
} else {
h->ReleaseExclusiveRef();
} }
h->ReleaseExclusiveRef();
} }
} }
} }
@ -565,53 +597,81 @@ class ClockHandleTable {
uint32_t GetOccupancy() const { return occupancy_; } uint32_t GetOccupancy() const { return occupancy_; }
size_t GetUsage() const { return usage_; }
size_t GetCapacity() const { return capacity_; }
// Returns x mod 2^{length_bits_}. // Returns x mod 2^{length_bits_}.
uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; } uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
private: private:
friend class ClockCacheShard; // Extracts the element information from a handle (src), and assigns it
// to a hash table slot (dst). Doesn't touch displacements and refs,
int FindElement(const Slice& key, uint32_t hash, uint32_t& probe); // which are maintained by the hash table algorithm.
void Assign(ClockHandle* dst, ClockHandle* src);
int FindAvailableSlot(const Slice& key, uint32_t& probe);
int FindElementOrAvailableSlot(const Slice& key, uint32_t hash,
uint32_t& probe);
// Returns the index of the first slot probed (hashing with // Returns the first slot in the probe sequence, starting from the given
// the given key) with a handle e such that match(e) is true. // probe number, with a handle e such that match(e) is true. At every
// At every step, the function first tests whether match(e) holds. // step, the function first tests whether match(e) holds. If this is false,
// If it's false, it evaluates abort(e) to decide whether the // it evaluates abort(e) to decide whether the search should be aborted,
// search should be aborted, and in the affirmative returns -1. // and in the affirmative returns -1. For every handle e probed except
// For every handle e probed except the last one, the function runs // the last one, the function runs update(e).
// update(e). We say a probe to a handle e is aborting if match(e) is // The probe parameter is modified as follows. We say a probe to a handle
// false and abort(e) is true. The argument probe is one more than the // e is aborting if match(e) is false and abort(e) is true. Then the final
// last non-aborting probe during the call. This is so that that the // value of probe is one more than the last non-aborting probe during the
// variable can be used to keep track of progress across consecutive // call. This is so that that the variable can be used to keep track of
// calls to FindSlot. // progress across consecutive calls to FindSlot.
inline int FindSlot(const Slice& key, std::function<bool(ClockHandle*)> match, inline ClockHandle* FindSlot(const Slice& key,
std::function<bool(ClockHandle*)> match,
std::function<bool(ClockHandle*)> stop, std::function<bool(ClockHandle*)> stop,
std::function<void(ClockHandle*)> update, std::function<void(ClockHandle*)> update,
uint32_t& probe); uint32_t& probe);
// After a failed FindSlot call (i.e., with answer -1), this function // Returns an available slot for the given key. All copies of the
// decrements all displacements, starting from the 0-th probe. // key found along the probing sequence until an available slot is
// found are marked for deletion. On each of them, a deletion is
// attempted, and when the attempt succeeds the slot is assigned to
// the new copy of the element.
ClockHandle* FindAvailableSlot(const Slice& key, uint32_t hash,
uint32_t& probe,
autovector<ClockHandle>* deleted);
// After a failed FindSlot call (i.e., with answer -1) in
// FindAvailableSlot, this function fixes all displacements's
// starting from the 0-th probe, until the given probe.
void Rollback(const Slice& key, uint32_t probe); void Rollback(const Slice& key, uint32_t probe);
// Number of hash bits used for table index. // Number of hash bits used for table index.
// The size of the table is 1 << length_bits_. // The size of the table is 1 << length_bits_.
int length_bits_; const int length_bits_;
// For faster computation of ModTableSize. // For faster computation of ModTableSize.
const uint32_t length_bits_mask_; const uint32_t length_bits_mask_;
// Number of elements in the table.
uint32_t occupancy_;
// Maximum number of elements the user can store in the table. // Maximum number of elements the user can store in the table.
uint32_t occupancy_limit_; const uint32_t occupancy_limit_;
// Maximum total charge of all elements stored in the table.
const size_t capacity_;
// We partition the following members into different cache lines
// to avoid false sharing among Lookup, Release, Erase and Insert
// operations in ClockCacheShard.
ALIGN_AS(CACHE_LINE_SIZE)
// Array of slots comprising the hash table.
std::unique_ptr<ClockHandle[]> array_; std::unique_ptr<ClockHandle[]> array_;
ALIGN_AS(CACHE_LINE_SIZE)
// Clock algorithm sweep pointer.
std::atomic<uint32_t> clock_pointer_;
ALIGN_AS(CACHE_LINE_SIZE)
// Number of elements in the table.
std::atomic<uint32_t> occupancy_;
// Memory size for entries residing in the cache.
std::atomic<size_t> usage_;
}; // class ClockHandleTable }; // class ClockHandleTable
// A single shard of sharded cache. // A single shard of sharded cache.
@ -652,20 +712,26 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
Statistics* /*stats*/) override { Statistics* /*stats*/) override {
return Lookup(key, hash); return Lookup(key, hash);
} }
Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
bool Release(Cache::Handle* handle, bool /*useful*/, bool Release(Cache::Handle* handle, bool /*useful*/,
bool erase_if_last_ref) override { bool erase_if_last_ref) override {
return Release(handle, erase_if_last_ref); return Release(handle, erase_if_last_ref);
} }
bool IsReady(Cache::Handle* /*handle*/) override { return true; } bool IsReady(Cache::Handle* /*handle*/) override { return true; }
void Wait(Cache::Handle* /*handle*/) override {} void Wait(Cache::Handle* /*handle*/) override {}
bool Ref(Cache::Handle* handle) override; bool Ref(Cache::Handle* handle) override;
bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override; bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
void Erase(const Slice& key, uint32_t hash) override; void Erase(const Slice& key, uint32_t hash) override;
size_t GetUsage() const override; size_t GetUsage() const override;
size_t GetPinnedUsage() const override; size_t GetPinnedUsage() const override;
void ApplyToSomeEntries( void ApplyToSomeEntries(
@ -675,20 +741,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
void EraseUnRefEntries() override; void EraseUnRefEntries() override;
std::string GetPrintableOptions() const override; std::string GetPrintableOptions() const override { return std::string{}; }
private: private:
friend class ClockCache; friend class ClockCache;
// Makes an element evictable by clock.
void ClockOn(ClockHandle* h);
// Makes an element non-evictable.
void ClockOff(ClockHandle* h);
// Requires an exclusive ref on h.
void Evict(ClockHandle* h);
// Free some space following strict clock policy until enough space // Free some space following strict clock policy until enough space
// to hold (usage_ + charge) is freed or there are no evictable elements. // to hold (usage_ + charge) is freed or there are no evictable elements.
void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted); void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
@ -703,34 +760,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
static int CalcHashBits(size_t capacity, size_t estimated_value_size, static int CalcHashBits(size_t capacity, size_t estimated_value_size,
CacheMetadataChargePolicy metadata_charge_policy); CacheMetadataChargePolicy metadata_charge_policy);
// Initialized before use.
size_t capacity_;
// Whether to reject insertion if cache reaches its full capacity. // Whether to reject insertion if cache reaches its full capacity.
bool strict_capacity_limit_; std::atomic<bool> strict_capacity_limit_;
uint32_t clock_pointer_;
// ------------^^^^^^^^^^^^^-----------
// Not frequently modified data members
// ------------------------------------
//
// We separate data members that are updated frequently from the ones that
// are not frequently updated so that they don't share the same cache line
// which will lead into false cache sharing
//
// ------------------------------------
// Frequently modified data members
// ------------vvvvvvvvvvvvv-----------
ClockHandleTable table_; ClockHandleTable table_;
// Memory size for entries residing in the cache.
size_t usage_;
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
mutable DMutex mutex_;
}; // class ClockCacheShard }; // class ClockCacheShard
class ClockCache class ClockCache
@ -743,19 +776,28 @@ class ClockCache
bool strict_capacity_limit, bool strict_capacity_limit,
CacheMetadataChargePolicy metadata_charge_policy = CacheMetadataChargePolicy metadata_charge_policy =
kDontChargeCacheMetadata); kDontChargeCacheMetadata);
~ClockCache() override; ~ClockCache() override;
const char* Name() const override { return "ClockCache"; } const char* Name() const override { return "ClockCache"; }
CacheShard* GetShard(uint32_t shard) override; CacheShard* GetShard(uint32_t shard) override;
const CacheShard* GetShard(uint32_t shard) const override; const CacheShard* GetShard(uint32_t shard) const override;
void* Value(Handle* handle) override; void* Value(Handle* handle) override;
size_t GetCharge(Handle* handle) const override; size_t GetCharge(Handle* handle) const override;
uint32_t GetHash(Handle* handle) const override; uint32_t GetHash(Handle* handle) const override;
DeleterFn GetDeleter(Handle* handle) const override; DeleterFn GetDeleter(Handle* handle) const override;
void DisownData() override; void DisownData() override;
private: private:
ClockCacheShard* shards_ = nullptr; ClockCacheShard* shards_ = nullptr;
int num_shards_ = 0; int num_shards_;
}; // class ClockCache }; // class ClockCache
} // namespace clock_cache } // namespace clock_cache

Loading…
Cancel
Save