@ -10,6 +10,8 @@
# pragma once
# pragma once
# include <array>
# include <array>
# include <atomic>
# include <cstdint>
# include <memory>
# include <memory>
# include <string>
# include <string>
@ -27,116 +29,254 @@ namespace ROCKSDB_NAMESPACE {
namespace clock_cache {
namespace clock_cache {
// Clock cache implementation. This is based on FastLRUCache's open-addressed
// Block cache implementation using a lock-free open-address hash table
// hash table. Importantly, it stores elements in an array, and resolves
// and clock eviction.
// collision using a probing strategy. Visibility and referenceability of
// elements works as usual. See fast_lru_cache.h for a detailed description.
///////////////////////////////////////////////////////////////////////////////
// Part 1: Handles
//
// Every slot in the hash table is a ClockHandle. A handle can be in a few
// different states, that stem from the fact that handles can be externally
// referenced and, thus, can't always be immediately evicted when a delete
// operation is executed or when they are replaced by a new version (via an
// insert of the same key). Concretely, the state of a handle is defined by the
// following two properties:
// (R) Externally referenced: A handle can be referenced externally, or not.
// Importantly, a handle can be evicted if and only if it's not
// referenced. In particular, when an handle becomes referenced, it's
// temporarily taken out of clock until all references to it are released.
// (M) Marked for deletion (or invisible): An handle is marked for deletion
// when an operation attempts to delete it, but the handle is externally
// referenced, so it can't be immediately deleted. When this mark is placed,
// lookups will no longer be able to find it. Consequently, no more external
// references will be taken to the handle. When a handle is marked for
// deletion, we also say it's invisible.
// These properties induce 4 different states, with transitions defined as
// follows:
// - Not M --> M: When a handle is deleted or replaced by a new version, but
// not immediately evicted.
// - M --> not M: This cannot happen. Once a handle is marked for deletion,
// there is no can't go back.
// - R --> not R: When all references to an handle are released.
// - Not R --> R: When an unreferenced handle becomes referenced. This can only
// happen if the handle is visible, since references to an handle can only be
// created when it's visible.
//
///////////////////////////////////////////////////////////////////////////////
// Part 2: Hash table structure
//
// Internally, the cache uses an open-addressed hash table to index the handles.
// We use tombstone counters to keep track of displacements. Probes are
// generated with double-hashing (but the code can be easily modified to use
// other probing schemes, like linear hashing). Because of the tombstones and
// the two possible visibility states of a handle, the table slots (we use the
// word "slot" to refer to handles that are not necessary valid key-value
// elements) can be in 4 different states:
// 1. Visible element: The slot contains an element in not M state.
// 2. To-be-deleted element: The slot contains an element in M state.
// 3. Tombstone: The slot doesn't contain an element, but there is some other
// element that probed this slot during its insertion.
// 4. Empty: The slot is unused.
// When a ghost is removed from the table, it can either transition to being a
// tombstone or an empty slot, depending on the number of displacements of the
// slot. In any case, the slot becomes available. When a handle is inserted
// into that slot, it becomes a visible element again.
//
///////////////////////////////////////////////////////////////////////////////
// Part 3: The clock algorithm
//
// We maintain a circular buffer with the handles available for eviction,
// which the clock algorithm traverses (using a "clock pointer") to pick the
// next victim. We use the hash table array as the circular buffer, and mark
// the handles that are evictable. For this we use different clock flags, namely
// NONE, LOW, MEDIUM, HIGH, that represent priorities: LOW, MEDIUM and HIGH
// represent how close an element is from being evictable, LOW being immediately
// evictable. NONE means the slot is not evictable. This is due to one of the
// following reasons:
// (i) the slot doesn't contain an element, or
// (ii) the slot contains an element that is in R state, or
// (iii) the slot contains an element that was in R state but it's
// not any more, and the clock pointer has not swept through the
// slot since the element stopped being referenced.
//
// The priority NONE is really only important for case (iii), as in the other
// two cases there are other metadata fields that already capture the state.
// When an element stops being referenced (and is not deleted), the clock
// algorithm must acknowledge this, and assign a non-NONE priority to make
// the element evictable again.
//
///////////////////////////////////////////////////////////////////////////////
// Part 4: Synchronization
//
// We provide the following synchronization guarantees:
// - Lookup is lock-free.
// - Release is lock-free, unless (i) no references to the element are left,
// and (ii) it was marked for deletion or the user wishes to delete if
// releasing the last reference.
// - Insert and Erase still use a per-shard lock.
//
// Our hash table is lock-free, in the sense that system-wide progress is
// guaranteed, i.e., some thread is always able to make progress.
//
//
// The main difference with FastLRUCache is, not surprisingly, the eviction
///////////////////////////////////////////////////////////////////////////////
// algorithm
// ---instead of an LRU list, we maintain a circular list with the elements
// The load factor p is a real number in (0, 1) such that at all
// available for eviction, which the clock algorithm traverses to pick the next
// times at most a fraction p of all slots, without counting tombstones,
// victim. The clock list is represented using the array of handles, and we
// are occupied by elements. This means that the probability that a
// simply mark those elements that are present in the list. This is done using
// random probe hits an empty slot is at most p, and thus at most 1/p probes
// different clock flags, namely NONE, LOW, MEDIUM, HIGH, that represent
// are required on average. For example, p = 70% implies that between 1 and 2
// priorities: NONE means that the element is not part of the clock list, and
// probes are needed on average (bear in mind that this reasoning doesn't
// LOW to HIGH represent how close an element is from being evictable (LOW being
// consider the effects of clustering over time).
// immediately evictable). When the clock pointer steps on an element that is
// Because the size of the hash table is always rounded up to the next
// not immediately evictable, it decreases its priority.
// power of 2, p is really an upper bound on the actual load factor---the
// actual load factor is anywhere between p/2 and p. This is a bit wasteful,
constexpr double kLoadFactor = 0.35 ; // See fast_lru_cache.h.
// but bear in mind that slots only hold metadata, not actual values.
// Since space cost is dominated by the values (the LSM blocks),
constexpr double kStrictLoadFactor = 0.7 ; // See fast_lru_cache.h.
// overprovisioning the table with metadata only increases the total cache space
// usage by a tiny fraction.
constexpr double kLoadFactor = 0.35 ;
// The user can exceed kLoadFactor if the sizes of the inserted values don't
// match estimated_value_size, or if strict_capacity_limit == false. To
// avoid performance to plunge, we set a strict upper bound on the load factor.
constexpr double kStrictLoadFactor = 0.7 ;
// Arbitrary seeds.
// Arbitrary seeds.
constexpr uint32_t kProbingSeed1 = 0xbc9f1d34 ;
constexpr uint32_t kProbingSeed1 = 0xbc9f1d34 ;
constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5 ;
constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5 ;
// An experimental (under development!) alternative to LRUCache
// An experimental (under development!) alternative to LRUCache.
struct ClockHandle {
struct ClockHandle {
void * value ;
void * value ;
Cache : : DeleterFn deleter ;
Cache : : DeleterFn deleter ;
uint32_t hash ;
uint32_t hash ;
size_t total_charge ; // TODO(opt): Only allow uint32_t?
size_t total_charge ;
// The number of external refs to this entry.
std : : array < char , kCacheKeySize > key_data ;
uint32_t refs ;
static constexpr uint8_t kExternalRefsOffset = 0 ;
static constexpr uint8_t kSharedRefsOffset = 15 ;
static constexpr uint8_t kExclusiveRefOffset = 30 ;
static constexpr uint8_t kWillBeDeletedOffset = 31 ;
enum Refs : uint32_t {
// Number of external references to the slot.
EXTERNAL_REFS = ( ( uint32_t { 1 } < < 15 ) - 1 )
< < kExternalRefsOffset , // Bits 0, ..., 14
// Number of internal references plus external references to the slot.
SHARED_REFS = ( ( uint32_t { 1 } < < 15 ) - 1 )
< < kSharedRefsOffset , // Bits 15, ..., 29
// Whether a thread has an exclusive reference to the slot.
EXCLUSIVE_REF = uint32_t { 1 } < < kExclusiveRefOffset , // Bit 30
// Whether the handle will be deleted soon. When this bit is set, new
// internal
// or external references to this handle stop being accepted.
// There is an exception: external references can be created from
// existing external references, or converting from existing internal
// references.
WILL_BE_DELETED = uint32_t { 1 } < < kWillBeDeletedOffset // Bit 31
// Shared references (i.e., external and internal references) and exclusive
// references are our custom implementation of RW locks---external and
// internal references are read locks, and exclusive references are write
// locks. We prioritize readers, which never block; in fact, they don't even
// use compare-and-swap operations. Using our own implementation of RW locks
// allows us to save many atomic operations by packing data more carefully.
// In particular:
// - Combining EXTERNAL_REFS and SHARED_REFS allows us to convert an
// internal
// reference into an external reference in a single atomic arithmetic
// operation.
// - Combining SHARED_REFS and WILL_BE_DELETED allows us to attempt to take
// a shared reference and check whether the entry is marked for deletion
// in a single atomic arithmetic operation.
} ;
static constexpr uint32_t kOneInternalRef = 0x8000 ;
static constexpr uint32_t kOneExternalRef = 0x8001 ;
std : : atomic < uint32_t > refs ;
static constexpr int kIsVisibleOffset = 0 ;
static constexpr uint8_t kIsElementOffset = 1 ;
static constexpr int kIsElementOffset = 1 ;
static constexpr uint8_t kClockPriorityOffset = 2 ;
static constexpr int kClockPriorityOffset = 2 ;
static constexpr uint8_t kIsHitOffset = 4 ;
static constexpr int kIsHitOffset = 4 ;
static constexpr uint8_t kCachePriorityOffset = 5 ;
static constexpr int kCachePriorityOffset = 5 ;
enum Flags : uint8_t {
enum Flags : uint8_t {
// Whether the handle is visible to Lookups.
IS_VISIBLE = ( 1 < < kIsVisibleOffset ) ,
// Whether the slot is in use by an element.
// Whether the slot is in use by an element.
IS_ELEMENT = ( 1 < < kIsElementOffset ) ,
IS_ELEMENT = 1 < < kIsElementOffset ,
// Clock priorities. Represents how close a handle is from
// Clock priorities. Represents how close a handle is from being evictable.
// being evictable.
CLOCK_PRIORITY = 3 < < kClockPriorityOffset ,
CLOCK_PRIORITY = ( 3 < < kClockPriorityOffset ) ,
// Whether the handle has been looked up after its insertion.
// Whether the handle has been looked up after its insertion.
HAS_HIT = ( 1 < < kIsHitOffset ) ,
HAS_HIT = 1 < < kIsHitOffset ,
CACHE_PRIORITY = ( 1 < < kCachePriorityOffset ) ,
// The value of Cache::Priority for the handle.
CACHE_PRIORITY = 1 < < kCachePriorityOffset ,
} ;
} ;
uint8_t flags ;
std : : atomic < uint8_t > flags ;
enum ClockPriority : uint8_t {
enum ClockPriority : uint8_t {
NONE = ( 0 < < kClockPriorityOffset ) , // Not an element in the eyes of clock.
NONE = ( 0 < < kClockPriorityOffset ) ,
LOW = ( 1 < < kClockPriorityOffset ) , // Immediately evictable.
LOW = ( 1 < < kClockPriorityOffset ) ,
MEDIUM = ( 2 < < kClockPriorityOffset ) ,
MEDIUM = ( 2 < < kClockPriorityOffset ) ,
HIGH = ( 3 < < kClockPriorityOffset )
HIGH = ( 3 < < kClockPriorityOffset )
// Priority is NONE if and only if
// (i) the handle is not an element, or
// (ii) the handle is an element but it is being referenced.
} ;
} ;
// The number of elements that hash to this slot or a lower one,
// The number of elements that hash to this slot or a lower one, but wind
// but wind up in a higher slot.
// up in this slot or a higher one.
uint32_t displacements ;
std : : atomic < uint32_t > displacements ;
std : : array < char , kCacheKeySize > key_data ;
// Synchronization rules:
// - Use a shared reference when we want the handle's identity
ClockHandle ( ) {
// members (key_data, hash, value and IS_ELEMENT flag) to
value = nullptr ;
// remain untouched, but not modify them. The only updates
deleter = nullptr ;
// that a shared reference allows are:
hash = 0 ;
// * set CLOCK_PRIORITY to NONE;
total_charge = 0 ;
// * set the HAS_HIT bit.
refs = 0 ;
// Notice that these two types of updates are idempotent, so
flags = 0 ;
// they don't require synchronization across shared references.
SetIsVisible ( false ) ;
// - Use an exclusive reference when we want identity members
// to remain untouched, as well as modify any identity member
// or flag.
// - displacements can be modified without holding a reference.
// - refs is only modified through appropriate functions to
// take or release references.
ClockHandle ( )
: value ( nullptr ) ,
deleter ( nullptr ) ,
hash ( 0 ) ,
total_charge ( 0 ) ,
refs ( 0 ) ,
flags ( 0 ) ,
displacements ( 0 ) {
SetWillBeDeleted ( false ) ;
SetIsElement ( false ) ;
SetIsElement ( false ) ;
SetClockPriority ( ClockPriority : : NONE ) ;
SetClockPriority ( ClockPriority : : NONE ) ;
SetCachePriority ( Cache : : Priority : : LOW ) ;
SetCachePriority ( Cache : : Priority : : LOW ) ;
displacements = 0 ;
key_data . fill ( 0 ) ;
key_data . fill ( 0 ) ;
}
}
Slice key ( ) const { return Slice ( key_data . data ( ) , kCacheKeySize ) ; }
ClockHandle ( const ClockHandle & other ) { * this = other ; }
// Increase the reference count by 1.
void operator = ( const ClockHandle & other ) {
void Ref ( ) { refs + + ; }
value = other . value ;
deleter = other . deleter ;
// Just reduce the reference count by 1. Return true if it was last reference.
hash = other . hash ;
bool Unref ( ) {
total_charge = other . total_charge ;
assert ( refs > 0 ) ;
refs . store ( other . refs ) ;
refs - - ;
key_data = other . key_data ;
return refs = = 0 ;
flags . store ( other . flags ) ;
SetWillBeDeleted ( other . WillBeDeleted ( ) ) ;
SetIsElement ( other . IsElement ( ) ) ;
SetClockPriority ( other . GetClockPriority ( ) ) ;
SetCachePriority ( other . GetCachePriority ( ) ) ;
displacements . store ( other . displacements ) ;
}
}
// Return true if there are external refs, false otherwise.
Slice key ( ) const { return Slice ( key_data . data ( ) , kCacheKeySize ) ; }
bool HasRefs ( ) const { return refs > 0 ; }
bool IsVisible ( ) const { return flags & IS_VISIBLE ; }
void SetIsVisible ( bool is_visible ) {
bool HasExternalRefs ( ) const { return ( refs & EXTERNAL_REFS ) > 0 ; }
if ( is_visible ) {
flags | = IS_VISIBLE ;
} else {
flags & = ~ IS_VISIBLE ;
}
}
bool IsElement ( ) const { return flags & IS_ELEMENT ; }
bool IsElement ( ) const { return flags & IS_ELEMENT ; }
@ -144,7 +284,7 @@ struct ClockHandle {
if ( is_element ) {
if ( is_element ) {
flags | = IS_ELEMENT ;
flags | = IS_ELEMENT ;
} else {
} else {
flags & = ~ IS_ELEMENT ;
flags & = static_cast < uint8_t > ( ~ IS_ELEMENT ) ;
}
}
}
}
@ -152,7 +292,7 @@ struct ClockHandle {
void SetHit ( ) { flags | = HAS_HIT ; }
void SetHit ( ) { flags | = HAS_HIT ; }
bool IsInClockList ( ) const {
bool IsInClock ( ) const {
return GetClockPriority ( ) ! = ClockHandle : : ClockPriority : : NONE ;
return GetClockPriority ( ) ! = ClockHandle : : ClockPriority : : NONE ;
}
}
@ -164,7 +304,7 @@ struct ClockHandle {
if ( priority = = Cache : : Priority : : HIGH ) {
if ( priority = = Cache : : Priority : : HIGH ) {
flags | = Flags : : CACHE_PRIORITY ;
flags | = Flags : : CACHE_PRIORITY ;
} else {
} else {
flags & = ~ Flags : : CACHE_PRIORITY ;
flags & = static_cast < uint8_t > ( ~ Flags : : CACHE_PRIORITY ) ;
}
}
}
}
@ -173,7 +313,7 @@ struct ClockHandle {
}
}
void SetClockPriority ( ClockPriority priority ) {
void SetClockPriority ( ClockPriority priority ) {
flags & = ~ Flags : : CLOCK_PRIORITY ;
flags & = static_cast < uint8_t > ( ~ Flags : : CLOCK_PRIORITY ) ;
flags | = priority ;
flags | = priority ;
}
}
@ -182,14 +322,13 @@ struct ClockHandle {
kClockPriorityOffset ;
kClockPriorityOffset ;
assert ( p > 0 ) ;
assert ( p > 0 ) ;
p - - ;
p - - ;
flags & = ~ Flags : : CLOCK_PRIORITY ;
flags & = static_cast < uint8_t > ( ~ Flags : : CLOCK_PRIORITY ) ;
ClockPriority new_priority =
ClockPriority new_priority =
static_cast < ClockPriority > ( p < < kClockPriorityOffset ) ;
static_cast < ClockPriority > ( p < < kClockPriorityOffset ) ;
flags | = new_priority ;
flags | = new_priority ;
}
}
void FreeData ( ) {
void FreeData ( ) {
assert ( refs = = 0 ) ;
if ( deleter ) {
if ( deleter ) {
( * deleter ) ( key ( ) , value ) ;
( * deleter ) ( key ( ) , value ) ;
}
}
@ -232,17 +371,131 @@ struct ClockHandle {
return total_charge - meta_charge ;
return total_charge - meta_charge ;
}
}
inline bool IsEmpty ( ) {
inline bool IsEmpty ( ) const {
return ! this - > IsElement ( ) & & this - > displacements = = 0 ;
return ! this - > IsElement ( ) & & this - > displacements = = 0 ;
}
}
inline bool IsTombstone ( ) {
inline bool IsTombstone ( ) const {
return ! this - > IsElement ( ) & & this - > displacements > 0 ;
return ! this - > IsElement ( ) & & this - > displacements > 0 ;
}
}
inline bool Matches ( const Slice & some_key ) {
inline bool Matches ( const Slice & some_key , uint32_t some_hash ) const {
return this - > IsElement ( ) & & this - > key ( ) = = some_key ;
return this - > IsElement ( ) & & this - > hash = = some_hash & &
this - > key ( ) = = some_key ;
}
bool WillBeDeleted ( ) const { return refs & WILL_BE_DELETED ; }
void SetWillBeDeleted ( bool will_be_deleted ) {
if ( will_be_deleted ) {
refs | = WILL_BE_DELETED ;
} else {
refs & = ~ WILL_BE_DELETED ;
}
}
// The following functions are for taking and releasing refs.
// Tries to take an external ref. Returns true iff it succeeds.
inline bool TryExternalRef ( ) {
if ( ! ( ( refs + = kOneExternalRef ) & ( EXCLUSIVE_REF | WILL_BE_DELETED ) ) ) {
return true ;
}
refs - = kOneExternalRef ;
return false ;
}
// Releases an external ref. Returns the new value (this is useful to
// avoid an extra atomic read).
inline uint32_t ReleaseExternalRef ( ) { return refs - = kOneExternalRef ; }
// Take an external ref, assuming there is already one external ref
// to the handle.
void Ref ( ) {
// TODO(Guido) Is it okay to assume that the existing external reference
// survives until this function returns?
refs + = kOneExternalRef ;
}
// Tries to take an internal ref. Returns true iff it succeeds.
inline bool TryInternalRef ( ) {
if ( ! ( ( refs + = kOneInternalRef ) & ( EXCLUSIVE_REF | WILL_BE_DELETED ) ) ) {
return true ;
}
refs - = kOneInternalRef ;
return false ;
}
inline void ReleaseInternalRef ( ) { refs - = kOneInternalRef ; }
// Tries to take an exclusive ref. Returns true iff it succeeds.
inline bool TryExclusiveRef ( ) {
uint32_t will_be_deleted = refs & WILL_BE_DELETED ;
uint32_t expected = will_be_deleted ;
return refs . compare_exchange_strong ( expected ,
EXCLUSIVE_REF | will_be_deleted ) ;
}
// Repeatedly tries to take an exclusive reference, but stops as soon
// as an external reference is detected (in this case the wait would
// presumably be too long).
inline bool TrySpinExclusiveRef ( ) {
uint32_t expected = 0 ;
uint32_t will_be_deleted = 0 ;
while ( ! refs . compare_exchange_strong ( expected ,
EXCLUSIVE_REF | will_be_deleted ) ) {
if ( expected & EXTERNAL_REFS ) {
return false ;
}
will_be_deleted = expected & WILL_BE_DELETED ;
expected = will_be_deleted ;
}
return true ;
}
inline void ReleaseExclusiveRef ( ) { refs . fetch_and ( ~ EXCLUSIVE_REF ) ; }
// The following functions are for upgrading and downgrading refs.
// They guarantee atomicity, i.e., no exclusive refs to the handle
// can be taken by a different thread during the conversion.
inline void ExclusiveToInternalRef ( ) {
refs + = kOneInternalRef ;
ReleaseExclusiveRef ( ) ;
}
inline void ExclusiveToExternalRef ( ) {
refs + = kOneExternalRef ;
ReleaseExclusiveRef ( ) ;
}
// TODO(Guido) Do we want to bound the loop and prepare the
// algorithms to react to a failure?
inline void InternalToExclusiveRef ( ) {
uint32_t expected = kOneInternalRef ;
uint32_t will_be_deleted = 0 ;
while ( ! refs . compare_exchange_strong ( expected ,
EXCLUSIVE_REF | will_be_deleted ) ) {
will_be_deleted = expected & WILL_BE_DELETED ;
expected = kOneInternalRef | will_be_deleted ;
}
}
}
inline void InternalToExternalRef ( ) {
refs + = kOneExternalRef - kOneInternalRef ;
}
// TODO(Guido) Same concern.
inline void ExternalToExclusiveRef ( ) {
uint32_t expected = kOneExternalRef ;
uint32_t will_be_deleted = 0 ;
while ( ! refs . compare_exchange_strong ( expected ,
EXCLUSIVE_REF | will_be_deleted ) ) {
will_be_deleted = expected & WILL_BE_DELETED ;
expected = kOneExternalRef | will_be_deleted ;
}
}
} ; // struct ClockHandle
} ; // struct ClockHandle
class ClockHandleTable {
class ClockHandleTable {
@ -252,32 +505,55 @@ class ClockHandleTable {
// Returns a pointer to a visible element matching the key/hash, or
// Returns a pointer to a visible element matching the key/hash, or
// nullptr if not present.
// nullptr if not present.
ClockHandle * Lookup ( const Slice & key ) ;
ClockHandle * Lookup ( const Slice & key , uint32_t hash ) ;
// Inserts a copy of h into the hash table.
// Inserts a copy of h into the hash table.
// Returns a pointer to the inserted handle, or nullptr if no slot
// Returns a pointer to the inserted handle, or nullptr if no slot
// available was found. If an existing visible element matching the
// available was found. If an existing visible element matching the
// key/hash is already present in the hash table, the argument old
// key/hash is already present in the hash table, the argument old
// is set to pointe to it; otherwise, it's set to nullptr.
// is set to point to it; otherwise, it's set to nullptr.
// Returns an exclusive reference to h, and no references to old.
ClockHandle * Insert ( ClockHandle * h , ClockHandle * * old ) ;
ClockHandle * Insert ( ClockHandle * h , ClockHandle * * old ) ;
// Removes h from the hash table. The handle must already be off
// Removes h from the hash table. The handle must already be off clock.
// the clock list.
void Remove ( ClockHandle * h ) ;
void Remove ( ClockHandle * h ) ;
// Turns a visible element h into a ghost (i.e., not visible).
// Extracts the element information from a handle (src), and assigns it
void Exclude ( ClockHandle * h ) ;
// to a hash table slot (dst). Doesn't touch displacements and refs,
// which are maintained by the hash table algorithm.
void Assign ( ClockHandle * dst , ClockHandle * src ) ;
// Assigns a copy of h to the given slot.
template < typename T >
void Assign ( int slot , ClockHandle * h ) ;
void ApplyToEntriesRange ( T func , uint32_t index_begin , uint32_t index_end ,
bool apply_if_will_be_deleted ) {
for ( uint32_t i = index_begin ; i < index_end ; i + + ) {
ClockHandle * h = & array_ [ i ] ;
if ( h - > TryExclusiveRef ( ) ) {
if ( h - > IsElement ( ) & &
( apply_if_will_be_deleted | | ! h - > WillBeDeleted ( ) ) ) {
// Hand the internal ref over to func, which is now responsible
// to release it.
func ( h ) ;
} else {
h - > ReleaseExclusiveRef ( ) ;
}
}
}
}
template < typename T >
template < typename T >
void ApplyToEntriesRange ( T func , uint32_t index_begin , uint32_t index_end ) {
void ConstApplyToEntriesRange ( T func , uint32_t index_begin ,
uint32_t index_end ,
bool apply_if_will_be_deleted ) const {
for ( uint32_t i = index_begin ; i < index_end ; i + + ) {
for ( uint32_t i = index_begin ; i < index_end ; i + + ) {
ClockHandle * h = & array_ [ i ] ;
ClockHandle * h = & array_ [ i ] ;
if ( h - > IsVisible ( ) ) {
if ( h - > TryExclusiveRef ( ) ) {
if ( h - > IsElement ( ) & &
( apply_if_will_be_deleted | | ! h - > WillBeDeleted ( ) ) ) {
func ( h ) ;
func ( h ) ;
}
}
h - > ReleaseExclusiveRef ( ) ;
}
}
}
}
}
@ -295,28 +571,38 @@ class ClockHandleTable {
private :
private :
friend class ClockCacheShard ;
friend class ClockCacheShard ;
int FindVisible Element ( const Slice & key , int & probe , int displacement ) ;
int FindElement ( const Slice & key , uint32_t hash , uint32_t & probe ) ;
int FindAvailableSlot ( const Slice & key , int & probe , int displacement ) ;
int FindAvailableSlot ( const Slice & key , u int32_ t& probe ) ;
int FindVisible ElementOrAvailableSlot ( const Slice & key , int & probe ,
int FindElementOrAvailableSlot ( const Slice & key , uint32_t hash ,
int displacement ) ;
uint32_t & probe ) ;
// Returns the index of the first slot probed (hashing with
// Returns the index of the first slot probed (hashing with
// the given key) with a handle e such that cond(e) is true.
// the given key) with a handle e such that match(e) is true.
// Otherwise, if no match is found, returns -1.
// At every step, the function first tests whether match(e) holds.
// For every handle e probed except the final slot, updates
// If it's false, it evaluates abort(e) to decide whether the
// e->displacements += displacement.
// search should be aborted, and in the affirmative returns -1.
// The argument probe is modified such that consecutive calls
// For every handle e probed except the last one, the function runs
// to FindSlot continue probing right after where the previous
// update(e). We say a probe to a handle e is aborting if match(e) is
// call left.
// false and abort(e) is true. The argument probe is one more than the
int FindSlot ( const Slice & key , std : : function < bool ( ClockHandle * ) > cond ,
// last non-aborting probe during the call. This is so that that the
int & probe , int displacement ) ;
// variable can be used to keep track of progress across consecutive
// calls to FindSlot.
inline int FindSlot ( const Slice & key , std : : function < bool ( ClockHandle * ) > match ,
std : : function < bool ( ClockHandle * ) > stop ,
std : : function < void ( ClockHandle * ) > update ,
uint32_t & probe ) ;
// After a failed FindSlot call (i.e., with answer -1), this function
// decrements all displacements, starting from the 0-th probe.
void Rollback ( const Slice & key , uint32_t probe ) ;
// Number of hash bits used for table index.
// Number of hash bits used for table index.
// The size of the table is 1 << length_bits_.
// The size of the table is 1 << length_bits_.
int length_bits_ ;
int length_bits_ ;
// For faster computation of ModTableSize.
const uint32_t length_bits_mask_ ;
const uint32_t length_bits_mask_ ;
// Number of elements in the table.
// Number of elements in the table.
@ -345,10 +631,10 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
void SetStrictCapacityLimit ( bool strict_capacity_limit ) override ;
void SetStrictCapacityLimit ( bool strict_capacity_limit ) override ;
// Like Cache methods, but with an extra "hash" parameter.
// Like Cache methods, but with an extra "hash" parameter.
// Insert an item into the hash table and, if handle is null, insert into
// Insert an item into the hash table and, if handle is null, make it
// the clock lis t. Older items are evicted as necessary. If the cache is full
// evictable by the clock a lgor ithm . Older items are evicted as necessary.
// and free_handle_on_fail is true, the item is deleted and handle is set to
// If the cache is full and free_handle_on_fail is true, the item is deleted
// nullptr.
// and handle is set to nullptr.
Status Insert ( const Slice & key , uint32_t hash , void * value , size_t charge ,
Status Insert ( const Slice & key , uint32_t hash , void * value , size_t charge ,
Cache : : DeleterFn deleter , Cache : : Handle * * handle ,
Cache : : DeleterFn deleter , Cache : : Handle * * handle ,
Cache : : Priority priority ) override ;
Cache : : Priority priority ) override ;
@ -393,13 +679,18 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
private :
private :
friend class ClockCache ;
friend class ClockCache ;
void ClockRemove ( ClockHandle * e ) ;
void ClockInsert ( ClockHandle * e ) ;
// Makes an element evictable by clock.
void ClockOn ( ClockHandle * h ) ;
// Makes an element non-evictable.
void ClockOff ( ClockHandle * h ) ;
// Requires an exclusive ref on h.
void Evict ( ClockHandle * h ) ;
// Free some space following strict clock policy until enough space
// Free some space following strict clock policy until enough space
// to hold (usage_ + charge) is freed or the clock list is empty
// to hold (usage_ + charge) is freed or there are no evictable elements.
// This function is not thread safe - it needs to be executed while
// holding the mutex_.
void EvictFromClock ( size_t charge , autovector < ClockHandle > * deleted ) ;
void EvictFromClock ( size_t charge , autovector < ClockHandle > * deleted ) ;
// Returns the charge of a single handle.
// Returns the charge of a single handle.
@ -436,9 +727,6 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
// Memory size for entries residing in the cache.
// Memory size for entries residing in the cache.
size_t usage_ ;
size_t usage_ ;
// Memory size for unpinned entries in the clock list.
size_t clock_usage_ ;
// mutex_ protects the following state.
// mutex_ protects the following state.
// We don't count mutex_ as the cache's internal state so semantically we
// We don't count mutex_ as the cache's internal state so semantically we
// don't mind mutex_ invoking the non-const actions.
// don't mind mutex_ invoking the non-const actions.