// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). // // Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. #pragma once #include #include #include #include #include #include "cache/cache_key.h" #include "cache/sharded_cache.h" #include "port/lang.h" #include "port/malloc.h" #include "port/port.h" #include "rocksdb/cache.h" #include "rocksdb/secondary_cache.h" #include "util/autovector.h" #include "util/distributed_mutex.h" namespace ROCKSDB_NAMESPACE { namespace clock_cache { // Block cache implementation using a lock-free open-address hash table // and clock eviction. /////////////////////////////////////////////////////////////////////////////// // Part 1: Handles // // Every slot in the hash table is a ClockHandle. A handle can be in a few // different states, that stem from the fact that handles can be externally // referenced and, thus, can't always be immediately evicted when a delete // operation is executed or when they are replaced by a new version (via an // insert of the same key). Concretely, the state of a handle is defined by the // following two properties: // (R) Externally referenced: A handle can be referenced externally, or not. // Importantly, a handle can be evicted if and only if it's not // referenced. In particular, when an handle becomes referenced, it's // temporarily taken out of clock until all references to it are released. // (M) Marked for deletion (or invisible): An handle is marked for deletion // when an operation attempts to delete it, but the handle is externally // referenced, so it can't be immediately deleted. When this mark is placed, // lookups will no longer be able to find it. Consequently, no more external // references will be taken to the handle. When a handle is marked for // deletion, we also say it's invisible. // These properties induce 4 different states, with transitions defined as // follows: // - Not M --> M: When a handle is deleted or replaced by a new version, but // not immediately evicted. // - M --> not M: This cannot happen. Once a handle is marked for deletion, // there is no can't go back. // - R --> not R: When all references to an handle are released. // - Not R --> R: When an unreferenced handle becomes referenced. This can only // happen if the handle is visible, since references to an handle can only be // created when it's visible. // /////////////////////////////////////////////////////////////////////////////// // Part 2: Hash table structure // // Internally, the cache uses an open-addressed hash table to index the handles. // We use tombstone counters to keep track of displacements. Probes are // generated with double-hashing (but the code can be easily modified to use // other probing schemes, like linear hashing). Because of the tombstones and // the two possible visibility states of a handle, the table slots (we use the // word "slot" to refer to handles that are not necessary valid key-value // elements) can be in 4 different states: // 1. Visible element: The slot contains an element in not M state. // 2. To-be-deleted element: The slot contains an element in M state. // 3. Tombstone: The slot doesn't contain an element, but there is some other // element that probed this slot during its insertion. // 4. Empty: The slot is unused. // When a ghost is removed from the table, it can either transition to being a // tombstone or an empty slot, depending on the number of displacements of the // slot. In any case, the slot becomes available. When a handle is inserted // into that slot, it becomes a visible element again. // /////////////////////////////////////////////////////////////////////////////// // Part 3: The clock algorithm // // We maintain a circular buffer with the handles available for eviction, // which the clock algorithm traverses (using a "clock pointer") to pick the // next victim. We use the hash table array as the circular buffer, and mark // the handles that are evictable. For this we use different clock flags, namely // NONE, LOW, MEDIUM, HIGH, that represent priorities: LOW, MEDIUM and HIGH // represent how close an element is from being evictable, LOW being immediately // evictable. NONE means the slot is not evictable. This is due to one of the // following reasons: // (i) the slot doesn't contain an element, or // (ii) the slot contains an element that is in R state, or // (iii) the slot contains an element that was in R state but it's // not any more, and the clock pointer has not swept through the // slot since the element stopped being referenced. // // The priority NONE is really only important for case (iii), as in the other // two cases there are other metadata fields that already capture the state. // When an element stops being referenced (and is not deleted), the clock // algorithm must acknowledge this, and assign a non-NONE priority to make // the element evictable again. // /////////////////////////////////////////////////////////////////////////////// // Part 4: Synchronization // // We provide the following synchronization guarantees: // - Lookup is lock-free. // - Release is lock-free, unless (i) no references to the element are left, // and (ii) it was marked for deletion or the user wishes to delete if // releasing the last reference. // - Insert and Erase still use a per-shard lock. // // Our hash table is lock-free, in the sense that system-wide progress is // guaranteed, i.e., some thread is always able to make progress. // /////////////////////////////////////////////////////////////////////////////// // The load factor p is a real number in (0, 1) such that at all // times at most a fraction p of all slots, without counting tombstones, // are occupied by elements. This means that the probability that a // random probe hits an empty slot is at most p, and thus at most 1/p probes // are required on average. For example, p = 70% implies that between 1 and 2 // probes are needed on average (bear in mind that this reasoning doesn't // consider the effects of clustering over time). // Because the size of the hash table is always rounded up to the next // power of 2, p is really an upper bound on the actual load factor---the // actual load factor is anywhere between p/2 and p. This is a bit wasteful, // but bear in mind that slots only hold metadata, not actual values. // Since space cost is dominated by the values (the LSM blocks), // overprovisioning the table with metadata only increases the total cache space // usage by a tiny fraction. constexpr double kLoadFactor = 0.35; // The user can exceed kLoadFactor if the sizes of the inserted values don't // match estimated_value_size, or if strict_capacity_limit == false. To // avoid performance to plunge, we set a strict upper bound on the load factor. constexpr double kStrictLoadFactor = 0.7; // Arbitrary seeds. constexpr uint32_t kProbingSeed1 = 0xbc9f1d34; constexpr uint32_t kProbingSeed2 = 0x7a2bb9d5; // An experimental (under development!) alternative to LRUCache. struct ClockHandle { void* value; Cache::DeleterFn deleter; uint32_t hash; size_t total_charge; std::array key_data; static constexpr uint8_t kExternalRefsOffset = 0; static constexpr uint8_t kSharedRefsOffset = 15; static constexpr uint8_t kExclusiveRefOffset = 30; static constexpr uint8_t kWillBeDeletedOffset = 31; enum Refs : uint32_t { // Number of external references to the slot. EXTERNAL_REFS = ((uint32_t{1} << 15) - 1) << kExternalRefsOffset, // Bits 0, ..., 14 // Number of internal references plus external references to the slot. SHARED_REFS = ((uint32_t{1} << 15) - 1) << kSharedRefsOffset, // Bits 15, ..., 29 // Whether a thread has an exclusive reference to the slot. EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30 // Whether the handle will be deleted soon. When this bit is set, new // internal // or external references to this handle stop being accepted. // There is an exception: external references can be created from // existing external references, or converting from existing internal // references. WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31 // Shared references (i.e., external and internal references) and exclusive // references are our custom implementation of RW locks---external and // internal references are read locks, and exclusive references are write // locks. We prioritize readers, which never block; in fact, they don't even // use compare-and-swap operations. Using our own implementation of RW locks // allows us to save many atomic operations by packing data more carefully. // In particular: // - Combining EXTERNAL_REFS and SHARED_REFS allows us to convert an // internal // reference into an external reference in a single atomic arithmetic // operation. // - Combining SHARED_REFS and WILL_BE_DELETED allows us to attempt to take // a shared reference and check whether the entry is marked for deletion // in a single atomic arithmetic operation. }; static constexpr uint32_t kOneInternalRef = 0x8000; static constexpr uint32_t kOneExternalRef = 0x8001; std::atomic refs; static constexpr uint8_t kIsElementOffset = 1; static constexpr uint8_t kClockPriorityOffset = 2; static constexpr uint8_t kIsHitOffset = 4; static constexpr uint8_t kCachePriorityOffset = 5; enum Flags : uint8_t { // Whether the slot is in use by an element. IS_ELEMENT = 1 << kIsElementOffset, // Clock priorities. Represents how close a handle is from being evictable. CLOCK_PRIORITY = 3 << kClockPriorityOffset, // Whether the handle has been looked up after its insertion. HAS_HIT = 1 << kIsHitOffset, // The value of Cache::Priority for the handle. CACHE_PRIORITY = 1 << kCachePriorityOffset, }; std::atomic flags; enum ClockPriority : uint8_t { NONE = (0 << kClockPriorityOffset), LOW = (1 << kClockPriorityOffset), MEDIUM = (2 << kClockPriorityOffset), HIGH = (3 << kClockPriorityOffset) }; // The number of elements that hash to this slot or a lower one, but wind // up in this slot or a higher one. std::atomic displacements; // Synchronization rules: // - Use a shared reference when we want the handle's identity // members (key_data, hash, value and IS_ELEMENT flag) to // remain untouched, but not modify them. The only updates // that a shared reference allows are: // * set CLOCK_PRIORITY to NONE; // * set the HAS_HIT bit. // Notice that these two types of updates are idempotent, so // they don't require synchronization across shared references. // - Use an exclusive reference when we want identity members // to remain untouched, as well as modify any identity member // or flag. // - displacements can be modified without holding a reference. // - refs is only modified through appropriate functions to // take or release references. ClockHandle() : value(nullptr), deleter(nullptr), hash(0), total_charge(0), refs(0), flags(0), displacements(0) { SetWillBeDeleted(false); SetIsElement(false); SetClockPriority(ClockPriority::NONE); SetCachePriority(Cache::Priority::LOW); key_data.fill(0); } ClockHandle(const ClockHandle& other) { *this = other; } void operator=(const ClockHandle& other) { value = other.value; deleter = other.deleter; hash = other.hash; total_charge = other.total_charge; refs.store(other.refs); key_data = other.key_data; flags.store(other.flags); SetWillBeDeleted(other.WillBeDeleted()); SetIsElement(other.IsElement()); SetClockPriority(other.GetClockPriority()); SetCachePriority(other.GetCachePriority()); displacements.store(other.displacements); } Slice key() const { return Slice(key_data.data(), kCacheKeySize); } bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; } bool IsElement() const { return flags & IS_ELEMENT; } void SetIsElement(bool is_element) { if (is_element) { flags |= IS_ELEMENT; } else { flags &= static_cast(~IS_ELEMENT); } } bool HasHit() const { return flags & HAS_HIT; } void SetHit() { flags |= HAS_HIT; } bool IsInClock() const { return GetClockPriority() != ClockHandle::ClockPriority::NONE; } Cache::Priority GetCachePriority() const { return static_cast(flags & CACHE_PRIORITY); } void SetCachePriority(Cache::Priority priority) { if (priority == Cache::Priority::HIGH) { flags |= Flags::CACHE_PRIORITY; } else { flags &= static_cast(~Flags::CACHE_PRIORITY); } } ClockPriority GetClockPriority() const { return static_cast(flags & Flags::CLOCK_PRIORITY); } void SetClockPriority(ClockPriority priority) { flags &= static_cast(~Flags::CLOCK_PRIORITY); flags |= priority; } void DecreaseClockPriority() { uint8_t p = static_cast(flags & Flags::CLOCK_PRIORITY) >> kClockPriorityOffset; assert(p > 0); p--; flags &= static_cast(~Flags::CLOCK_PRIORITY); ClockPriority new_priority = static_cast(p << kClockPriorityOffset); flags |= new_priority; } void FreeData() { if (deleter) { (*deleter)(key(), value); } } // Calculate the memory usage by metadata. inline size_t CalcMetaCharge( CacheMetadataChargePolicy metadata_charge_policy) const { if (metadata_charge_policy != kFullChargeCacheMetadata) { return 0; } else { // #ifdef ROCKSDB_MALLOC_USABLE_SIZE // return malloc_usable_size( // const_cast(static_cast(this))); // #else // TODO(Guido) malloc_usable_size only works when we call it on // a pointer allocated with malloc. Because our handles are all // allocated in a single shot as an array, the user can't call // CalcMetaCharge (or CalcTotalCharge or GetCharge) on a handle // pointer returned by the cache. Moreover, malloc_usable_size // expects a heap-allocated handle, but sometimes in our code we // wish to pass a stack-allocated handle (this is only a performance // concern). // What is the right way to compute metadata charges with pre-allocated // handles? return sizeof(ClockHandle); // #endif } } inline void CalcTotalCharge( size_t charge, CacheMetadataChargePolicy metadata_charge_policy) { total_charge = charge + CalcMetaCharge(metadata_charge_policy); } inline size_t GetCharge( CacheMetadataChargePolicy metadata_charge_policy) const { size_t meta_charge = CalcMetaCharge(metadata_charge_policy); assert(total_charge >= meta_charge); return total_charge - meta_charge; } inline bool IsEmpty() const { return !this->IsElement() && this->displacements == 0; } inline bool IsTombstone() const { return !this->IsElement() && this->displacements > 0; } inline bool Matches(const Slice& some_key, uint32_t some_hash) const { return this->IsElement() && this->hash == some_hash && this->key() == some_key; } bool WillBeDeleted() const { return refs & WILL_BE_DELETED; } void SetWillBeDeleted(bool will_be_deleted) { if (will_be_deleted) { refs |= WILL_BE_DELETED; } else { refs &= ~WILL_BE_DELETED; } } // The following functions are for taking and releasing refs. // Tries to take an external ref. Returns true iff it succeeds. inline bool TryExternalRef() { if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) { return true; } refs -= kOneExternalRef; return false; } // Releases an external ref. Returns the new value (this is useful to // avoid an extra atomic read). inline uint32_t ReleaseExternalRef() { return refs -= kOneExternalRef; } // Take an external ref, assuming there is already one external ref // to the handle. void Ref() { // TODO(Guido) Is it okay to assume that the existing external reference // survives until this function returns? refs += kOneExternalRef; } // Tries to take an internal ref. Returns true iff it succeeds. inline bool TryInternalRef() { if (!((refs += kOneInternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) { return true; } refs -= kOneInternalRef; return false; } inline void ReleaseInternalRef() { refs -= kOneInternalRef; } // Tries to take an exclusive ref. Returns true iff it succeeds. inline bool TryExclusiveRef() { uint32_t will_be_deleted = refs & WILL_BE_DELETED; uint32_t expected = will_be_deleted; return refs.compare_exchange_strong(expected, EXCLUSIVE_REF | will_be_deleted); } // Repeatedly tries to take an exclusive reference, but stops as soon // as an external reference is detected (in this case the wait would // presumably be too long). inline bool TrySpinExclusiveRef() { uint32_t expected = 0; uint32_t will_be_deleted = 0; while (!refs.compare_exchange_strong(expected, EXCLUSIVE_REF | will_be_deleted)) { if (expected & EXTERNAL_REFS) { return false; } will_be_deleted = expected & WILL_BE_DELETED; expected = will_be_deleted; } return true; } inline void ReleaseExclusiveRef() { refs.fetch_and(~EXCLUSIVE_REF); } // The following functions are for upgrading and downgrading refs. // They guarantee atomicity, i.e., no exclusive refs to the handle // can be taken by a different thread during the conversion. inline void ExclusiveToInternalRef() { refs += kOneInternalRef; ReleaseExclusiveRef(); } inline void ExclusiveToExternalRef() { refs += kOneExternalRef; ReleaseExclusiveRef(); } // TODO(Guido) Do we want to bound the loop and prepare the // algorithms to react to a failure? inline void InternalToExclusiveRef() { uint32_t expected = kOneInternalRef; uint32_t will_be_deleted = 0; while (!refs.compare_exchange_strong(expected, EXCLUSIVE_REF | will_be_deleted)) { will_be_deleted = expected & WILL_BE_DELETED; expected = kOneInternalRef | will_be_deleted; } } inline void InternalToExternalRef() { refs += kOneExternalRef - kOneInternalRef; } // TODO(Guido) Same concern. inline void ExternalToExclusiveRef() { uint32_t expected = kOneExternalRef; uint32_t will_be_deleted = 0; while (!refs.compare_exchange_strong(expected, EXCLUSIVE_REF | will_be_deleted)) { will_be_deleted = expected & WILL_BE_DELETED; expected = kOneExternalRef | will_be_deleted; } } }; // struct ClockHandle class ClockHandleTable { public: explicit ClockHandleTable(int hash_bits); ~ClockHandleTable(); // Returns a pointer to a visible element matching the key/hash, or // nullptr if not present. ClockHandle* Lookup(const Slice& key, uint32_t hash); // Inserts a copy of h into the hash table. // Returns a pointer to the inserted handle, or nullptr if no slot // available was found. If an existing visible element matching the // key/hash is already present in the hash table, the argument old // is set to point to it; otherwise, it's set to nullptr. // Returns an exclusive reference to h, and no references to old. ClockHandle* Insert(ClockHandle* h, ClockHandle** old); // Removes h from the hash table. The handle must already be off clock. void Remove(ClockHandle* h); // Extracts the element information from a handle (src), and assigns it // to a hash table slot (dst). Doesn't touch displacements and refs, // which are maintained by the hash table algorithm. void Assign(ClockHandle* dst, ClockHandle* src); template void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end, bool apply_if_will_be_deleted) { for (uint32_t i = index_begin; i < index_end; i++) { ClockHandle* h = &array_[i]; if (h->TryExclusiveRef()) { if (h->IsElement() && (apply_if_will_be_deleted || !h->WillBeDeleted())) { // Hand the internal ref over to func, which is now responsible // to release it. func(h); } else { h->ReleaseExclusiveRef(); } } } } template void ConstApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end, bool apply_if_will_be_deleted) const { for (uint32_t i = index_begin; i < index_end; i++) { ClockHandle* h = &array_[i]; if (h->TryExclusiveRef()) { if (h->IsElement() && (apply_if_will_be_deleted || !h->WillBeDeleted())) { func(h); } h->ReleaseExclusiveRef(); } } } uint32_t GetTableSize() const { return uint32_t{1} << length_bits_; } int GetLengthBits() const { return length_bits_; } uint32_t GetOccupancyLimit() const { return occupancy_limit_; } uint32_t GetOccupancy() const { return occupancy_; } // Returns x mod 2^{length_bits_}. uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; } private: friend class ClockCacheShard; int FindElement(const Slice& key, uint32_t hash, uint32_t& probe); int FindAvailableSlot(const Slice& key, uint32_t& probe); int FindElementOrAvailableSlot(const Slice& key, uint32_t hash, uint32_t& probe); // Returns the index of the first slot probed (hashing with // the given key) with a handle e such that match(e) is true. // At every step, the function first tests whether match(e) holds. // If it's false, it evaluates abort(e) to decide whether the // search should be aborted, and in the affirmative returns -1. // For every handle e probed except the last one, the function runs // update(e). We say a probe to a handle e is aborting if match(e) is // false and abort(e) is true. The argument probe is one more than the // last non-aborting probe during the call. This is so that that the // variable can be used to keep track of progress across consecutive // calls to FindSlot. inline int FindSlot(const Slice& key, std::function match, std::function stop, std::function update, uint32_t& probe); // After a failed FindSlot call (i.e., with answer -1), this function // decrements all displacements, starting from the 0-th probe. void Rollback(const Slice& key, uint32_t probe); // Number of hash bits used for table index. // The size of the table is 1 << length_bits_. int length_bits_; // For faster computation of ModTableSize. const uint32_t length_bits_mask_; // Number of elements in the table. uint32_t occupancy_; // Maximum number of elements the user can store in the table. uint32_t occupancy_limit_; std::unique_ptr array_; }; // class ClockHandleTable // A single shard of sharded cache. class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard { public: ClockCacheShard(size_t capacity, size_t estimated_value_size, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy); ~ClockCacheShard() override = default; // Separate from constructor so caller can easily make an array of ClockCache // if current usage is more than new capacity, the function will attempt to // free the needed space. void SetCapacity(size_t capacity) override; // Set the flag to reject insertion if cache if full. void SetStrictCapacityLimit(bool strict_capacity_limit) override; // Like Cache methods, but with an extra "hash" parameter. // Insert an item into the hash table and, if handle is null, make it // evictable by the clock algorithm. Older items are evicted as necessary. // If the cache is full and free_handle_on_fail is true, the item is deleted // and handle is set to nullptr. Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge, Cache::DeleterFn deleter, Cache::Handle** handle, Cache::Priority priority) override; Status Insert(const Slice& key, uint32_t hash, void* value, const Cache::CacheItemHelper* helper, size_t charge, Cache::Handle** handle, Cache::Priority priority) override { return Insert(key, hash, value, charge, helper->del_cb, handle, priority); } Cache::Handle* Lookup(const Slice& key, uint32_t hash, const Cache::CacheItemHelper* /*helper*/, const Cache::CreateCallback& /*create_cb*/, Cache::Priority /*priority*/, bool /*wait*/, Statistics* /*stats*/) override { return Lookup(key, hash); } Cache::Handle* Lookup(const Slice& key, uint32_t hash) override; bool Release(Cache::Handle* handle, bool /*useful*/, bool erase_if_last_ref) override { return Release(handle, erase_if_last_ref); } bool IsReady(Cache::Handle* /*handle*/) override { return true; } void Wait(Cache::Handle* /*handle*/) override {} bool Ref(Cache::Handle* handle) override; bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override; void Erase(const Slice& key, uint32_t hash) override; size_t GetUsage() const override; size_t GetPinnedUsage() const override; void ApplyToSomeEntries( const std::function& callback, uint32_t average_entries_per_lock, uint32_t* state) override; void EraseUnRefEntries() override; std::string GetPrintableOptions() const override; private: friend class ClockCache; // Makes an element evictable by clock. void ClockOn(ClockHandle* h); // Makes an element non-evictable. void ClockOff(ClockHandle* h); // Requires an exclusive ref on h. void Evict(ClockHandle* h); // Free some space following strict clock policy until enough space // to hold (usage_ + charge) is freed or there are no evictable elements. void EvictFromClock(size_t charge, autovector* deleted); // Returns the charge of a single handle. static size_t CalcEstimatedHandleCharge( size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy); // Returns the number of bits used to hash an element in the hash // table. static int CalcHashBits(size_t capacity, size_t estimated_value_size, CacheMetadataChargePolicy metadata_charge_policy); // Initialized before use. size_t capacity_; // Whether to reject insertion if cache reaches its full capacity. bool strict_capacity_limit_; uint32_t clock_pointer_; // ------------^^^^^^^^^^^^^----------- // Not frequently modified data members // ------------------------------------ // // We separate data members that are updated frequently from the ones that // are not frequently updated so that they don't share the same cache line // which will lead into false cache sharing // // ------------------------------------ // Frequently modified data members // ------------vvvvvvvvvvvvv----------- ClockHandleTable table_; // Memory size for entries residing in the cache. size_t usage_; // mutex_ protects the following state. // We don't count mutex_ as the cache's internal state so semantically we // don't mind mutex_ invoking the non-const actions. mutable DMutex mutex_; }; // class ClockCacheShard class ClockCache #ifdef NDEBUG final #endif : public ShardedCache { public: ClockCache(size_t capacity, size_t estimated_value_size, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy = kDontChargeCacheMetadata); ~ClockCache() override; const char* Name() const override { return "ClockCache"; } CacheShard* GetShard(uint32_t shard) override; const CacheShard* GetShard(uint32_t shard) const override; void* Value(Handle* handle) override; size_t GetCharge(Handle* handle) const override; uint32_t GetHash(Handle* handle) const override; DeleterFn GetDeleter(Handle* handle) const override; void DisownData() override; private: ClockCacheShard* shards_ = nullptr; int num_shards_ = 0; }; // class ClockCache } // namespace clock_cache // Only for internal testing, temporarily replacing NewClockCache. // TODO(Guido) Remove once NewClockCache constructs a ClockCache again. extern std::shared_ptr ExperimentalNewClockCache( size_t capacity, size_t estimated_value_size, int num_shard_bits, bool strict_capacity_limit, CacheMetadataChargePolicy metadata_charge_policy); } // namespace ROCKSDB_NAMESPACE