|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <atomic>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <memory>
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include "cache/cache_entry_roles.h"
|
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "table/block_based/block_based_table_reader.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
class CacheReservationHandle;
|
|
|
|
|
|
|
|
// CacheReservationManager is for reserving cache space for the memory used
|
|
|
|
// through inserting/releasing dummy entries in the cache.
|
|
|
|
//
|
|
|
|
// This class is NOT thread-safe, except that GetTotalReservedCacheSize()
|
|
|
|
// can be called without external synchronization.
|
|
|
|
class CacheReservationManager
|
|
|
|
: public std::enable_shared_from_this<CacheReservationManager> {
|
|
|
|
public:
|
|
|
|
// Construct a CacheReservationManager
|
|
|
|
// @param cache The cache where dummy entries are inserted and released for
|
|
|
|
// reserving cache space
|
|
|
|
// @param delayed_decrease If set true, then dummy entries won't be released
|
|
|
|
// immediately when memory usage decreases.
|
|
|
|
// Instead, it will be released when the memory usage
|
|
|
|
// decreases to 3/4 of what we have reserved so far.
|
|
|
|
// This is for saving some future dummy entry
|
|
|
|
// insertion when memory usage increases are likely to
|
|
|
|
// happen in the near future.
|
|
|
|
explicit CacheReservationManager(std::shared_ptr<Cache> cache,
|
|
|
|
bool delayed_decrease = false);
|
|
|
|
|
|
|
|
// no copy constructor, copy assignment, move constructor, move assignment
|
|
|
|
CacheReservationManager(const CacheReservationManager &) = delete;
|
|
|
|
CacheReservationManager &operator=(const CacheReservationManager &) = delete;
|
|
|
|
CacheReservationManager(CacheReservationManager &&) = delete;
|
|
|
|
CacheReservationManager &operator=(CacheReservationManager &&) = delete;
|
|
|
|
|
|
|
|
~CacheReservationManager();
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
|
|
|
|
// One of the two ways of reserving/releasing cache,
|
|
|
|
// see CacheReservationManager::MakeCacheReservation() for the other.
|
|
|
|
// Use ONLY one of them to prevent unexpected behavior.
|
|
|
|
//
|
|
|
|
// Insert and release dummy entries in the cache to
|
|
|
|
// match the size of total dummy entries with the least multiple of
|
|
|
|
// kSizeDummyEntry greater than or equal to new_mem_used
|
|
|
|
//
|
|
|
|
// Insert dummy entries if new_memory_used > cache_allocated_size_;
|
|
|
|
//
|
|
|
|
// Release dummy entries if new_memory_used < cache_allocated_size_
|
|
|
|
// (and new_memory_used < cache_allocated_size_ * 3/4
|
|
|
|
// when delayed_decrease is set true);
|
|
|
|
//
|
|
|
|
// Keey dummy entries the same if (1) new_memory_used == cache_allocated_size_
|
|
|
|
// or (2) new_memory_used is in the interval of
|
|
|
|
// [cache_allocated_size_ * 3/4, cache_allocated_size) when delayed_decrease
|
|
|
|
// is set true.
|
|
|
|
//
|
|
|
|
// @param new_memory_used The number of bytes used by new memory
|
|
|
|
// The most recent new_memoy_used passed in will be returned
|
|
|
|
// in GetTotalMemoryUsed() even when the call return non-ok status.
|
|
|
|
//
|
|
|
|
// Since the class is NOT thread-safe, external synchronization on the
|
|
|
|
// order of calling UpdateCacheReservation() is needed if you want
|
|
|
|
// GetTotalMemoryUsed() indeed returns the latest memory used.
|
|
|
|
//
|
|
|
|
// @return On inserting dummy entries, it returns Status::OK() if all dummy
|
|
|
|
// entry insertions succeed.
|
|
|
|
// Otherwise, it returns the first non-ok status;
|
|
|
|
// On releasing dummy entries, it always returns Status::OK().
|
|
|
|
// On keeping dummy entries the same, it always returns Status::OK().
|
|
|
|
Status UpdateCacheReservation(std::size_t new_memory_used);
|
|
|
|
|
|
|
|
// One of the two ways of reserving/releasing cache,
|
|
|
|
// see CacheReservationManager::UpdateCacheReservation() for the other.
|
|
|
|
// Use ONLY one of them to prevent unexpected behavior.
|
|
|
|
//
|
|
|
|
// Insert dummy entries in the cache for the incremental memory usage
|
|
|
|
// to match the size of total dummy entries with the least multiple of
|
|
|
|
// kSizeDummyEntry greater than or equal to the total memory used.
|
|
|
|
//
|
|
|
|
// A CacheReservationHandle is returned as an output parameter.
|
|
|
|
// The reserved dummy entries are automatically released on the destruction of
|
|
|
|
// this handle, which achieves better RAII per cache reservation.
|
|
|
|
//
|
|
|
|
// WARNING: Deallocate all the handles of the CacheReservationManager object
|
|
|
|
// before deallocating the object to prevent unexpected behavior.
|
|
|
|
//
|
|
|
|
// @param incremental_memory_used The number of bytes increased in memory
|
|
|
|
// usage.
|
|
|
|
//
|
|
|
|
// Calling GetTotalMemoryUsed() afterward will return the total memory
|
|
|
|
// increased by this number, even when calling MakeCacheReservation()
|
|
|
|
// returns non-ok status.
|
|
|
|
//
|
|
|
|
// Since the class is NOT thread-safe, external synchronization in
|
|
|
|
// calling MakeCacheReservation() is needed if you want
|
|
|
|
// GetTotalMemoryUsed() indeed returns the latest memory used.
|
|
|
|
//
|
|
|
|
// @param handle An pointer to std::unique_ptr<CacheReservationHandle<R>> that
|
|
|
|
// manages the lifetime of the handle and its cache reservation.
|
|
|
|
//
|
|
|
|
// @return It returns Status::OK() if all dummy
|
|
|
|
// entry insertions succeed.
|
|
|
|
// Otherwise, it returns the first non-ok status;
|
|
|
|
//
|
|
|
|
// REQUIRES: handle != nullptr
|
|
|
|
// REQUIRES: The CacheReservationManager object is NOT managed by
|
|
|
|
// std::unique_ptr as CacheReservationHandle needs to
|
|
|
|
// shares ownership to the CacheReservationManager object.
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
Status MakeCacheReservation(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::unique_ptr<CacheReservationHandle<R>> *handle);
|
|
|
|
|
|
|
|
// Return the size of the cache (which is a multiple of kSizeDummyEntry)
|
|
|
|
// successfully reserved by calling UpdateCacheReservation().
|
|
|
|
//
|
|
|
|
// When UpdateCacheReservation() returns non-ok status,
|
|
|
|
// calling GetTotalReservedCacheSize() after that might return a slightly
|
|
|
|
// smaller number than the actual reserved cache size due to
|
|
|
|
// the returned number will always be a multiple of kSizeDummyEntry
|
|
|
|
// and cache full might happen in the middle of inserting a dummy entry.
|
|
|
|
std::size_t GetTotalReservedCacheSize();
|
|
|
|
|
|
|
|
// Return the latest total memory used indicated by the most recent call of
|
|
|
|
// UpdateCacheReservation(std::size_t new_memory_used);
|
|
|
|
std::size_t GetTotalMemoryUsed();
|
|
|
|
|
|
|
|
static constexpr std::size_t GetDummyEntrySize() { return kSizeDummyEntry; }
|
|
|
|
|
Account Bloom/Ribbon filter construction memory in global memory limit (#9073)
Summary:
Note: This PR is the 4th part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073) and will rebase/merge only after the first three PRs (https://github.com/facebook/rocksdb/pull/9070, https://github.com/facebook/rocksdb/pull/9071, https://github.com/facebook/rocksdb/pull/9130) merge.
**Context:**
Similar to https://github.com/facebook/rocksdb/pull/8428, this PR is to track memory usage during (new) Bloom Filter (i.e,FastLocalBloom) and Ribbon Filter (i.e, Ribbon128) construction, moving toward the goal of [single global memory limit using block cache capacity](https://github.com/facebook/rocksdb/wiki/Projects-Being-Developed#improving-memory-efficiency). It also constrains the size of the banding portion of Ribbon Filter during construction by falling back to Bloom Filter if that banding is, at some point, larger than the available space in the cache under `LRUCacheOptions::strict_capacity_limit=true`.
The option to turn on this feature is `BlockBasedTableOptions::reserve_table_builder_memory = true` which by default is set to `false`. We [decided](https://github.com/facebook/rocksdb/pull/9073#discussion_r741548409) not to have separate option for separate memory user in table building therefore their memory accounting are all bundled under one general option.
**Summary:**
- Reserved/released cache for creation/destruction of three main memory users with the passed-in `FilterBuildingContext::cache_res_mgr` during filter construction:
- hash entries (i.e`hash_entries`.size(), we bucket-charge hash entries during insertion for performance),
- banding (Ribbon Filter only, `bytes_coeff_rows` +`bytes_result_rows` + `bytes_backtrack`),
- final filter (i.e, `mutable_buf`'s size).
- Implementation details: in order to use `CacheReservationManager::CacheReservationHandle` to account final filter's memory, we have to store the `CacheReservationManager` object and `CacheReservationHandle` for final filter in `XXPH3BitsFilterBuilder` as well as explicitly delete the filter bits builder when done with the final filter in block based table.
- Added option fo run `filter_bench` with this memory reservation feature
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9073
Test Plan:
- Added new tests in `db_bloom_filter_test` to verify filter construction peak cache reservation under combination of `BlockBasedTable::Rep::FilterType` (e.g, `kFullFilter`, `kPartitionedFilter`), `BloomFilterPolicy::Mode`(e.g, `kFastLocalBloom`, `kStandard128Ribbon`, `kDeprecatedBlock`) and `BlockBasedTableOptions::reserve_table_builder_memory`
- To address the concern for slow test: tests with memory reservation under `kFullFilter` + `kStandard128Ribbon` and `kPartitionedFilter` take around **3000 - 6000 ms** and others take around **1500 - 2000 ms**, in total adding **20000 - 25000 ms** to the test suit running locally
- Added new test in `bloom_test` to verify Ribbon Filter fallback on large banding in FullFilter
- Added test in `filter_bench` to verify that this feature does not significantly slow down Bloom/Ribbon Filter construction speed. Local result averaged over **20** run as below:
- FastLocalBloom
- baseline `./filter_bench -impl=2 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 29.56295** (DEBUG_LEVEL=1), **29.98153** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above)`./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg'`:
- **Build avg ns/key: 30.99046** (DEBUG_LEVEL=1), **30.48867** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be similar as above) `./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 31.146975** (DEBUG_LEVEL=1), **30.08165** (DEBUG_LEVEL=0)
- Ribbon128
- baseline `./filter_bench -impl=3 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 129.17585** (DEBUG_LEVEL=1), **130.5225** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg' `:
- **Build avg ns/key: 131.61645** (DEBUG_LEVEL=1), **132.98075** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be a lot faster than above due to fallback) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 52.032965** (DEBUG_LEVEL=1), **52.597825** (DEBUG_LEVEL=0)
- And the warning message of `"Cache reservation for Ribbon filter banding failed due to cache full"` is indeed logged to console.
Reviewed By: pdillinger
Differential Revision: D31991348
Pulled By: hx235
fbshipit-source-id: 9336b2c60f44d530063da518ceaf56dac5f9df8e
3 years ago
|
|
|
// For testing only - it is to help ensure the NoopDeleterForRole<R>
|
|
|
|
// accessed from CacheReservationManager and the one accessed from the test
|
|
|
|
// are from the same translation units
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
static Cache::DeleterFn TEST_GetNoopDeleterForRole();
|
|
|
|
|
|
|
|
private:
|
|
|
|
static constexpr std::size_t kSizeDummyEntry = 256 * 1024;
|
|
|
|
// The key will be longer than keys for blocks in SST files so they won't
|
|
|
|
// conflict.
|
|
|
|
static const std::size_t kCacheKeyPrefixSize =
|
|
|
|
BlockBasedTable::kMaxCacheKeyPrefixSize + kMaxVarint64Length;
|
|
|
|
|
|
|
|
Slice GetNextCacheKey();
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
Status IncreaseCacheReservation(std::size_t new_mem_used);
|
|
|
|
Status DecreaseCacheReservation(std::size_t new_mem_used);
|
|
|
|
|
|
|
|
std::shared_ptr<Cache> cache_;
|
|
|
|
bool delayed_decrease_;
|
|
|
|
std::atomic<std::size_t> cache_allocated_size_;
|
|
|
|
std::size_t memory_used_;
|
|
|
|
std::vector<Cache::Handle *> dummy_handles_;
|
|
|
|
std::uint64_t next_cache_key_id_ = 0;
|
|
|
|
// The non-prefix part will be updated according to the ID to use.
|
|
|
|
char cache_key_[kCacheKeyPrefixSize + kMaxVarint64Length];
|
|
|
|
};
|
|
|
|
|
|
|
|
// CacheReservationHandle is for managing the lifetime of a cache reservation
|
|
|
|
// This class is NOT thread-safe
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
class CacheReservationHandle {
|
|
|
|
public:
|
|
|
|
// REQUIRES: cache_res_mgr != nullptr
|
|
|
|
explicit CacheReservationHandle(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::shared_ptr<CacheReservationManager> cache_res_mgr);
|
|
|
|
|
|
|
|
~CacheReservationHandle();
|
|
|
|
|
|
|
|
private:
|
|
|
|
std::size_t incremental_memory_used_;
|
|
|
|
std::shared_ptr<CacheReservationManager> cache_res_mgr_;
|
|
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|