|
|
|
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
//
|
|
|
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
|
|
#include "cache/cache_reservation_manager.h"
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstring>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
#include "cache/cache_entry_roles.h"
|
|
|
|
#include "rocksdb/cache.h"
|
|
|
|
#include "rocksdb/slice.h"
|
|
|
|
#include "rocksdb/status.h"
|
|
|
|
#include "table/block_based/block_based_table_reader.h"
|
|
|
|
#include "util/coding.h"
|
|
|
|
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
CacheReservationManager::CacheReservationManager(std::shared_ptr<Cache> cache,
|
|
|
|
bool delayed_decrease)
|
|
|
|
: delayed_decrease_(delayed_decrease),
|
|
|
|
cache_allocated_size_(0),
|
|
|
|
memory_used_(0) {
|
|
|
|
assert(cache != nullptr);
|
|
|
|
cache_ = cache;
|
|
|
|
}
|
|
|
|
|
|
|
|
CacheReservationManager::~CacheReservationManager() {
|
|
|
|
for (auto* handle : dummy_handles_) {
|
|
|
|
cache_->Release(handle, true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
Status CacheReservationManager::UpdateCacheReservation(
|
|
|
|
std::size_t new_mem_used) {
|
|
|
|
memory_used_ = new_mem_used;
|
|
|
|
std::size_t cur_cache_allocated_size =
|
|
|
|
cache_allocated_size_.load(std::memory_order_relaxed);
|
|
|
|
if (new_mem_used == cur_cache_allocated_size) {
|
|
|
|
return Status::OK();
|
|
|
|
} else if (new_mem_used > cur_cache_allocated_size) {
|
|
|
|
Status s = IncreaseCacheReservation<R>(new_mem_used);
|
|
|
|
return s;
|
|
|
|
} else {
|
|
|
|
// In delayed decrease mode, we don't decrease cache reservation
|
|
|
|
// untill the memory usage is less than 3/4 of what we reserve
|
|
|
|
// in the cache.
|
|
|
|
// We do this because
|
|
|
|
// (1) Dummy entry insertion is expensive in block cache
|
|
|
|
// (2) Delayed releasing previously inserted dummy entries can save such
|
|
|
|
// expensive dummy entry insertion on memory increase in the near future,
|
|
|
|
// which is likely to happen when the memory usage is greater than or equal
|
|
|
|
// to 3/4 of what we reserve
|
|
|
|
if (delayed_decrease_ && new_mem_used >= cur_cache_allocated_size / 4 * 3) {
|
|
|
|
return Status::OK();
|
|
|
|
} else {
|
|
|
|
Status s = DecreaseCacheReservation(new_mem_used);
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Explicitly instantiate templates for "CacheEntryRole" values we use.
|
|
|
|
// This makes it possible to keep the template definitions in the .cc file.
|
|
|
|
template Status CacheReservationManager::UpdateCacheReservation<
|
|
|
|
CacheEntryRole::kWriteBuffer>(std::size_t new_mem_used);
|
|
|
|
template Status CacheReservationManager::UpdateCacheReservation<
|
|
|
|
CacheEntryRole::kCompressionDictionaryBuildingBuffer>(
|
|
|
|
std::size_t new_mem_used);
|
|
|
|
// For cache reservation manager unit tests
|
|
|
|
template Status CacheReservationManager::UpdateCacheReservation<
|
|
|
|
CacheEntryRole::kMisc>(std::size_t new_mem_used);
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
Status CacheReservationManager::MakeCacheReservation(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::unique_ptr<CacheReservationHandle<R>>* handle) {
|
|
|
|
assert(handle != nullptr);
|
|
|
|
Status s =
|
|
|
|
UpdateCacheReservation<R>(GetTotalMemoryUsed() + incremental_memory_used);
|
|
|
|
(*handle).reset(new CacheReservationHandle<R>(incremental_memory_used,
|
|
|
|
shared_from_this()));
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
template Status
|
|
|
|
CacheReservationManager::MakeCacheReservation<CacheEntryRole::kMisc>(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::unique_ptr<CacheReservationHandle<CacheEntryRole::kMisc>>* handle);
|
Account Bloom/Ribbon filter construction memory in global memory limit (#9073)
Summary:
Note: This PR is the 4th part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073) and will rebase/merge only after the first three PRs (https://github.com/facebook/rocksdb/pull/9070, https://github.com/facebook/rocksdb/pull/9071, https://github.com/facebook/rocksdb/pull/9130) merge.
**Context:**
Similar to https://github.com/facebook/rocksdb/pull/8428, this PR is to track memory usage during (new) Bloom Filter (i.e,FastLocalBloom) and Ribbon Filter (i.e, Ribbon128) construction, moving toward the goal of [single global memory limit using block cache capacity](https://github.com/facebook/rocksdb/wiki/Projects-Being-Developed#improving-memory-efficiency). It also constrains the size of the banding portion of Ribbon Filter during construction by falling back to Bloom Filter if that banding is, at some point, larger than the available space in the cache under `LRUCacheOptions::strict_capacity_limit=true`.
The option to turn on this feature is `BlockBasedTableOptions::reserve_table_builder_memory = true` which by default is set to `false`. We [decided](https://github.com/facebook/rocksdb/pull/9073#discussion_r741548409) not to have separate option for separate memory user in table building therefore their memory accounting are all bundled under one general option.
**Summary:**
- Reserved/released cache for creation/destruction of three main memory users with the passed-in `FilterBuildingContext::cache_res_mgr` during filter construction:
- hash entries (i.e`hash_entries`.size(), we bucket-charge hash entries during insertion for performance),
- banding (Ribbon Filter only, `bytes_coeff_rows` +`bytes_result_rows` + `bytes_backtrack`),
- final filter (i.e, `mutable_buf`'s size).
- Implementation details: in order to use `CacheReservationManager::CacheReservationHandle` to account final filter's memory, we have to store the `CacheReservationManager` object and `CacheReservationHandle` for final filter in `XXPH3BitsFilterBuilder` as well as explicitly delete the filter bits builder when done with the final filter in block based table.
- Added option fo run `filter_bench` with this memory reservation feature
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9073
Test Plan:
- Added new tests in `db_bloom_filter_test` to verify filter construction peak cache reservation under combination of `BlockBasedTable::Rep::FilterType` (e.g, `kFullFilter`, `kPartitionedFilter`), `BloomFilterPolicy::Mode`(e.g, `kFastLocalBloom`, `kStandard128Ribbon`, `kDeprecatedBlock`) and `BlockBasedTableOptions::reserve_table_builder_memory`
- To address the concern for slow test: tests with memory reservation under `kFullFilter` + `kStandard128Ribbon` and `kPartitionedFilter` take around **3000 - 6000 ms** and others take around **1500 - 2000 ms**, in total adding **20000 - 25000 ms** to the test suit running locally
- Added new test in `bloom_test` to verify Ribbon Filter fallback on large banding in FullFilter
- Added test in `filter_bench` to verify that this feature does not significantly slow down Bloom/Ribbon Filter construction speed. Local result averaged over **20** run as below:
- FastLocalBloom
- baseline `./filter_bench -impl=2 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 29.56295** (DEBUG_LEVEL=1), **29.98153** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above)`./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg'`:
- **Build avg ns/key: 30.99046** (DEBUG_LEVEL=1), **30.48867** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be similar as above) `./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 31.146975** (DEBUG_LEVEL=1), **30.08165** (DEBUG_LEVEL=0)
- Ribbon128
- baseline `./filter_bench -impl=3 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 129.17585** (DEBUG_LEVEL=1), **130.5225** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg' `:
- **Build avg ns/key: 131.61645** (DEBUG_LEVEL=1), **132.98075** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be a lot faster than above due to fallback) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 52.032965** (DEBUG_LEVEL=1), **52.597825** (DEBUG_LEVEL=0)
- And the warning message of `"Cache reservation for Ribbon filter banding failed due to cache full"` is indeed logged to console.
Reviewed By: pdillinger
Differential Revision: D31991348
Pulled By: hx235
fbshipit-source-id: 9336b2c60f44d530063da518ceaf56dac5f9df8e
3 years ago
|
|
|
template Status CacheReservationManager::MakeCacheReservation<
|
|
|
|
CacheEntryRole::kFilterConstruction>(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::unique_ptr<
|
|
|
|
CacheReservationHandle<CacheEntryRole::kFilterConstruction>>* handle);
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
Status CacheReservationManager::IncreaseCacheReservation(
|
|
|
|
std::size_t new_mem_used) {
|
|
|
|
Status return_status = Status::OK();
|
|
|
|
while (new_mem_used > cache_allocated_size_.load(std::memory_order_relaxed)) {
|
|
|
|
Cache::Handle* handle = nullptr;
|
|
|
|
return_status = cache_->Insert(GetNextCacheKey(), nullptr, kSizeDummyEntry,
|
|
|
|
GetNoopDeleterForRole<R>(), &handle);
|
|
|
|
|
|
|
|
if (return_status != Status::OK()) {
|
|
|
|
return return_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
dummy_handles_.push_back(handle);
|
|
|
|
cache_allocated_size_ += kSizeDummyEntry;
|
|
|
|
}
|
|
|
|
return return_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
Status CacheReservationManager::DecreaseCacheReservation(
|
|
|
|
std::size_t new_mem_used) {
|
|
|
|
Status return_status = Status::OK();
|
|
|
|
|
|
|
|
// Decrease to the smallest multiple of kSizeDummyEntry that is greater than
|
|
|
|
// or equal to new_mem_used We do addition instead of new_mem_used <=
|
|
|
|
// cache_allocated_size_.load(std::memory_order_relaxed) - kSizeDummyEntry to
|
|
|
|
// avoid underflow of size_t when cache_allocated_size_ = 0
|
|
|
|
while (new_mem_used + kSizeDummyEntry <=
|
|
|
|
cache_allocated_size_.load(std::memory_order_relaxed)) {
|
|
|
|
assert(!dummy_handles_.empty());
|
|
|
|
auto* handle = dummy_handles_.back();
|
|
|
|
cache_->Release(handle, true);
|
|
|
|
dummy_handles_.pop_back();
|
|
|
|
cache_allocated_size_ -= kSizeDummyEntry;
|
|
|
|
}
|
|
|
|
return return_status;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::size_t CacheReservationManager::GetTotalReservedCacheSize() {
|
|
|
|
return cache_allocated_size_.load(std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::size_t CacheReservationManager::GetTotalMemoryUsed() {
|
|
|
|
return memory_used_;
|
|
|
|
}
|
|
|
|
|
|
|
|
Slice CacheReservationManager::GetNextCacheKey() {
|
|
|
|
// Calling this function will have the side-effect of changing the
|
|
|
|
// underlying cache_key_ that is shared among other keys generated from this
|
|
|
|
// fucntion. Therefore please make sure the previous keys are saved/copied
|
|
|
|
// before calling this function.
|
New stable, fixed-length cache keys (#9126)
Summary:
This change standardizes on a new 16-byte cache key format for
block cache (incl compressed and secondary) and persistent cache (but
not table cache and row cache).
The goal is a really fast cache key with practically ideal stability and
uniqueness properties without external dependencies (e.g. from FileSystem).
A fixed key size of 16 bytes should enable future optimizations to the
concurrent hash table for block cache, which is a heavy CPU user /
bottleneck, but there appears to be measurable performance improvement
even with no changes to LRUCache.
This change replaces a lot of disjointed and ugly code handling cache
keys with calls to a simple, clean new internal API (cache_key.h).
(Preserving the old cache key logic under an option would be very ugly
and likely negate the performance gain of the new approach. Complete
replacement carries some inherent risk, but I think that's acceptable
with sufficient analysis and testing.)
The scheme for encoding new cache keys is complicated but explained
in cache_key.cc.
Also: EndianSwapValue is moved to math.h to be next to other bit
operations. (Explains some new include "math.h".) ReverseBits operation
added and unit tests added to hash_test for both.
Fixes https://github.com/facebook/rocksdb/issues/7405 (presuming a root cause)
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9126
Test Plan:
### Basic correctness
Several tests needed updates to work with the new functionality, mostly
because we are no longer relying on filesystem for stable cache keys
so table builders & readers need more context info to agree on cache
keys. This functionality is so core, a huge number of existing tests
exercise the cache key functionality.
### Performance
Create db with
`TEST_TMPDIR=/dev/shm ./db_bench -bloom_bits=10 -benchmarks=fillrandom -num=3000000 -partition_index_and_filters`
And test performance with
`TEST_TMPDIR=/dev/shm ./db_bench -readonly -use_existing_db -bloom_bits=10 -benchmarks=readrandom -num=3000000 -duration=30 -cache_index_and_filter_blocks -cache_size=250000 -threads=4`
using DEBUG_LEVEL=0 and simultaneous before & after runs.
Before ops/sec, avg over 100 runs: 121924
After ops/sec, avg over 100 runs: 125385 (+2.8%)
### Collision probability
I have built a tool, ./cache_bench -stress_cache_key to broadly simulate host-wide cache activity
over many months, by making some pessimistic simplifying assumptions:
* Every generated file has a cache entry for every byte offset in the file (contiguous range of cache keys)
* All of every file is cached for its entire lifetime
We use a simple table with skewed address assignment and replacement on address collision
to simulate files coming & going, with quite a variance (super-Poisson) in ages. Some output
with `./cache_bench -stress_cache_key -sck_keep_bits=40`:
```
Total cache or DBs size: 32TiB Writing 925.926 MiB/s or 76.2939TiB/day
Multiply by 9.22337e+18 to correct for simulation losses (but still assume whole file cached)
```
These come from default settings of 2.5M files per day of 32 MB each, and
`-sck_keep_bits=40` means that to represent a single file, we are only keeping 40 bits of
the 128-bit cache key. With file size of 2\*\*25 contiguous keys (pessimistic), our simulation
is about 2\*\*(128-40-25) or about 9 billion billion times more prone to collision than reality.
More default assumptions, relatively pessimistic:
* 100 DBs in same process (doesn't matter much)
* Re-open DB in same process (new session ID related to old session ID) on average
every 100 files generated
* Restart process (all new session IDs unrelated to old) 24 times per day
After enough data, we get a result at the end:
```
(keep 40 bits) 17 collisions after 2 x 90 days, est 10.5882 days between (9.76592e+19 corrected)
```
If we believe the (pessimistic) simulation and the mathematical generalization, we would need to run a billion machines all for 97 billion days to expect a cache key collision. To help verify that our generalization ("corrected") is robust, we can make our simulation more precise with `-sck_keep_bits=41` and `42`, which takes more running time to get enough data:
```
(keep 41 bits) 16 collisions after 4 x 90 days, est 22.5 days between (1.03763e+20 corrected)
(keep 42 bits) 19 collisions after 10 x 90 days, est 47.3684 days between (1.09224e+20 corrected)
```
The generalized prediction still holds. With the `-sck_randomize` option, we can see that we are beating "random" cache keys (except offsets still non-randomized) by a modest amount (roughly 20x less collision prone than random), which should make us reasonably comfortable even in "degenerate" cases:
```
197 collisions after 1 x 90 days, est 0.456853 days between (4.21372e+18 corrected)
```
I've run other tests to validate other conditions behave as expected, never behaving "worse than random" unless we start chopping off structured data.
Reviewed By: zhichao-cao
Differential Revision: D33171746
Pulled By: pdillinger
fbshipit-source-id: f16a57e369ed37be5e7e33525ace848d0537c88f
3 years ago
|
|
|
cache_key_ = CacheKey::CreateUniqueForCacheLifetime(cache_.get());
|
|
|
|
return cache_key_.AsSlice();
|
|
|
|
}
|
|
|
|
|
Account Bloom/Ribbon filter construction memory in global memory limit (#9073)
Summary:
Note: This PR is the 4th part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073) and will rebase/merge only after the first three PRs (https://github.com/facebook/rocksdb/pull/9070, https://github.com/facebook/rocksdb/pull/9071, https://github.com/facebook/rocksdb/pull/9130) merge.
**Context:**
Similar to https://github.com/facebook/rocksdb/pull/8428, this PR is to track memory usage during (new) Bloom Filter (i.e,FastLocalBloom) and Ribbon Filter (i.e, Ribbon128) construction, moving toward the goal of [single global memory limit using block cache capacity](https://github.com/facebook/rocksdb/wiki/Projects-Being-Developed#improving-memory-efficiency). It also constrains the size of the banding portion of Ribbon Filter during construction by falling back to Bloom Filter if that banding is, at some point, larger than the available space in the cache under `LRUCacheOptions::strict_capacity_limit=true`.
The option to turn on this feature is `BlockBasedTableOptions::reserve_table_builder_memory = true` which by default is set to `false`. We [decided](https://github.com/facebook/rocksdb/pull/9073#discussion_r741548409) not to have separate option for separate memory user in table building therefore their memory accounting are all bundled under one general option.
**Summary:**
- Reserved/released cache for creation/destruction of three main memory users with the passed-in `FilterBuildingContext::cache_res_mgr` during filter construction:
- hash entries (i.e`hash_entries`.size(), we bucket-charge hash entries during insertion for performance),
- banding (Ribbon Filter only, `bytes_coeff_rows` +`bytes_result_rows` + `bytes_backtrack`),
- final filter (i.e, `mutable_buf`'s size).
- Implementation details: in order to use `CacheReservationManager::CacheReservationHandle` to account final filter's memory, we have to store the `CacheReservationManager` object and `CacheReservationHandle` for final filter in `XXPH3BitsFilterBuilder` as well as explicitly delete the filter bits builder when done with the final filter in block based table.
- Added option fo run `filter_bench` with this memory reservation feature
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9073
Test Plan:
- Added new tests in `db_bloom_filter_test` to verify filter construction peak cache reservation under combination of `BlockBasedTable::Rep::FilterType` (e.g, `kFullFilter`, `kPartitionedFilter`), `BloomFilterPolicy::Mode`(e.g, `kFastLocalBloom`, `kStandard128Ribbon`, `kDeprecatedBlock`) and `BlockBasedTableOptions::reserve_table_builder_memory`
- To address the concern for slow test: tests with memory reservation under `kFullFilter` + `kStandard128Ribbon` and `kPartitionedFilter` take around **3000 - 6000 ms** and others take around **1500 - 2000 ms**, in total adding **20000 - 25000 ms** to the test suit running locally
- Added new test in `bloom_test` to verify Ribbon Filter fallback on large banding in FullFilter
- Added test in `filter_bench` to verify that this feature does not significantly slow down Bloom/Ribbon Filter construction speed. Local result averaged over **20** run as below:
- FastLocalBloom
- baseline `./filter_bench -impl=2 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 29.56295** (DEBUG_LEVEL=1), **29.98153** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above)`./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg'`:
- **Build avg ns/key: 30.99046** (DEBUG_LEVEL=1), **30.48867** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be similar as above) `./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 31.146975** (DEBUG_LEVEL=1), **30.08165** (DEBUG_LEVEL=0)
- Ribbon128
- baseline `./filter_bench -impl=3 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 129.17585** (DEBUG_LEVEL=1), **130.5225** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg' `:
- **Build avg ns/key: 131.61645** (DEBUG_LEVEL=1), **132.98075** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be a lot faster than above due to fallback) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 52.032965** (DEBUG_LEVEL=1), **52.597825** (DEBUG_LEVEL=0)
- And the warning message of `"Cache reservation for Ribbon filter banding failed due to cache full"` is indeed logged to console.
Reviewed By: pdillinger
Differential Revision: D31991348
Pulled By: hx235
fbshipit-source-id: 9336b2c60f44d530063da518ceaf56dac5f9df8e
3 years ago
|
|
|
template <CacheEntryRole R>
|
|
|
|
Cache::DeleterFn CacheReservationManager::TEST_GetNoopDeleterForRole() {
|
|
|
|
return GetNoopDeleterForRole<R>();
|
|
|
|
}
|
|
|
|
|
|
|
|
template Cache::DeleterFn CacheReservationManager::TEST_GetNoopDeleterForRole<
|
|
|
|
CacheEntryRole::kFilterConstruction>();
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
CacheReservationHandle<R>::CacheReservationHandle(
|
|
|
|
std::size_t incremental_memory_used,
|
|
|
|
std::shared_ptr<CacheReservationManager> cache_res_mgr)
|
|
|
|
: incremental_memory_used_(incremental_memory_used) {
|
|
|
|
assert(cache_res_mgr != nullptr);
|
|
|
|
cache_res_mgr_ = cache_res_mgr;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <CacheEntryRole R>
|
|
|
|
CacheReservationHandle<R>::~CacheReservationHandle() {
|
|
|
|
assert(cache_res_mgr_ != nullptr);
|
|
|
|
assert(cache_res_mgr_->GetTotalMemoryUsed() >= incremental_memory_used_);
|
|
|
|
|
|
|
|
Status s = cache_res_mgr_->UpdateCacheReservation<R>(
|
|
|
|
cache_res_mgr_->GetTotalMemoryUsed() - incremental_memory_used_);
|
|
|
|
s.PermitUncheckedError();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Explicitly instantiate templates for "CacheEntryRole" values we use.
|
|
|
|
// This makes it possible to keep the template definitions in the .cc file.
|
|
|
|
template class CacheReservationHandle<CacheEntryRole::kMisc>;
|
Account Bloom/Ribbon filter construction memory in global memory limit (#9073)
Summary:
Note: This PR is the 4th part of a bigger PR stack (https://github.com/facebook/rocksdb/pull/9073) and will rebase/merge only after the first three PRs (https://github.com/facebook/rocksdb/pull/9070, https://github.com/facebook/rocksdb/pull/9071, https://github.com/facebook/rocksdb/pull/9130) merge.
**Context:**
Similar to https://github.com/facebook/rocksdb/pull/8428, this PR is to track memory usage during (new) Bloom Filter (i.e,FastLocalBloom) and Ribbon Filter (i.e, Ribbon128) construction, moving toward the goal of [single global memory limit using block cache capacity](https://github.com/facebook/rocksdb/wiki/Projects-Being-Developed#improving-memory-efficiency). It also constrains the size of the banding portion of Ribbon Filter during construction by falling back to Bloom Filter if that banding is, at some point, larger than the available space in the cache under `LRUCacheOptions::strict_capacity_limit=true`.
The option to turn on this feature is `BlockBasedTableOptions::reserve_table_builder_memory = true` which by default is set to `false`. We [decided](https://github.com/facebook/rocksdb/pull/9073#discussion_r741548409) not to have separate option for separate memory user in table building therefore their memory accounting are all bundled under one general option.
**Summary:**
- Reserved/released cache for creation/destruction of three main memory users with the passed-in `FilterBuildingContext::cache_res_mgr` during filter construction:
- hash entries (i.e`hash_entries`.size(), we bucket-charge hash entries during insertion for performance),
- banding (Ribbon Filter only, `bytes_coeff_rows` +`bytes_result_rows` + `bytes_backtrack`),
- final filter (i.e, `mutable_buf`'s size).
- Implementation details: in order to use `CacheReservationManager::CacheReservationHandle` to account final filter's memory, we have to store the `CacheReservationManager` object and `CacheReservationHandle` for final filter in `XXPH3BitsFilterBuilder` as well as explicitly delete the filter bits builder when done with the final filter in block based table.
- Added option fo run `filter_bench` with this memory reservation feature
Pull Request resolved: https://github.com/facebook/rocksdb/pull/9073
Test Plan:
- Added new tests in `db_bloom_filter_test` to verify filter construction peak cache reservation under combination of `BlockBasedTable::Rep::FilterType` (e.g, `kFullFilter`, `kPartitionedFilter`), `BloomFilterPolicy::Mode`(e.g, `kFastLocalBloom`, `kStandard128Ribbon`, `kDeprecatedBlock`) and `BlockBasedTableOptions::reserve_table_builder_memory`
- To address the concern for slow test: tests with memory reservation under `kFullFilter` + `kStandard128Ribbon` and `kPartitionedFilter` take around **3000 - 6000 ms** and others take around **1500 - 2000 ms**, in total adding **20000 - 25000 ms** to the test suit running locally
- Added new test in `bloom_test` to verify Ribbon Filter fallback on large banding in FullFilter
- Added test in `filter_bench` to verify that this feature does not significantly slow down Bloom/Ribbon Filter construction speed. Local result averaged over **20** run as below:
- FastLocalBloom
- baseline `./filter_bench -impl=2 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 29.56295** (DEBUG_LEVEL=1), **29.98153** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above)`./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg'`:
- **Build avg ns/key: 30.99046** (DEBUG_LEVEL=1), **30.48867** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be similar as above) `./filter_bench -impl=2 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 31.146975** (DEBUG_LEVEL=1), **30.08165** (DEBUG_LEVEL=0)
- Ribbon128
- baseline `./filter_bench -impl=3 -quick -runs 20 | grep 'Build avg'`:
- **Build avg ns/key: 129.17585** (DEBUG_LEVEL=1), **130.5225** (DEBUG_LEVEL=0)
- new feature (expected to be similar as above) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true | grep 'Build avg' `:
- **Build avg ns/key: 131.61645** (DEBUG_LEVEL=1), **132.98075** (DEBUG_LEVEL=0)
- new feature of RibbonFilter with fallback (expected to be a lot faster than above due to fallback) `./filter_bench -impl=3 -quick -runs 20 -reserve_table_builder_memory=true -strict_capacity_limit=true | grep 'Build avg'` :
- **Build avg ns/key: 52.032965** (DEBUG_LEVEL=1), **52.597825** (DEBUG_LEVEL=0)
- And the warning message of `"Cache reservation for Ribbon filter banding failed due to cache full"` is indeed logged to console.
Reviewed By: pdillinger
Differential Revision: D31991348
Pulled By: hx235
fbshipit-source-id: 9336b2c60f44d530063da518ceaf56dac5f9df8e
3 years ago
|
|
|
template class CacheReservationHandle<CacheEntryRole::kFilterConstruction>;
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|