Add new API to report dummy entries size in cache in WriteBufferManager (#7837)

Summary:
Add new API WriteBufferManager::dummy_entries_in_cache_usage() which reports the dummy entries size stored in cache to account for DataBlocks in WriteBufferManager.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7837

Test Plan: Updated test ./write_buffer_manager_test

Reviewed By: ajkr

Differential Revision: D25794312

Pulled By: akankshamahajan15

fbshipit-source-id: 197f5e8701e3dc57a7df72dab1735624f90daf4b
main
Akanksha Mahajan 4 years ago committed by Facebook GitHub Bot
parent b2e30bdb67
commit 8ed680bdb0
  1. 3
      HISTORY.md
  2. 4
      include/rocksdb/write_buffer_manager.h
  3. 3
      memtable/write_buffer_manager.cc
  4. 25
      memtable/write_buffer_manager_test.cc

@ -4,6 +4,9 @@
* When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created. * When verifying full file checksum with `DB::VerifyFileChecksums()`, we now fail with `Status::InvalidArgument` if the name of the checksum generator used for verification does not match the name of the checksum generator used for protecting the file when it was created.
* Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`. * Since RocksDB does not continue write the same file if a file write fails for any reason, the file scope write IO error is treated the same as retryable IO error. More information about error handling of file scope IO error is included in `ErrorHandler::SetBGError`.
### Public API Change
* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks.
## 6.16.0 (12/18/2020) ## 6.16.0 (12/18/2020)
### Behavior Changes ### Behavior Changes
* Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. * Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation.

@ -43,6 +43,9 @@ class WriteBufferManager {
size_t mutable_memtable_memory_usage() const { size_t mutable_memtable_memory_usage() const {
return memory_active_.load(std::memory_order_relaxed); return memory_active_.load(std::memory_order_relaxed);
} }
size_t dummy_entries_in_cache_usage() const {
return dummy_size_.load(std::memory_order_relaxed);
}
size_t buffer_size() const { return buffer_size_; } size_t buffer_size() const { return buffer_size_; }
// Should only be called from write thread // Should only be called from write thread
@ -93,6 +96,7 @@ class WriteBufferManager {
std::atomic<size_t> memory_used_; std::atomic<size_t> memory_used_;
// Memory that hasn't been scheduled to free. // Memory that hasn't been scheduled to free.
std::atomic<size_t> memory_active_; std::atomic<size_t> memory_active_;
std::atomic<size_t> dummy_size_;
struct CacheRep; struct CacheRep;
std::unique_ptr<CacheRep> cache_rep_; std::unique_ptr<CacheRep> cache_rep_;

@ -54,6 +54,7 @@ WriteBufferManager::WriteBufferManager(size_t _buffer_size,
mutable_limit_(buffer_size_ * 7 / 8), mutable_limit_(buffer_size_ * 7 / 8),
memory_used_(0), memory_used_(0),
memory_active_(0), memory_active_(0),
dummy_size_(0),
cache_rep_(nullptr) { cache_rep_(nullptr) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (cache) { if (cache) {
@ -104,6 +105,7 @@ void WriteBufferManager::ReserveMemWithCache(size_t mem) {
// it in the future. // it in the future.
cache_rep_->dummy_handles_.push_back(handle); cache_rep_->dummy_handles_.push_back(handle);
cache_rep_->cache_allocated_size_ += kSizeDummyEntry; cache_rep_->cache_allocated_size_ += kSizeDummyEntry;
dummy_size_.fetch_add(kSizeDummyEntry, std::memory_order_relaxed);
} }
#else #else
(void)mem; (void)mem;
@ -137,6 +139,7 @@ void WriteBufferManager::FreeMemWithCache(size_t mem) {
} }
cache_rep_->dummy_handles_.pop_back(); cache_rep_->dummy_handles_.pop_back();
cache_rep_->cache_allocated_size_ -= kSizeDummyEntry; cache_rep_->cache_allocated_size_ -= kSizeDummyEntry;
dummy_size_.fetch_sub(kSizeDummyEntry, std::memory_order_relaxed);
} }
#else #else
(void)mem; (void)mem;

@ -11,7 +11,7 @@
#include "test_util/testharness.h" #include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
const size_t kSizeDummyEntry = 256 * 1024;
class WriteBufferManagerTest : public testing::Test {}; class WriteBufferManagerTest : public testing::Test {};
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
@ -65,28 +65,35 @@ TEST_F(WriteBufferManagerTest, CacheCost) {
wbf->ReserveMem(333 * 1024); wbf->ReserveMem(333 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 2 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 2 * 256 * 1024 + 10000);
// 2 dummy entries are added for size 333 kb.
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 2 * kSizeDummyEntry);
// Allocate another 512KB // Allocate another 512KB
wbf->ReserveMem(512 * 1024); wbf->ReserveMem(512 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 4 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + 10000);
// 2 more dummy entries are added for size 512.
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry);
// Allocate another 10MB // Allocate another 10MB
wbf->ReserveMem(10 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000);
// 40 more entries are added for size 10 * 1024 * 1024.
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry);
// Free 1MB will not cause any change in cache cost // Free 1MB will not cause any change in cache cost
wbf->FreeMem(1024 * 1024); wbf->FreeMem(1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 11 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 11 * 1024 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 44 * kSizeDummyEntry);
ASSERT_FALSE(wbf->ShouldFlush()); ASSERT_FALSE(wbf->ShouldFlush());
// Allocate another 41MB // Allocate another 41MB
wbf->ReserveMem(41 * 1024 * 1024); wbf->ReserveMem(41 * 1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry);
ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush());
ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush());
@ -94,7 +101,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) {
wbf->ScheduleFreeMem(20 * 1024 * 1024); wbf->ScheduleFreeMem(20 * 1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 204 * kSizeDummyEntry);
// Still need flush as the hard limit hits // Still need flush as the hard limit hits
ASSERT_TRUE(wbf->ShouldFlush()); ASSERT_TRUE(wbf->ShouldFlush());
@ -102,6 +109,7 @@ TEST_F(WriteBufferManagerTest, CacheCost) {
wbf->FreeMem(20 * 1024 * 1024); wbf->FreeMem(20 * 1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 256 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 203 * kSizeDummyEntry);
ASSERT_FALSE(wbf->ShouldFlush()); ASSERT_FALSE(wbf->ShouldFlush());
@ -109,19 +117,23 @@ TEST_F(WriteBufferManagerTest, CacheCost) {
wbf->FreeMem(16 * 1024); wbf->FreeMem(16 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 2 * 256 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 202 * kSizeDummyEntry);
wbf->FreeMem(16 * 1024); wbf->FreeMem(16 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry);
// Reserve 512KB will not cause any change in cache cost // Reserve 512KB will not cause any change in cache cost
wbf->ReserveMem(512 * 1024); wbf->ReserveMem(512 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 3 * 256 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 201 * kSizeDummyEntry);
wbf->FreeMem(16 * 1024); wbf->FreeMem(16 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 51 * 1024 * 1024 - 4 * 256 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 200 * kSizeDummyEntry);
// Destory write buffer manger should free everything // Destory write buffer manger should free everything
wbf.reset(); wbf.reset();
@ -137,6 +149,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) {
wbf->ReserveMem(10 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 10 * 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 10 * 1024 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry);
ASSERT_FALSE(wbf->ShouldFlush()); ASSERT_FALSE(wbf->ShouldFlush());
wbf->FreeMem(9 * 1024 * 1024); wbf->FreeMem(9 * 1024 * 1024);
@ -145,6 +158,7 @@ TEST_F(WriteBufferManagerTest, NoCapCacheCost) {
} }
ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000); ASSERT_LT(cache->GetPinnedUsage(), 1024 * 1024 + 10000);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 4 * kSizeDummyEntry);
} }
TEST_F(WriteBufferManagerTest, CacheFull) { TEST_F(WriteBufferManagerTest, CacheFull) {
@ -156,16 +170,20 @@ TEST_F(WriteBufferManagerTest, CacheFull) {
std::shared_ptr<Cache> cache = NewLRUCache(lo); std::shared_ptr<Cache> cache = NewLRUCache(lo);
std::unique_ptr<WriteBufferManager> wbf(new WriteBufferManager(0, cache)); std::unique_ptr<WriteBufferManager> wbf(new WriteBufferManager(0, cache));
wbf->ReserveMem(10 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 40 * kSizeDummyEntry);
size_t prev_pinned = cache->GetPinnedUsage(); size_t prev_pinned = cache->GetPinnedUsage();
ASSERT_GE(prev_pinned, 10 * 1024 * 1024); ASSERT_GE(prev_pinned, 10 * 1024 * 1024);
// Some insert will fail // Some insert will fail
wbf->ReserveMem(10 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024); ASSERT_LE(cache->GetPinnedUsage(), 12 * 1024 * 1024);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 80 * kSizeDummyEntry);
// Increase capacity so next insert will succeed // Increase capacity so next insert will succeed
cache->SetCapacity(30 * 1024 * 1024); cache->SetCapacity(30 * 1024 * 1024);
wbf->ReserveMem(10 * 1024 * 1024); wbf->ReserveMem(10 * 1024 * 1024);
ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024); ASSERT_GT(cache->GetPinnedUsage(), 20 * 1024 * 1024);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 120 * kSizeDummyEntry);
// Gradually release 20 MB // Gradually release 20 MB
for (int i = 0; i < 40; i++) { for (int i = 0; i < 40; i++) {
@ -173,6 +191,7 @@ TEST_F(WriteBufferManagerTest, CacheFull) {
} }
ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024); ASSERT_GE(cache->GetPinnedUsage(), 10 * 1024 * 1024);
ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024); ASSERT_LT(cache->GetPinnedUsage(), 20 * 1024 * 1024);
ASSERT_EQ(wbf->dummy_entries_in_cache_usage(), 95 * kSizeDummyEntry);
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

Loading…
Cancel
Save