From 23376aa5766ff486df666896809ccac38227fbd0 Mon Sep 17 00:00:00 2001
From: Levi Tamasi <ltamasi@fb.com>
Date: Fri, 26 Aug 2022 15:53:08 -0700
Subject: [PATCH] Improve the accounting of memory used by cached blobs
 (#10583)

Summary:
The patch improves the bookkeeping around the memory usage of
cached blobs in two ways: 1) it uses `malloc_usable_size`, which accounts
for allocator bin sizes etc., and 2) it also considers the memory usage
of the `BlobContents` object in addition to the blob itself. Note: some unit
tests had been relying on the cache charge being equal to the size of the
cached blob; these were updated.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10583

Test Plan: `make check`

Reviewed By: riversand963

Differential Revision: D39060680

Pulled By: ltamasi

fbshipit-source-id: 3583adce2b4ce6e84861f3fadccbfd2e5a3cc482
---
 db/blob/blob_contents.cc     | 29 ++++++++++++++++++++-
 db/blob/blob_contents.h      |  2 ++
 db/blob/blob_file_builder.cc |  5 ++--
 db/blob/blob_source.cc       | 49 ++++++++++++++++++++----------------
 db/blob/blob_source.h        |  2 +-
 db/blob/blob_source_test.cc  | 40 ++++++++++++++++++++---------
 6 files changed, 88 insertions(+), 39 deletions(-)
diff --git a/db/blob/blob_contents.cc b/db/blob/blob_contents.cc
index 32c959b0b..df5e47659 100644
--- a/db/blob/blob_contents.cc
+++ b/db/blob/blob_contents.cc
@@ -8,6 +8,7 @@
 #include <cassert>
 
 #include "cache/cache_helpers.h"
+#include "port/malloc.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -17,6 +18,32 @@ std::unique_ptr<BlobContents> BlobContents::Create(
       new BlobContents(std::move(allocation), size));
 }
 
+size_t BlobContents::ApproximateMemoryUsage() const {
+  size_t usage = 0;
+
+  if (allocation_) {
+    MemoryAllocator* const allocator = allocation_.get_deleter().allocator;
+
+    if (allocator) {
+      usage += allocator->UsableSize(allocation_.get(), data_.size());
+    } else {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      usage += malloc_usable_size(allocation_.get());
+#else
+      usage += data_.size();
+#endif
+    }
+  }
+
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+  usage += malloc_usable_size(const_cast<BlobContents*>(this));
+#else
+  usage += sizeof(*this);
+#endif
+
+  return usage;
+}
+
 size_t BlobContents::SizeCallback(void* obj) {
   assert(obj);
 
@@ -55,7 +82,7 @@ Status BlobContents::CreateCallback(const void* buf, size_t size,
   BlobContents* const contents = obj.release();
 
   *out_obj = contents;
-  *charge = contents->size();
+  *charge = contents->ApproximateMemoryUsage();
 
   return Status::OK();
 }
diff --git a/db/blob/blob_contents.h b/db/blob/blob_contents.h
index 9b5369721..9ea9babf2 100644
--- a/db/blob/blob_contents.h
+++ b/db/blob/blob_contents.h
@@ -32,6 +32,8 @@ class BlobContents {
   const Slice& data() const { return data_; }
   size_t size() const { return data_.size(); }
 
+  size_t ApproximateMemoryUsage() const;
+
   // Callbacks for secondary cache
   static size_t SizeCallback(void* obj);
 
diff --git a/db/blob/blob_file_builder.cc b/db/blob/blob_file_builder.cc
index 9f769af59..65939d3e3 100644
--- a/db/blob/blob_file_builder.cc
+++ b/db/blob/blob_file_builder.cc
@@ -410,14 +410,13 @@ Status BlobFileBuilder::PutBlobIntoCacheIfNeeded(const Slice& blob,
     // Objects to be put into the cache have to be heap-allocated and
     // self-contained, i.e. own their contents. The Cache has to be able to
     // take unique ownership of them.
+    // TODO: support custom allocators
     CacheAllocationPtr allocation(new char[blob.size()]);
     memcpy(allocation.get(), blob.data(), blob.size());
     std::unique_ptr<BlobContents> buf =
         BlobContents::Create(std::move(allocation), blob.size());
 
-    // TODO: support custom allocators and provide a better estimated memory
-    // usage using malloc_usable_size.
-    s = blob_cache->Insert(key, buf.get(), buf->size(),
+    s = blob_cache->Insert(key, buf.get(), buf->ApproximateMemoryUsage(),
                            &BlobContents::DeleteCallback,
                            nullptr /* cache_handle */, priority);
     if (s.ok()) {
diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc
index 8e3ee7834..f2571be90 100644
--- a/db/blob/blob_source.cc
+++ b/db/blob/blob_source.cc
@@ -55,11 +55,18 @@ Status BlobSource::GetBlobFromCache(
   cache_handle = GetEntryFromCache(cache_key);
   if (cache_handle != nullptr) {
     *blob = CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
+
+    PERF_COUNTER_ADD(blob_cache_hit_count, 1);
+    RecordTick(statistics_, BLOB_DB_CACHE_HIT);
+    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ, blob->GetValue()->size());
+
     return Status::OK();
   }
 
   assert(blob->IsEmpty());
 
+  RecordTick(statistics_, BLOB_DB_CACHE_MISS);
+
   return Status::NotFound("Blob not found in cache");
 }
 
@@ -76,21 +83,26 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key,
   // Objects to be put into the cache have to be heap-allocated and
   // self-contained, i.e. own their contents. The Cache has to be able to take
   // unique ownership of them.
+  // TODO: support custom allocators
   CacheAllocationPtr allocation(new char[blob->size()]);
   memcpy(allocation.get(), blob->data(), blob->size());
   std::unique_ptr<BlobContents> buf =
       BlobContents::Create(std::move(allocation), blob->size());
 
-  // TODO: support custom allocators and provide a better estimated memory
-  // usage using malloc_usable_size.
   Cache::Handle* cache_handle = nullptr;
-  s = InsertEntryIntoCache(cache_key, buf.get(), buf->size(), &cache_handle,
-                           priority);
+  s = InsertEntryIntoCache(cache_key, buf.get(), buf->ApproximateMemoryUsage(),
+                           &cache_handle, priority);
   if (s.ok()) {
     buf.release();
     assert(cache_handle != nullptr);
     *cached_blob =
         CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
+
+    RecordTick(statistics_, BLOB_DB_CACHE_ADD);
+    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE, blob->size());
+
+  } else {
+    RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
   }
 
   return s;
@@ -107,14 +119,6 @@ Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const {
     cache_handle = blob_cache_->Lookup(key, statistics_);
   }
 
-  if (cache_handle != nullptr) {
-    PERF_COUNTER_ADD(blob_cache_hit_count, 1);
-    RecordTick(statistics_, BLOB_DB_CACHE_HIT);
-    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ,
-               blob_cache_->GetUsage(cache_handle));
-  } else {
-    RecordTick(statistics_, BLOB_DB_CACHE_MISS);
-  }
   return cache_handle;
 }
 
@@ -132,15 +136,6 @@ Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value,
                             cache_handle, priority);
   }
 
-  if (s.ok()) {
-    assert(*cache_handle != nullptr);
-    RecordTick(statistics_, BLOB_DB_CACHE_ADD);
-    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE,
-               blob_cache_->GetUsage(*cache_handle));
-  } else {
-    RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
-  }
-
   return s;
 }
 
@@ -420,7 +415,7 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
 }
 
 bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
-                                  uint64_t offset) const {
+                                  uint64_t offset, size_t* charge) const {
   const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
   const Slice key = cache_key.AsSlice();
 
@@ -428,6 +423,16 @@ bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
   const Status s = GetBlobFromCache(key, &blob_handle);
 
   if (s.ok() && blob_handle.GetValue() != nullptr) {
+    if (charge) {
+      const Cache* const cache = blob_handle.GetCache();
+      assert(cache);
+
+      Cache::Handle* const handle = blob_handle.GetCacheHandle();
+      assert(handle);
+
+      *charge = cache->GetUsage(handle);
+    }
+
     return true;
   }
 
diff --git a/db/blob/blob_source.h b/db/blob/blob_source.h
index dd10e4d0e..c91437fcd 100644
--- a/db/blob/blob_source.h
+++ b/db/blob/blob_source.h
@@ -103,7 +103,7 @@ class BlobSource {
   inline Cache* GetBlobCache() const { return blob_cache_.get(); }
 
   bool TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
-                        uint64_t offset) const;
+                        uint64_t offset, size_t* charge = nullptr) const;
 
  private:
   Status GetBlobFromCache(const Slice& cache_key,
diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc
index eac2936cd..9db8f5f05 100644
--- a/db/blob/blob_source_test.cc
+++ b/db/blob/blob_source_test.cc
@@ -1099,8 +1099,8 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
   Random rnd(301);
 
   std::vector<std::string> key_strs{"key0", "key1"};
-  std::vector<std::string> blob_strs{rnd.RandomString(1010),
-                                     rnd.RandomString(1020)};
+  std::vector<std::string> blob_strs{rnd.RandomString(512),
+                                     rnd.RandomString(768)};
 
   std::vector<Slice> keys{key_strs[0], key_strs[1]};
   std::vector<Slice> blobs{blob_strs[0], blob_strs[1]};
@@ -1406,7 +1406,12 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
           read_options, keys_[i], kBlobFileNumber, blob_offsets[i],
           blob_file_size_, blob_sizes[i], kNoCompression,
           nullptr /* prefetch_buffer */, &values[i], nullptr /* bytes_read */));
-      blob_bytes += blob_sizes[i];
+
+      size_t charge = 0;
+      ASSERT_TRUE(blob_source.TEST_BlobInCache(kBlobFileNumber, blob_file_size_,
+                                               blob_offsets[i], &charge));
+
+      blob_bytes += charge;
       ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
       ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
       ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
@@ -1419,6 +1424,10 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
     size_t blob_bytes = options_.blob_cache->GetUsage();
 
     for (size_t i = 0; i < kNumBlobs; ++i) {
+      size_t charge = 0;
+      ASSERT_TRUE(blob_source.TEST_BlobInCache(kBlobFileNumber, blob_file_size_,
+                                               blob_offsets[i], &charge));
+
       CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[i]);
       // We didn't call options_.blob_cache->Erase() here, this is because
       // the cache wrapper's Erase() method must be called to update the
@@ -1431,7 +1440,7 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
       } else {
         ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
       }
-      blob_bytes -= blob_sizes[i];
+      blob_bytes -= charge;
       ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
       ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
                 options_.blob_cache->GetUsage());
@@ -1507,21 +1516,28 @@ TEST_F(BlobSourceCacheReservationTest, IncreaseCacheReservationOnFullCache) {
   {
     read_options.fill_cache = true;
 
-    // Since we resized each blob to be kSizeDummyEntry / (num_blobs/ 2), we
-    // should observe cache eviction for the second half blobs.
+    // Since we resized each blob to be kSizeDummyEntry / (num_blobs / 2), we
+    // can't fit all the blobs in the cache at the same time, which means we
+    // should observe cache evictions once we reach the cache's capacity.
+    // Due to the overhead of the cache and the BlobContents objects, as well as
+    // jemalloc bin sizes, this happens after inserting seven blobs.
     uint64_t blob_bytes = 0;
     for (size_t i = 0; i < kNumBlobs; ++i) {
       ASSERT_OK(blob_source.GetBlob(
           read_options, keys_[i], kBlobFileNumber, blob_offsets[i],
           blob_file_size_, blob_sizes[i], kNoCompression,
           nullptr /* prefetch_buffer */, &values[i], nullptr /* bytes_read */));
-      blob_bytes += blob_sizes[i];
-      ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
-      if (i >= kNumBlobs / 2) {
-        ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), kSizeDummyEntry);
-      } else {
-        ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
+
+      if (i < kNumBlobs / 2 - 1) {
+        size_t charge = 0;
+        ASSERT_TRUE(blob_source.TEST_BlobInCache(
+            kBlobFileNumber, blob_file_size_, blob_offsets[i], &charge));
+
+        blob_bytes += charge;
       }
+
+      ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
+      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
       ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
                 options_.blob_cache->GetUsage());
     }