Improve the accounting of memory used by cached blobs (#10583)

Summary: The patch improves the bookkeeping around the memory usage of cached blobs in two ways: 1) it uses `malloc_usable_size`, which accounts for allocator bin sizes etc., and 2) it also considers the memory usage of the `BlobContents` object in addition to the blob itself. Note: some unit tests had been relying on the cache charge being equal to the size of the cached blob; these were updated. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10583 Test Plan: `make check` Reviewed By: riversand963 Differential Revision: D39060680 Pulled By: ltamasi fbshipit-source-id: 3583adce2b4ce6e84861f3fadccbfd2e5a3cc482
3 years ago · 23376aa576
parent 7670fdd690
commit 23376aa576
6 changed files with 88 additions and 39 deletions
--- a/db/blob/blob_contents.cc
+++ b/db/blob/blob_contents.cc
@ -8,6 +8,7 @@
 #include <cassert>
 #include "cache/cache_helpers.h"
 #include "port/malloc.h"
 namespace ROCKSDB_NAMESPACE {
@ -17,6 +18,32 @@ std::unique_ptr<BlobContents> BlobContents::Create(
      new BlobContents(std::move(allocation), size));
 }
 size_t BlobContents::ApproximateMemoryUsage() const {
  size_t usage = 0;
  if (allocation_) {
    MemoryAllocator* const allocator = allocation_.get_deleter().allocator;
    if (allocator) {
      usage += allocator->UsableSize(allocation_.get(), data_.size());
    } else {
 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
      usage += malloc_usable_size(allocation_.get());
 #else
      usage += data_.size();
 #endif
    }
  }
 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  usage += malloc_usable_size(const_cast<BlobContents*>(this));
 #else
  usage += sizeof(*this);
 #endif
  return usage;
 }
 size_t BlobContents::SizeCallback(void* obj) {
  assert(obj);
@ -55,7 +82,7 @@ Status BlobContents::CreateCallback(const void* buf, size_t size,
  BlobContents* const contents = obj.release();
  *out_obj = contents;
-  *charge = contents->size();
+  *charge = contents->ApproximateMemoryUsage();
  return Status::OK();
 }
--- a/db/blob/blob_contents.h
+++ b/db/blob/blob_contents.h
@ -32,6 +32,8 @@ class BlobContents {
  const Slice& data() const { return data_; }
  size_t size() const { return data_.size(); }
  size_t ApproximateMemoryUsage() const;
  // Callbacks for secondary cache
  static size_t SizeCallback(void* obj);
--- a/db/blob/blob_file_builder.cc
+++ b/db/blob/blob_file_builder.cc
@ -410,14 +410,13 @@ Status BlobFileBuilder::PutBlobIntoCacheIfNeeded(const Slice& blob,
    // Objects to be put into the cache have to be heap-allocated and
    // self-contained, i.e. own their contents. The Cache has to be able to
    // take unique ownership of them.
    // TODO: support custom allocators
    CacheAllocationPtr allocation(new char[blob.size()]);
    memcpy(allocation.get(), blob.data(), blob.size());
    std::unique_ptr<BlobContents> buf =
        BlobContents::Create(std::move(allocation), blob.size());
-    // TODO: support custom allocators and provide a better estimated memory
+    s = blob_cache->Insert(key, buf.get(), buf->ApproximateMemoryUsage(),
    // usage using malloc_usable_size.
    s = blob_cache->Insert(key, buf.get(), buf->size(),
                           &BlobContents::DeleteCallback,
                           nullptr /* cache_handle */, priority);
    if (s.ok()) {
--- a/db/blob/blob_source.cc
+++ b/db/blob/blob_source.cc
@ -55,11 +55,18 @@ Status BlobSource::GetBlobFromCache(
  cache_handle = GetEntryFromCache(cache_key);
  if (cache_handle != nullptr) {
    *blob = CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
    PERF_COUNTER_ADD(blob_cache_hit_count, 1);
    RecordTick(statistics_, BLOB_DB_CACHE_HIT);
    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ, blob->GetValue()->size());
    return Status::OK();
  }
  assert(blob->IsEmpty());
  RecordTick(statistics_, BLOB_DB_CACHE_MISS);
  return Status::NotFound("Blob not found in cache");
 }
@ -76,21 +83,26 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key,
  // Objects to be put into the cache have to be heap-allocated and
  // self-contained, i.e. own their contents. The Cache has to be able to take
  // unique ownership of them.
  // TODO: support custom allocators
  CacheAllocationPtr allocation(new char[blob->size()]);
  memcpy(allocation.get(), blob->data(), blob->size());
  std::unique_ptr<BlobContents> buf =
      BlobContents::Create(std::move(allocation), blob->size());
  // TODO: support custom allocators and provide a better estimated memory
  // usage using malloc_usable_size.
  Cache::Handle* cache_handle = nullptr;
-  s = InsertEntryIntoCache(cache_key, buf.get(), buf->size(), &cache_handle,
+  s = InsertEntryIntoCache(cache_key, buf.get(), buf->ApproximateMemoryUsage(),
-                           priority);
+                           &cache_handle, priority);
  if (s.ok()) {
    buf.release();
    assert(cache_handle != nullptr);
    *cached_blob =
        CacheHandleGuard<BlobContents>(blob_cache_.get(), cache_handle);
    RecordTick(statistics_, BLOB_DB_CACHE_ADD);
    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE, blob->size());
  } else {
    RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
  }
  return s;
@ -107,14 +119,6 @@ Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const {
    cache_handle = blob_cache_->Lookup(key, statistics_);
  }
  if (cache_handle != nullptr) {
    PERF_COUNTER_ADD(blob_cache_hit_count, 1);
    RecordTick(statistics_, BLOB_DB_CACHE_HIT);
    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ,
               blob_cache_->GetUsage(cache_handle));
  } else {
    RecordTick(statistics_, BLOB_DB_CACHE_MISS);
  }
  return cache_handle;
 }
@ -132,15 +136,6 @@ Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value,
                            cache_handle, priority);
  }
  if (s.ok()) {
    assert(*cache_handle != nullptr);
    RecordTick(statistics_, BLOB_DB_CACHE_ADD);
    RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE,
               blob_cache_->GetUsage(*cache_handle));
  } else {
    RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
  }
  return s;
 }
@ -420,7 +415,7 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options,
 }
 bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
-                                  uint64_t offset) const {
+                                  uint64_t offset, size_t* charge) const {
  const CacheKey cache_key = GetCacheKey(file_number, file_size, offset);
  const Slice key = cache_key.AsSlice();
@ -428,6 +423,16 @@ bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
  const Status s = GetBlobFromCache(key, &blob_handle);
  if (s.ok() && blob_handle.GetValue() != nullptr) {
    if (charge) {
      const Cache* const cache = blob_handle.GetCache();
      assert(cache);
      Cache::Handle* const handle = blob_handle.GetCacheHandle();
      assert(handle);
      *charge = cache->GetUsage(handle);
    }
    return true;
  }
--- a/db/blob/blob_source.h
+++ b/db/blob/blob_source.h
@ -103,7 +103,7 @@ class BlobSource {
  inline Cache* GetBlobCache() const { return blob_cache_.get(); }
  bool TEST_BlobInCache(uint64_t file_number, uint64_t file_size,
-                        uint64_t offset) const;
+                        uint64_t offset, size_t* charge = nullptr) const;
 private:
  Status GetBlobFromCache(const Slice& cache_key,
--- a/db/blob/blob_source_test.cc
+++ b/db/blob/blob_source_test.cc
@ -1099,8 +1099,8 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) {
  Random rnd(301);
  std::vector<std::string> key_strs{"key0", "key1"};
-  std::vector<std::string> blob_strs{rnd.RandomString(1010),
+  std::vector<std::string> blob_strs{rnd.RandomString(512),
-                                     rnd.RandomString(1020)};
+                                     rnd.RandomString(768)};
  std::vector<Slice> keys{key_strs[0], key_strs[1]};
  std::vector<Slice> blobs{blob_strs[0], blob_strs[1]};
@ -1406,7 +1406,12 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
          read_options, keys_[i], kBlobFileNumber, blob_offsets[i],
          blob_file_size_, blob_sizes[i], kNoCompression,
          nullptr /* prefetch_buffer */, &values[i], nullptr /* bytes_read */));
-      blob_bytes += blob_sizes[i];
+
      size_t charge = 0;
      ASSERT_TRUE(blob_source.TEST_BlobInCache(kBlobFileNumber, blob_file_size_,
                                               blob_offsets[i], &charge));
      blob_bytes += charge;
      ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
@ -1419,6 +1424,10 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
    size_t blob_bytes = options_.blob_cache->GetUsage();
    for (size_t i = 0; i < kNumBlobs; ++i) {
      size_t charge = 0;
      ASSERT_TRUE(blob_source.TEST_BlobInCache(kBlobFileNumber, blob_file_size_,
                                               blob_offsets[i], &charge));
      CacheKey cache_key = base_cache_key.WithOffset(blob_offsets[i]);
      // We didn't call options_.blob_cache->Erase() here, this is because
      // the cache wrapper's Erase() method must be called to update the
@ -1431,7 +1440,7 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) {
      } else {
        ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
      }
-      blob_bytes -= blob_sizes[i];
+      blob_bytes -= charge;
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
                options_.blob_cache->GetUsage());
@ -1508,20 +1517,27 @@ TEST_F(BlobSourceCacheReservationTest, IncreaseCacheReservationOnFullCache) {
    read_options.fill_cache = true;
    // Since we resized each blob to be kSizeDummyEntry / (num_blobs / 2), we
-    // should observe cache eviction for the second half blobs.
+    // can't fit all the blobs in the cache at the same time, which means we
    // should observe cache evictions once we reach the cache's capacity.
    // Due to the overhead of the cache and the BlobContents objects, as well as
    // jemalloc bin sizes, this happens after inserting seven blobs.
    uint64_t blob_bytes = 0;
    for (size_t i = 0; i < kNumBlobs; ++i) {
      ASSERT_OK(blob_source.GetBlob(
          read_options, keys_[i], kBlobFileNumber, blob_offsets[i],
          blob_file_size_, blob_sizes[i], kNoCompression,
          nullptr /* prefetch_buffer */, &values[i], nullptr /* bytes_read */));
-      blob_bytes += blob_sizes[i];
+
      if (i < kNumBlobs / 2 - 1) {
        size_t charge = 0;
        ASSERT_TRUE(blob_source.TEST_BlobInCache(
            kBlobFileNumber, blob_file_size_, blob_offsets[i], &charge));
        blob_bytes += charge;
      }
      ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry);
      if (i >= kNumBlobs / 2) {
        ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), kSizeDummyEntry);
      } else {
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(), blob_bytes);
      }
      ASSERT_EQ(cache_res_mgr->GetTotalMemoryUsed(),
                options_.blob_cache->GetUsage());
    }