From b07217da04af4f1cd08f664912e7ae1e94ac7094 Mon Sep 17 00:00:00 2001 From: Levi Tamasi Date: Thu, 1 Sep 2022 16:25:46 -0700 Subject: [PATCH] Pin the newly cached blob after insert (#10625) Summary: With the current code, when a blob isn't found in the cache and gets read from the blob file and then inserted into the cache, the application gets passed the self-contained `PinnableSlice` resulting from the blob file read. The patch changes this so that the `PinnableSlice` pins the cache entry instead in this case. Pull Request resolved: https://github.com/facebook/rocksdb/pull/10625 Test Plan: `make check` Reviewed By: pdillinger Differential Revision: D39220904 Pulled By: ltamasi fbshipit-source-id: cb9c62881e3523b1e9f614e00bf503bac2fe3b0a --- db/blob/blob_source.cc | 72 +++++++++++++++++-------------------- db/blob/blob_source.h | 3 ++ db/blob/blob_source_test.cc | 40 +++++++++++++++++---- 3 files changed, 69 insertions(+), 46 deletions(-) diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 11a91d9e4..b0cafb224 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -122,6 +122,25 @@ Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const { return cache_handle; } +void BlobSource::PinCachedBlob(CacheHandleGuard* cached_blob, + PinnableSlice* value) { + assert(cached_blob); + assert(cached_blob->GetValue()); + assert(value); + + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + + value->Reset(); + + constexpr Cleanable* cleanable = nullptr; + value->PinSlice(cached_blob->GetValue()->data(), cleanable); + + cached_blob->TransferTo(value); +} + Status BlobSource::InsertEntryIntoCache(const Slice& key, BlobContents* value, size_t charge, Cache::Handle** cache_handle, @@ -165,23 +184,7 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, Slice key = cache_key.AsSlice(); s = GetBlobFromCache(key, &blob_handle); if (s.ok() && blob_handle.GetValue()) { - { - value->Reset(); - // To avoid copying the cached blob into the buffer provided by the - // application, we can simply transfer ownership of the cache handle to - // the target PinnableSlice. This has the potential to save a lot of - // CPU, especially with large blob values. - value->PinSlice( - blob_handle.GetValue()->data(), - [](void* arg1, void* arg2) { - Cache* const cache = static_cast(arg1); - Cache::Handle* const handle = static_cast(arg2); - cache->Release(handle); - }, - blob_handle.GetCache(), blob_handle.GetCacheHandle()); - // Make the CacheHandleGuard relinquish ownership of the handle. - blob_handle.TransferTo(nullptr); - } + PinCachedBlob(&blob_handle, value); // For consistency, the size of on-disk (possibly compressed) blob record // is assigned to bytes_read. @@ -243,6 +246,8 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, if (!s.ok()) { return s; } + + PinCachedBlob(&blob_handle, value); } assert(s.ok()); @@ -312,23 +317,7 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, assert(req.status); *req.status = s; - { - req.result->Reset(); - // To avoid copying the cached blob into the buffer provided by the - // application, we can simply transfer ownership of the cache handle - // to the target PinnableSlice. This has the potential to save a lot - // of CPU, especially with large blob values. - req.result->PinSlice( - blob_handle.GetValue()->data(), - [](void* arg1, void* arg2) { - Cache* const cache = static_cast(arg1); - Cache::Handle* const handle = static_cast(arg2); - cache->Release(handle); - }, - blob_handle.GetCache(), blob_handle.GetCacheHandle()); - // Make the CacheHandleGuard relinquish ownership of the handle. - blob_handle.TransferTo(nullptr); - } + PinCachedBlob(&blob_handle, req.result); // Update the counter for the number of valid blobs read from the cache. ++cached_blob_count; @@ -397,15 +386,18 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, if (blob_cache_ && read_options.fill_cache) { // If filling cache is allowed and a cache is configured, try to put // the blob(s) to the cache. - for (size_t i = 0; i < _blob_reqs.size(); ++i) { - if (_blob_reqs[i]->status->ok()) { + for (BlobReadRequest* req : _blob_reqs) { + assert(req); + + if (req->status->ok()) { CacheHandleGuard blob_handle; - const CacheKey cache_key = - base_cache_key.WithOffset(_blob_reqs[i]->offset); + const CacheKey cache_key = base_cache_key.WithOffset(req->offset); const Slice key = cache_key.AsSlice(); - s = PutBlobIntoCache(key, &blob_handle, _blob_reqs[i]->result); + s = PutBlobIntoCache(key, &blob_handle, req->result); if (!s.ok()) { - *_blob_reqs[i]->status = s; + *req->status = s; + } else { + PinCachedBlob(&blob_handle, req->result); } } } diff --git a/db/blob/blob_source.h b/db/blob/blob_source.h index c91437fcd..2027e16fa 100644 --- a/db/blob/blob_source.h +++ b/db/blob/blob_source.h @@ -113,6 +113,9 @@ class BlobSource { CacheHandleGuard* cached_blob, PinnableSlice* blob) const; + static void PinCachedBlob(CacheHandleGuard* cached_blob, + PinnableSlice* value); + Cache::Handle* GetEntryFromCache(const Slice& key) const; Status InsertEntryIntoCache(const Slice& key, BlobContents* value, diff --git a/db/blob/blob_source_test.cc b/db/blob/blob_source_test.cc index 9db8f5f05..3ce9a1072 100644 --- a/db/blob/blob_source_test.cc +++ b/db/blob/blob_source_test.cc @@ -219,6 +219,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { kNoCompression, prefetch_buffer, &values[i], &bytes_read)); ASSERT_EQ(values[i], blobs[i]); + ASSERT_FALSE(values[i].IsPinned()); ASSERT_EQ(bytes_read, BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); @@ -256,6 +257,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { kNoCompression, prefetch_buffer, &values[i], &bytes_read)); ASSERT_EQ(values[i], blobs[i]); + ASSERT_TRUE(values[i].IsPinned()); ASSERT_EQ(bytes_read, BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); @@ -299,6 +301,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { kNoCompression, prefetch_buffer, &values[i], &bytes_read)); ASSERT_EQ(values[i], blobs[i]); + ASSERT_TRUE(values[i].IsPinned()); ASSERT_EQ(bytes_read, BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); @@ -337,6 +340,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { kNoCompression, prefetch_buffer, &values[i], &bytes_read)); ASSERT_EQ(values[i], blobs[i]); + ASSERT_TRUE(values[i].IsPinned()); ASSERT_EQ(bytes_read, BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); @@ -383,6 +387,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { &bytes_read) .IsIncomplete()); ASSERT_TRUE(values[i].empty()); + ASSERT_FALSE(values[i].IsPinned()); ASSERT_EQ(bytes_read, 0); ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, @@ -424,6 +429,7 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) { &bytes_read) .IsIOError()); ASSERT_TRUE(values[i].empty()); + ASSERT_FALSE(values[i].IsPinned()); ASSERT_EQ(bytes_read, 0); ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size, @@ -856,6 +862,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { if (i % 2 == 0) { ASSERT_OK(statuses_buf[i]); ASSERT_EQ(value_buf[i], blobs[i]); + ASSERT_TRUE(value_buf[i].IsPinned()); fs_read_bytes += blob_sizes[i] + keys[i].size() + BlobLogRecord::kHeaderSize; ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, @@ -864,6 +871,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { } else { statuses_buf[i].PermitUncheckedError(); ASSERT_TRUE(value_buf[i].empty()); + ASSERT_FALSE(value_buf[i].IsPinned()); ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, blob_offsets[i])); } @@ -896,6 +904,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { kNoCompression, prefetch_buffer, &value_buf[i], &bytes_read)); ASSERT_EQ(value_buf[i], blobs[i]); + ASSERT_TRUE(value_buf[i].IsPinned()); ASSERT_EQ(bytes_read, BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); @@ -921,6 +930,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { for (size_t i = 0; i < num_blobs; ++i) { ASSERT_OK(statuses_buf[i]); ASSERT_EQ(value_buf[i], blobs[i]); + ASSERT_TRUE(value_buf[i].IsPinned()); ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, blob_offsets[i])); blob_bytes += blob_sizes[i]; @@ -969,6 +979,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { for (size_t i = 0; i < num_blobs; ++i) { ASSERT_TRUE(statuses_buf[i].IsIncomplete()); ASSERT_TRUE(value_buf[i].empty()); + ASSERT_FALSE(value_buf[i].IsPinned()); ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, blob_offsets[i])); } @@ -1012,6 +1023,7 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) { for (size_t i = 0; i < num_blobs; ++i) { ASSERT_TRUE(statuses_buf[i].IsIOError()); ASSERT_TRUE(value_buf[i].empty()); + ASSERT_FALSE(value_buf[i].IsPinned()); ASSERT_FALSE(blob_source.TEST_BlobInCache(non_existing_file_number, file_size, blob_offsets[i])); } @@ -1153,6 +1165,9 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) { ASSERT_TRUE( blob_source.TEST_BlobInCache(file_number, file_size, blob_offsets[0])); + // Release cache handle + values[0].Reset(); + // key0 should be demoted to the secondary cache, and key1 should be filled // to the primary cache from the blob file. ASSERT_OK(blob_source.GetBlob(read_options, keys[1], file_number, @@ -1163,6 +1178,9 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) { ASSERT_TRUE( blob_source.TEST_BlobInCache(file_number, file_size, blob_offsets[1])); + // Release cache handle + values[1].Reset(); + OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number); // blob_cache here only looks at the primary cache since we didn't provide @@ -1219,6 +1237,9 @@ TEST_F(BlobSecondaryCacheTest, GetBlobsFromSecondaryCache) { &values[0], nullptr /* bytes_read */)); ASSERT_EQ(values[0], blobs[0]); + // Release cache handle + values[0].Reset(); + // key0 should be in the primary cache. CacheKey cache_key0 = base_cache_key.WithOffset(blob_offsets[0]); const Slice key0 = cache_key0.AsSlice(); @@ -1379,11 +1400,11 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) { ReadOptions read_options; read_options.verify_checksums = true; - std::vector values(keys_.size()); - { read_options.fill_cache = false; + std::vector values(keys_.size()); + for (size_t i = 0; i < kNumBlobs; ++i) { ASSERT_OK(blob_source.GetBlob( read_options, keys_[i], kBlobFileNumber, blob_offsets[i], @@ -1397,6 +1418,8 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) { { read_options.fill_cache = true; + std::vector values(keys_.size()); + // num_blobs is 16, so the total blob cache usage is less than a single // dummy entry. Therefore, cache reservation manager only reserves one dummy // entry here. @@ -1434,8 +1457,8 @@ TEST_F(BlobSourceCacheReservationTest, SimpleCacheReservation) { // cache usage after erasing the cache entry. blob_source.GetBlobCache()->Erase(cache_key.AsSlice()); if (i == kNumBlobs - 1) { - // The last blob is not in the cache. cache_res_mgr should not reserve - // any space for it. + // All the blobs got removed from the cache. cache_res_mgr should not + // reserve any space for them. ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), 0); } else { ASSERT_EQ(cache_res_mgr->GetTotalReservedCacheSize(), kSizeDummyEntry); @@ -1498,11 +1521,11 @@ TEST_F(BlobSourceCacheReservationTest, IncreaseCacheReservationOnFullCache) { ReadOptions read_options; read_options.verify_checksums = true; - std::vector values(keys_.size()); - { read_options.fill_cache = false; + std::vector values(keys_.size()); + for (size_t i = 0; i < kNumBlobs; ++i) { ASSERT_OK(blob_source.GetBlob( read_options, keys_[i], kBlobFileNumber, blob_offsets[i], @@ -1516,6 +1539,8 @@ TEST_F(BlobSourceCacheReservationTest, IncreaseCacheReservationOnFullCache) { { read_options.fill_cache = true; + std::vector values(keys_.size()); + // Since we resized each blob to be kSizeDummyEntry / (num_blobs / 2), we // can't fit all the blobs in the cache at the same time, which means we // should observe cache evictions once we reach the cache's capacity. @@ -1528,6 +1553,9 @@ TEST_F(BlobSourceCacheReservationTest, IncreaseCacheReservationOnFullCache) { blob_file_size_, blob_sizes[i], kNoCompression, nullptr /* prefetch_buffer */, &values[i], nullptr /* bytes_read */)); + // Release cache handle + values[i].Reset(); + if (i < kNumBlobs / 2 - 1) { size_t charge = 0; ASSERT_TRUE(blob_source.TEST_BlobInCache(