From c987eb47125911972d038566d06f4b98b58f2e45 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Wed, 6 Jul 2022 18:57:29 -0700 Subject: [PATCH] Eliminate the copying of blobs when serving reads from the cache (#10297) Summary: The blob cache enables an optimization on the read path: when a blob is found in the cache, we can avoid copying it into the buffer provided by the application. Instead, we can simply transfer ownership of the cache handle to the target `PinnableSlice`. (Note: this relies on the `Cleanable` interface, which is implemented by `PinnableSlice`.) This has the potential to save a lot of CPU, especially with large blob values. This task is a part of https://github.com/facebook/rocksdb/issues/10156 Pull Request resolved: https://github.com/facebook/rocksdb/pull/10297 Reviewed By: riversand963 Differential Revision: D37640311 Pulled By: gangliao fbshipit-source-id: 92de0e35cc703d06c87c5c1861cc2899ec52234a --- cache/cache_helpers.h | 20 +++++++++++ db/blob/blob_source.cc | 75 ++++++++++++++++++++++++++++++------------ db/blob/blob_source.h | 4 +-- 3 files changed, 76 insertions(+), 23 deletions(-) diff --git a/cache/cache_helpers.h b/cache/cache_helpers.h index 4b7849396..7ea2365b8 100644 --- a/cache/cache_helpers.h +++ b/cache/cache_helpers.h @@ -84,6 +84,16 @@ class CacheHandleGuard { Cache::Handle* GetCacheHandle() const { return handle_; } T* GetValue() const { return value_; } + void TransferTo(Cleanable* cleanable) { + if (cleanable) { + if (handle_ != nullptr) { + assert(cache_); + cleanable->RegisterCleanup(&ReleaseCacheHandle, cache_, handle_); + } + } + ResetFields(); + } + void Reset() { ReleaseHandle(); ResetFields(); @@ -105,6 +115,16 @@ class CacheHandleGuard { value_ = nullptr; } + static void ReleaseCacheHandle(void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + assert(cache); + + Cache::Handle* const cache_handle = static_cast(arg2); + assert(cache_handle); + + cache->Release(cache_handle); + } + private: Cache* cache_ = nullptr; Cache::Handle* handle_ = nullptr; diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 2b09930a0..b09a2a729 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -30,7 +30,7 @@ BlobSource::BlobSource(const ImmutableOptions* immutable_options, BlobSource::~BlobSource() = default; Status BlobSource::GetBlobFromCache(const Slice& cache_key, - CachableEntry* blob) const { + CacheHandleGuard* blob) const { assert(blob); assert(blob->IsEmpty()); assert(blob_cache_); @@ -39,9 +39,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key, Cache::Handle* cache_handle = nullptr; cache_handle = GetEntryFromCache(cache_key); if (cache_handle != nullptr) { - blob->SetCachedValue( - static_cast(blob_cache_->Value(cache_handle)), - blob_cache_.get(), cache_handle); + *blob = CacheHandleGuard(blob_cache_.get(), cache_handle); return Status::OK(); } @@ -51,7 +49,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key, } Status BlobSource::PutBlobIntoCache(const Slice& cache_key, - CachableEntry* cached_blob, + CacheHandleGuard* cached_blob, PinnableSlice* blob) const { assert(blob); assert(!cache_key.empty()); @@ -74,7 +72,8 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key, priority); if (s.ok()) { assert(cache_handle != nullptr); - cached_blob->SetCachedValue(buf, blob_cache_.get(), cache_handle); + *cached_blob = + CacheHandleGuard(blob_cache_.get(), cache_handle); } return s; @@ -125,16 +124,32 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, const CacheKey cache_key = GetCacheKey(file_number, file_size, offset); - CachableEntry blob_entry; + CacheHandleGuard blob_handle; // First, try to get the blob from the cache // // If blob cache is enabled, we'll try to read from it. if (blob_cache_) { Slice key = cache_key.AsSlice(); - s = GetBlobFromCache(key, &blob_entry); - if (s.ok() && blob_entry.GetValue()) { - value->PinSelf(*blob_entry.GetValue()); + s = GetBlobFromCache(key, &blob_handle); + if (s.ok() && blob_handle.GetValue()) { + { + value->Reset(); + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + value->PinSlice( + *blob_handle.GetValue(), + [](void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + Cache::Handle* const handle = static_cast(arg2); + cache->Release(handle); + }, + blob_handle.GetCache(), blob_handle.GetCacheHandle()); + // Make the CacheHandleGuard relinquish ownership of the handle. + blob_handle.TransferTo(nullptr); + } // For consistency, the size of on-disk (possibly compressed) blob record // is assigned to bytes_read. @@ -153,7 +168,7 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, } } - assert(blob_entry.IsEmpty()); + assert(blob_handle.IsEmpty()); const bool no_io = read_options.read_tier == kBlockCacheTier; if (no_io) { @@ -192,7 +207,7 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, // If filling cache is allowed and a cache is configured, try to put the // blob to the cache. Slice key = cache_key.AsSlice(); - s = PutBlobIntoCache(key, &blob_entry, value); + s = PutBlobIntoCache(key, &blob_handle, value); if (!s.ok()) { return s; } @@ -256,19 +271,37 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, for (size_t i = 0; i < num_blobs; ++i) { auto& req = blob_reqs[i]; - CachableEntry blob_entry; + CacheHandleGuard blob_handle; const CacheKey cache_key = base_cache_key.WithOffset(req.offset); const Slice key = cache_key.AsSlice(); - const Status s = GetBlobFromCache(key, &blob_entry); + const Status s = GetBlobFromCache(key, &blob_handle); - if (s.ok() && blob_entry.GetValue()) { + if (s.ok() && blob_handle.GetValue()) { assert(req.status); *req.status = s; - req.result->PinSelf(*blob_entry.GetValue()); + + { + req.result->Reset(); + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle + // to the target PinnableSlice. This has the potential to save a lot + // of CPU, especially with large blob values. + req.result->PinSlice( + *blob_handle.GetValue(), + [](void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + Cache::Handle* const handle = static_cast(arg2); + cache->Release(handle); + }, + blob_handle.GetCache(), blob_handle.GetCacheHandle()); + // Make the CacheHandleGuard relinquish ownership of the handle. + blob_handle.TransferTo(nullptr); + } // Update the counter for the number of valid blobs read from the cache. ++cached_blob_count; + // For consistency, the size of each on-disk (possibly compressed) blob // record is accumulated to total_bytes. uint64_t adjustment = @@ -335,11 +368,11 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, // the blob(s) to the cache. for (size_t i = 0; i < _blob_reqs.size(); ++i) { if (_blob_reqs[i]->status->ok()) { - CachableEntry blob_entry; + CacheHandleGuard blob_handle; const CacheKey cache_key = base_cache_key.WithOffset(_blob_reqs[i]->offset); const Slice key = cache_key.AsSlice(); - s = PutBlobIntoCache(key, &blob_entry, _blob_reqs[i]->result); + s = PutBlobIntoCache(key, &blob_handle, _blob_reqs[i]->result); if (!s.ok()) { *_blob_reqs[i]->status = s; } @@ -359,10 +392,10 @@ bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size, const CacheKey cache_key = GetCacheKey(file_number, file_size, offset); const Slice key = cache_key.AsSlice(); - CachableEntry blob_entry; - const Status s = GetBlobFromCache(key, &blob_entry); + CacheHandleGuard blob_handle; + const Status s = GetBlobFromCache(key, &blob_handle); - if (s.ok() && blob_entry.GetValue() != nullptr) { + if (s.ok() && blob_handle.GetValue() != nullptr) { return true; } diff --git a/db/blob/blob_source.h b/db/blob/blob_source.h index aa46311c1..779f8d00f 100644 --- a/db/blob/blob_source.h +++ b/db/blob/blob_source.h @@ -104,10 +104,10 @@ class BlobSource { private: Status GetBlobFromCache(const Slice& cache_key, - CachableEntry* blob) const; + CacheHandleGuard* blob) const; Status PutBlobIntoCache(const Slice& cache_key, - CachableEntry* cached_blob, + CacheHandleGuard* cached_blob, PinnableSlice* blob) const; Cache::Handle* GetEntryFromCache(const Slice& key) const;