From 54aebb2cc544300331ad938da9da4922a0844663 Mon Sep 17 00:00:00 2001 From: anand76 Date: Fri, 29 Jul 2022 14:24:44 -0700 Subject: [PATCH] Fix cache metrics update when secondary cache is used (#10440) Summary: If a secondary cache is configured, its possible that a cache lookup will get a hit in the secondary cache. In that case, the ```LRUCacheShard::Lookup``` doesn't immediately update the ```total_charge``` for the item handle if the ```wait``` parameter is false (i.e caller will call later to check the completeness). However, ```BlockBasedTable::GetEntryFromCache``` assumes the handle is complete and calls ```UpdateCacheHitMetrics```, which checks the usage of the cache item and fails the assert in https://github.com/facebook/rocksdb/blob/main/cache/lru_cache.h#L237 (```assert(total_charge >= meta_charge)```). To fix this, we call ```UpdateCacheHitMetrics``` later in ```MultiGet```, after waiting for all cache lookup completions. Test plan - Run crash test with changes from https://github.com/facebook/rocksdb/issues/10160 Pull Request resolved: https://github.com/facebook/rocksdb/pull/10440 Reviewed By: gitbw95 Differential Revision: D38283968 Pulled By: anand1976 fbshipit-source-id: 31c54ef43517726c6e5fdda81899b364241dd7e1 --- cache/lru_cache.cc | 1 + table/block_based/block_based_table_reader.cc | 15 ++++++++++----- .../block_based_table_reader_sync_and_async.h | 7 +++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cache/lru_cache.cc b/cache/lru_cache.cc index 686b386d9..bed5e5c9f 100644 --- a/cache/lru_cache.cc +++ b/cache/lru_cache.cc @@ -458,6 +458,7 @@ Cache::Handle* LRUCacheShard::Lookup( memcpy(e->key_data, key.data(), key.size()); e->value = nullptr; e->sec_handle = secondary_handle.release(); + e->total_charge = 0; e->Ref(); if (wait) { diff --git a/table/block_based/block_based_table_reader.cc b/table/block_based/block_based_table_reader.cc index b4640245a..024932a70 100644 --- a/table/block_based/block_based_table_reader.cc +++ b/table/block_based/block_based_table_reader.cc @@ -392,11 +392,16 @@ Cache::Handle* BlockBasedTable::GetEntryFromCache( cache_handle = block_cache->Lookup(key, rep_->ioptions.statistics.get()); } - if (cache_handle != nullptr) { - UpdateCacheHitMetrics(block_type, get_context, - block_cache->GetUsage(cache_handle)); - } else { - UpdateCacheMissMetrics(block_type, get_context); + // Avoid updating metrics here if the handle is not complete yet. This + // happens with MultiGet and secondary cache. So update the metrics only + // if its a miss, or a hit and value is ready + if (!cache_handle || block_cache->Value(cache_handle)) { + if (cache_handle != nullptr) { + UpdateCacheHitMetrics(block_type, get_context, + block_cache->GetUsage(cache_handle)); + } else { + UpdateCacheMissMetrics(block_type, get_context); + } } return cache_handle; diff --git a/table/block_based/block_based_table_reader_sync_and_async.h b/table/block_based/block_based_table_reader_sync_and_async.h index 634285fbb..728efc55a 100644 --- a/table/block_based/block_based_table_reader_sync_and_async.h +++ b/table/block_based/block_based_table_reader_sync_and_async.h @@ -484,14 +484,21 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet) } results[i].UpdateCachedValue(); void* val = results[i].GetValue(); + Cache::Handle* handle = results[i].GetCacheHandle(); + // GetContext for any key will do, as the stats will be aggregated + // anyway + GetContext* get_context = sst_file_range.begin()->get_context; if (!val) { // The async cache lookup failed - could be due to an error // or a false positive. We need to read the data block from // the SST file results[i].Reset(); total_len += BlockSizeWithTrailer(block_handles[i]); + UpdateCacheMissMetrics(BlockType::kData, get_context); } else { block_handles[i] = BlockHandle::NullBlockHandle(); + UpdateCacheHitMetrics(BlockType::kData, get_context, + block_cache->GetUsage(handle)); } } }