Add blob cache tickers, perf context statistics, and DB properties (#10203)

Summary:
In order to be able to monitor the performance of the new blob cache, we made the follow changes:
- Add blob cache hit/miss/insertion tickers (see https://github.com/facebook/rocksdb/wiki/Statistics)
- Extend the perf context similarly (see https://github.com/facebook/rocksdb/wiki/Perf-Context-and-IO-Stats-Context)
- Implement new DB properties (see e.g. https://github.com/facebook/rocksdb/blob/main/include/rocksdb/db.h#L1042-L1051) that expose the capacity and current usage of the blob cache.

This PR is a part of https://github.com/facebook/rocksdb/issues/10156

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10203

Reviewed By: ltamasi

Differential Revision: D37478658

Pulled By: gangliao

fbshipit-source-id: d8ee3f41d47315ef725e4551226330b4b6832e40
main
Gang Liao 2 years ago committed by Facebook GitHub Bot
parent c6055cba30
commit d7ebb58cb5
  1. 4
      HISTORY.md
  2. 9
      db/blob/blob_file_reader.cc
  3. 59
      db/blob/blob_source.cc
  4. 19
      db/blob/blob_source.h
  5. 198
      db/blob/blob_source_test.cc
  6. 12
      db/c.cc
  7. 79
      db/db_properties_test.cc
  8. 100
      db/internal_stats.cc
  9. 7
      db/internal_stats.h
  10. 10
      db_stress_tool/db_stress_test_base.cc
  11. 8
      include/rocksdb/c.h
  12. 14
      include/rocksdb/db.h
  13. 7
      include/rocksdb/perf_context.h
  14. 14
      include/rocksdb/statistics.h
  15. 24
      java/rocksjni/portal.h
  16. 30
      java/src/main/java/org/rocksdb/TickerType.java
  17. 38
      monitoring/perf_context.cc
  18. 8
      monitoring/statistics.cc

@ -55,6 +55,10 @@
* Add support for timestamped snapshots (#9879) * Add support for timestamped snapshots (#9879)
* Provide support for AbortIO in posix to cancel submitted asynchronous requests using io_uring. * Provide support for AbortIO in posix to cancel submitted asynchronous requests using io_uring.
* Add support for rate-limiting batched `MultiGet()` APIs * Add support for rate-limiting batched `MultiGet()` APIs
* Added several new tickers, perf context statistics, and DB properties to BlobDB
* Added new DB properties "rocksdb.blob-cache-capacity", "rocksdb.blob-cache-usage", "rocksdb.blob-cache-pinned-usage" to show blob cache usage.
* Added new perf context statistics `blob_cache_hit_count`, `blob_read_count`, `blob_read_byte`, `blob_read_time`, `blob_checksum_time` and `blob_decompress_time`.
* Added new tickers `BLOB_DB_CACHE_MISS`, `BLOB_DB_CACHE_HIT`, `BLOB_DB_CACHE_ADD`, `BLOB_DB_CACHE_ADD_FAILURES`, `BLOB_DB_CACHE_BYTES_READ` and `BLOB_DB_CACHE_BYTES_WRITE`.
### Behavior changes ### Behavior changes
* DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984) * DB::Open(), DB::OpenAsSecondary() will fail if a Logger cannot be created (#9984)

@ -335,7 +335,9 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
if (!prefetched) { if (!prefetched) {
TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile"); TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
PERF_COUNTER_ADD(blob_read_count, 1);
PERF_COUNTER_ADD(blob_read_byte, record_size);
PERF_TIMER_GUARD(blob_read_time);
const Status s = ReadFromFile(file_reader_.get(), record_offset, const Status s = ReadFromFile(file_reader_.get(), record_offset,
static_cast<size_t>(record_size), statistics_, static_cast<size_t>(record_size), statistics_,
&record_slice, &buf, &aligned_buf, &record_slice, &buf, &aligned_buf,
@ -428,6 +430,8 @@ void BlobFileReader::MultiGetBlob(
} }
} }
TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile"); TEST_SYNC_POINT("BlobFileReader::MultiGetBlob:ReadFromFile");
PERF_COUNTER_ADD(blob_read_count, num_blobs);
PERF_COUNTER_ADD(blob_read_byte, total_len);
s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(), s = file_reader_->MultiRead(IOOptions(), read_reqs.data(), read_reqs.size(),
direct_io ? &aligned_buf : nullptr, direct_io ? &aligned_buf : nullptr,
read_options.rate_limiter_priority); read_options.rate_limiter_priority);
@ -483,6 +487,8 @@ void BlobFileReader::MultiGetBlob(
Status BlobFileReader::VerifyBlob(const Slice& record_slice, Status BlobFileReader::VerifyBlob(const Slice& record_slice,
const Slice& user_key, uint64_t value_size) { const Slice& user_key, uint64_t value_size) {
PERF_TIMER_GUARD(blob_checksum_time);
BlobLogRecord record; BlobLogRecord record;
const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize); const Slice header_slice(record_slice.data(), BlobLogRecord::kHeaderSize);
@ -547,6 +553,7 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
CacheAllocationPtr output; CacheAllocationPtr output;
{ {
PERF_TIMER_GUARD(blob_decompress_time);
StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS); StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
output = UncompressData(info, value_slice.data(), value_slice.size(), output = UncompressData(info, value_slice.data(), value_slice.size(),
&uncompressed_size, compression_format_version, &uncompressed_size, compression_format_version,

@ -10,7 +10,9 @@
#include "db/blob/blob_file_reader.h" #include "db/blob/blob_file_reader.h"
#include "db/blob/blob_log_format.h" #include "db/blob/blob_log_format.h"
#include "monitoring/statistics.h"
#include "options/cf_options.h" #include "options/cf_options.h"
#include "table/get_context.h"
#include "table/multiget_context.h" #include "table/multiget_context.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -78,6 +80,38 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key,
return s; return s;
} }
Cache::Handle* BlobSource::GetEntryFromCache(const Slice& key) const {
Cache::Handle* cache_handle = nullptr;
cache_handle = blob_cache_->Lookup(key, statistics_);
if (cache_handle != nullptr) {
PERF_COUNTER_ADD(blob_cache_hit_count, 1);
RecordTick(statistics_, BLOB_DB_CACHE_HIT);
RecordTick(statistics_, BLOB_DB_CACHE_BYTES_READ,
blob_cache_->GetUsage(cache_handle));
} else {
RecordTick(statistics_, BLOB_DB_CACHE_MISS);
}
return cache_handle;
}
Status BlobSource::InsertEntryIntoCache(const Slice& key, std::string* value,
size_t charge,
Cache::Handle** cache_handle,
Cache::Priority priority) const {
const Status s =
blob_cache_->Insert(key, value, charge, &DeleteCacheEntry<std::string>,
cache_handle, priority);
if (s.ok()) {
assert(*cache_handle != nullptr);
RecordTick(statistics_, BLOB_DB_CACHE_ADD);
RecordTick(statistics_, BLOB_DB_CACHE_BYTES_WRITE,
blob_cache_->GetUsage(*cache_handle));
} else {
RecordTick(statistics_, BLOB_DB_CACHE_ADD_FAILURES);
}
return s;
}
Status BlobSource::GetBlob(const ReadOptions& read_options, Status BlobSource::GetBlob(const ReadOptions& read_options,
const Slice& user_key, uint64_t file_number, const Slice& user_key, uint64_t file_number,
uint64_t offset, uint64_t file_size, uint64_t offset, uint64_t file_size,
@ -100,18 +134,21 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
Slice key = cache_key.AsSlice(); Slice key = cache_key.AsSlice();
s = GetBlobFromCache(key, &blob_entry); s = GetBlobFromCache(key, &blob_entry);
if (s.ok() && blob_entry.GetValue()) { if (s.ok() && blob_entry.GetValue()) {
value->PinSelf(*blob_entry.GetValue());
// For consistency, the size of on-disk (possibly compressed) blob record // For consistency, the size of on-disk (possibly compressed) blob record
// is assigned to bytes_read. // is assigned to bytes_read.
uint64_t adjustment =
read_options.verify_checksums
? BlobLogRecord::CalculateAdjustmentForRecordHeader(
user_key.size())
: 0;
assert(offset >= adjustment);
uint64_t record_size = value_size + adjustment;
if (bytes_read) { if (bytes_read) {
uint64_t adjustment = *bytes_read = record_size;
read_options.verify_checksums
? BlobLogRecord::CalculateAdjustmentForRecordHeader(
user_key.size())
: 0;
assert(offset >= adjustment);
*bytes_read = value_size + adjustment;
} }
value->PinSelf(*blob_entry.GetValue());
return s; return s;
} }
} }
@ -139,12 +176,16 @@ Status BlobSource::GetBlob(const ReadOptions& read_options,
return Status::Corruption("Compression type mismatch when reading blob"); return Status::Corruption("Compression type mismatch when reading blob");
} }
uint64_t read_size = 0;
s = blob_file_reader.GetValue()->GetBlob( s = blob_file_reader.GetValue()->GetBlob(
read_options, user_key, offset, value_size, compression_type, read_options, user_key, offset, value_size, compression_type,
prefetch_buffer, value, bytes_read); prefetch_buffer, value, &read_size);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
if (bytes_read) {
*bytes_read = read_size;
}
} }
if (blob_cache_ && read_options.fill_cache) { if (blob_cache_ && read_options.fill_cache) {

@ -90,6 +90,12 @@ class BlobSource {
CachableEntry<std::string>* cached_blob, CachableEntry<std::string>* cached_blob,
PinnableSlice* blob) const; PinnableSlice* blob) const;
Cache::Handle* GetEntryFromCache(const Slice& key) const;
Status InsertEntryIntoCache(const Slice& key, std::string* value,
size_t charge, Cache::Handle** cache_handle,
Cache::Priority priority) const;
inline CacheKey GetCacheKey(uint64_t file_number, uint64_t file_size, inline CacheKey GetCacheKey(uint64_t file_number, uint64_t file_size,
uint64_t offset) const { uint64_t offset) const {
OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number, OffsetableCacheKey base_cache_key(db_id_, db_session_id_, file_number,
@ -97,19 +103,6 @@ class BlobSource {
return base_cache_key.WithOffset(offset); return base_cache_key.WithOffset(offset);
} }
inline Cache::Handle* GetEntryFromCache(const Slice& key) const {
return blob_cache_->Lookup(key, statistics_);
}
inline Status InsertEntryIntoCache(const Slice& key, std::string* value,
size_t charge,
Cache::Handle** cache_handle,
Cache::Priority priority) const {
return blob_cache_->Insert(key, value, charge,
&DeleteCacheEntry<std::string>, cache_handle,
priority);
}
const std::string& db_id_; const std::string& db_id_;
const std::string& db_session_id_; const std::string& db_session_id_;

@ -115,7 +115,7 @@ class BlobSourceTest : public DBTestBase {
options_.create_if_missing = true; options_.create_if_missing = true;
LRUCacheOptions co; LRUCacheOptions co;
co.capacity = 2048; co.capacity = 8 << 20;
co.num_shard_bits = 2; co.num_shard_bits = 2;
co.metadata_charge_policy = kDontChargeCacheMetadata; co.metadata_charge_policy = kDontChargeCacheMetadata;
options_.blob_cache = NewLRUCache(co); options_.blob_cache = NewLRUCache(co);
@ -134,6 +134,10 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
options_.cf_paths.emplace_back( options_.cf_paths.emplace_back(
test::PerThreadDBPath(env_, "BlobSourceTest_GetBlobsFromCache"), 0); test::PerThreadDBPath(env_, "BlobSourceTest_GetBlobsFromCache"), 0);
options_.statistics = CreateDBStatistics();
Statistics* statistics = options_.statistics.get();
assert(statistics);
DestroyAndReopen(options_); DestroyAndReopen(options_);
ImmutableOptions immutable_options(options_); ImmutableOptions immutable_options(options_);
@ -193,8 +197,11 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
// GetBlob // GetBlob
std::vector<PinnableSlice> values(keys.size()); std::vector<PinnableSlice> values(keys.size());
uint64_t bytes_read = 0; uint64_t bytes_read = 0;
uint64_t blob_bytes = 0;
uint64_t total_bytes = 0;
read_options.fill_cache = false; read_options.fill_cache = false;
get_perf_context()->Reset();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -210,9 +217,28 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
total_bytes += bytes_read;
} }
// Retrieved the blob cache num_blobs * 3 times via TEST_BlobInCache,
// GetBlob, and TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_count, num_blobs);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, total_bytes);
ASSERT_GE((int)get_perf_context()->blob_checksum_time, 0);
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
read_options.fill_cache = true; read_options.fill_cache = true;
blob_bytes = 0;
total_bytes = 0;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -226,11 +252,36 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_EQ(bytes_read, ASSERT_EQ(bytes_read,
BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]); BlobLogRecord::kHeaderSize + keys[i].size() + blob_sizes[i]);
blob_bytes += blob_sizes[i];
total_bytes += bytes_read;
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, i);
ASSERT_EQ((int)get_perf_context()->blob_read_count, i + 1);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, total_bytes);
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, i + 1);
ASSERT_EQ((int)get_perf_context()->blob_read_count, i + 1);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, total_bytes);
} }
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, num_blobs);
ASSERT_EQ((int)get_perf_context()->blob_read_count, num_blobs);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, total_bytes);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 2);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), num_blobs);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), num_blobs);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), blob_bytes);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE),
blob_bytes);
read_options.fill_cache = true; read_options.fill_cache = true;
total_bytes = 0;
blob_bytes = 0;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -246,10 +297,29 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
total_bytes += bytes_read; // on-disk blob record size
blob_bytes += blob_sizes[i]; // cached blob value size
} }
// Retrieved the blob cache num_blobs * 3 times via TEST_BlobInCache,
// GetBlob, and TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, num_blobs * 3);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0); // without i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0); // without i/o
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), num_blobs * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ),
blob_bytes * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
// Cache-only GetBlob // Cache-only GetBlob
read_options.read_tier = ReadTier::kBlockCacheTier; read_options.read_tier = ReadTier::kBlockCacheTier;
total_bytes = 0;
blob_bytes = 0;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -265,7 +335,22 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
total_bytes += bytes_read;
blob_bytes += blob_sizes[i];
} }
// Retrieved the blob cache num_blobs * 3 times via TEST_BlobInCache,
// GetBlob, and TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, num_blobs * 3);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0); // without i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0); // without i/o
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), num_blobs * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ),
blob_bytes * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
options_.blob_cache->EraseUnRefEntries(); options_.blob_cache->EraseUnRefEntries();
@ -277,6 +362,8 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
read_options.read_tier = ReadTier::kBlockCacheTier; read_options.read_tier = ReadTier::kBlockCacheTier;
read_options.fill_cache = true; read_options.fill_cache = true;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -294,6 +381,18 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
} }
// Retrieved the blob cache num_blobs * 3 times via TEST_BlobInCache,
// GetBlob, and TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
{ {
@ -304,6 +403,8 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
read_options.read_tier = ReadTier::kReadAllTier; read_options.read_tier = ReadTier::kReadAllTier;
read_options.fill_cache = true; read_options.fill_cache = true;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size,
@ -321,6 +422,18 @@ TEST_F(BlobSourceTest, GetBlobsFromCache) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
} }
// Retrieved the blob cache num_blobs * 3 times via TEST_BlobInCache,
// GetBlob, and TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 3);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
} }
@ -403,6 +516,7 @@ TEST_F(BlobSourceTest, GetCompressedBlobs) {
read_options.fill_cache = true; read_options.fill_cache = true;
read_options.read_tier = ReadTier::kReadAllTier; read_options.read_tier = ReadTier::kReadAllTier;
get_perf_context()->Reset();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size,
@ -420,7 +534,10 @@ TEST_F(BlobSourceTest, GetCompressedBlobs) {
blob_offsets[i])); blob_offsets[i]));
} }
ASSERT_GE((int)get_perf_context()->blob_decompress_time, 0);
read_options.read_tier = ReadTier::kBlockCacheTier; read_options.read_tier = ReadTier::kBlockCacheTier;
get_perf_context()->Reset();
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(file_number, file_size,
@ -439,6 +556,8 @@ TEST_F(BlobSourceTest, GetCompressedBlobs) {
ASSERT_TRUE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
} }
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
} }
} }
@ -446,6 +565,10 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
options_.cf_paths.emplace_back( options_.cf_paths.emplace_back(
test::PerThreadDBPath(env_, "BlobSourceTest_MultiGetBlobsFromCache"), 0); test::PerThreadDBPath(env_, "BlobSourceTest_MultiGetBlobsFromCache"), 0);
options_.statistics = CreateDBStatistics();
Statistics* statistics = options_.statistics.get();
assert(statistics);
DestroyAndReopen(options_); DestroyAndReopen(options_);
ImmutableOptions immutable_options(options_); ImmutableOptions immutable_options(options_);
@ -525,18 +648,25 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
read_options.fill_cache = true; read_options.fill_cache = true;
read_options.read_tier = ReadTier::kReadAllTier; read_options.read_tier = ReadTier::kReadAllTier;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
// Get half of blobs // Get half of blobs
blob_source.MultiGetBlob(read_options, key_refs, blob_file_number, blob_source.MultiGetBlob(read_options, key_refs, blob_file_number,
file_size, offsets, sizes, statuses, values, file_size, offsets, sizes, statuses, values,
&bytes_read); &bytes_read);
uint64_t fs_read_bytes = 0;
uint64_t ca_read_bytes = 0;
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
if (i % 2 == 0) { if (i % 2 == 0) {
ASSERT_OK(statuses_buf[i]); ASSERT_OK(statuses_buf[i]);
ASSERT_EQ(value_buf[i], blobs[i]); ASSERT_EQ(value_buf[i], blobs[i]);
fs_read_bytes +=
blob_sizes[i] + keys[i].size() + BlobLogRecord::kHeaderSize;
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
ca_read_bytes += blob_sizes[i];
} else { } else {
statuses_buf[i].PermitUncheckedError(); statuses_buf[i].PermitUncheckedError();
ASSERT_TRUE(value_buf[i].empty()); ASSERT_TRUE(value_buf[i].empty());
@ -545,6 +675,23 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
} }
} }
constexpr int num_even_blobs = num_blobs / 2;
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, num_even_blobs);
ASSERT_EQ((int)get_perf_context()->blob_read_count,
num_even_blobs); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte,
fs_read_bytes); // blocking i/o
ASSERT_GE((int)get_perf_context()->blob_checksum_time, 0);
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), num_even_blobs);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), num_even_blobs);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ),
ca_read_bytes);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE),
ca_read_bytes);
// Get the rest of blobs // Get the rest of blobs
for (size_t i = 1; i < num_blobs; i += 2) { // odd index for (size_t i = 1; i < num_blobs; i += 2) { // odd index
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
@ -564,6 +711,8 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
// Cache-only MultiGetBlob // Cache-only MultiGetBlob
read_options.read_tier = ReadTier::kBlockCacheTier; read_options.read_tier = ReadTier::kBlockCacheTier;
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
key_refs.clear(); key_refs.clear();
offsets.clear(); offsets.clear();
@ -582,12 +731,29 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
file_size, offsets, sizes, statuses, values, file_size, offsets, sizes, statuses, values,
&bytes_read); &bytes_read);
uint64_t blob_bytes = 0;
for (size_t i = 0; i < num_blobs; ++i) { for (size_t i = 0; i < num_blobs; ++i) {
ASSERT_OK(statuses_buf[i]); ASSERT_OK(statuses_buf[i]);
ASSERT_EQ(value_buf[i], blobs[i]); ASSERT_EQ(value_buf[i], blobs[i]);
ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_TRUE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
blob_bytes += blob_sizes[i];
} }
// Retrieved the blob cache num_blobs * 2 times via GetBlob and
// TEST_BlobInCache.
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, num_blobs * 2);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0); // blocking i/o
ASSERT_GE((int)get_perf_context()->blob_checksum_time, 0);
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), num_blobs * 2);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ),
blob_bytes * 2);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
options_.blob_cache->EraseUnRefEntries(); options_.blob_cache->EraseUnRefEntries();
@ -615,6 +781,9 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
blob_offsets[i])); blob_offsets[i]));
} }
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
blob_source.MultiGetBlob(read_options, key_refs, blob_file_number, blob_source.MultiGetBlob(read_options, key_refs, blob_file_number,
file_size, offsets, sizes, statuses, values, file_size, offsets, sizes, statuses, values,
&bytes_read); &bytes_read);
@ -625,6 +794,18 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(blob_file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
} }
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_checksum_time, 0);
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 2);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
{ {
@ -651,6 +832,9 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
blob_offsets[i])); blob_offsets[i]));
} }
get_perf_context()->Reset();
statistics->Reset().PermitUncheckedError();
blob_source.MultiGetBlob(read_options, key_refs, file_number, file_size, blob_source.MultiGetBlob(read_options, key_refs, file_number, file_size,
offsets, sizes, statuses, values, &bytes_read); offsets, sizes, statuses, values, &bytes_read);
@ -660,6 +844,18 @@ TEST_F(BlobSourceTest, MultiGetBlobsFromCache) {
ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size, ASSERT_FALSE(blob_source.TEST_BlobInCache(file_number, file_size,
blob_offsets[i])); blob_offsets[i]));
} }
ASSERT_EQ((int)get_perf_context()->blob_cache_hit_count, 0);
ASSERT_EQ((int)get_perf_context()->blob_read_count, 0); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_read_byte, 0); // blocking i/o
ASSERT_EQ((int)get_perf_context()->blob_checksum_time, 0);
ASSERT_EQ((int)get_perf_context()->blob_decompress_time, 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_MISS), num_blobs * 2);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_HIT), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_ADD), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_READ), 0);
ASSERT_EQ(statistics->getTickerCount(BLOB_DB_CACHE_BYTES_WRITE), 0);
} }
} }

@ -4027,6 +4027,18 @@ uint64_t rocksdb_perfcontext_metric(rocksdb_perfcontext_t* context,
return rep->env_new_logger_nanos; return rep->env_new_logger_nanos;
case rocksdb_number_async_seek: case rocksdb_number_async_seek:
return rep->number_async_seek; return rep->number_async_seek;
case rocksdb_blob_cache_hit_count:
return rep->blob_cache_hit_count;
case rocksdb_blob_read_count:
return rep->blob_read_count;
case rocksdb_blob_read_byte:
return rep->blob_read_byte;
case rocksdb_blob_read_time:
return rep->blob_read_time;
case rocksdb_blob_checksum_time:
return rep->blob_checksum_time;
case rocksdb_blob_decompress_time:
return rep->blob_decompress_time;
default: default:
break; break;
} }

@ -1818,6 +1818,85 @@ TEST_F(DBPropertiesTest, MinObsoleteSstNumberToKeep) {
ASSERT_TRUE(listener->Validated()); ASSERT_TRUE(listener->Validated());
} }
TEST_F(DBPropertiesTest, BlobCacheProperties) {
Options options;
uint64_t value;
options.env = CurrentOptions().env;
// Test with empty blob cache.
constexpr size_t kCapacity = 100;
LRUCacheOptions co;
co.capacity = kCapacity;
co.num_shard_bits = 0;
co.metadata_charge_policy = kDontChargeCacheMetadata;
auto blob_cache = NewLRUCache(co);
options.blob_cache = blob_cache;
Reopen(options);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
ASSERT_EQ(kCapacity, value);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
ASSERT_EQ(0, value);
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value));
ASSERT_EQ(0, value);
// Insert unpinned blob to the cache and check size.
constexpr size_t kSize1 = 70;
ASSERT_OK(blob_cache->Insert("blob1", nullptr /*value*/, kSize1,
nullptr /*deleter*/));
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
ASSERT_EQ(kCapacity, value);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
ASSERT_EQ(kSize1, value);
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value));
ASSERT_EQ(0, value);
// Insert pinned blob to the cache and check size.
constexpr size_t kSize2 = 60;
Cache::Handle* blob2 = nullptr;
ASSERT_OK(blob_cache->Insert("blob2", nullptr /*value*/, kSize2,
nullptr /*deleter*/, &blob2));
ASSERT_NE(nullptr, blob2);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
ASSERT_EQ(kCapacity, value);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
// blob1 is evicted.
ASSERT_EQ(kSize2, value);
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value));
ASSERT_EQ(kSize2, value);
// Insert another pinned blob to make the cache over-sized.
constexpr size_t kSize3 = 80;
Cache::Handle* blob3 = nullptr;
ASSERT_OK(blob_cache->Insert("blob3", nullptr /*value*/, kSize3,
nullptr /*deleter*/, &blob3));
ASSERT_NE(nullptr, blob3);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
ASSERT_EQ(kCapacity, value);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
ASSERT_EQ(kSize2 + kSize3, value);
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value));
ASSERT_EQ(kSize2 + kSize3, value);
// Check size after release.
blob_cache->Release(blob2);
blob_cache->Release(blob3);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheCapacity, &value));
ASSERT_EQ(kCapacity, value);
ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kBlobCacheUsage, &value));
// blob2 will be evicted, while blob3 remain in cache after release.
ASSERT_EQ(kSize3, value);
ASSERT_TRUE(
db_->GetIntProperty(DB::Properties::kBlobCachePinnedUsage, &value));
ASSERT_EQ(0, value);
}
TEST_F(DBPropertiesTest, BlockCacheProperties) { TEST_F(DBPropertiesTest, BlockCacheProperties) {
Options options; Options options;
uint64_t value; uint64_t value;

@ -307,6 +307,9 @@ static const std::string total_blob_file_size = "total-blob-file-size";
static const std::string live_blob_file_size = "live-blob-file-size"; static const std::string live_blob_file_size = "live-blob-file-size";
static const std::string live_blob_file_garbage_size = static const std::string live_blob_file_garbage_size =
"live-blob-file-garbage-size"; "live-blob-file-garbage-size";
static const std::string blob_cache_capacity = "blob-cache-capacity";
static const std::string blob_cache_usage = "blob-cache-usage";
static const std::string blob_cache_pinned_usage = "blob-cache-pinned-usage";
const std::string DB::Properties::kNumFilesAtLevelPrefix = const std::string DB::Properties::kNumFilesAtLevelPrefix =
rocksdb_prefix + num_files_at_level_prefix; rocksdb_prefix + num_files_at_level_prefix;
@ -409,6 +412,12 @@ const std::string DB::Properties::kLiveBlobFileSize =
rocksdb_prefix + live_blob_file_size; rocksdb_prefix + live_blob_file_size;
const std::string DB::Properties::kLiveBlobFileGarbageSize = const std::string DB::Properties::kLiveBlobFileGarbageSize =
rocksdb_prefix + live_blob_file_garbage_size; rocksdb_prefix + live_blob_file_garbage_size;
const std::string DB::Properties::kBlobCacheCapacity =
rocksdb_prefix + blob_cache_capacity;
const std::string DB::Properties::kBlobCacheUsage =
rocksdb_prefix + blob_cache_usage;
const std::string DB::Properties::kBlobCachePinnedUsage =
rocksdb_prefix + blob_cache_pinned_usage;
const UnorderedMap<std::string, DBPropertyInfo> const UnorderedMap<std::string, DBPropertyInfo>
InternalStats::ppt_name_to_info = { InternalStats::ppt_name_to_info = {
@ -570,6 +579,15 @@ const UnorderedMap<std::string, DBPropertyInfo>
{DB::Properties::kLiveBlobFileGarbageSize, {DB::Properties::kLiveBlobFileGarbageSize,
{false, nullptr, &InternalStats::HandleLiveBlobFileGarbageSize, {false, nullptr, &InternalStats::HandleLiveBlobFileGarbageSize,
nullptr, nullptr}}, nullptr, nullptr}},
{DB::Properties::kBlobCacheCapacity,
{false, nullptr, &InternalStats::HandleBlobCacheCapacity, nullptr,
nullptr}},
{DB::Properties::kBlobCacheUsage,
{false, nullptr, &InternalStats::HandleBlobCacheUsage, nullptr,
nullptr}},
{DB::Properties::kBlobCachePinnedUsage,
{false, nullptr, &InternalStats::HandleBlobCachePinnedUsage, nullptr,
nullptr}},
}; };
InternalStats::InternalStats(int num_levels, SystemClock* clock, InternalStats::InternalStats(int num_levels, SystemClock* clock,
@ -585,10 +603,8 @@ InternalStats::InternalStats(int num_levels, SystemClock* clock,
clock_(clock), clock_(clock),
cfd_(cfd), cfd_(cfd),
started_at_(clock->NowMicros()) { started_at_(clock->NowMicros()) {
Cache* block_cache = nullptr; Cache* block_cache = GetBlockCacheForStats();
bool ok = GetBlockCacheForStats(&block_cache); if (block_cache) {
if (ok) {
assert(block_cache);
// Extract or create stats collector. Could fail in rare cases. // Extract or create stats collector. Could fail in rare cases.
Status s = CacheEntryStatsCollector<CacheEntryRoleStats>::GetShared( Status s = CacheEntryStatsCollector<CacheEntryRoleStats>::GetShared(
block_cache, clock_, &cache_entry_stats_collector_); block_cache, clock_, &cache_entry_stats_collector_);
@ -597,8 +613,6 @@ InternalStats::InternalStats(int num_levels, SystemClock* clock,
} else { } else {
assert(!cache_entry_stats_collector_); assert(!cache_entry_stats_collector_);
} }
} else {
assert(!block_cache);
} }
} }
@ -851,6 +865,40 @@ bool InternalStats::HandleLiveBlobFileGarbageSize(uint64_t* value,
return true; return true;
} }
Cache* InternalStats::GetBlobCacheForStats() {
return cfd_->ioptions()->blob_cache.get();
}
bool InternalStats::HandleBlobCacheCapacity(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
Cache* blob_cache = GetBlobCacheForStats();
if (blob_cache) {
*value = static_cast<uint64_t>(blob_cache->GetCapacity());
return true;
}
return false;
}
bool InternalStats::HandleBlobCacheUsage(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
Cache* blob_cache = GetBlobCacheForStats();
if (blob_cache) {
*value = static_cast<uint64_t>(blob_cache->GetUsage());
return true;
}
return false;
}
bool InternalStats::HandleBlobCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) {
Cache* blob_cache = GetBlobCacheForStats();
if (blob_cache) {
*value = static_cast<uint64_t>(blob_cache->GetPinnedUsage());
return true;
}
return false;
}
const DBPropertyInfo* GetPropertyInfo(const Slice& property) { const DBPropertyInfo* GetPropertyInfo(const Slice& property) {
std::string ppt_name = GetPropertyNameAndArg(property).first.ToString(); std::string ppt_name = GetPropertyNameAndArg(property).first.ToString();
auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name); auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name);
@ -1313,46 +1361,40 @@ bool InternalStats::HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* /*db*/,
return *value > 0 && *value < std::numeric_limits<uint64_t>::max(); return *value > 0 && *value < std::numeric_limits<uint64_t>::max();
} }
bool InternalStats::GetBlockCacheForStats(Cache** block_cache) { Cache* InternalStats::GetBlockCacheForStats() {
assert(block_cache != nullptr);
auto* table_factory = cfd_->ioptions()->table_factory.get(); auto* table_factory = cfd_->ioptions()->table_factory.get();
assert(table_factory != nullptr); assert(table_factory != nullptr);
*block_cache = return table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
return *block_cache != nullptr;
} }
bool InternalStats::HandleBlockCacheCapacity(uint64_t* value, DBImpl* /*db*/, bool InternalStats::HandleBlockCacheCapacity(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) { Version* /*version*/) {
Cache* block_cache; Cache* block_cache = GetBlockCacheForStats();
bool ok = GetBlockCacheForStats(&block_cache); if (block_cache) {
if (!ok) { *value = static_cast<uint64_t>(block_cache->GetCapacity());
return false; return true;
} }
*value = static_cast<uint64_t>(block_cache->GetCapacity()); return false;
return true;
} }
bool InternalStats::HandleBlockCacheUsage(uint64_t* value, DBImpl* /*db*/, bool InternalStats::HandleBlockCacheUsage(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) { Version* /*version*/) {
Cache* block_cache; Cache* block_cache = GetBlockCacheForStats();
bool ok = GetBlockCacheForStats(&block_cache); if (block_cache) {
if (!ok) { *value = static_cast<uint64_t>(block_cache->GetUsage());
return false; return true;
} }
*value = static_cast<uint64_t>(block_cache->GetUsage()); return false;
return true;
} }
bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/, bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
Version* /*version*/) { Version* /*version*/) {
Cache* block_cache; Cache* block_cache = GetBlockCacheForStats();
bool ok = GetBlockCacheForStats(&block_cache); if (block_cache) {
if (!ok) { *value = static_cast<uint64_t>(block_cache->GetPinnedUsage());
return false; return true;
} }
*value = static_cast<uint64_t>(block_cache->GetPinnedUsage()); return false;
return true;
} }
void InternalStats::DumpDBMapStats( void InternalStats::DumpDBMapStats(

@ -500,7 +500,8 @@ class InternalStats {
void DumpCFStatsNoFileHistogram(std::string* value); void DumpCFStatsNoFileHistogram(std::string* value);
void DumpCFFileHistogram(std::string* value); void DumpCFFileHistogram(std::string* value);
bool GetBlockCacheForStats(Cache** block_cache); Cache* GetBlockCacheForStats();
Cache* GetBlobCacheForStats();
// Per-DB stats // Per-DB stats
std::atomic<uint64_t> db_stats_[kIntStatsNumMax]; std::atomic<uint64_t> db_stats_[kIntStatsNumMax];
@ -693,6 +694,10 @@ class InternalStats {
bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version); bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
bool HandleLiveBlobFileGarbageSize(uint64_t* value, DBImpl* db, bool HandleLiveBlobFileGarbageSize(uint64_t* value, DBImpl* db,
Version* version); Version* version);
bool HandleBlobCacheCapacity(uint64_t* value, DBImpl* db, Version* version);
bool HandleBlobCacheUsage(uint64_t* value, DBImpl* db, Version* version);
bool HandleBlobCachePinnedUsage(uint64_t* value, DBImpl* db,
Version* version);
// Total number of background errors encountered. Every time a flush task // Total number of background errors encountered. Every time a flush task
// or compaction task fails, this counter is incremented. The failure can // or compaction task fails, this counter is incremented. The failure can

@ -1808,6 +1808,16 @@ void StressTest::TestGetProperty(ThreadState* thread) const {
unknownPropertyNames.insert(levelPropertyNames.begin(), unknownPropertyNames.insert(levelPropertyNames.begin(),
levelPropertyNames.end()); levelPropertyNames.end());
std::unordered_set<std::string> blobCachePropertyNames = {
DB::Properties::kBlobCacheCapacity,
DB::Properties::kBlobCacheUsage,
DB::Properties::kBlobCachePinnedUsage,
};
if (db_->GetOptions().blob_cache == nullptr) {
unknownPropertyNames.insert(blobCachePropertyNames.begin(),
blobCachePropertyNames.end());
}
std::string prop; std::string prop;
for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) { for (const auto& ppt_name_and_info : InternalStats::ppt_name_to_info) {
bool res = db_->GetProperty(ppt_name_and_info.first, &prop); bool res = db_->GetProperty(ppt_name_and_info.first, &prop);

@ -1668,7 +1668,13 @@ enum {
rocksdb_env_unlock_file_nanos, rocksdb_env_unlock_file_nanos,
rocksdb_env_new_logger_nanos, rocksdb_env_new_logger_nanos,
rocksdb_number_async_seek, rocksdb_number_async_seek,
rocksdb_total_metric_count = 69 rocksdb_blob_cache_hit_count,
rocksdb_blob_read_count,
rocksdb_blob_read_byte,
rocksdb_blob_read_time,
rocksdb_blob_checksum_time,
rocksdb_blob_decompress_time,
rocksdb_total_metric_count = 77
}; };
extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int); extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int);

@ -1080,6 +1080,17 @@ class DB {
// "rocksdb.live-blob-file-garbage-size" - returns the total amount of // "rocksdb.live-blob-file-garbage-size" - returns the total amount of
// garbage in the blob files in the current version. // garbage in the blob files in the current version.
static const std::string kLiveBlobFileGarbageSize; static const std::string kLiveBlobFileGarbageSize;
// "rocksdb.blob-cache-capacity" - returns blob cache capacity.
static const std::string kBlobCacheCapacity;
// "rocksdb.blob-cache-usage" - returns the memory size for the entries
// residing in blob cache.
static const std::string kBlobCacheUsage;
// "rocksdb.blob-cache-pinned-usage" - returns the memory size for the
// entries being pinned in blob cache.
static const std::string kBlobCachePinnedUsage;
}; };
#endif /* ROCKSDB_LITE */ #endif /* ROCKSDB_LITE */
@ -1145,6 +1156,9 @@ class DB {
// "rocksdb.num-blob-files" // "rocksdb.num-blob-files"
// "rocksdb.total-blob-file-size" // "rocksdb.total-blob-file-size"
// "rocksdb.live-blob-file-size" // "rocksdb.live-blob-file-size"
// "rocksdb.blob-cache-capacity"
// "rocksdb.blob-cache-usage"
// "rocksdb.blob-cache-pinned-usage"
virtual bool GetIntProperty(ColumnFamilyHandle* column_family, virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
const Slice& property, uint64_t* value) = 0; const Slice& property, uint64_t* value) = 0;
virtual bool GetIntProperty(const Slice& property, uint64_t* value) { virtual bool GetIntProperty(const Slice& property, uint64_t* value) {

@ -84,6 +84,13 @@ struct PerfContext {
uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet
uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator
uint64_t blob_cache_hit_count; // total number of blob cache hits
uint64_t blob_read_count; // total number of blob reads (with IO)
uint64_t blob_read_byte; // total number of bytes from blob reads
uint64_t blob_read_time; // total nanos spent on blob reads
uint64_t blob_checksum_time; // total nanos spent on blob checksum
uint64_t blob_decompress_time; // total nanos spent on blob decompression
// total number of internal keys skipped over during iteration. // total number of internal keys skipped over during iteration.
// There are several reasons for it: // There are several reasons for it:
// 1. when calling Next(), the iterator is in the position of the previous // 1. when calling Next(), the iterator is in the position of the previous

@ -434,6 +434,20 @@ enum Tickers : uint32_t {
BLOCK_CHECKSUM_COMPUTE_COUNT, BLOCK_CHECKSUM_COMPUTE_COUNT,
MULTIGET_COROUTINE_COUNT, MULTIGET_COROUTINE_COUNT,
// Integrated BlobDB specific stats
// # of times cache miss when accessing blob from blob cache.
BLOB_DB_CACHE_MISS,
// # of times cache hit when accessing blob from blob cache.
BLOB_DB_CACHE_HIT,
// # of data blocks added to blob cache.
BLOB_DB_CACHE_ADD,
// # of failures when adding blobs to blob cache.
BLOB_DB_CACHE_ADD_FAILURES,
// # of bytes read from blob cache.
BLOB_DB_CACHE_BYTES_READ,
// # of bytes written into blob cache.
BLOB_DB_CACHE_BYTES_WRITE,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };

@ -5088,6 +5088,18 @@ class TickerTypeJni {
return -0x2D; return -0x2D;
case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT: case ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT:
return -0x2E; return -0x2E;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS:
return -0x2F;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT:
return -0x30;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD:
return -0x31;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES:
return -0x32;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ:
return -0x33;
case ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE:
return -0x34;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
@ -5459,6 +5471,18 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT; return ROCKSDB_NAMESPACE::Tickers::NON_LAST_LEVEL_READ_COUNT;
case -0x2E: case -0x2E:
return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT; return ROCKSDB_NAMESPACE::Tickers::BLOCK_CHECKSUM_COMPUTE_COUNT;
case -0x2F:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_MISS;
case -0x30:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_HIT;
case -0x31:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD;
case -0x32:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_ADD_FAILURES;
case -0x33:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_READ;
case -0x34:
return ROCKSDB_NAMESPACE::Tickers::BLOB_DB_CACHE_BYTES_WRITE;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep

@ -806,6 +806,36 @@ public enum TickerType {
BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E), BLOCK_CHECKSUM_COMPUTE_COUNT((byte) -0x2E),
/**
* # of times cache miss when accessing blob from blob cache.
*/
BLOB_DB_CACHE_MISS((byte) -0x2F),
/**
* # of times cache hit when accessing blob from blob cache.
*/
BLOB_DB_CACHE_HIT((byte) -0x30),
/**
* # of data blocks added to blob cache.
*/
BLOB_DB_CACHE_ADD((byte) -0x31),
/**
* # # of failures when adding blobs to blob cache.
*/
BLOB_DB_CACHE_ADD_FAILURES((byte) -0x32),
/**
* # of bytes read from blob cache.
*/
BLOB_DB_CACHE_BYTES_READ((byte) -0x33),
/**
* # of bytes written into blob cache.
*/
BLOB_DB_CACHE_BYTES_WRITE((byte) -0x34),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;

@ -47,6 +47,14 @@ PerfContext::PerfContext(const PerfContext& other) {
get_read_bytes = other.get_read_bytes; get_read_bytes = other.get_read_bytes;
multiget_read_bytes = other.multiget_read_bytes; multiget_read_bytes = other.multiget_read_bytes;
iter_read_bytes = other.iter_read_bytes; iter_read_bytes = other.iter_read_bytes;
blob_cache_hit_count = other.blob_cache_hit_count;
blob_read_count = other.blob_read_count;
blob_read_byte = other.blob_read_byte;
blob_read_time = other.blob_read_time;
blob_checksum_time = other.blob_checksum_time;
blob_decompress_time = other.blob_decompress_time;
internal_key_skipped_count = other.internal_key_skipped_count; internal_key_skipped_count = other.internal_key_skipped_count;
internal_delete_skipped_count = other.internal_delete_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count;
internal_recent_skipped_count = other.internal_recent_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count;
@ -146,6 +154,14 @@ PerfContext::PerfContext(PerfContext&& other) noexcept {
get_read_bytes = other.get_read_bytes; get_read_bytes = other.get_read_bytes;
multiget_read_bytes = other.multiget_read_bytes; multiget_read_bytes = other.multiget_read_bytes;
iter_read_bytes = other.iter_read_bytes; iter_read_bytes = other.iter_read_bytes;
blob_cache_hit_count = other.blob_cache_hit_count;
blob_read_count = other.blob_read_count;
blob_read_byte = other.blob_read_byte;
blob_read_time = other.blob_read_time;
blob_checksum_time = other.blob_checksum_time;
blob_decompress_time = other.blob_decompress_time;
internal_key_skipped_count = other.internal_key_skipped_count; internal_key_skipped_count = other.internal_key_skipped_count;
internal_delete_skipped_count = other.internal_delete_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count;
internal_recent_skipped_count = other.internal_recent_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count;
@ -247,6 +263,14 @@ PerfContext& PerfContext::operator=(const PerfContext& other) {
get_read_bytes = other.get_read_bytes; get_read_bytes = other.get_read_bytes;
multiget_read_bytes = other.multiget_read_bytes; multiget_read_bytes = other.multiget_read_bytes;
iter_read_bytes = other.iter_read_bytes; iter_read_bytes = other.iter_read_bytes;
blob_cache_hit_count = other.blob_cache_hit_count;
blob_read_count = other.blob_read_count;
blob_read_byte = other.blob_read_byte;
blob_read_time = other.blob_read_time;
blob_checksum_time = other.blob_checksum_time;
blob_decompress_time = other.blob_decompress_time;
internal_key_skipped_count = other.internal_key_skipped_count; internal_key_skipped_count = other.internal_key_skipped_count;
internal_delete_skipped_count = other.internal_delete_skipped_count; internal_delete_skipped_count = other.internal_delete_skipped_count;
internal_recent_skipped_count = other.internal_recent_skipped_count; internal_recent_skipped_count = other.internal_recent_skipped_count;
@ -345,6 +369,14 @@ void PerfContext::Reset() {
get_read_bytes = 0; get_read_bytes = 0;
multiget_read_bytes = 0; multiget_read_bytes = 0;
iter_read_bytes = 0; iter_read_bytes = 0;
blob_cache_hit_count = 0;
blob_read_count = 0;
blob_read_byte = 0;
blob_read_time = 0;
blob_checksum_time = 0;
blob_decompress_time = 0;
internal_key_skipped_count = 0; internal_key_skipped_count = 0;
internal_delete_skipped_count = 0; internal_delete_skipped_count = 0;
internal_recent_skipped_count = 0; internal_recent_skipped_count = 0;
@ -467,6 +499,12 @@ std::string PerfContext::ToString(bool exclude_zero_counters) const {
PERF_CONTEXT_OUTPUT(get_read_bytes); PERF_CONTEXT_OUTPUT(get_read_bytes);
PERF_CONTEXT_OUTPUT(multiget_read_bytes); PERF_CONTEXT_OUTPUT(multiget_read_bytes);
PERF_CONTEXT_OUTPUT(iter_read_bytes); PERF_CONTEXT_OUTPUT(iter_read_bytes);
PERF_CONTEXT_OUTPUT(blob_cache_hit_count);
PERF_CONTEXT_OUTPUT(blob_read_count);
PERF_CONTEXT_OUTPUT(blob_read_byte);
PERF_CONTEXT_OUTPUT(blob_read_time);
PERF_CONTEXT_OUTPUT(blob_checksum_time);
PERF_CONTEXT_OUTPUT(blob_decompress_time);
PERF_CONTEXT_OUTPUT(internal_key_skipped_count); PERF_CONTEXT_OUTPUT(internal_key_skipped_count);
PERF_CONTEXT_OUTPUT(internal_delete_skipped_count); PERF_CONTEXT_OUTPUT(internal_delete_skipped_count);
PERF_CONTEXT_OUTPUT(internal_recent_skipped_count); PERF_CONTEXT_OUTPUT(internal_recent_skipped_count);

@ -227,7 +227,13 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"}, {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
{NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"}, {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
{BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}, {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"},
{MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}}; {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"},
{BLOB_DB_CACHE_MISS, "rocksdb.blobdb.cache.miss"},
{BLOB_DB_CACHE_HIT, "rocksdb.blobdb.cache.hit"},
{BLOB_DB_CACHE_ADD, "rocksdb.blobdb.cache.add"},
{BLOB_DB_CACHE_ADD_FAILURES, "rocksdb.blobdb.cache.add.failures"},
{BLOB_DB_CACHE_BYTES_READ, "rocksdb.blobdb.cache.bytes.read"},
{BLOB_DB_CACHE_BYTES_WRITE, "rocksdb.blobdb.cache.bytes.write"}};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{DB_GET, "rocksdb.db.get.micros"}, {DB_GET, "rocksdb.db.get.micros"},

Loading…
Cancel
Save