diff --git a/HISTORY.md b/HISTORY.md index 0456014db..d4f19fb77 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -13,6 +13,7 @@ * RemoteCompaction's interface now includes `db_name`, `db_id`, `session_id`, which could help the user uniquely identify compaction job between db instances and sessions. * Added a ticker statistic, "rocksdb.verify_checksum.read.bytes", reporting how many bytes were read from file to serve `VerifyChecksum()` and `VerifyFileChecksums()` queries. * Added ticker statistics, "rocksdb.backup.read.bytes" and "rocksdb.backup.write.bytes", reporting how many bytes were read and written during backup. +* Added properties for BlobDB: `rocksdb.num-blob-files`, `rocksdb.blob-stats`, `rocksdb.total-blob-file-size`, and `rocksdb.live-blob-file-size`. The existing property `rocksdb.estimate_live-data-size` was also extended to include live bytes residing in blob files. ### Public API change * Remove obsolete implementation details FullKey and ParseFullKey from public API diff --git a/db/blob/db_blob_basic_test.cc b/db/blob/db_blob_basic_test.cc index b765eb446..1b94c64df 100644 --- a/db/blob/db_blob_basic_test.cc +++ b/db/blob/db_blob_basic_test.cc @@ -367,6 +367,111 @@ TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) { ASSERT_EQ(values[2], "v2_0"); } +#ifndef ROCKSDB_LITE +TEST_F(DBBlobBasicTest, Properties) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + Reopen(options); + ASSERT_OK(Put("key1", "0000000000")); + ASSERT_OK(Put("key2", "0000000000")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key3", "0000000000")); + ASSERT_OK(Flush()); + // num of files + uint64_t num_blob_files = 0; + EXPECT_TRUE( + db_->GetIntProperty(DB::Properties::kNumBlobFiles, &num_blob_files)); + ASSERT_EQ(num_blob_files, 2); + // size of live blob files + uint64_t live_blob_file_size = 0; + EXPECT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileSize, + &live_blob_file_size)); + // size of total blob files + uint64_t total_blob_file_size = 0; + EXPECT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize, + &total_blob_file_size)); + ASSERT_EQ(live_blob_file_size, total_blob_file_size); + auto* versions = dbfull()->TEST_GetVersionSet(); + auto* current = versions->GetColumnFamilySet()->GetDefault()->current(); + const auto& blob_files = current->storage_info()->GetBlobFiles(); + uint64_t expected_live_blob_file_size = 0; + for (const auto& pair : blob_files) { + expected_live_blob_file_size += pair.second->GetTotalBlobBytes(); + } + ASSERT_EQ(live_blob_file_size, expected_live_blob_file_size); + + // estimate live data size + std::string blob_stats = ""; + EXPECT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &blob_stats)); + EXPECT_TRUE(blob_stats.size() > 0); + + // delete key2 to make some garbage + ASSERT_OK(Delete("key2")); + ASSERT_OK(Flush()); + constexpr Slice* begin = nullptr; + constexpr Slice* end = nullptr; + ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end)); + + std::string new_blob_stats = ""; + EXPECT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &new_blob_stats)); + std::cout << blob_stats << new_blob_stats << std::endl; + + { + std::istringstream ss1(blob_stats); + std::istringstream ss2(new_blob_stats); + std::string stats_line = ""; + std::string new_stats_line = ""; + // skip the first line because it is the version info + std::getline(ss1, stats_line); + std::getline(ss2, new_stats_line); + for (size_t i = 0; i < 3; i++) { + std::getline(ss1, stats_line); + std::getline(ss2, new_stats_line); + if (i == 2) { + ASSERT_TRUE(stats_line != new_stats_line); + } else { + ASSERT_EQ(stats_line, new_stats_line); + } + } + } +} + +TEST_F(DBBlobBasicTest, PropertiesMultiVersion) { + Options options = GetDefaultOptions(); + options.enable_blob_files = true; + options.min_blob_size = 0; + Reopen(options); + + ASSERT_OK(Put("key1", "0000000000")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "0000000000")); + ASSERT_OK(Flush()); + // create an iterator to make the current version alive + Iterator* iter = db_->NewIterator(ReadOptions()); + ASSERT_OK(iter->status()); + ASSERT_OK(Put("key3", "0000000000")); + ASSERT_OK(Flush()); + + // size of total blob files + uint64_t total_blob_file_size = 0; + EXPECT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize, + &total_blob_file_size)); + + // total size equals to the current version's blob size because previous + // version's files are duplicated and thus not counted + auto* versions = dbfull()->TEST_GetVersionSet(); + auto* current = versions->GetColumnFamilySet()->GetDefault()->current(); + const auto& blob_files = current->storage_info()->GetBlobFiles(); + uint64_t current_v_blob_size = 0; + for (const auto& pair : blob_files) { + current_v_blob_size += pair.second->GetTotalBlobBytes(); + } + ASSERT_EQ(current_v_blob_size, total_blob_file_size); + delete iter; +} +#endif // !ROCKSDB_LITE + class DBBlobBasicIOErrorTest : public DBBlobBasicTest, public testing::WithParamInterface { protected: diff --git a/db/column_family.cc b/db/column_family.cc index 6a8e0c7a4..51f141349 100644 --- a/db/column_family.cc +++ b/db/column_family.cc @@ -1048,6 +1048,10 @@ uint64_t ColumnFamilyData::GetTotalSstFilesSize() const { return VersionSet::GetTotalSstFilesSize(dummy_versions_); } +uint64_t ColumnFamilyData::GetTotalBlobFileSize() const { + return VersionSet::GetTotalBlobFileSize(dummy_versions_); +} + uint64_t ColumnFamilyData::GetLiveSstFilesSize() const { return current_->GetSstFilesSize(); } diff --git a/db/column_family.h b/db/column_family.h index 99106c612..a28f5d7e9 100644 --- a/db/column_family.h +++ b/db/column_family.h @@ -357,6 +357,7 @@ class ColumnFamilyData { uint64_t GetNumLiveVersions() const; // REQUIRE: DB mutex held uint64_t GetTotalSstFilesSize() const; // REQUIRE: DB mutex held uint64_t GetLiveSstFilesSize() const; // REQUIRE: DB mutex held + uint64_t GetTotalBlobFileSize() const; // REQUIRE: DB mutex held void SetMemtable(MemTable* new_mem) { uint64_t memtable_id = last_memtable_id_.fetch_add(1) + 1; new_mem->SetID(memtable_id); diff --git a/db/internal_stats.cc b/db/internal_stats.cc index abe4b6607..0235105de 100644 --- a/db/internal_stats.cc +++ b/db/internal_stats.cc @@ -281,6 +281,10 @@ static const std::string block_cache_capacity = "block-cache-capacity"; static const std::string block_cache_usage = "block-cache-usage"; static const std::string block_cache_pinned_usage = "block-cache-pinned-usage"; static const std::string options_statistics = "options-statistics"; +static const std::string num_blob_files = "num-blob-files"; +static const std::string blob_stats = "blob-stats"; +static const std::string total_blob_file_size = "total-blob-file-size"; +static const std::string live_blob_file_size = "live-blob-file-size"; const std::string DB::Properties::kNumFilesAtLevelPrefix = rocksdb_prefix + num_files_at_level_prefix; @@ -374,6 +378,13 @@ const std::string DB::Properties::kOptionsStatistics = rocksdb_prefix + options_statistics; const std::string DB::Properties::kLiveSstFilesSizeAtTemperature = rocksdb_prefix + live_sst_files_size_at_temperature; +const std::string DB::Properties::kNumBlobFiles = + rocksdb_prefix + num_blob_files; +const std::string DB::Properties::kBlobStats = rocksdb_prefix + blob_stats; +const std::string DB::Properties::kTotalBlobFileSize = + rocksdb_prefix + total_blob_file_size; +const std::string DB::Properties::kLiveBlobFileSize = + rocksdb_prefix + live_blob_file_size; const std::unordered_map InternalStats::ppt_name_to_info = { @@ -520,6 +531,17 @@ const std::unordered_map {DB::Properties::kOptionsStatistics, {true, nullptr, nullptr, nullptr, &DBImpl::GetPropertyHandleOptionsStatistics}}, + {DB::Properties::kNumBlobFiles, + {false, nullptr, &InternalStats::HandleNumBlobFiles, nullptr, + nullptr}}, + {DB::Properties::kBlobStats, + {false, &InternalStats::HandleBlobStats, nullptr, nullptr, nullptr}}, + {DB::Properties::kTotalBlobFileSize, + {false, nullptr, &InternalStats::HandleTotalBlobFileSize, nullptr, + nullptr}}, + {DB::Properties::kLiveBlobFileSize, + {false, nullptr, &InternalStats::HandleLiveBlobFileSize, nullptr, + nullptr}}, }; InternalStats::InternalStats(int num_levels, SystemClock* clock, @@ -713,6 +735,49 @@ bool InternalStats::HandleLiveSstFilesSizeAtTemperature(std::string* value, return true; } +bool InternalStats::HandleNumBlobFiles(uint64_t* value, DBImpl* /*db*/, + Version* /*version*/) { + const auto* vstorage = cfd_->current()->storage_info(); + const auto& blob_files = vstorage->GetBlobFiles(); + *value = blob_files.size(); + return true; +} + +bool InternalStats::HandleBlobStats(std::string* value, Slice /*suffix*/) { + std::ostringstream oss; + auto* current_version = cfd_->current(); + const auto& blob_files = current_version->storage_info()->GetBlobFiles(); + uint64_t current_num_blob_files = blob_files.size(); + uint64_t current_file_size = 0; + uint64_t current_garbage_size = 0; + for (const auto& pair : blob_files) { + const auto& meta = pair.second; + current_file_size += meta->GetTotalBlobBytes(); + current_garbage_size += meta->GetGarbageBlobBytes(); + } + oss << "Current version number: " << current_version->GetVersionNumber() + << "\n" + << "Number of blob files: " << current_num_blob_files << "\n" + << "Total size of blob files: " << current_file_size << "\n" + << "Total size of garbage in blob files: " << current_garbage_size + << std::endl; + value->append(oss.str()); + return true; +} + +bool InternalStats::HandleTotalBlobFileSize(uint64_t* value, DBImpl* /*db*/, + Version* /*version*/) { + *value = cfd_->GetTotalBlobFileSize(); + return true; +} + +bool InternalStats::HandleLiveBlobFileSize(uint64_t* value, DBImpl* /*db*/, + Version* /*version*/) { + const auto* vstorage = cfd_->current()->storage_info(); + *value = vstorage->GetTotalBlobFileSize(); + return true; +} + const DBPropertyInfo* GetPropertyInfo(const Slice& property) { std::string ppt_name = GetPropertyNameAndArg(property).first.ToString(); auto ppt_info_iter = InternalStats::ppt_name_to_info.find(ppt_name); diff --git a/db/internal_stats.h b/db/internal_stats.h index b4cc6d8bd..d3b90421e 100644 --- a/db/internal_stats.h +++ b/db/internal_stats.h @@ -676,6 +676,10 @@ class InternalStats { bool HandleBlockCacheEntryStatsMap(std::map* values, Slice suffix); bool HandleLiveSstFilesSizeAtTemperature(std::string* value, Slice suffix); + bool HandleNumBlobFiles(uint64_t* value, DBImpl* db, Version* version); + bool HandleBlobStats(std::string* value, Slice suffix); + bool HandleTotalBlobFileSize(uint64_t* value, DBImpl* db, Version* version); + bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version); // Total number of background errors encountered. Every time a flush task // or compaction task fails, this counter is incremented. The failure can // be caused by any possible reason, including file system errors, out of diff --git a/db/version_set.cc b/db/version_set.cc index d8802825f..7b89128c3 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -3600,6 +3600,14 @@ uint64_t VersionStorageInfo::EstimateLiveDataSize() const { } } } + // For BlobDB, the result also includes the exact value of live bytes in the + // blob files of the version. + const auto& blobFiles = GetBlobFiles(); + for (const auto& pair : blobFiles) { + const auto& meta = pair.second; + size += meta->GetTotalBlobBytes(); + size -= meta->GetGarbageBlobBytes(); + } return size; } @@ -5681,6 +5689,25 @@ uint64_t VersionSet::GetTotalSstFilesSize(Version* dummy_versions) { return total_files_size; } +uint64_t VersionSet::GetTotalBlobFileSize(Version* dummy_versions) { + std::unordered_set unique_blob_files; + uint64_t all_v_blob_file_size = 0; + for (auto* v = dummy_versions->next_; v != dummy_versions; v = v->next_) { + // iterate all the versions + auto* vstorage = v->storage_info(); + const auto& blob_files = vstorage->GetBlobFiles(); + for (const auto& pair : blob_files) { + if (unique_blob_files.find(pair.first) == unique_blob_files.end()) { + // find Blob file that has not been counted + unique_blob_files.insert(pair.first); + const auto& meta = pair.second; + all_v_blob_file_size += meta->GetTotalBlobBytes(); + } + } + } + return all_v_blob_file_size; +} + Status VersionSet::VerifyFileMetadata(const std::string& fpath, const FileMetaData& meta) const { uint64_t fsize = 0; diff --git a/db/version_set.h b/db/version_set.h index 1b8b3cda1..49d35e924 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -1255,6 +1255,8 @@ class VersionSet { static uint64_t GetTotalSstFilesSize(Version* dummy_versions); + static uint64_t GetTotalBlobFileSize(Version* dummy_versions); + // Get the IO Status returned by written Manifest. const IOStatus& io_status() const { return io_status_; } diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index d749ea8f5..c037fcf2c 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -893,7 +893,8 @@ class DB { static const std::string kCurrentSuperVersionNumber; // "rocksdb.estimate-live-data-size" - returns an estimate of the amount of - // live data in bytes. + // live data in bytes. For BlobDB, it also includes the exact value of + // live bytes in the blob files of the version. static const std::string kEstimateLiveDataSize; // "rocksdb.min-log-number-to-keep" - return the minimum log number of the @@ -966,6 +967,23 @@ class DB { // "rocksdb.options-statistics" - returns multi-line string // of options.statistics static const std::string kOptionsStatistics; + + // "rocksdb.num-blob-files" - returns number of blob files in the current + // version. + static const std::string kNumBlobFiles; + + // "rocksdb.blob-stats" - return the total number and size of all blob + // files, and total amount of garbage (bytes) in the blob files in + // the current version. + static const std::string kBlobStats; + + // "rocksdb.total-blob-file-size" - returns the total size of all blob + // files over all versions. + static const std::string kTotalBlobFileSize; + + // "rocksdb.live-blob-file-size" - returns the total size of all blob + // files in the current version. + static const std::string kLiveBlobFileSize; }; #endif /* ROCKSDB_LITE */ @@ -1026,6 +1044,11 @@ class DB { // "rocksdb.block-cache-capacity" // "rocksdb.block-cache-usage" // "rocksdb.block-cache-pinned-usage" + // + // Properties dedicated for BlobDB: + // "rocksdb.num-blob-files" + // "rocksdb.total-blob-file-size" + // "rocksdb.live-blob-file-size" virtual bool GetIntProperty(ColumnFamilyHandle* column_family, const Slice& property, uint64_t* value) = 0; virtual bool GetIntProperty(const Slice& property, uint64_t* value) {