Add BlobMetaData retrieval methods (#8273)

Summary:
Added BlobMetaData to ColumnFamilyMetaData and LiveBlobMetaData and DB API GetLiveBlobMetaData to retrieve it.

First pass at struct.  More tests and maybe fields to come...

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8273

Reviewed By: ltamasi

Differential Revision: D29102400

Pulled By: mrambacher

fbshipit-source-id: 8a2383a4446328be6b91dced9841fdd3dfc80b73
main
mrambacher 4 years ago committed by Facebook GitHub Bot
parent 6f9ed59b1d
commit be219089ad
  1. 6
      HISTORY.md
  2. 4
      db/blob/blob_file_meta.cc
  3. 6
      db/blob/blob_file_meta.h
  4. 11
      db/db_impl/db_impl.cc
  5. 11
      db/db_impl/db_impl.h
  6. 9
      db/db_impl/db_impl_debug.cc
  7. 155
      db/db_test.cc
  8. 15
      db/version_set.cc
  9. 6
      include/rocksdb/db.h
  10. 46
      include/rocksdb/metadata.h

@ -29,6 +29,12 @@
* Add BlockBasedTableOptions.prepopulate_block_cache. If enabled, it prepopulate warm/hot data blocks which are already in memory into block cache at the time of flush. On a flush, the data block that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this data back into memory again, which is avoided by enabling this option and it also helps with Distributed FileSystem. More details in include/rocksdb/table.h. * Add BlockBasedTableOptions.prepopulate_block_cache. If enabled, it prepopulate warm/hot data blocks which are already in memory into block cache at the time of flush. On a flush, the data block that is in memory (in memtables) get flushed to the device. If using Direct IO, additional IO is incurred to read this data back into memory again, which is avoided by enabling this option and it also helps with Distributed FileSystem. More details in include/rocksdb/table.h.
* Added a `cancel` field to `CompactRangeOptions`, allowing individual in-process manual range compactions to be cancelled. * Added a `cancel` field to `CompactRangeOptions`, allowing individual in-process manual range compactions to be cancelled.
### New Features
* Added BlobMetaData to the ColumnFamilyMetaData to return information about blob files
### Public API change
* Added GetAllColumnFamilyMetaData API to retrieve the ColumnFamilyMetaData about all column families.
## 6.21.0 (2021-05-21) ## 6.21.0 (2021-05-21)
### Bug Fixes ### Bug Fixes
* Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened. * Fixed a bug in handling file rename error in distributed/network file systems when the server succeeds but client returns error. The bug can cause CURRENT file to point to non-existing MANIFEST file, thus DB cannot be opened.

@ -8,9 +8,13 @@
#include <ostream> #include <ostream>
#include <sstream> #include <sstream>
#include "db/blob/blob_log_format.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
uint64_t SharedBlobFileMetaData::GetBlobFileSize() const {
return BlobLogHeader::kSize + total_blob_bytes_ + BlobLogFooter::kSize;
}
std::string SharedBlobFileMetaData::DebugString() const { std::string SharedBlobFileMetaData::DebugString() const {
std::ostringstream oss; std::ostringstream oss;

@ -51,6 +51,7 @@ class SharedBlobFileMetaData {
SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete; SharedBlobFileMetaData(SharedBlobFileMetaData&&) = delete;
SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete; SharedBlobFileMetaData& operator=(SharedBlobFileMetaData&&) = delete;
uint64_t GetBlobFileSize() const;
uint64_t GetBlobFileNumber() const { return blob_file_number_; } uint64_t GetBlobFileNumber() const { return blob_file_number_; }
uint64_t GetTotalBlobCount() const { return total_blob_count_; } uint64_t GetTotalBlobCount() const { return total_blob_count_; }
uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; } uint64_t GetTotalBlobBytes() const { return total_blob_bytes_; }
@ -112,6 +113,11 @@ class BlobFileMetaData {
return shared_meta_; return shared_meta_;
} }
uint64_t GetBlobFileSize() const {
assert(shared_meta_);
return shared_meta_->GetBlobFileSize();
}
uint64_t GetBlobFileNumber() const { uint64_t GetBlobFileNumber() const {
assert(shared_meta_); assert(shared_meta_);
return shared_meta_->GetBlobFileNumber(); return shared_meta_->GetBlobFileNumber();

@ -3774,6 +3774,17 @@ void DBImpl::GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
ReturnAndCleanupSuperVersion(cfd, sv); ReturnAndCleanupSuperVersion(cfd, sv);
} }
void DBImpl::GetAllColumnFamilyMetaData(
std::vector<ColumnFamilyMetaData>* metadata) {
InstrumentedMutexLock l(&mutex_);
for (auto cfd : *(versions_->GetColumnFamilySet())) {
{
metadata->emplace_back();
cfd->current()->GetColumnFamilyMetaData(&metadata->back());
}
}
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
Status DBImpl::CheckConsistency() { Status DBImpl::CheckConsistency() {

@ -400,12 +400,13 @@ class DBImpl : public DB {
FileChecksumList* checksum_list) override; FileChecksumList* checksum_list) override;
// Obtains the meta data of the specified column family of the DB. // Obtains the meta data of the specified column family of the DB.
// Status::NotFound() will be returned if the current DB does not have
// any column family match the specified name.
// TODO(yhchiang): output parameter is placed in the end in this codebase. // TODO(yhchiang): output parameter is placed in the end in this codebase.
virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family, virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* column_family,
ColumnFamilyMetaData* metadata) override; ColumnFamilyMetaData* metadata) override;
void GetAllColumnFamilyMetaData(
std::vector<ColumnFamilyMetaData>* metadata) override;
Status SuggestCompactRange(ColumnFamilyHandle* column_family, Status SuggestCompactRange(ColumnFamilyHandle* column_family,
const Slice* begin, const Slice* end) override; const Slice* begin, const Slice* end) override;
@ -974,8 +975,10 @@ class DBImpl : public DB {
// get total level0 file size. Only for testing. // get total level0 file size. Only for testing.
uint64_t TEST_GetLevel0TotalSize(); uint64_t TEST_GetLevel0TotalSize();
void TEST_GetFilesMetaData(ColumnFamilyHandle* column_family, void TEST_GetFilesMetaData(
std::vector<std::vector<FileMetaData>>* metadata); ColumnFamilyHandle* column_family,
std::vector<std::vector<FileMetaData>>* metadata,
std::vector<std::shared_ptr<BlobFileMetaData>>* blob_metadata = nullptr);
void TEST_LockMutex(); void TEST_LockMutex();

@ -58,7 +58,8 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes(
void DBImpl::TEST_GetFilesMetaData( void DBImpl::TEST_GetFilesMetaData(
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
std::vector<std::vector<FileMetaData>>* metadata) { std::vector<std::vector<FileMetaData>>* metadata,
std::vector<std::shared_ptr<BlobFileMetaData>>* blob_metadata) {
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family); auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
auto cfd = cfh->cfd(); auto cfd = cfh->cfd();
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
@ -72,6 +73,12 @@ void DBImpl::TEST_GetFilesMetaData(
(*metadata)[level].push_back(*f); (*metadata)[level].push_back(*f);
} }
} }
if (blob_metadata != nullptr) {
blob_metadata->clear();
for (const auto& blob : cfd->current()->storage_info()->GetBlobFiles()) {
blob_metadata->push_back(blob.second);
}
}
} }
uint64_t DBImpl::TEST_Current_Manifest_FileNo() { uint64_t DBImpl::TEST_Current_Manifest_FileNo() {

@ -25,6 +25,7 @@
#include "cache/lru_cache.h" #include "cache/lru_cache.h"
#include "db/blob/blob_index.h" #include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "db/dbformat.h" #include "db/dbformat.h"
@ -1027,10 +1028,10 @@ TEST_F(DBTest, FailMoreDbPaths) {
} }
void CheckColumnFamilyMeta( void CheckColumnFamilyMeta(
const ColumnFamilyMetaData& cf_meta, const ColumnFamilyMetaData& cf_meta, const std::string& cf_name,
const std::vector<std::vector<FileMetaData>>& files_by_level, const std::vector<std::vector<FileMetaData>>& files_by_level,
uint64_t start_time, uint64_t end_time) { uint64_t start_time, uint64_t end_time) {
ASSERT_EQ(cf_meta.name, kDefaultColumnFamilyName); ASSERT_EQ(cf_meta.name, cf_name);
ASSERT_EQ(cf_meta.levels.size(), files_by_level.size()); ASSERT_EQ(cf_meta.levels.size(), files_by_level.size());
uint64_t cf_size = 0; uint64_t cf_size = 0;
@ -1124,6 +1125,53 @@ void CheckLiveFilesMeta(
} }
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
void AddBlobFile(const ColumnFamilyHandle* cfh, uint64_t blob_file_number,
uint64_t total_blob_count, uint64_t total_blob_bytes,
const std::string& checksum_method,
const std::string& checksum_value,
uint64_t garbage_blob_count = 0,
uint64_t garbage_blob_bytes = 0) {
ColumnFamilyData* cfd =
(static_cast<const ColumnFamilyHandleImpl*>(cfh))->cfd();
assert(cfd);
Version* const version = cfd->current();
assert(version);
VersionStorageInfo* const storage_info = version->storage_info();
assert(storage_info);
// Add a live blob file.
auto shared_meta = SharedBlobFileMetaData::Create(
blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
checksum_value);
auto meta = BlobFileMetaData::Create(std::move(shared_meta),
BlobFileMetaData::LinkedSsts(),
garbage_blob_count, garbage_blob_bytes);
storage_info->AddBlobFile(std::move(meta));
}
static void CheckBlobMetaData(
const BlobMetaData& bmd, uint64_t blob_file_number,
uint64_t total_blob_count, uint64_t total_blob_bytes,
const std::string& checksum_method, const std::string& checksum_value,
uint64_t garbage_blob_count = 0, uint64_t garbage_blob_bytes = 0) {
ASSERT_EQ(bmd.blob_file_number, blob_file_number);
ASSERT_EQ(bmd.blob_file_name, BlobFileName("", blob_file_number));
ASSERT_EQ(bmd.blob_file_size,
total_blob_bytes + BlobLogHeader::kSize + BlobLogFooter::kSize);
ASSERT_EQ(bmd.total_blob_count, total_blob_count);
ASSERT_EQ(bmd.total_blob_bytes, total_blob_bytes);
ASSERT_EQ(bmd.garbage_blob_count, garbage_blob_count);
ASSERT_EQ(bmd.garbage_blob_bytes, garbage_blob_bytes);
ASSERT_EQ(bmd.checksum_method, checksum_method);
ASSERT_EQ(bmd.checksum_value, checksum_value);
}
TEST_F(DBTest, MetaDataTest) { TEST_F(DBTest, MetaDataTest) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.create_if_missing = true; options.create_if_missing = true;
@ -1164,13 +1212,69 @@ TEST_F(DBTest, MetaDataTest) {
ColumnFamilyMetaData cf_meta; ColumnFamilyMetaData cf_meta;
db_->GetColumnFamilyMetaData(&cf_meta); db_->GetColumnFamilyMetaData(&cf_meta);
CheckColumnFamilyMeta(cf_meta, files_by_level, start_time, end_time); CheckColumnFamilyMeta(cf_meta, kDefaultColumnFamilyName, files_by_level,
start_time, end_time);
std::vector<LiveFileMetaData> live_file_meta; std::vector<LiveFileMetaData> live_file_meta;
db_->GetLiveFilesMetaData(&live_file_meta); db_->GetLiveFilesMetaData(&live_file_meta);
CheckLiveFilesMeta(live_file_meta, files_by_level); CheckLiveFilesMeta(live_file_meta, files_by_level);
} }
TEST_F(DBTest, AllMetaDataTest) {
Options options = CurrentOptions();
options.create_if_missing = true;
options.disable_auto_compactions = true;
DestroyAndReopen(options);
CreateAndReopenWithCF({"pikachu"}, options);
constexpr uint64_t blob_file_number = 234;
constexpr uint64_t total_blob_count = 555;
constexpr uint64_t total_blob_bytes = 66666;
constexpr char checksum_method[] = "CRC32";
constexpr char checksum_value[] = "\x3d\x87\xff\x57";
int64_t temp_time = 0;
options.env->GetCurrentTime(&temp_time).PermitUncheckedError();
uint64_t start_time = static_cast<uint64_t>(temp_time);
Random rnd(301);
for (int cf = 0; cf < 2; cf++) {
AddBlobFile(handles_[cf], blob_file_number * (cf + 1),
total_blob_count * (cf + 1), total_blob_bytes * (cf + 1),
checksum_method, checksum_value);
}
std::vector<ColumnFamilyMetaData> all_meta;
db_->GetAllColumnFamilyMetaData(&all_meta);
std::vector<std::vector<FileMetaData>> default_files_by_level;
std::vector<std::vector<FileMetaData>> pikachu_files_by_level;
dbfull()->TEST_GetFilesMetaData(handles_[0], &default_files_by_level);
dbfull()->TEST_GetFilesMetaData(handles_[1], &pikachu_files_by_level);
options.env->GetCurrentTime(&temp_time).PermitUncheckedError();
uint64_t end_time = static_cast<uint64_t>(temp_time);
ASSERT_EQ(all_meta.size(), 2);
for (int cf = 0; cf < 2; cf++) {
const auto& cfmd = all_meta[cf];
if (cf == 0) {
CheckColumnFamilyMeta(cfmd, "default", default_files_by_level, start_time,
end_time);
} else {
CheckColumnFamilyMeta(cfmd, "pikachu", pikachu_files_by_level, start_time,
end_time);
}
ASSERT_EQ(cfmd.blob_files.size(), 1U);
const auto& bmd = cfmd.blob_files[0];
ASSERT_EQ(cfmd.blob_file_count, 1U);
ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size);
ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_));
CheckBlobMetaData(bmd, blob_file_number * (cf + 1),
total_blob_count * (cf + 1), total_blob_bytes * (cf + 1),
checksum_method, checksum_value);
}
}
namespace { namespace {
void MinLevelHelper(DBTest* self, Options& options) { void MinLevelHelper(DBTest* self, Options& options) {
Random rnd(301); Random rnd(301);
@ -2344,41 +2448,19 @@ TEST_F(DBTest, GetLiveBlobFiles) {
Options options = CurrentOptions(); Options options = CurrentOptions();
options.stats_dump_period_sec = 0; options.stats_dump_period_sec = 0;
Reopen(options);
VersionSet* const versions = dbfull()->TEST_GetVersionSet();
assert(versions);
assert(versions->GetColumnFamilySet());
ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
assert(cfd);
Version* const version = cfd->current();
assert(version);
VersionStorageInfo* const storage_info = version->storage_info();
assert(storage_info);
// Add a live blob file.
constexpr uint64_t blob_file_number = 234; constexpr uint64_t blob_file_number = 234;
constexpr uint64_t total_blob_count = 555; constexpr uint64_t total_blob_count = 555;
constexpr uint64_t total_blob_bytes = 66666; constexpr uint64_t total_blob_bytes = 66666;
constexpr char checksum_method[] = "CRC32"; constexpr char checksum_method[] = "CRC32";
constexpr char checksum_value[] = "\x3d\x87\xff\x57"; constexpr char checksum_value[] = "\x3d\x87\xff\x57";
auto shared_meta = SharedBlobFileMetaData::Create(
blob_file_number, total_blob_count, total_blob_bytes, checksum_method,
checksum_value);
constexpr uint64_t garbage_blob_count = 0; constexpr uint64_t garbage_blob_count = 0;
constexpr uint64_t garbage_blob_bytes = 0; constexpr uint64_t garbage_blob_bytes = 0;
auto meta = BlobFileMetaData::Create(std::move(shared_meta), Reopen(options);
BlobFileMetaData::LinkedSsts(),
garbage_blob_count, garbage_blob_bytes);
storage_info->AddBlobFile(std::move(meta));
AddBlobFile(db_->DefaultColumnFamily(), blob_file_number, total_blob_count,
total_blob_bytes, checksum_method, checksum_value,
garbage_blob_count, garbage_blob_bytes);
// Make sure it appears in the results returned by GetLiveFiles. // Make sure it appears in the results returned by GetLiveFiles.
uint64_t manifest_size = 0; uint64_t manifest_size = 0;
std::vector<std::string> files; std::vector<std::string> files;
@ -2386,6 +2468,19 @@ TEST_F(DBTest, GetLiveBlobFiles) {
ASSERT_FALSE(files.empty()); ASSERT_FALSE(files.empty());
ASSERT_EQ(files[0], BlobFileName("", blob_file_number)); ASSERT_EQ(files[0], BlobFileName("", blob_file_number));
ColumnFamilyMetaData cfmd;
db_->GetColumnFamilyMetaData(&cfmd);
ASSERT_EQ(cfmd.blob_files.size(), 1);
const BlobMetaData& bmd = cfmd.blob_files[0];
CheckBlobMetaData(bmd, blob_file_number, total_blob_count, total_blob_bytes,
checksum_method, checksum_value, garbage_blob_count,
garbage_blob_bytes);
ASSERT_EQ(NormalizePath(bmd.blob_file_path), NormalizePath(dbname_));
ASSERT_EQ(cfmd.blob_file_count, 1U);
ASSERT_EQ(cfmd.blob_file_size, bmd.blob_file_size);
} }
#endif #endif

@ -1469,6 +1469,10 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) {
cf_meta->file_count = 0; cf_meta->file_count = 0;
cf_meta->levels.clear(); cf_meta->levels.clear();
cf_meta->blob_file_size = 0;
cf_meta->blob_file_count = 0;
cf_meta->blob_files.clear();
auto* ioptions = cfd_->ioptions(); auto* ioptions = cfd_->ioptions();
auto* vstorage = storage_info(); auto* vstorage = storage_info();
@ -1504,6 +1508,17 @@ void Version::GetColumnFamilyMetaData(ColumnFamilyMetaData* cf_meta) {
level, level_size, std::move(files)); level, level_size, std::move(files));
cf_meta->size += level_size; cf_meta->size += level_size;
} }
for (const auto& iter : vstorage->GetBlobFiles()) {
const auto meta = iter.second.get();
cf_meta->blob_files.emplace_back(
meta->GetBlobFileNumber(), BlobFileName("", meta->GetBlobFileNumber()),
ioptions->cf_paths.front().path, meta->GetBlobFileSize(),
meta->GetTotalBlobCount(), meta->GetTotalBlobBytes(),
meta->GetGarbageBlobCount(), meta->GetGarbageBlobBytes(),
meta->GetChecksumMethod(), meta->GetChecksumValue());
cf_meta->blob_file_count++;
cf_meta->blob_file_size += meta->GetBlobFileSize();
}
} }
uint64_t Version::GetSstFilesSize() { uint64_t Version::GetSstFilesSize() {

@ -1423,6 +1423,12 @@ class DB {
GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
} }
// Obtains the meta data of all column families for the DB.
// The returned map contains one entry for each column family indexed by the
// name of the column family.
virtual void GetAllColumnFamilyMetaData(
std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
// IngestExternalFile() will load a list of external SST files (1) into the DB // IngestExternalFile() will load a list of external SST files (1) into the DB
// Two primary modes are supported: // Two primary modes are supported:
// - Duplicate keys in the new files will overwrite exiting keys (default) // - Duplicate keys in the new files will overwrite exiting keys (default)

@ -15,6 +15,7 @@
#include "rocksdb/types.h" #include "rocksdb/types.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
struct BlobMetaData;
struct ColumnFamilyMetaData; struct ColumnFamilyMetaData;
struct LevelMetaData; struct LevelMetaData;
struct SstFileMetaData; struct SstFileMetaData;
@ -35,6 +36,13 @@ struct ColumnFamilyMetaData {
std::string name; std::string name;
// The metadata of all levels in this column family. // The metadata of all levels in this column family.
std::vector<LevelMetaData> levels; std::vector<LevelMetaData> levels;
// The total size of all blob files
uint64_t blob_file_size = 0;
// The number of blob files in this column family.
size_t blob_file_count = 0;
// The metadata of the blobs in this column family
std::vector<BlobMetaData> blob_files;
}; };
// The metadata that describes a level. // The metadata that describes a level.
@ -153,6 +161,44 @@ struct LiveFileMetaData : SstFileMetaData {
LiveFileMetaData() : column_family_name(), level(0) {} LiveFileMetaData() : column_family_name(), level(0) {}
}; };
// The MetaData that describes a Blob file
struct BlobMetaData {
BlobMetaData()
: blob_file_number(0),
blob_file_size(0),
total_blob_count(0),
total_blob_bytes(0),
garbage_blob_count(0),
garbage_blob_bytes(0) {}
BlobMetaData(uint64_t _file_number, const std::string& _file_name,
const std::string& _file_path, uint64_t _file_size,
uint64_t _total_blob_count, uint64_t _total_blob_bytes,
uint64_t _garbage_blob_count, uint64_t _garbage_blob_bytes,
const std::string& _file_checksum,
const std::string& _file_checksum_func_name)
: blob_file_number(_file_number),
blob_file_name(_file_name),
blob_file_path(_file_path),
blob_file_size(_file_size),
total_blob_count(_total_blob_count),
total_blob_bytes(_total_blob_bytes),
garbage_blob_count(_garbage_blob_count),
garbage_blob_bytes(_garbage_blob_bytes),
checksum_method(_file_checksum),
checksum_value(_file_checksum_func_name) {}
uint64_t blob_file_number;
std::string blob_file_name;
std::string blob_file_path;
uint64_t blob_file_size;
uint64_t total_blob_count;
uint64_t total_blob_bytes;
uint64_t garbage_blob_count;
uint64_t garbage_blob_bytes;
std::string checksum_method;
std::string checksum_value;
};
// Metadata returned as output from ExportColumnFamily() and used as input to // Metadata returned as output from ExportColumnFamily() and used as input to
// CreateColumnFamiliesWithImport(). // CreateColumnFamiliesWithImport().
struct ExportImportFilesMetaData { struct ExportImportFilesMetaData {

Loading…
Cancel
Save