From 1bac873fcfaed1b4bbcc00facf24dacada0cbafb Mon Sep 17 00:00:00 2001 From: Peter Dillinger <peterd@fb.com> Date: Wed, 20 Apr 2022 16:09:34 -0700 Subject: [PATCH] Mark GetLiveFilesStorageInfo ready for production use (#9868) Summary: ... by filling out remaining testing hole: handling of db_pathsi+cf_paths. (Note that while GetLiveFilesStorageInfo works with db_paths / cf_paths, Checkpoint and BackupEngine do not and are marked appropriately.) Also improved comments for "live files" APIs, and grouped them together in db.h. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9868 Test Plan: Adding to existing unit tests Reviewed By: jay-zhuang Differential Revision: D35752254 Pulled By: pdillinger fbshipit-source-id: c70eb67748fad61826e2f554b674638700abefb2 --- HISTORY.md | 2 + db/db_compaction_test.cc | 24 +++++++++++ db/db_test.cc | 4 +- include/rocksdb/db.h | 90 ++++++++++++++++++++-------------------- 4 files changed, 74 insertions(+), 46 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 7ac894566..82a73bf77 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,7 @@ # Rocksdb Change Log ## Unreleased +### New Features +* DB::GetLiveFilesStorageInfo is ready for production use. ## 7.2.0 (04/15/2022) ### Bug Fixes diff --git a/db/db_compaction_test.cc b/db/db_compaction_test.cc index 0de7e4f66..0d99e8779 100644 --- a/db/db_compaction_test.cc +++ b/db/db_compaction_test.cc @@ -2409,6 +2409,30 @@ TEST_P(DBCompactionTestWithParam, LevelCompactionCFPathUse) { check_getvalues(); + { // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths + std::vector<LiveFileStorageInfo> new_infos; + LiveFilesStorageInfoOptions lfsio; + lfsio.wal_size_for_flush = UINT64_MAX; // no flush + ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos)); + std::unordered_map<std::string, int> live_sst_by_dir; + for (auto& info : new_infos) { + if (info.file_type == kTableFile) { + live_sst_by_dir[info.directory]++; + // Verify file on disk (no directory confusion) + uint64_t size; + ASSERT_OK(env_->GetFileSize( + info.directory + "/" + info.relative_filename, &size)); + ASSERT_EQ(info.size, size); + } + } + ASSERT_EQ(3U * 3U, live_sst_by_dir.size()); + for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) { + ASSERT_EQ(1, live_sst_by_dir[paths[0].path]); + ASSERT_EQ(4, live_sst_by_dir[paths[1].path]); + ASSERT_EQ(2, live_sst_by_dir[paths[2].path]); + } + } + ReopenWithColumnFamilies({"default", "one", "two"}, option_vector); check_getvalues(); diff --git a/db/db_test.cc b/db/db_test.cc index a8f74d8ca..a0557d272 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -2427,8 +2427,8 @@ TEST_F(DBTest, SnapshotFiles) { // Also test GetLiveFilesStorageInfo std::vector<LiveFileStorageInfo> new_infos; - ASSERT_OK(dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), - &new_infos)); + ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(), + &new_infos)); // Close DB (while deletions disabled) Close(); diff --git a/include/rocksdb/db.h b/include/rocksdb/db.h index 74754da5a..1a6401442 100644 --- a/include/rocksdb/db.h +++ b/include/rocksdb/db.h @@ -1440,39 +1440,6 @@ class DB { virtual Status EnableFileDeletions(bool force = true) = 0; #ifndef ROCKSDB_LITE - // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup - - // Retrieve the list of all files in the database. The files are - // relative to the dbname and are not absolute paths. Despite being relative - // paths, the file names begin with "/". The valid size of the manifest file - // is returned in manifest_file_size. The manifest file is an ever growing - // file, but only the portion specified by manifest_file_size is valid for - // this snapshot. Setting flush_memtable to true does Flush before recording - // the live files. Setting flush_memtable to false is useful when we don't - // want to wait for flush which may have to wait for compaction to complete - // taking an indeterminate time. - // - // In case you have multiple column families, even if flush_memtable is true, - // you still need to call GetSortedWalFiles after GetLiveFiles to compensate - // for new data that arrived to already-flushed column families while other - // column families were flushing - virtual Status GetLiveFiles(std::vector<std::string>&, - uint64_t* manifest_file_size, - bool flush_memtable = true) = 0; - - // Retrieve the sorted list of all wal files with earliest file first - virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; - - // Retrieve information about the current wal file - // - // Note that the log might have rolled after this call in which case - // the current_log_file would not point to the current log file. - // - // Additionally, for the sake of optimization current_log_file->StartSequence - // would always be set to 0 - virtual Status GetCurrentWalFile( - std::unique_ptr<LogFile>* current_log_file) = 0; - // Retrieves the creation time of the oldest file in the DB. // This API only works if max_open_files = -1, if it is not then // Status returned is Status::NotSupported() @@ -1517,26 +1484,30 @@ class DB { // path relative to the db directory. eg. 000001.sst, /archive/000003.log virtual Status DeleteFile(std::string name) = 0; - // Returns a list of all table files with their level, start key - // and end key + // Obtains a list of all live table (SST) files and how they fit into the + // LSM-trees, such as column family, level, key range, etc. + // This builds a de-normalized form of GetAllColumnFamilyMetaData(). + // For information about all files in a DB, use GetLiveFilesStorageInfo(). virtual void GetLiveFilesMetaData( std::vector<LiveFileMetaData>* /*metadata*/) {} - // Return a list of all table and blob files checksum info. + // Return a list of all table (SST) and blob files checksum info. // Note: This function might be of limited use because it cannot be - // synchronized with GetLiveFiles. + // synchronized with other "live files" APIs. GetLiveFilesStorageInfo() + // is recommended instead. virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0; - // EXPERIMENTAL: This function is not yet feature-complete. // Get information about all live files that make up a DB, for making // live copies (Checkpoint, backups, etc.) or other storage-related purposes. - // Use DisableFileDeletions() before and EnableFileDeletions() after to - // preserve the files for live copy. + // If creating a live copy, use DisableFileDeletions() before and + // EnableFileDeletions() after to prevent deletions. + // For LSM-tree metadata, use Get*MetaData() functions instead. virtual Status GetLiveFilesStorageInfo( const LiveFilesStorageInfoOptions& opts, std::vector<LiveFileStorageInfo>* files) = 0; - // Obtains the meta data of the specified column family of the DB. + // Obtains the LSM-tree meta data of the specified column family of the DB, + // including metadata for each live table (SST) file in that column family. virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/, ColumnFamilyMetaData* /*metadata*/) {} @@ -1545,12 +1516,43 @@ class DB { GetColumnFamilyMetaData(DefaultColumnFamily(), metadata); } - // Obtains the meta data of all column families for the DB. - // The returned map contains one entry for each column family indexed by the - // name of the column family. + // Obtains the LSM-tree meta data of all column families of the DB, + // including metadata for each live table (SST) file in the DB. virtual void GetAllColumnFamilyMetaData( std::vector<ColumnFamilyMetaData>* /*metadata*/) {} + // Retrieve the list of all files in the database except WAL files. The files + // are relative to the dbname (or db_paths/cf_paths), not absolute paths. + // (Not recommended with db_paths/cf_paths because that information is not + // returned.) Despite being relative paths, the file names begin with "/". + // The valid size of the manifest file is returned in manifest_file_size. + // The manifest file is an ever growing file, but only the portion specified + // by manifest_file_size is valid for this snapshot. Setting flush_memtable + // to true does Flush before recording the live files. Setting flush_memtable + // to false is useful when we don't want to wait for flush which may have to + // wait for compaction to complete taking an indeterminate time. + // + // NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate + // a lossless backup, GetLiveFilesStorageInfo() is strongly recommended + // instead, because it ensures a single consistent view of all files is + // captured in one call. + virtual Status GetLiveFiles(std::vector<std::string>&, + uint64_t* manifest_file_size, + bool flush_memtable = true) = 0; + + // Retrieve the sorted list of all wal files with earliest file first + virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0; + + // Retrieve information about the current wal file + // + // Note that the log might have rolled after this call in which case + // the current_log_file would not point to the current log file. + // + // Additionally, for the sake of optimization current_log_file->StartSequence + // would always be set to 0 + virtual Status GetCurrentWalFile( + std::unique_ptr<LogFile>* current_log_file) = 0; + // IngestExternalFile() will load a list of external SST files (1) into the DB // Two primary modes are supported: // - Duplicate keys in the new files will overwrite exiting keys (default)