From 879357fdb0936073b95bc6c584584624dd817bd9 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 6 Apr 2021 14:36:45 -0700 Subject: [PATCH] Make backups openable as read-only DBs (#8142) Summary: A current limitation of backups is that you don't know the exact database state of when the backup was taken. With this new feature, you can at least inspect the backup's DB state without restoring it by opening it as a read-only DB. Rather than add something like OpenAsReadOnlyDB to the BackupEngine API, which would inhibit opening stackable DB implementations read-only (if/when their APIs support it), we instead provide a DB name and Env that can be used to open as a read-only DB. Possible follow-up work: * Add a version of GetBackupInfo for a single backup. * Let CreateNewBackup return the BackupID of the newly-created backup. Implementation details: Refactored ChrootFileSystem to split off new base class RemapFileSystem, which allows more general remapping of files. We use this base class to implement BackupEngineImpl::RemapSharedFileSystem. To minimize API impact, I decided to just add these fields `name_for_open` and `env_for_open` to those set by GetBackupInfo when include_file_details=true. Creating the RemapSharedFileSystem adds a bit to the memory consumption, perhaps unnecessarily in some cases, but this has been mitigated by (a) only initialize the RemapSharedFileSystem lazily when GetBackupInfo with include_file_details=true is called, and (b) using the existing `shared_ptr` objects to hold most of the mapping data. To enhance API safety, RemapSharedFileSystem is wrapped by new ReadOnlyFileSystem which rejects any attempts to write. This uncovered a couple of places in which DB::OpenForReadOnly would write to the filesystem, so I fixed these. Added a release note because this affects logging. Additional minor refactoring in backupable_db.cc to support the new functionality. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8142 Test Plan: new test (run with ASAN and UBSAN), added to stress test and ran it for a while with amplified backup_one_in Reviewed By: ajkr Differential Revision: D27535408 Pulled By: pdillinger fbshipit-source-id: 04666d310aa0261ef6b2385c43ca793ce1dfd148 --- CMakeLists.txt | 1 + HISTORY.md | 4 + TARGETS | 2 + db/db_impl/compacted_db_impl.cc | 12 +- db/db_impl/db_impl.cc | 5 +- db/db_impl/db_impl.h | 11 +- db/db_impl/db_impl_files.cc | 14 +- db/db_impl/db_impl_open.cc | 12 +- db/db_impl/db_impl_readonly.cc | 11 +- db_stress_tool/db_stress_test_base.cc | 54 +++- env/env_chroot.cc | 289 +------------------ env/env_chroot.h | 3 + env/fs_readonly.h | 97 +++++++ env/fs_remap.cc | 306 +++++++++++++++++++++ env/fs_remap.h | 131 +++++++++ include/rocksdb/utilities/backupable_db.h | 33 ++- src.mk | 1 + utilities/backupable/backupable_db.cc | 223 ++++++++++++--- utilities/backupable/backupable_db_test.cc | 59 ++++ 19 files changed, 908 insertions(+), 360 deletions(-) create mode 100644 env/fs_readonly.h create mode 100644 env/fs_remap.cc create mode 100644 env/fs_remap.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1668d24b9..984c6197c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -651,6 +651,7 @@ set(SOURCES env/env_hdfs.cc env/file_system.cc env/file_system_tracer.cc + env/fs_remap.cc env/mock_env.cc file/delete_scheduler.cc file/file_prefetch_buffer.cc diff --git a/HISTORY.md b/HISTORY.md index 77ca9a169..72b7192af 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -3,6 +3,7 @@ ### Behavior Changes * `ColumnFamilyOptions::sample_for_compression` now takes effect for creation of all block-based tables. Previously it only took effect for block-based tables created by flush. * `CompactFiles()` can no longer compact files from lower level to up level, which has the risk to corrupt DB (details: #8063). The validation is also added to all compactions. +* Fixed some cases in which DB::OpenForReadOnly() could write to the filesystem. If you want a Logger with a read-only DB, you must now set DBOptions::info_log yourself, such as using CreateLoggerFromOptions(). ### Bug Fixes * Use thread-safe `strerror_r()` to get error messages. @@ -18,6 +19,9 @@ * Added `TableProperties::slow_compression_estimated_data_size` and `TableProperties::fast_compression_estimated_data_size`. When `ColumnFamilyOptions::sample_for_compression > 0`, they estimate what `TableProperties::data_size` would have been if the "fast" or "slow" (see `ColumnFamilyOptions::sample_for_compression` API doc for definitions) compression had been used instead. * Update DB::StartIOTrace and remove Env object from the arguments as its redundant and DB already has Env object that is passed down to IOTracer::StartIOTrace +### New Features +* Added the ability to open BackupEngine backups as read-only DBs, using BackupInfo::name_for_open and env_for_open provided by BackupEngine::GetBackupInfo() with include_file_details=true. + ## 6.19.0 (03/21/2021) ### Bug Fixes * Fixed the truncation error found in APIs/tools when dumping block-based SST files in a human-readable format. After fix, the block-based table can be fully dumped as a readable file. diff --git a/TARGETS b/TARGETS index 4c1d450f8..7f813623d 100644 --- a/TARGETS +++ b/TARGETS @@ -218,6 +218,7 @@ cpp_library( "env/file_system.cc", "env/file_system_tracer.cc", "env/fs_posix.cc", + "env/fs_remap.cc", "env/io_posix.cc", "env/mock_env.cc", "file/delete_scheduler.cc", @@ -526,6 +527,7 @@ cpp_library( "env/file_system.cc", "env/file_system_tracer.cc", "env/fs_posix.cc", + "env/fs_remap.cc", "env/io_posix.cc", "env/mock_env.cc", "file/delete_scheduler.cc", diff --git a/db/db_impl/compacted_db_impl.cc b/db/db_impl/compacted_db_impl.cc index 3eb873604..076ce818f 100644 --- a/db/db_impl/compacted_db_impl.cc +++ b/db/db_impl/compacted_db_impl.cc @@ -17,11 +17,13 @@ extern void MarkKeyMayExist(void* arg); extern bool SaveValue(void* arg, const ParsedInternalKey& parsed_key, const Slice& v, bool hit_and_return); -CompactedDBImpl::CompactedDBImpl( - const DBOptions& options, const std::string& dbname) - : DBImpl(options, dbname), cfd_(nullptr), version_(nullptr), - user_comparator_(nullptr) { -} +CompactedDBImpl::CompactedDBImpl(const DBOptions& options, + const std::string& dbname) + : DBImpl(options, dbname, /*seq_per_batch*/ false, +/*batch_per_txn*/ true, + /*read_only*/ true), + cfd_(nullptr), + version_(nullptr), + user_comparator_(nullptr) {} CompactedDBImpl::~CompactedDBImpl() { } diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index a040e8009..d9f609984 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -146,10 +146,11 @@ void DumpSupportInfo(Logger* logger) { } // namespace DBImpl::DBImpl(const DBOptions& options, const std::string& dbname, - const bool seq_per_batch, const bool batch_per_txn) + const bool seq_per_batch, const bool batch_per_txn, + bool read_only) : dbname_(dbname), own_info_log_(options.info_log == nullptr), - initial_db_options_(SanitizeOptions(dbname, options)), + initial_db_options_(SanitizeOptions(dbname, options, read_only)), env_(initial_db_options_.env), io_tracer_(std::make_shared()), immutable_db_options_(initial_db_options_), diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index 65ca3a38a..6082ec70b 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -129,7 +129,8 @@ class Directories { class DBImpl : public DB { public: DBImpl(const DBOptions& options, const std::string& dbname, - const bool seq_per_batch = false, const bool batch_per_txn = true); + const bool seq_per_batch = false, const bool batch_per_txn = true, + bool read_only = false); // No copying allowed DBImpl(const DBImpl&) = delete; void operator=(const DBImpl&) = delete; @@ -1236,7 +1237,7 @@ class DBImpl : public DB { virtual bool OwnTablesAndLogs() const { return true; } // Set DB identity file, and write DB ID to manifest if necessary. - Status SetDBId(); + Status SetDBId(bool read_only); // REQUIRES: db mutex held when calling this function, but the db mutex can // be released and re-acquired. Db mutex will be held when the function @@ -2231,9 +2232,11 @@ class DBImpl : public DB { BlobFileCompletionCallback blob_callback_; }; -extern Options SanitizeOptions(const std::string& db, const Options& src); +extern Options SanitizeOptions(const std::string& db, const Options& src, + bool read_only = false); -extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src); +extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src, + bool read_only = false); extern CompressionType GetCompressionFlush( const ImmutableCFOptions& ioptions, diff --git a/db/db_impl/db_impl_files.cc b/db/db_impl/db_impl_files.cc index e3afde03b..c71dc8c83 100644 --- a/db/db_impl/db_impl_files.cc +++ b/db/db_impl/db_impl_files.cc @@ -854,7 +854,7 @@ uint64_t PrecomputeMinLogNumberToKeep2PC( return min_log_number_to_keep; } -Status DBImpl::SetDBId() { +Status DBImpl::SetDBId(bool read_only) { Status s; // Happens when immutable_db_options_.write_dbid_to_manifest is set to true // the very first time. @@ -865,9 +865,15 @@ Status DBImpl::SetDBId() { // it is no longer available then at this point DB ID is not in Identity // file or Manifest. if (s.IsNotFound()) { - s = SetIdentityFile(env_, dbname_); - if (!s.ok()) { - return s; + // Create a new DB ID, saving to file only if allowed + if (read_only) { + db_id_ = env_->GenerateUniqueId(); + return Status::OK(); + } else { + s = SetIdentityFile(env_, dbname_); + if (!s.ok()) { + return s; + } } } else if (!s.ok()) { assert(s.IsIOError()); diff --git a/db/db_impl/db_impl_open.cc b/db/db_impl/db_impl_open.cc index 6524952b9..f51303d00 100644 --- a/db/db_impl/db_impl_open.cc +++ b/db/db_impl/db_impl_open.cc @@ -24,15 +24,17 @@ #include "util/rate_limiter.h" namespace ROCKSDB_NAMESPACE { -Options SanitizeOptions(const std::string& dbname, const Options& src) { - auto db_options = SanitizeOptions(dbname, DBOptions(src)); +Options SanitizeOptions(const std::string& dbname, const Options& src, + bool read_only) { + auto db_options = SanitizeOptions(dbname, DBOptions(src), read_only); ImmutableDBOptions immutable_db_options(db_options); auto cf_options = SanitizeOptions(immutable_db_options, ColumnFamilyOptions(src)); return Options(db_options, cf_options); } -DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { +DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src, + bool read_only) { DBOptions result(src); if (result.env == nullptr) { @@ -50,7 +52,7 @@ DBOptions SanitizeOptions(const std::string& dbname, const DBOptions& src) { &result.max_open_files); } - if (result.info_log == nullptr) { + if (result.info_log == nullptr && !read_only) { Status s = CreateLoggerFromOptions(dbname, result, &result.info_log); if (!s.ok()) { // No place suitable for logging @@ -488,7 +490,7 @@ Status DBImpl::Recover( if (!s.ok()) { return s; } - s = SetDBId(); + s = SetDBId(read_only); if (s.ok() && !read_only) { s = DeleteUnreferencedSstFiles(); } diff --git a/db/db_impl/db_impl_readonly.cc b/db/db_impl/db_impl_readonly.cc index 5576d299c..cb3562d3f 100644 --- a/db/db_impl/db_impl_readonly.cc +++ b/db/db_impl/db_impl_readonly.cc @@ -19,7 +19,8 @@ namespace ROCKSDB_NAMESPACE { DBImplReadOnly::DBImplReadOnly(const DBOptions& db_options, const std::string& dbname) - : DBImpl(db_options, dbname) { + : DBImpl(db_options, dbname, /*seq_per_batch*/ false, + /*batch_per_txn*/ true, /*read_only*/ true) { ROCKS_LOG_INFO(immutable_db_options_.info_log, "Opening the db in read only mode"); LogFlush(immutable_db_options_.info_log); @@ -131,8 +132,8 @@ Status DBImplReadOnly::NewIterators( } namespace { -// Return OK if dbname exists in the file system -// or create_if_missing is false +// Return OK if dbname exists in the file system or create it if +// create_if_missing Status OpenForReadOnlyCheckExistence(const DBOptions& db_options, const std::string& dbname) { Status s; @@ -143,6 +144,9 @@ Status OpenForReadOnlyCheckExistence(const DBOptions& db_options, uint64_t manifest_file_number; s = VersionSet::GetCurrentManifestPath(dbname, fs.get(), &manifest_path, &manifest_file_number); + } else { + // Historic behavior that doesn't necessarily make sense + s = db_options.env->CreateDirIfMissing(dbname); } return s; } @@ -150,7 +154,6 @@ Status OpenForReadOnlyCheckExistence(const DBOptions& db_options, Status DB::OpenForReadOnly(const Options& options, const std::string& dbname, DB** dbptr, bool /*error_if_wal_file_exists*/) { - // If dbname does not exist in the file system, should not do anything Status s = OpenForReadOnlyCheckExistence(options, dbname); if (!s.ok()) { return s; diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index 6c61680f9..baaacaa5a 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -1368,8 +1368,13 @@ Status StressTest::TestBackupRestore( } } std::vector backup_info; + // If inplace_not_restore, we verify the backup by opening it as a + // read-only DB. If !inplace_not_restore, we restore it to a temporary + // directory for verification. + bool inplace_not_restore = thread->rand.OneIn(3); if (s.ok()) { - backup_engine->GetBackupInfo(&backup_info); + backup_engine->GetBackupInfo(&backup_info, + /*include_file_details*/ inplace_not_restore); if (backup_info.empty()) { s = Status::NotFound("no backups found"); from = "BackupEngine::GetBackupInfo"; @@ -1385,8 +1390,8 @@ Status StressTest::TestBackupRestore( } const bool allow_persistent = thread->tid == 0; // not too many bool from_latest = false; - if (s.ok()) { - int count = static_cast(backup_info.size()); + int count = static_cast(backup_info.size()); + if (s.ok() && !inplace_not_restore) { if (count > 1) { s = backup_engine->RestoreDBFromBackup( RestoreOptions(), backup_info[thread->rand.Uniform(count)].backup_id, @@ -1404,7 +1409,9 @@ Status StressTest::TestBackupRestore( } } } - if (s.ok()) { + if (s.ok() && !inplace_not_restore) { + // Purge early if restoring, to ensure the restored directory doesn't + // have some secret dependency on the backup directory. uint32_t to_keep = 0; if (allow_persistent) { // allow one thread to keep up to 2 backups @@ -1432,10 +1439,21 @@ Status StressTest::TestBackupRestore( for (auto name : column_family_names_) { cf_descriptors.emplace_back(name, ColumnFamilyOptions(restore_options)); } - s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, - &restored_cf_handles, &restored_db); - if (!s.ok()) { - from = "DB::Open in backup/restore"; + if (inplace_not_restore) { + BackupInfo& info = backup_info[thread->rand.Uniform(count)]; + restore_options.env = info.env_for_open.get(); + s = DB::OpenForReadOnly(DBOptions(restore_options), info.name_for_open, + cf_descriptors, &restored_cf_handles, + &restored_db); + if (!s.ok()) { + from = "DB::OpenForReadOnly in backup/restore"; + } + } else { + s = DB::Open(DBOptions(restore_options), restore_dir, cf_descriptors, + &restored_cf_handles, &restored_db); + if (!s.ok()) { + from = "DB::Open in backup/restore"; + } } } // Note the column families chosen by `rand_column_families` cannot be @@ -1476,10 +1494,6 @@ Status StressTest::TestBackupRestore( } } } - if (backup_engine != nullptr) { - delete backup_engine; - backup_engine = nullptr; - } if (restored_db != nullptr) { for (auto* cf_handle : restored_cf_handles) { restored_db->DestroyColumnFamilyHandle(cf_handle); @@ -1487,6 +1501,22 @@ Status StressTest::TestBackupRestore( delete restored_db; restored_db = nullptr; } + if (s.ok() && inplace_not_restore) { + // Purge late if inplace open read-only + uint32_t to_keep = 0; + if (allow_persistent) { + // allow one thread to keep up to 2 backups + to_keep = thread->rand.Uniform(3); + } + s = backup_engine->PurgeOldBackups(to_keep); + if (!s.ok()) { + from = "BackupEngine::PurgeOldBackups"; + } + } + if (backup_engine != nullptr) { + delete backup_engine; + backup_engine = nullptr; + } if (s.ok()) { // Preserve directories on failure, or allowed persistent backup if (!allow_persistent) { diff --git a/env/env_chroot.cc b/env/env_chroot.cc index 4575e98e0..ff47049e0 100644 --- a/env/env_chroot.cc +++ b/env/env_chroot.cc @@ -7,27 +7,21 @@ #include "env/env_chroot.h" -#include -#include -#include -#include - -#include -#include -#include +#include // errno +#include // realpath, free +#include // geteuid #include "env/composite_env_wrapper.h" -#include "rocksdb/file_system.h" -#include "rocksdb/status.h" -#include "util/string_util.h" +#include "env/fs_remap.h" +#include "util/string_util.h" // errnoStr namespace ROCKSDB_NAMESPACE { namespace { -class ChrootFileSystem : public FileSystemWrapper { +class ChrootFileSystem : public RemapFileSystem { public: ChrootFileSystem(const std::shared_ptr& base, const std::string& chroot_dir) - : FileSystemWrapper(base) { + : RemapFileSystem(base) { #if defined(OS_AIX) char resolvedName[PATH_MAX]; char* real_chroot_dir = realpath(chroot_dir.c_str(), resolvedName); @@ -43,245 +37,6 @@ class ChrootFileSystem : public FileSystemWrapper { } const char* Name() const override { return "ChrootFS"; } - Status RegisterDbPaths(const std::vector& paths) override { - std::vector encoded_paths; - encoded_paths.reserve(paths.size()); - for (auto& path : paths) { - auto status_and_enc_path = EncodePathWithNewBasename(path); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - encoded_paths.emplace_back(status_and_enc_path.second); - } - return FileSystemWrapper::RegisterDbPaths(encoded_paths); - } - - Status UnregisterDbPaths(const std::vector& paths) override { - std::vector encoded_paths; - encoded_paths.reserve(paths.size()); - for (auto& path : paths) { - auto status_and_enc_path = EncodePathWithNewBasename(path); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - encoded_paths.emplace_back(status_and_enc_path.second); - } - return FileSystemWrapper::UnregisterDbPaths(encoded_paths); - } - - IOStatus NewSequentialFile(const std::string& fname, - const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewSequentialFile(status_and_enc_path.second, - options, result, dbg); - } - - IOStatus NewRandomAccessFile(const std::string& fname, - const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewRandomAccessFile(status_and_enc_path.second, - options, result, dbg); - } - - IOStatus NewWritableFile(const std::string& fname, const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewWritableFile(status_and_enc_path.second, - options, result, dbg); - } - - IOStatus ReuseWritableFile(const std::string& fname, - const std::string& old_fname, - const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - auto status_and_old_enc_path = EncodePath(old_fname); - if (!status_and_old_enc_path.first.ok()) { - return status_and_old_enc_path.first; - } - return FileSystemWrapper::ReuseWritableFile(status_and_old_enc_path.second, - status_and_old_enc_path.second, - options, result, dbg); - } - - IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewRandomRWFile(status_and_enc_path.second, - options, result, dbg); - } - - IOStatus NewDirectory(const std::string& dir, const IOOptions& options, - std::unique_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(dir); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewDirectory(status_and_enc_path.second, options, - result, dbg); - } - - IOStatus FileExists(const std::string& fname, const IOOptions& options, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::FileExists(status_and_enc_path.second, options, - dbg); - } - - IOStatus GetChildren(const std::string& dir, const IOOptions& options, - std::vector* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(dir); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::GetChildren(status_and_enc_path.second, options, - result, dbg); - } - - IOStatus GetChildrenFileAttributes(const std::string& dir, - const IOOptions& options, - std::vector* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(dir); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::GetChildrenFileAttributes( - status_and_enc_path.second, options, result, dbg); - } - - IOStatus DeleteFile(const std::string& fname, const IOOptions& options, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::DeleteFile(status_and_enc_path.second, options, - dbg); - } - - IOStatus CreateDir(const std::string& dirname, const IOOptions& options, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(dirname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::CreateDir(status_and_enc_path.second, options, - dbg); - } - - IOStatus CreateDirIfMissing(const std::string& dirname, - const IOOptions& options, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(dirname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::CreateDirIfMissing(status_and_enc_path.second, - options, dbg); - } - - IOStatus DeleteDir(const std::string& dirname, const IOOptions& options, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(dirname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::DeleteDir(status_and_enc_path.second, options, - dbg); - } - - IOStatus GetFileSize(const std::string& fname, const IOOptions& options, - uint64_t* file_size, IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::GetFileSize(status_and_enc_path.second, options, - file_size, dbg); - } - - IOStatus GetFileModificationTime(const std::string& fname, - const IOOptions& options, - uint64_t* file_mtime, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::GetFileModificationTime( - status_and_enc_path.second, options, file_mtime, dbg); - } - - IOStatus RenameFile(const std::string& src, const std::string& dest, - const IOOptions& options, IODebugContext* dbg) override { - auto status_and_src_enc_path = EncodePath(src); - if (!status_and_src_enc_path.first.ok()) { - return status_and_src_enc_path.first; - } - auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); - if (!status_and_dest_enc_path.first.ok()) { - return status_and_dest_enc_path.first; - } - return FileSystemWrapper::RenameFile(status_and_src_enc_path.second, - status_and_dest_enc_path.second, - options, dbg); - } - - IOStatus LinkFile(const std::string& src, const std::string& dest, - const IOOptions& options, IODebugContext* dbg) override { - auto status_and_src_enc_path = EncodePath(src); - if (!status_and_src_enc_path.first.ok()) { - return status_and_src_enc_path.first; - } - auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); - if (!status_and_dest_enc_path.first.ok()) { - return status_and_dest_enc_path.first; - } - return FileSystemWrapper::LinkFile(status_and_src_enc_path.second, - status_and_dest_enc_path.second, options, - dbg); - } - - IOStatus LockFile(const std::string& fname, const IOOptions& options, - FileLock** lock, IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - // FileLock subclasses may store path (e.g., PosixFileLock stores it). We - // can skip stripping the chroot directory from this path because callers - // shouldn't use it. - return FileSystemWrapper::LockFile(status_and_enc_path.second, options, - lock, dbg); - } IOStatus GetTestDirectory(const IOOptions& options, std::string* path, IODebugContext* dbg) override { @@ -295,33 +50,12 @@ class ChrootFileSystem : public FileSystemWrapper { return CreateDirIfMissing(*path, options, dbg); } - IOStatus NewLogger(const std::string& fname, const IOOptions& options, - std::shared_ptr* result, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePathWithNewBasename(fname); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::NewLogger(status_and_enc_path.second, options, - result, dbg); - } - - IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options, - std::string* output_path, - IODebugContext* dbg) override { - auto status_and_enc_path = EncodePath(db_path); - if (!status_and_enc_path.first.ok()) { - return status_and_enc_path.first; - } - return FileSystemWrapper::GetAbsolutePath(status_and_enc_path.second, - options, output_path, dbg); - } - - private: + protected: // Returns status and expanded absolute path including the chroot directory. // Checks whether the provided path breaks out of the chroot. If it returns // non-OK status, the returned path should not be used. - std::pair EncodePath(const std::string& path) { + std::pair EncodePath( + const std::string& path) override { if (path.empty() || path[0] != '/') { return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""}; } @@ -352,7 +86,7 @@ class ChrootFileSystem : public FileSystemWrapper { // Similar to EncodePath() except assumes the basename in the path hasn't been // created yet. std::pair EncodePathWithNewBasename( - const std::string& path) { + const std::string& path) override { if (path.empty() || path[0] != '/') { return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""}; } @@ -371,6 +105,7 @@ class ChrootFileSystem : public FileSystemWrapper { return status_and_enc_path; } + private: std::string chroot_dir_; }; } // namespace diff --git a/env/env_chroot.h b/env/env_chroot.h index cb5585b3b..fb5b70c44 100644 --- a/env/env_chroot.h +++ b/env/env_chroot.h @@ -15,6 +15,9 @@ namespace ROCKSDB_NAMESPACE { // Returns an Env that translates paths such that the root directory appears to // be chroot_dir. chroot_dir should refer to an existing directory. +// +// This class has not been fully analyzed for providing strong security +// guarantees. Env* NewChrootEnv(Env* base_env, const std::string& chroot_dir); } // namespace ROCKSDB_NAMESPACE diff --git a/env/fs_readonly.h b/env/fs_readonly.h new file mode 100644 index 000000000..77ea1ded8 --- /dev/null +++ b/env/fs_readonly.h @@ -0,0 +1,97 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#ifndef ROCKSDB_LITE + +#include "rocksdb/file_system.h" + +namespace ROCKSDB_NAMESPACE { + +// A FileSystem wrapper that only allows read-only operation. +// +// This class has not been fully analyzed for providing strong security +// guarantees. +class ReadOnlyFileSystem : public FileSystemWrapper { + static inline IOStatus FailReadOnly() { + IOStatus s = IOStatus::IOError("Attempted write to ReadOnlyFileSystem"); + assert(s.GetRetryable() == false); + return s; + } + + public: + ReadOnlyFileSystem(const std::shared_ptr& base) + : FileSystemWrapper(base) {} + + IOStatus NewWritableFile(const std::string& /*fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus ReuseWritableFile(const std::string& /*fname*/, + const std::string& /*old_fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus NewRandomRWFile(const std::string& /*fname*/, + const FileOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus NewDirectory(const std::string& /*dir*/, + const IOOptions& /*options*/, + std::unique_ptr* /*result*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus DeleteFile(const std::string& /*fname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus CreateDir(const std::string& /*dirname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus CreateDirIfMissing(const std::string& /*dirname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus DeleteDir(const std::string& /*dirname*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus RenameFile(const std::string& /*src*/, const std::string& /*dest*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus LinkFile(const std::string& /*src*/, const std::string& /*dest*/, + const IOOptions& /*options*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus LockFile(const std::string& /*fname*/, const IOOptions& /*options*/, + FileLock** /*lock*/, IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } + IOStatus NewLogger(const std::string& /*fname*/, const IOOptions& /*options*/, + std::shared_ptr* /*result*/, + IODebugContext* /*dbg*/) override { + return FailReadOnly(); + } +}; + +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/env/fs_remap.cc b/env/fs_remap.cc new file mode 100644 index 000000000..026f83cd1 --- /dev/null +++ b/env/fs_remap.cc @@ -0,0 +1,306 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef ROCKSDB_LITE + +#include "env/fs_remap.h" + +namespace ROCKSDB_NAMESPACE { + +RemapFileSystem::RemapFileSystem(const std::shared_ptr& base) + : FileSystemWrapper(base) {} + +std::pair RemapFileSystem::EncodePathWithNewBasename( + const std::string& path) { + // No difference by default + return EncodePath(path); +} + +Status RemapFileSystem::RegisterDbPaths(const std::vector& paths) { + std::vector encoded_paths; + encoded_paths.reserve(paths.size()); + for (auto& path : paths) { + auto status_and_enc_path = EncodePathWithNewBasename(path); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + encoded_paths.emplace_back(status_and_enc_path.second); + } + return FileSystemWrapper::RegisterDbPaths(encoded_paths); +} + +Status RemapFileSystem::UnregisterDbPaths( + const std::vector& paths) { + std::vector encoded_paths; + encoded_paths.reserve(paths.size()); + for (auto& path : paths) { + auto status_and_enc_path = EncodePathWithNewBasename(path); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + encoded_paths.emplace_back(status_and_enc_path.second); + } + return FileSystemWrapper::UnregisterDbPaths(encoded_paths); +} + +IOStatus RemapFileSystem::NewSequentialFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewSequentialFile(status_and_enc_path.second, + options, result, dbg); +} + +IOStatus RemapFileSystem::NewRandomAccessFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewRandomAccessFile(status_and_enc_path.second, + options, result, dbg); +} + +IOStatus RemapFileSystem::NewWritableFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewWritableFile(status_and_enc_path.second, options, + result, dbg); +} + +IOStatus RemapFileSystem::ReuseWritableFile( + const std::string& fname, const std::string& old_fname, + const FileOptions& options, std::unique_ptr* result, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + auto status_and_old_enc_path = EncodePath(old_fname); + if (!status_and_old_enc_path.first.ok()) { + return status_and_old_enc_path.first; + } + return FileSystemWrapper::ReuseWritableFile(status_and_old_enc_path.second, + status_and_old_enc_path.second, + options, result, dbg); +} + +IOStatus RemapFileSystem::NewRandomRWFile( + const std::string& fname, const FileOptions& options, + std::unique_ptr* result, IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewRandomRWFile(status_and_enc_path.second, options, + result, dbg); +} + +IOStatus RemapFileSystem::NewDirectory(const std::string& dir, + const IOOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(dir); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewDirectory(status_and_enc_path.second, options, + result, dbg); +} + +IOStatus RemapFileSystem::FileExists(const std::string& fname, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::FileExists(status_and_enc_path.second, options, + dbg); +} + +IOStatus RemapFileSystem::GetChildren(const std::string& dir, + const IOOptions& options, + std::vector* result, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(dir); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::GetChildren(status_and_enc_path.second, options, + result, dbg); +} + +IOStatus RemapFileSystem::GetChildrenFileAttributes( + const std::string& dir, const IOOptions& options, + std::vector* result, IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(dir); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::GetChildrenFileAttributes( + status_and_enc_path.second, options, result, dbg); +} + +IOStatus RemapFileSystem::DeleteFile(const std::string& fname, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::DeleteFile(status_and_enc_path.second, options, + dbg); +} + +IOStatus RemapFileSystem::CreateDir(const std::string& dirname, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(dirname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::CreateDir(status_and_enc_path.second, options, dbg); +} + +IOStatus RemapFileSystem::CreateDirIfMissing(const std::string& dirname, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(dirname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::CreateDirIfMissing(status_and_enc_path.second, + options, dbg); +} + +IOStatus RemapFileSystem::DeleteDir(const std::string& dirname, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(dirname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::DeleteDir(status_and_enc_path.second, options, dbg); +} + +IOStatus RemapFileSystem::GetFileSize(const std::string& fname, + const IOOptions& options, + uint64_t* file_size, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::GetFileSize(status_and_enc_path.second, options, + file_size, dbg); +} + +IOStatus RemapFileSystem::GetFileModificationTime(const std::string& fname, + const IOOptions& options, + uint64_t* file_mtime, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::GetFileModificationTime(status_and_enc_path.second, + options, file_mtime, dbg); +} + +IOStatus RemapFileSystem::IsDirectory(const std::string& path, + const IOOptions& options, bool* is_dir, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(path); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::IsDirectory(status_and_enc_path.second, options, + is_dir, dbg); +} + +IOStatus RemapFileSystem::RenameFile(const std::string& src, + const std::string& dest, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_src_enc_path = EncodePath(src); + if (!status_and_src_enc_path.first.ok()) { + return status_and_src_enc_path.first; + } + auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); + if (!status_and_dest_enc_path.first.ok()) { + return status_and_dest_enc_path.first; + } + return FileSystemWrapper::RenameFile(status_and_src_enc_path.second, + status_and_dest_enc_path.second, options, + dbg); +} + +IOStatus RemapFileSystem::LinkFile(const std::string& src, + const std::string& dest, + const IOOptions& options, + IODebugContext* dbg) { + auto status_and_src_enc_path = EncodePath(src); + if (!status_and_src_enc_path.first.ok()) { + return status_and_src_enc_path.first; + } + auto status_and_dest_enc_path = EncodePathWithNewBasename(dest); + if (!status_and_dest_enc_path.first.ok()) { + return status_and_dest_enc_path.first; + } + return FileSystemWrapper::LinkFile(status_and_src_enc_path.second, + status_and_dest_enc_path.second, options, + dbg); +} + +IOStatus RemapFileSystem::LockFile(const std::string& fname, + const IOOptions& options, FileLock** lock, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + // FileLock subclasses may store path (e.g., PosixFileLock stores it). We + // can skip stripping the chroot directory from this path because callers + // shouldn't use it. + return FileSystemWrapper::LockFile(status_and_enc_path.second, options, lock, + dbg); +} + +IOStatus RemapFileSystem::NewLogger(const std::string& fname, + const IOOptions& options, + std::shared_ptr* result, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePathWithNewBasename(fname); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::NewLogger(status_and_enc_path.second, options, + result, dbg); +} + +IOStatus RemapFileSystem::GetAbsolutePath(const std::string& db_path, + const IOOptions& options, + std::string* output_path, + IODebugContext* dbg) { + auto status_and_enc_path = EncodePath(db_path); + if (!status_and_enc_path.first.ok()) { + return status_and_enc_path.first; + } + return FileSystemWrapper::GetAbsolutePath(status_and_enc_path.second, options, + output_path, dbg); +} + +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/env/fs_remap.h b/env/fs_remap.h new file mode 100644 index 000000000..f95ff9de2 --- /dev/null +++ b/env/fs_remap.h @@ -0,0 +1,131 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#ifndef ROCKSDB_LITE + +#include + +#include "rocksdb/file_system.h" + +namespace ROCKSDB_NAMESPACE { + +// An abstract FileSystem wrapper that creates a view of an existing +// FileSystem by remapping names in some way. +// +// This class has not been fully analyzed for providing strong security +// guarantees. +class RemapFileSystem : public FileSystemWrapper { + public: + RemapFileSystem(const std::shared_ptr& base); + + protected: + // Returns status and mapped-to path in the wrapped filesystem. + // If it returns non-OK status, the returned path should not be used. + virtual std::pair EncodePath( + const std::string& path) = 0; + + // Similar to EncodePath() except used in cases in which it is OK for + // no file or directory on 'path' to already exist, such as if the + // operation would create one. However, the parent of 'path' is expected + // to exist for the operation to succeed. + // Default implementation: call EncodePath + virtual std::pair EncodePathWithNewBasename( + const std::string& path); + + public: + // Left abstract: + // const char* Name() const override { ... } + + Status RegisterDbPaths(const std::vector& paths) override; + + Status UnregisterDbPaths(const std::vector& paths) override; + + IOStatus NewSequentialFile(const std::string& fname, + const FileOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus NewRandomAccessFile(const std::string& fname, + const FileOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus NewWritableFile(const std::string& fname, const FileOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus ReuseWritableFile(const std::string& fname, + const std::string& old_fname, + const FileOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus NewRandomRWFile(const std::string& fname, const FileOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus NewDirectory(const std::string& dir, const IOOptions& options, + std::unique_ptr* result, + IODebugContext* dbg) override; + + IOStatus FileExists(const std::string& fname, const IOOptions& options, + IODebugContext* dbg) override; + + IOStatus GetChildren(const std::string& dir, const IOOptions& options, + std::vector* result, + IODebugContext* dbg) override; + + IOStatus GetChildrenFileAttributes(const std::string& dir, + const IOOptions& options, + std::vector* result, + IODebugContext* dbg) override; + + IOStatus DeleteFile(const std::string& fname, const IOOptions& options, + IODebugContext* dbg) override; + + IOStatus CreateDir(const std::string& dirname, const IOOptions& options, + IODebugContext* dbg) override; + + IOStatus CreateDirIfMissing(const std::string& dirname, + const IOOptions& options, + IODebugContext* dbg) override; + + IOStatus DeleteDir(const std::string& dirname, const IOOptions& options, + IODebugContext* dbg) override; + + IOStatus GetFileSize(const std::string& fname, const IOOptions& options, + uint64_t* file_size, IODebugContext* dbg) override; + + IOStatus GetFileModificationTime(const std::string& fname, + const IOOptions& options, + uint64_t* file_mtime, + IODebugContext* dbg) override; + + IOStatus IsDirectory(const std::string& path, const IOOptions& options, + bool* is_dir, IODebugContext* dbg) override; + + IOStatus RenameFile(const std::string& src, const std::string& dest, + const IOOptions& options, IODebugContext* dbg) override; + + IOStatus LinkFile(const std::string& src, const std::string& dest, + const IOOptions& options, IODebugContext* dbg) override; + + IOStatus LockFile(const std::string& fname, const IOOptions& options, + FileLock** lock, IODebugContext* dbg) override; + + IOStatus NewLogger(const std::string& fname, const IOOptions& options, + std::shared_ptr* result, + IODebugContext* dbg) override; + + IOStatus GetAbsolutePath(const std::string& db_path, const IOOptions& options, + std::string* output_path, + IODebugContext* dbg) override; +}; + +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/include/rocksdb/utilities/backupable_db.h b/include/rocksdb/utilities/backupable_db.h index fdbd27769..71989c56b 100644 --- a/include/rocksdb/utilities/backupable_db.h +++ b/include/rocksdb/utilities/backupable_db.h @@ -280,24 +280,43 @@ struct BackupFileInfo { typedef uint32_t BackupID; struct BackupInfo { - BackupID backup_id; + BackupID backup_id = 0U; // Creation time, according to GetCurrentTime - int64_t timestamp; + int64_t timestamp = 0; // Total size in bytes (based on file payloads, not including filesystem // overheads or backup meta file) - uint64_t size; + uint64_t size = 0U; // Number of backed up files, some of which might be shared with other // backups. Does not include backup meta file. - uint32_t number_files; + uint32_t number_files = 0U; // Backup API user metadata std::string app_metadata; - // Backup file details, if requested + // Backup file details, if requested with include_file_details=true std::vector file_details; + // DB "name" (a directory in the backup_env) for opening this backup as a + // read-only DB. This should also be used as the DBOptions::wal_dir, such + // as by default setting wal_dir="". See also env_for_open. + // This field is only set if include_file_details=true + std::string name_for_open; + + // An Env(+FileSystem) for opening this backup as a read-only DB, with + // DB::OpenForReadOnly or similar. This field is only set if + // include_file_details=true. (The FileSystem in this Env takes care + // of making shared backup files openable from the `name_for_open` DB + // directory.) See also name_for_open. + // + // This Env might or might not be shared with other backups. To work + // around DBOptions::env being a raw pointer, this is a shared_ptr so + // that keeping either this BackupInfo, the BackupEngine, or a copy of + // this shared_ptr alive is sufficient to keep the Env alive for use by + // a read-only DB. + std::shared_ptr env_for_open; + BackupInfo() {} BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size, @@ -344,10 +363,8 @@ class BackupEngineReadOnlyBase { virtual ~BackupEngineReadOnlyBase() {} // Returns info about backups in backup_info - // You can GetBackupInfo safely, even with other BackupEngine performing - // backups on the same directory. // Setting include_file_details=true provides information about each - // backed-up file in BackupInfo::file_details. + // backed-up file in BackupInfo::file_details and more. virtual void GetBackupInfo(std::vector* backup_info, bool include_file_details = false) const = 0; diff --git a/src.mk b/src.mk index ad312382f..f485f3803 100644 --- a/src.mk +++ b/src.mk @@ -86,6 +86,7 @@ LIB_SOURCES = \ env/env_posix.cc \ env/file_system.cc \ env/fs_posix.cc \ + env/fs_remap.cc \ env/file_system_tracer.cc \ env/io_posix.cc \ env/mock_env.cc \ diff --git a/utilities/backupable/backupable_db.cc b/utilities/backupable/backupable_db.cc index 325d76e6e..1f87e2904 100644 --- a/utilities/backupable/backupable_db.cc +++ b/utilities/backupable/backupable_db.cc @@ -27,6 +27,8 @@ #include #include "env/composite_env_wrapper.h" +#include "env/fs_readonly.h" +#include "env/fs_remap.h" #include "file/filename.h" #include "file/line_file_reader.h" #include "file/sequence_file_reader.h" @@ -65,6 +67,16 @@ inline std::string ChecksumInt32ToHex(const uint32_t& checksum_value) { PutFixed32(&checksum_str, EndianSwapValue(checksum_value)); return ChecksumStrToHex(checksum_str); } + +const std::string kPrivateDirName = "private"; +const std::string kMetaDirName = "meta"; +const std::string kSharedDirName = "shared"; +const std::string kSharedChecksumDirName = "shared_checksum"; +const std::string kPrivateDirSlash = kPrivateDirName + "/"; +const std::string kMetaDirSlash = kMetaDirName + "/"; +const std::string kSharedDirSlash = kSharedDirName + "/"; +const std::string kSharedChecksumDirSlash = kSharedChecksumDirName + "/"; + } // namespace void BackupStatistics::IncrementNumberSuccessBackup() { @@ -195,6 +207,133 @@ class BackupEngineImpl { // db_session_id appears in the backup SST filename if the table naming // option is kUseDbSessionId const std::string db_session_id; + + std::string GetDbFileName() { + std::string rv; + // extract the filename part + size_t slash = filename.find_last_of('/'); + // file will either be shared/, shared_checksum/, + // shared_checksum/, shared_checksum/, + // or private// + assert(slash != std::string::npos); + rv = filename.substr(slash + 1); + + // if the file was in shared_checksum, extract the real file name + // in this case the file is __., + // _., or __. + if (filename.substr(0, slash) == kSharedChecksumDirName) { + rv = GetFileFromChecksumFile(rv); + } + return rv; + } + }; + + static inline std::string WithoutTrailingSlash(const std::string& path) { + if (path.empty() || path.back() != '/') { + return path; + } else { + return path.substr(path.size() - 1); + } + } + + static inline std::string WithTrailingSlash(const std::string& path) { + if (path.empty() || path.back() != '/') { + return path + '/'; + } else { + return path; + } + } + + // A filesystem wrapper that makes shared backup files appear to be in the + // private backup directory (dst_dir), so that the private backup dir can + // be opened as a read-only DB. + class RemapSharedFileSystem : public RemapFileSystem { + public: + RemapSharedFileSystem(const std::shared_ptr& base, + const std::string& dst_dir, + const std::string& src_base_dir, + const std::vector>& files) + : RemapFileSystem(base), + dst_dir_(WithoutTrailingSlash(dst_dir)), + dst_dir_slash_(WithTrailingSlash(dst_dir)), + src_base_dir_(WithTrailingSlash(src_base_dir)) { + for (auto& info : files) { + if (!StartsWith(info->filename, kPrivateDirSlash)) { + assert(StartsWith(info->filename, kSharedDirSlash) || + StartsWith(info->filename, kSharedChecksumDirSlash)); + remaps_[info->GetDbFileName()] = info; + } + } + } + + const char* Name() const override { + return "BackupEngineImpl::RemapSharedFileSystem"; + } + + // Sometimes a directory listing is required in opening a DB + IOStatus GetChildren(const std::string& dir, const IOOptions& options, + std::vector* result, + IODebugContext* dbg) override { + IOStatus s = RemapFileSystem::GetChildren(dir, options, result, dbg); + if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) { + // Assume remapped files exist + for (auto& r : remaps_) { + result->push_back(r.first); + } + } + return s; + } + + // Sometimes a directory listing is required in opening a DB + IOStatus GetChildrenFileAttributes(const std::string& dir, + const IOOptions& options, + std::vector* result, + IODebugContext* dbg) override { + IOStatus s = + RemapFileSystem::GetChildrenFileAttributes(dir, options, result, dbg); + if (s.ok() && (dir == dst_dir_ || dir == dst_dir_slash_)) { + // Assume remapped files exist with recorded size + for (auto& r : remaps_) { + result->emplace_back(); // clean up with C++20 + FileAttributes& attr = result->back(); + attr.name = r.first; + attr.size_bytes = r.second->size; + } + } + return s; + } + + protected: + // When a file in dst_dir is requested, see if we need to remap to shared + // file path. + std::pair EncodePath( + const std::string& path) override { + if (path.empty() || path[0] != '/') { + return {IOStatus::InvalidArgument(path, "Not an absolute path"), ""}; + } + std::pair rv{IOStatus(), path}; + if (StartsWith(path, dst_dir_slash_)) { + std::string relative = path.substr(dst_dir_slash_.size()); + auto it = remaps_.find(relative); + if (it != remaps_.end()) { + rv.second = src_base_dir_ + it->second->filename; + } + } + return rv; + } + + private: + // Absolute path to a directory that some extra files will be mapped into. + const std::string dst_dir_; + // Includes a trailing slash. + const std::string dst_dir_slash_; + // Absolute path to a directory containing some files to be mapped into + // dst_dir_. Includes a trailing slash. + const std::string src_base_dir_; + // If remaps_[x] exists, attempt to read dst_dir_ / x should instead read + // src_base_dir_ / remaps_[x]->filename. FileInfo is used to maximize + // sharing with other backup data in memory. + std::unordered_map> remaps_; }; class BackupMeta { @@ -284,6 +423,27 @@ class BackupEngineImpl { return ss.str(); } + const std::shared_ptr& GetEnvForOpen() const { + if (!env_for_open_) { + // Lazy initialize + // Find directories + std::string dst_dir = meta_filename_; + auto i = dst_dir.rfind(kMetaDirSlash); + assert(i != std::string::npos); + std::string src_base_dir = dst_dir.substr(0, i); + dst_dir.replace(i, kMetaDirSlash.size(), kPrivateDirSlash); + // Make the RemapSharedFileSystem + std::shared_ptr remap_fs = + std::make_shared( + env_->GetFileSystem(), dst_dir, src_base_dir, files_); + // Make it read-only for safety + remap_fs = std::make_shared(remap_fs); + // Make an Env wrapper + env_for_open_ = std::make_shared(env_, remap_fs); + } + return env_for_open_; + } + private: int64_t timestamp_; // sequence number is only approximate, should not be used @@ -297,6 +457,7 @@ class BackupEngineImpl { std::vector> files_; std::unordered_map>* file_infos_; Env* env_; + mutable std::shared_ptr env_for_open_; }; // BackupMeta inline std::string GetAbsolutePath( @@ -304,30 +465,23 @@ class BackupEngineImpl { assert(relative_path.size() == 0 || relative_path[0] != '/'); return options_.backup_dir + "/" + relative_path; } - inline std::string GetPrivateDirRel() const { - return "private"; - } - inline std::string GetSharedDirRel() const { return "shared"; } - inline std::string GetSharedChecksumDirRel() const { - return "shared_checksum"; - } inline std::string GetPrivateFileRel(BackupID backup_id, bool tmp = false, const std::string& file = "") const { assert(file.size() == 0 || file[0] != '/'); - return GetPrivateDirRel() + "/" + ROCKSDB_NAMESPACE::ToString(backup_id) + + return kPrivateDirSlash + ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : "") + "/" + file; } inline std::string GetSharedFileRel(const std::string& file = "", bool tmp = false) const { assert(file.size() == 0 || file[0] != '/'); - return GetSharedDirRel() + "/" + (tmp ? "." : "") + file + + return kSharedDirSlash + std::string(tmp ? "." : "") + file + (tmp ? ".tmp" : ""); } inline std::string GetSharedFileWithChecksumRel(const std::string& file = "", bool tmp = false) const { assert(file.size() == 0 || file[0] != '/'); - return GetSharedChecksumDirRel() + "/" + (tmp ? "." : "") + file + + return kSharedChecksumDirSlash + std::string(tmp ? "." : "") + file + (tmp ? ".tmp" : ""); } inline bool UseLegacyNaming(const std::string& sid) const { @@ -354,18 +508,15 @@ class BackupEngineImpl { } return file_copy; } - inline std::string GetFileFromChecksumFile(const std::string& file) const { + static inline std::string GetFileFromChecksumFile(const std::string& file) { assert(file.size() == 0 || file[0] != '/'); std::string file_copy = file; size_t first_underscore = file_copy.find_first_of('_'); return file_copy.erase(first_underscore, file_copy.find_last_of('.') - first_underscore); } - inline std::string GetBackupMetaDir() const { - return GetAbsolutePath("meta"); - } inline std::string GetBackupMetaFile(BackupID backup_id, bool tmp) const { - return GetBackupMetaDir() + "/" + (tmp ? "." : "") + + return GetAbsolutePath(kMetaDirName) + "/" + (tmp ? "." : "") + ROCKSDB_NAMESPACE::ToString(backup_id) + (tmp ? ".tmp" : ""); } @@ -784,6 +935,8 @@ Status BackupEngineImpl::Initialize() { } options_.Dump(options_.info_log); + auto meta_path = GetAbsolutePath(kMetaDirName); + if (!read_only_) { // we might need to clean up from previous crash or I/O errors might_need_garbage_collect_ = true; @@ -810,9 +963,9 @@ Status BackupEngineImpl::Initialize() { &shared_directory_); } } - directories.emplace_back(GetAbsolutePath(GetPrivateDirRel()), + directories.emplace_back(GetAbsolutePath(kPrivateDirName), &private_directory_); - directories.emplace_back(GetBackupMetaDir(), &meta_directory_); + directories.emplace_back(meta_path, &meta_directory_); // create all the dirs we need for (const auto& d : directories) { auto s = backup_env_->CreateDirIfMissing(d.first); @@ -827,9 +980,9 @@ Status BackupEngineImpl::Initialize() { std::vector backup_meta_files; { - auto s = backup_env_->GetChildren(GetBackupMetaDir(), &backup_meta_files); + auto s = backup_env_->GetChildren(meta_path, &backup_meta_files); if (s.IsNotFound()) { - return Status::NotFound(GetBackupMetaDir() + " is missing"); + return Status::NotFound(meta_path + " is missing"); } else if (!s.ok()) { return s; } @@ -1396,14 +1549,19 @@ void BackupEngineImpl::GetBackupInfo(std::vector* backup_info, backup_info->push_back(BackupInfo(backup.first, meta.GetTimestamp(), meta.GetSize(), meta.GetNumberFiles(), meta.GetAppMetadata())); + BackupInfo& binfo = backup_info->back(); if (include_file_details) { - auto& file_details = backup_info->back().file_details; + auto& file_details = binfo.file_details; file_details.reserve(meta.GetFiles().size()); for (auto& file_ptr : meta.GetFiles()) { - BackupFileInfo& info = *file_details.emplace(file_details.end()); - info.relative_filename = file_ptr->filename; - info.size = file_ptr->size; + BackupFileInfo& finfo = *file_details.emplace(file_details.end()); + finfo.relative_filename = file_ptr->filename; + finfo.size = file_ptr->size; } + binfo.name_for_open = + GetAbsolutePath(GetPrivateFileRel(binfo.backup_id)); + binfo.name_for_open.pop_back(); // remove trailing '/' + binfo.env_for_open = meta.GetEnvForOpen(); } } } @@ -1488,21 +1646,8 @@ Status BackupEngineImpl::RestoreDBFromBackup(const RestoreOptions& options, std::vector restore_items_to_finish; for (const auto& file_info : backup->GetFiles()) { const std::string& file = file_info->filename; - std::string dst; - // 1. extract the filename - size_t slash = file.find_last_of('/'); - // file will either be shared/, shared_checksum/, - // shared_checksum/, shared_checksum/, - // or private// - assert(slash != std::string::npos); - dst = file.substr(slash + 1); - - // if the file was in shared_checksum, extract the real file name - // in this case the file is __., - // _., or __. - if (file.substr(0, slash) == GetSharedChecksumDirRel()) { - dst = GetFileFromChecksumFile(dst); - } + // 1. get DB filename + std::string dst = file_info->GetDbFileName(); // 2. find the filetype uint64_t number; @@ -2135,7 +2280,7 @@ Status BackupEngineImpl::GarbageCollect() { // delete obsolete private files std::vector private_children; { - auto s = backup_env_->GetChildren(GetAbsolutePath(GetPrivateDirRel()), + auto s = backup_env_->GetChildren(GetAbsolutePath(kPrivateDirName), &private_children); if (!s.ok()) { overall_status = s; diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 9d5c72d1b..f4e7f3d09 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -2614,6 +2614,65 @@ TEST_F(BackupableDBTest, ReadOnlyBackupEngine) { delete db; } +TEST_F(BackupableDBTest, OpenBackupAsReadOnlyDB) { + DestroyDB(dbname_, options_); + OpenDBAndBackupEngine(true); + FillDB(db_.get(), 0, 100); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + FillDB(db_.get(), 100, 200); + ASSERT_OK(backup_engine_->CreateNewBackup(db_.get(), true)); + db_.reset(); // CloseDB + DestroyDB(dbname_, options_); + std::vector backup_info; + // First, check that we get empty fields without include_file_details + backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ false); + ASSERT_EQ(backup_info.size(), 2); + ASSERT_EQ(backup_info[0].name_for_open, ""); + ASSERT_FALSE(backup_info[0].env_for_open); + + // Now for the real test + backup_info.clear(); + backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true); + ASSERT_EQ(backup_info.size(), 2); + + // Caution: DBOptions only holds a raw pointer to Env, so something else + // must keep it alive. + // Case 1: Keeping BackupEngine open suffices to keep Env alive + DB* db = nullptr; + Options opts = options_; + // Ensure some key defaults are set + opts.wal_dir = ""; + opts.create_if_missing = false; + opts.info_log.reset(); + + opts.env = backup_info[0].env_for_open.get(); + std::string name = backup_info[0].name_for_open; + backup_info.clear(); + ASSERT_OK(DB::OpenForReadOnly(opts, name, &db)); + + AssertExists(db, 0, 100); + AssertEmpty(db, 100, 200); + + delete db; + db = nullptr; + + // Case 2: Keeping BackupInfo alive rather than BackupEngine also suffices + backup_engine_->GetBackupInfo(&backup_info, /*with file details*/ true); + ASSERT_EQ(backup_info.size(), 2); + CloseBackupEngine(); + opts.env = backup_info[1].env_for_open.get(); + name = backup_info[1].name_for_open; + // Note: keeping backup_info[1] alive + ASSERT_OK(DB::OpenForReadOnly(opts, name, &db)); + + AssertExists(db, 0, 200); + delete db; + db = nullptr; + + // Now try opening read-write and make sure it fails, for safety. + ASSERT_TRUE(DB::Open(opts, name, &db).IsIOError()); +} + TEST_F(BackupableDBTest, ProgressCallbackDuringBackup) { DestroyDB(dbname_, options_); OpenDBAndBackupEngine(true);