From 670567db09f17e14ad7f20c80b774e62f805034e Mon Sep 17 00:00:00 2001 From: Hans Holmberg Date: Tue, 9 Mar 2021 20:47:26 -0800 Subject: [PATCH] Add support for custom file systems to ldb and sst_dump (#8010) Summary: This PR adds support for custom file systems to ldb and sst_dump by adding command line options for specifying --fs_uri and --backup_fs uri (for ldb backup/restore commands). fs_uri is already supported in db_bench and db_stress, and there is already support in ldb and db stress for specifying customized envs. The PR also fixes what looks like a bug in the ldb backup/restore commands. As it is right now, backups can only be made from and to the same environment/file system which does not seem to be the intended behavior. This PR makes it possible to do/restore backups between different envs/file systems. Example: `./ldb backup --fs_uri=zenfs://dev:nvme2n1 --backup_fs_uri=posix:// --backup_dir=/tmp/my_rocksdb_backup --db=rocksdbtest/dbbench ` Pull Request resolved: https://github.com/facebook/rocksdb/pull/8010 Reviewed By: jay-zhuang Differential Revision: D26904654 Pulled By: ajkr fbshipit-source-id: 9b695ed8b944fcc6b27c4daaa9f52e87ee2c1fb4 --- include/rocksdb/utilities/ldb_cmd.h | 2 + tools/ldb_cmd.cc | 96 +++++++++++++++++++++++++---- tools/ldb_cmd_impl.h | 2 + tools/ldb_tool.cc | 3 +- tools/sst_dump_tool.cc | 41 ++++++++++-- 5 files changed, 124 insertions(+), 20 deletions(-) diff --git a/include/rocksdb/utilities/ldb_cmd.h b/include/rocksdb/utilities/ldb_cmd.h index af0556a32..e900abefe 100644 --- a/include/rocksdb/utilities/ldb_cmd.h +++ b/include/rocksdb/utilities/ldb_cmd.h @@ -32,6 +32,7 @@ class LDBCommand { public: // Command-line arguments static const std::string ARG_ENV_URI; + static const std::string ARG_FS_URI; static const std::string ARG_DB; static const std::string ARG_PATH; static const std::string ARG_SECONDARY_PATH; @@ -137,6 +138,7 @@ class LDBCommand { protected: LDBCommandExecuteResult exec_state_; std::string env_uri_; + std::string fs_uri_; std::string db_path_; // If empty, open DB as primary. If non-empty, open the DB as secondary // with this secondary path. When running against a database opened by diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 02643cfae..c160e9ba8 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -49,6 +49,7 @@ class FileChecksumGenCrc32c; class FileChecksumGenCrc32cFactory; const std::string LDBCommand::ARG_ENV_URI = "env_uri"; +const std::string LDBCommand::ARG_FS_URI = "fs_uri"; const std::string LDBCommand::ARG_DB = "db"; const std::string LDBCommand::ARG_PATH = "path"; const std::string LDBCommand::ARG_SECONDARY_PATH = "secondary_path"; @@ -286,6 +287,16 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) { return nullptr; } +static Env* GetCompositeEnv(std::shared_ptr fs) { + static std::shared_ptr composite_env = NewCompositeEnv(fs); + return composite_env.get(); +} + +static Env* GetCompositeBackupEnv(std::shared_ptr fs) { + static std::shared_ptr composite_backup_env = NewCompositeEnv(fs); + return composite_backup_env.get(); +} + /* Run the command, and return the execute result. */ void LDBCommand::Run() { if (!exec_state_.IsNotStarted()) { @@ -294,12 +305,33 @@ void LDBCommand::Run() { if (!options_.env || options_.env == Env::Default()) { Env* env = Env::Default(); - Status s = Env::LoadEnv(env_uri_, &env, &env_guard_); - if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str()); - exec_state_ = LDBCommandExecuteResult::Failed(s.ToString()); + + if (!env_uri_.empty() && !fs_uri_.empty()) { + std::string err = + "Error: you may not specity both " + "fs_uri and fs_env."; + fprintf(stderr, "%s\n", err.c_str()); + exec_state_ = LDBCommandExecuteResult::Failed(err); return; } + if (!env_uri_.empty()) { + Status s = Env::LoadEnv(env_uri_, &env, &env_guard_); + if (!s.ok() && !s.IsNotFound()) { + fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str()); + exec_state_ = LDBCommandExecuteResult::Failed(s.ToString()); + return; + } + } else if (!fs_uri_.empty()) { + std::shared_ptr fs; + Status s = FileSystem::Load(fs_uri_, &fs); + if (fs == nullptr) { + fprintf(stderr, "error: %s\n", s.ToString().c_str()); + exec_state_ = LDBCommandExecuteResult::Failed(s.ToString()); + return; + } + env = GetCompositeEnv(fs); + } + options_.env = env; } @@ -351,6 +383,11 @@ LDBCommand::LDBCommand(const std::map& options, env_uri_ = itr->second; } + itr = options.find(ARG_FS_URI); + if (itr != options.end()) { + fs_uri_ = itr->second; + } + itr = options.find(ARG_CF_NAME); if (itr != options.end()) { column_family_name_ = itr->second; @@ -483,6 +520,7 @@ ColumnFamilyHandle* LDBCommand::GetCfHandle() { std::vector LDBCommand::BuildCmdLineOptions( std::vector options) { std::vector ret = {ARG_ENV_URI, + ARG_FS_URI, ARG_DB, ARG_SECONDARY_PATH, ARG_BLOOM_BITS, @@ -3016,6 +3054,7 @@ void RepairCommand::DoCommand() { const std::string BackupableCommand::ARG_NUM_THREADS = "num_threads"; const std::string BackupableCommand::ARG_BACKUP_ENV_URI = "backup_env_uri"; +const std::string BackupableCommand::ARG_BACKUP_FS_URI = "backup_fs_uri"; const std::string BackupableCommand::ARG_BACKUP_DIR = "backup_dir"; const std::string BackupableCommand::ARG_STDERR_LOG_LEVEL = "stderr_log_level"; @@ -3024,8 +3063,9 @@ BackupableCommand::BackupableCommand( const std::map& options, const std::vector& flags) : LDBCommand(options, flags, false /* is_read_only */, - BuildCmdLineOptions({ARG_BACKUP_ENV_URI, ARG_BACKUP_DIR, - ARG_NUM_THREADS, ARG_STDERR_LOG_LEVEL})), + BuildCmdLineOptions({ARG_BACKUP_ENV_URI, ARG_BACKUP_FS_URI, + ARG_BACKUP_DIR, ARG_NUM_THREADS, + ARG_STDERR_LOG_LEVEL})), num_threads_(1) { auto itr = options.find(ARG_NUM_THREADS); if (itr != options.end()) { @@ -3035,6 +3075,15 @@ BackupableCommand::BackupableCommand( if (itr != options.end()) { backup_env_uri_ = itr->second; } + itr = options.find(ARG_BACKUP_FS_URI); + if (itr != options.end()) { + backup_fs_uri_ = itr->second; + } + if (!backup_env_uri_.empty() && !backup_fs_uri_.empty()) { + exec_state_ = LDBCommandExecuteResult::Failed( + "you may not specity both --" + ARG_BACKUP_ENV_URI + " and --" + + ARG_BACKUP_FS_URI); + } itr = options.find(ARG_BACKUP_DIR); if (itr == options.end()) { exec_state_ = LDBCommandExecuteResult::Failed("--" + ARG_BACKUP_DIR + @@ -3061,7 +3110,7 @@ BackupableCommand::BackupableCommand( void BackupableCommand::Help(const std::string& name, std::string& ret) { ret.append(" "); ret.append(name); - ret.append(" [--" + ARG_BACKUP_ENV_URI + "] "); + ret.append(" [--" + ARG_BACKUP_ENV_URI + " | --" + ARG_BACKUP_FS_URI + "] "); ret.append(" [--" + ARG_BACKUP_DIR + "] "); ret.append(" [--" + ARG_NUM_THREADS + "] "); ret.append(" [--" + ARG_STDERR_LOG_LEVEL + "=] "); @@ -3087,15 +3136,26 @@ void BackupCommand::DoCommand() { return; } fprintf(stdout, "open db OK\n"); + Env* custom_env = nullptr; - Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_); - assert(custom_env != nullptr); + if (!backup_fs_uri_.empty()) { + std::shared_ptr fs; + Status s = FileSystem::Load(backup_fs_uri_, &fs); + if (fs == nullptr) { + exec_state_ = LDBCommandExecuteResult::Failed(s.ToString()); + return; + } + custom_env = GetCompositeBackupEnv(fs); + } else { + Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_); + assert(custom_env != nullptr); + } BackupableDBOptions backup_options = BackupableDBOptions(backup_dir_, custom_env); backup_options.info_log = logger_.get(); backup_options.max_background_operations = num_threads_; - status = BackupEngine::Open(custom_env, backup_options, &backup_engine); + status = BackupEngine::Open(options_.env, backup_options, &backup_engine); if (status.ok()) { fprintf(stdout, "open backup engine OK\n"); } else { @@ -3125,8 +3185,18 @@ void RestoreCommand::Help(std::string& ret) { void RestoreCommand::DoCommand() { Env* custom_env = nullptr; - Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_); - assert(custom_env != nullptr); + if (!backup_fs_uri_.empty()) { + std::shared_ptr fs; + Status s = FileSystem::Load(backup_fs_uri_, &fs); + if (fs == nullptr) { + exec_state_ = LDBCommandExecuteResult::Failed(s.ToString()); + return; + } + custom_env = GetCompositeBackupEnv(fs); + } else { + Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_); + assert(custom_env != nullptr); + } std::unique_ptr restore_engine; Status status; @@ -3136,7 +3206,7 @@ void RestoreCommand::DoCommand() { opts.max_background_operations = num_threads_; BackupEngineReadOnly* raw_restore_engine_ptr; status = - BackupEngineReadOnly::Open(custom_env, opts, &raw_restore_engine_ptr); + BackupEngineReadOnly::Open(options_.env, opts, &raw_restore_engine_ptr); if (status.ok()) { restore_engine.reset(raw_restore_engine_ptr); } diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index d20f5b98e..f5f7eff2e 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -541,6 +541,7 @@ class BackupableCommand : public LDBCommand { protected: static void Help(const std::string& name, std::string& ret); std::string backup_env_uri_; + std::string backup_fs_uri_; std::string backup_dir_; int num_threads_; std::unique_ptr logger_; @@ -549,6 +550,7 @@ class BackupableCommand : public LDBCommand { private: static const std::string ARG_BACKUP_DIR; static const std::string ARG_BACKUP_ENV_URI; + static const std::string ARG_BACKUP_FS_URI; static const std::string ARG_NUM_THREADS; static const std::string ARG_STDERR_LOG_LEVEL; }; diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index 956244792..f8f7e7181 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -22,7 +22,8 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options, "= when necessary\n"); ret.append("\n"); ret.append("commands can optionally specify --" + LDBCommand::ARG_ENV_URI + - "= if necessary\n\n"); + "= or --" + LDBCommand::ARG_FS_URI + + "= if necessary\n\n"); ret.append( "The following optional parameters control if keys/values are " "input/output as hex or as plain strings:\n"); diff --git a/tools/sst_dump_tool.cc b/tools/sst_dump_tool.cc index 86ecdad0c..9e808c88f 100644 --- a/tools/sst_dump_tool.cc +++ b/tools/sst_dump_tool.cc @@ -37,7 +37,10 @@ void print_help(bool to_stderr) { Path to SST file or directory containing SST files --env_uri= - URI of underlying Env + URI of underlying Env, mutually exclusive with fs_uri + + --fs_uri= + URI of underlying FileSystem, mutually exclusive with env_uri --command=check|scan|raw|verify|identify check: Iterate over entries in files but don't print anything except if an error is encountered (default command) @@ -129,8 +132,16 @@ bool ParseIntArg(const char* arg, const std::string arg_name, } } // namespace +static ROCKSDB_NAMESPACE::Env* GetCompositeEnv( + std::shared_ptr fs) { + static std::shared_ptr composite_env = + ROCKSDB_NAMESPACE::NewCompositeEnv(fs); + return composite_env.get(); +} + int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { const char* env_uri = nullptr; + const char* fs_uri = nullptr; const char* dir_or_file = nullptr; uint64_t read_num = std::numeric_limits::max(); std::string command; @@ -177,6 +188,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { for (int i = 1; i < argc; i++) { if (strncmp(argv[i], "--env_uri=", 10) == 0) { env_uri = argv[i] + 10; + } else if (strncmp(argv[i], "--fs_uri=", 9) == 0) { + fs_uri = argv[i] + 9; } else if (strncmp(argv[i], "--file=", 7) == 0) { dir_or_file = argv[i] + 7; } else if (strcmp(argv[i], "--output_hex") == 0) { @@ -339,14 +352,30 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) { std::shared_ptr env_guard; // If caller of SSTDumpTool::Run(...) does not specify a different env other - // than Env::Default(), then try to load custom env based on dir_or_file. + // than Env::Default(), then try to load custom env based on env_uri/fs_uri. // Otherwise, the caller is responsible for creating custom env. + + if (env_uri && fs_uri) { + fprintf(stderr, "cannot specify --fs_uri and --env_uri.\n\n"); + exit(1); + } + if (!options.env || options.env == ROCKSDB_NAMESPACE::Env::Default()) { Env* env = Env::Default(); - Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard); - if (!s.ok() && !s.IsNotFound()) { - fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str()); - exit(1); + if (env_uri) { + Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard); + if (!s.ok() && !s.IsNotFound()) { + fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str()); + exit(1); + } + } else if (fs_uri) { + std::shared_ptr fs; + Status s = FileSystem::Load(fs_uri, &fs); + if (fs == nullptr) { + fprintf(stderr, "FileSystem Load: %s\n", s.ToString().c_str()); + exit(1); + } + env = GetCompositeEnv(fs); } options.env = env; } else {