Add support for custom file systems to ldb and sst_dump (#8010)

Summary:
This PR adds support for custom file systems to ldb and sst_dump by adding command line options for specifying --fs_uri and --backup_fs uri (for ldb backup/restore commands). fs_uri is already supported in db_bench and db_stress, and there is already support in ldb and db stress for specifying customized envs.

The PR also fixes what looks like a bug in the ldb backup/restore commands. As it is right now, backups can only be made from and to the same environment/file system which does not seem to be the intended behavior. This PR makes it possible to do/restore backups between different envs/file systems.

Example:
`./ldb backup --fs_uri=zenfs://dev:nvme2n1 --backup_fs_uri=posix:// --backup_dir=/tmp/my_rocksdb_backup  --db=rocksdbtest/dbbench
`

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8010

Reviewed By: jay-zhuang

Differential Revision: D26904654

Pulled By: ajkr

fbshipit-source-id: 9b695ed8b944fcc6b27c4daaa9f52e87ee2c1fb4
main
Hans Holmberg 3 years ago committed by Facebook GitHub Bot
parent 7381dad1b1
commit 670567db09
  1. 2
      include/rocksdb/utilities/ldb_cmd.h
  2. 96
      tools/ldb_cmd.cc
  3. 2
      tools/ldb_cmd_impl.h
  4. 3
      tools/ldb_tool.cc
  5. 41
      tools/sst_dump_tool.cc

@ -32,6 +32,7 @@ class LDBCommand {
public:
// Command-line arguments
static const std::string ARG_ENV_URI;
static const std::string ARG_FS_URI;
static const std::string ARG_DB;
static const std::string ARG_PATH;
static const std::string ARG_SECONDARY_PATH;
@ -137,6 +138,7 @@ class LDBCommand {
protected:
LDBCommandExecuteResult exec_state_;
std::string env_uri_;
std::string fs_uri_;
std::string db_path_;
// If empty, open DB as primary. If non-empty, open the DB as secondary
// with this secondary path. When running against a database opened by

@ -49,6 +49,7 @@ class FileChecksumGenCrc32c;
class FileChecksumGenCrc32cFactory;
const std::string LDBCommand::ARG_ENV_URI = "env_uri";
const std::string LDBCommand::ARG_FS_URI = "fs_uri";
const std::string LDBCommand::ARG_DB = "db";
const std::string LDBCommand::ARG_PATH = "path";
const std::string LDBCommand::ARG_SECONDARY_PATH = "secondary_path";
@ -286,6 +287,16 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) {
return nullptr;
}
static Env* GetCompositeEnv(std::shared_ptr<FileSystem> fs) {
static std::shared_ptr<Env> composite_env = NewCompositeEnv(fs);
return composite_env.get();
}
static Env* GetCompositeBackupEnv(std::shared_ptr<FileSystem> fs) {
static std::shared_ptr<Env> composite_backup_env = NewCompositeEnv(fs);
return composite_backup_env.get();
}
/* Run the command, and return the execute result. */
void LDBCommand::Run() {
if (!exec_state_.IsNotStarted()) {
@ -294,12 +305,33 @@ void LDBCommand::Run() {
if (!options_.env || options_.env == Env::Default()) {
Env* env = Env::Default();
Status s = Env::LoadEnv(env_uri_, &env, &env_guard_);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
if (!env_uri_.empty() && !fs_uri_.empty()) {
std::string err =
"Error: you may not specity both "
"fs_uri and fs_env.";
fprintf(stderr, "%s\n", err.c_str());
exec_state_ = LDBCommandExecuteResult::Failed(err);
return;
}
if (!env_uri_.empty()) {
Status s = Env::LoadEnv(env_uri_, &env, &env_guard_);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
return;
}
} else if (!fs_uri_.empty()) {
std::shared_ptr<FileSystem> fs;
Status s = FileSystem::Load(fs_uri_, &fs);
if (fs == nullptr) {
fprintf(stderr, "error: %s\n", s.ToString().c_str());
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
return;
}
env = GetCompositeEnv(fs);
}
options_.env = env;
}
@ -351,6 +383,11 @@ LDBCommand::LDBCommand(const std::map<std::string, std::string>& options,
env_uri_ = itr->second;
}
itr = options.find(ARG_FS_URI);
if (itr != options.end()) {
fs_uri_ = itr->second;
}
itr = options.find(ARG_CF_NAME);
if (itr != options.end()) {
column_family_name_ = itr->second;
@ -483,6 +520,7 @@ ColumnFamilyHandle* LDBCommand::GetCfHandle() {
std::vector<std::string> LDBCommand::BuildCmdLineOptions(
std::vector<std::string> options) {
std::vector<std::string> ret = {ARG_ENV_URI,
ARG_FS_URI,
ARG_DB,
ARG_SECONDARY_PATH,
ARG_BLOOM_BITS,
@ -3016,6 +3054,7 @@ void RepairCommand::DoCommand() {
const std::string BackupableCommand::ARG_NUM_THREADS = "num_threads";
const std::string BackupableCommand::ARG_BACKUP_ENV_URI = "backup_env_uri";
const std::string BackupableCommand::ARG_BACKUP_FS_URI = "backup_fs_uri";
const std::string BackupableCommand::ARG_BACKUP_DIR = "backup_dir";
const std::string BackupableCommand::ARG_STDERR_LOG_LEVEL = "stderr_log_level";
@ -3024,8 +3063,9 @@ BackupableCommand::BackupableCommand(
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(options, flags, false /* is_read_only */,
BuildCmdLineOptions({ARG_BACKUP_ENV_URI, ARG_BACKUP_DIR,
ARG_NUM_THREADS, ARG_STDERR_LOG_LEVEL})),
BuildCmdLineOptions({ARG_BACKUP_ENV_URI, ARG_BACKUP_FS_URI,
ARG_BACKUP_DIR, ARG_NUM_THREADS,
ARG_STDERR_LOG_LEVEL})),
num_threads_(1) {
auto itr = options.find(ARG_NUM_THREADS);
if (itr != options.end()) {
@ -3035,6 +3075,15 @@ BackupableCommand::BackupableCommand(
if (itr != options.end()) {
backup_env_uri_ = itr->second;
}
itr = options.find(ARG_BACKUP_FS_URI);
if (itr != options.end()) {
backup_fs_uri_ = itr->second;
}
if (!backup_env_uri_.empty() && !backup_fs_uri_.empty()) {
exec_state_ = LDBCommandExecuteResult::Failed(
"you may not specity both --" + ARG_BACKUP_ENV_URI + " and --" +
ARG_BACKUP_FS_URI);
}
itr = options.find(ARG_BACKUP_DIR);
if (itr == options.end()) {
exec_state_ = LDBCommandExecuteResult::Failed("--" + ARG_BACKUP_DIR +
@ -3061,7 +3110,7 @@ BackupableCommand::BackupableCommand(
void BackupableCommand::Help(const std::string& name, std::string& ret) {
ret.append(" ");
ret.append(name);
ret.append(" [--" + ARG_BACKUP_ENV_URI + "] ");
ret.append(" [--" + ARG_BACKUP_ENV_URI + " | --" + ARG_BACKUP_FS_URI + "] ");
ret.append(" [--" + ARG_BACKUP_DIR + "] ");
ret.append(" [--" + ARG_NUM_THREADS + "] ");
ret.append(" [--" + ARG_STDERR_LOG_LEVEL + "=<int (InfoLogLevel)>] ");
@ -3087,15 +3136,26 @@ void BackupCommand::DoCommand() {
return;
}
fprintf(stdout, "open db OK\n");
Env* custom_env = nullptr;
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
if (!backup_fs_uri_.empty()) {
std::shared_ptr<FileSystem> fs;
Status s = FileSystem::Load(backup_fs_uri_, &fs);
if (fs == nullptr) {
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
return;
}
custom_env = GetCompositeBackupEnv(fs);
} else {
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
}
BackupableDBOptions backup_options =
BackupableDBOptions(backup_dir_, custom_env);
backup_options.info_log = logger_.get();
backup_options.max_background_operations = num_threads_;
status = BackupEngine::Open(custom_env, backup_options, &backup_engine);
status = BackupEngine::Open(options_.env, backup_options, &backup_engine);
if (status.ok()) {
fprintf(stdout, "open backup engine OK\n");
} else {
@ -3125,8 +3185,18 @@ void RestoreCommand::Help(std::string& ret) {
void RestoreCommand::DoCommand() {
Env* custom_env = nullptr;
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
if (!backup_fs_uri_.empty()) {
std::shared_ptr<FileSystem> fs;
Status s = FileSystem::Load(backup_fs_uri_, &fs);
if (fs == nullptr) {
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
return;
}
custom_env = GetCompositeBackupEnv(fs);
} else {
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
}
std::unique_ptr<BackupEngineReadOnly> restore_engine;
Status status;
@ -3136,7 +3206,7 @@ void RestoreCommand::DoCommand() {
opts.max_background_operations = num_threads_;
BackupEngineReadOnly* raw_restore_engine_ptr;
status =
BackupEngineReadOnly::Open(custom_env, opts, &raw_restore_engine_ptr);
BackupEngineReadOnly::Open(options_.env, opts, &raw_restore_engine_ptr);
if (status.ok()) {
restore_engine.reset(raw_restore_engine_ptr);
}

@ -541,6 +541,7 @@ class BackupableCommand : public LDBCommand {
protected:
static void Help(const std::string& name, std::string& ret);
std::string backup_env_uri_;
std::string backup_fs_uri_;
std::string backup_dir_;
int num_threads_;
std::unique_ptr<Logger> logger_;
@ -549,6 +550,7 @@ class BackupableCommand : public LDBCommand {
private:
static const std::string ARG_BACKUP_DIR;
static const std::string ARG_BACKUP_ENV_URI;
static const std::string ARG_BACKUP_FS_URI;
static const std::string ARG_NUM_THREADS;
static const std::string ARG_STDERR_LOG_LEVEL;
};

@ -22,7 +22,8 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options,
"=<full_path_to_db_directory> when necessary\n");
ret.append("\n");
ret.append("commands can optionally specify --" + LDBCommand::ARG_ENV_URI +
"=<uri_of_environment> if necessary\n\n");
"=<uri_of_environment> or --" + LDBCommand::ARG_FS_URI +
"=<uri_of_filesystem> if necessary\n\n");
ret.append(
"The following optional parameters control if keys/values are "
"input/output as hex or as plain strings:\n");

@ -37,7 +37,10 @@ void print_help(bool to_stderr) {
Path to SST file or directory containing SST files
--env_uri=<uri of underlying Env>
URI of underlying Env
URI of underlying Env, mutually exclusive with fs_uri
--fs_uri=<uri of underlying FileSystem>
URI of underlying FileSystem, mutually exclusive with env_uri
--command=check|scan|raw|verify|identify
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
@ -129,8 +132,16 @@ bool ParseIntArg(const char* arg, const std::string arg_name,
}
} // namespace
static ROCKSDB_NAMESPACE::Env* GetCompositeEnv(
std::shared_ptr<ROCKSDB_NAMESPACE::FileSystem> fs) {
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> composite_env =
ROCKSDB_NAMESPACE::NewCompositeEnv(fs);
return composite_env.get();
}
int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
const char* env_uri = nullptr;
const char* fs_uri = nullptr;
const char* dir_or_file = nullptr;
uint64_t read_num = std::numeric_limits<uint64_t>::max();
std::string command;
@ -177,6 +188,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
for (int i = 1; i < argc; i++) {
if (strncmp(argv[i], "--env_uri=", 10) == 0) {
env_uri = argv[i] + 10;
} else if (strncmp(argv[i], "--fs_uri=", 9) == 0) {
fs_uri = argv[i] + 9;
} else if (strncmp(argv[i], "--file=", 7) == 0) {
dir_or_file = argv[i] + 7;
} else if (strcmp(argv[i], "--output_hex") == 0) {
@ -339,14 +352,30 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
// If caller of SSTDumpTool::Run(...) does not specify a different env other
// than Env::Default(), then try to load custom env based on dir_or_file.
// than Env::Default(), then try to load custom env based on env_uri/fs_uri.
// Otherwise, the caller is responsible for creating custom env.
if (env_uri && fs_uri) {
fprintf(stderr, "cannot specify --fs_uri and --env_uri.\n\n");
exit(1);
}
if (!options.env || options.env == ROCKSDB_NAMESPACE::Env::Default()) {
Env* env = Env::Default();
Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exit(1);
if (env_uri) {
Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exit(1);
}
} else if (fs_uri) {
std::shared_ptr<FileSystem> fs;
Status s = FileSystem::Load(fs_uri, &fs);
if (fs == nullptr) {
fprintf(stderr, "FileSystem Load: %s\n", s.ToString().c_str());
exit(1);
}
env = GetCompositeEnv(fs);
}
options.env = env;
} else {

Loading…
Cancel
Save