Support custom env in sst_dump (#5845)

Summary:
This PR allows for the creation of custom env when using sst_dump. If
the user does not set options.env or set options.env to nullptr, then sst_dump
will automatically try to create a custom env depending on the path to the sst
file or db directory. In order to use this feature, the user must call
ObjectRegistry::Register() beforehand.

Test Plan (on devserver):
```
$make all && make check
```
All tests must pass to ensure this change does not break anything.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5845

Differential Revision: D17678038

Pulled By: riversand963

fbshipit-source-id: 58ecb4b3f75246d52b07c4c924a63ee61c1ee626
main
Yanqin Jin 5 years ago committed by Facebook Github Bot
parent 2f4e288143
commit 167cdc9f17
  1. 1
      HISTORY.md
  2. 30
      env/env.cc
  3. 4
      include/rocksdb/env.h
  4. 5
      include/rocksdb/utilities/ldb_cmd.h
  5. 54
      tools/ldb_cmd.cc
  6. 4
      tools/ldb_cmd_impl.h
  7. 41
      tools/ldb_cmd_test.cc
  8. 7
      tools/ldb_test.py
  9. 2
      tools/ldb_tool.cc
  10. 46
      tools/sst_dump_test.cc
  11. 34
      tools/sst_dump_tool.cc

@ -22,6 +22,7 @@
* Deprecate `snap_refresh_nanos` option.
* Added DisableManualCompaction/EnableManualCompaction to stop and resume manual compaction.
* Add TryCatchUpWithPrimary() to StackableDB in non-LITE mode.
* Add a new Env::LoadEnv() overloaded function to return a shared_ptr to Env.
### Performance Improvements
* Improve the speed of the MemTable Bloom filter, reducing the write overhead of enabling it by 1/3 to 1/2, with similar benefit to read performance.

30
env/env.cc vendored

@ -43,6 +43,36 @@ Status Env::LoadEnv(const std::string& value, Env** result) {
return s;
}
Status Env::LoadEnv(const std::string& value, Env** result,
std::shared_ptr<Env>* guard) {
assert(result);
Status s;
#ifndef ROCKSDB_LITE
Env* env = nullptr;
std::unique_ptr<Env> uniq_guard;
std::string err_msg;
assert(guard != nullptr);
env = ObjectRegistry::NewInstance()->NewObject<Env>(value, &uniq_guard,
&err_msg);
if (!env) {
s = Status::NotFound(std::string("Cannot load ") + Env::Type() + ": " +
value);
env = Env::Default();
}
if (s.ok() && uniq_guard) {
guard->reset(uniq_guard.release());
*result = guard->get();
} else {
*result = env;
}
#else
(void)result;
(void)guard;
s = Status::NotSupported("Cannot load environment in LITE mode: ", value);
#endif
return s;
}
std::string Env::PriorityToString(Env::Priority priority) {
switch (priority) {
case Env::Priority::BOTTOM:

@ -152,6 +152,10 @@ class Env {
// Loads the environment specified by the input value into the result
static Status LoadEnv(const std::string& value, Env** result);
// Loads the environment specified by the input value into the result
static Status LoadEnv(const std::string& value, Env** result,
std::shared_ptr<Env>* guard);
// Return a default environment suitable for the current operating
// system. Sophisticated users may wish to provide their own Env
// implementation instead of relying on this default environment.

@ -29,6 +29,7 @@ namespace rocksdb {
class LDBCommand {
public:
// Command-line arguments
static const std::string ARG_ENV_URI;
static const std::string ARG_DB;
static const std::string ARG_PATH;
static const std::string ARG_SECONDARY_PATH;
@ -128,6 +129,7 @@ class LDBCommand {
protected:
LDBCommandExecuteResult exec_state_;
std::string env_uri_;
std::string db_path_;
// If empty, open DB as primary. If non-empty, open the DB as secondary
// with this secondary path. When running against a database opened by
@ -176,6 +178,9 @@ class LDBCommand {
/** List of command-line options valid for this command */
const std::vector<std::string> valid_cmd_line_options_;
/** Shared pointer to underlying environment if applicable **/
std::shared_ptr<Env> env_guard_;
bool ParseKeyValue(const std::string& line, std::string* key,
std::string* value, bool is_key_hex, bool is_value_hex);

@ -45,6 +45,7 @@
namespace rocksdb {
const std::string LDBCommand::ARG_ENV_URI = "env_uri";
const std::string LDBCommand::ARG_DB = "db";
const std::string LDBCommand::ARG_PATH = "path";
const std::string LDBCommand::ARG_SECONDARY_PATH = "secondary_path";
@ -274,6 +275,17 @@ void LDBCommand::Run() {
return;
}
if (!options_.env || options_.env == Env::Default()) {
Env* env = Env::Default();
Status s = Env::LoadEnv(env_uri_, &env, &env_guard_);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exec_state_ = LDBCommandExecuteResult::Failed(s.ToString());
return;
}
options_.env = env;
}
if (db_ == nullptr && !NoDBOpen()) {
OpenDB();
if (exec_state_.IsFailed() && try_load_options_) {
@ -318,6 +330,11 @@ LDBCommand::LDBCommand(const std::map<std::string, std::string>& options,
db_path_ = itr->second;
}
itr = options.find(ARG_ENV_URI);
if (itr != options.end()) {
env_uri_ = itr->second;
}
itr = options.find(ARG_CF_NAME);
if (itr != options.end()) {
column_family_name_ = itr->second;
@ -341,7 +358,7 @@ LDBCommand::LDBCommand(const std::map<std::string, std::string>& options,
void LDBCommand::OpenDB() {
if (!create_if_missing_ && try_load_options_) {
Status s = LoadLatestOptions(db_path_, Env::Default(), &options_,
Status s = LoadLatestOptions(db_path_, options_.env, &options_,
&column_families_, ignore_unknown_options_);
if (!s.ok() && !s.IsNotFound()) {
// Option file exists but load option file error.
@ -397,7 +414,7 @@ void LDBCommand::OpenDB() {
if (column_families_.empty()) {
// Try to figure out column family lists
std::vector<std::string> cf_list;
st = DB::ListColumnFamilies(DBOptions(), db_path_, &cf_list);
st = DB::ListColumnFamilies(options_, db_path_, &cf_list);
// There is possible the DB doesn't exist yet, for "create if not
// "existing case". The failure is ignored here. We rely on DB::Open()
// to give us the correct error message for problem with opening
@ -487,7 +504,8 @@ ColumnFamilyHandle* LDBCommand::GetCfHandle() {
std::vector<std::string> LDBCommand::BuildCmdLineOptions(
std::vector<std::string> options) {
std::vector<std::string> ret = {ARG_DB,
std::vector<std::string> ret = {ARG_ENV_URI,
ARG_DB,
ARG_SECONDARY_PATH,
ARG_BLOOM_BITS,
ARG_BLOCK_SIZE,
@ -1095,31 +1113,23 @@ void ManifestDumpCommand::DoCommand() {
void ListColumnFamiliesCommand::Help(std::string& ret) {
ret.append(" ");
ret.append(ListColumnFamiliesCommand::Name());
ret.append(" full_path_to_db_directory ");
ret.append("\n");
}
ListColumnFamiliesCommand::ListColumnFamiliesCommand(
const std::vector<std::string>& params,
const std::vector<std::string>& /*params*/,
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(options, flags, false, {}) {
if (params.size() != 1) {
exec_state_ = LDBCommandExecuteResult::Failed(
"dbname must be specified for the list_column_families command");
} else {
dbname_ = params[0];
}
}
: LDBCommand(options, flags, false, BuildCmdLineOptions({})) {}
void ListColumnFamiliesCommand::DoCommand() {
std::vector<std::string> column_families;
Status s = DB::ListColumnFamilies(DBOptions(), dbname_, &column_families);
Status s = DB::ListColumnFamilies(options_, db_path_, &column_families);
if (!s.ok()) {
printf("Error in processing db %s %s\n", dbname_.c_str(),
printf("Error in processing db %s %s\n", db_path_.c_str(),
s.ToString().c_str());
} else {
printf("Column families in %s: \n{", dbname_.c_str());
printf("Column families in %s: \n{", db_path_.c_str());
bool first = true;
for (auto cf : column_families) {
if (!first) {
@ -2857,13 +2867,14 @@ void BackupCommand::DoCommand() {
}
printf("open db OK\n");
Env* custom_env = nullptr;
Env::LoadEnv(backup_env_uri_, &custom_env);
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
BackupableDBOptions backup_options =
BackupableDBOptions(backup_dir_, custom_env);
backup_options.info_log = logger_.get();
backup_options.max_background_operations = num_threads_;
status = BackupEngine::Open(Env::Default(), backup_options, &backup_engine);
status = BackupEngine::Open(custom_env, backup_options, &backup_engine);
if (status.ok()) {
printf("open backup engine OK\n");
} else {
@ -2893,7 +2904,8 @@ void RestoreCommand::Help(std::string& ret) {
void RestoreCommand::DoCommand() {
Env* custom_env = nullptr;
Env::LoadEnv(backup_env_uri_, &custom_env);
Env::LoadEnv(backup_env_uri_, &custom_env, &backup_env_guard_);
assert(custom_env != nullptr);
std::unique_ptr<BackupEngineReadOnly> restore_engine;
Status status;
@ -2902,8 +2914,8 @@ void RestoreCommand::DoCommand() {
opts.info_log = logger_.get();
opts.max_background_operations = num_threads_;
BackupEngineReadOnly* raw_restore_engine_ptr;
status = BackupEngineReadOnly::Open(Env::Default(), opts,
&raw_restore_engine_ptr);
status =
BackupEngineReadOnly::Open(custom_env, opts, &raw_restore_engine_ptr);
if (status.ok()) {
restore_engine.reset(raw_restore_engine_ptr);
}

@ -183,9 +183,6 @@ class ListColumnFamiliesCommand : public LDBCommand {
virtual void DoCommand() override;
virtual bool NoDBOpen() override { return true; }
private:
std::string dbname_;
};
class CreateColumnFamilyCommand : public LDBCommand {
@ -510,6 +507,7 @@ class BackupableCommand : public LDBCommand {
std::string backup_dir_;
int num_threads_;
std::unique_ptr<Logger> logger_;
std::shared_ptr<Env> backup_env_guard_;
private:
static const std::string ARG_BACKUP_DIR;

@ -6,6 +6,7 @@
#ifndef ROCKSDB_LITE
#include "rocksdb/utilities/ldb_cmd.h"
#include "port/stack_trace.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
@ -15,7 +16,23 @@ using std::map;
namespace rocksdb {
class LdbCmdTest : public testing::Test {};
class LdbCmdTest : public testing::Test {
public:
LdbCmdTest() : testing::Test() {}
Env* TryLoadCustomOrDefaultEnv() {
const char* test_env_uri = getenv("TEST_ENV_URI");
if (!test_env_uri) {
return Env::Default();
}
Env* env = Env::Default();
Env::LoadEnv(test_env_uri, &env, &env_guard_);
return env;
}
private:
std::shared_ptr<Env> env_guard_;
};
TEST_F(LdbCmdTest, HexToString) {
// map input to expected outputs.
@ -51,7 +68,8 @@ TEST_F(LdbCmdTest, HexToStringBadInputs) {
}
TEST_F(LdbCmdTest, MemEnv) {
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
@ -84,13 +102,15 @@ TEST_F(LdbCmdTest, MemEnv) {
TEST_F(LdbCmdTest, OptionParsing) {
// test parsing flags
Options opts;
opts.env = TryLoadCustomOrDefaultEnv();
{
std::vector<std::string> args;
args.push_back("scan");
args.push_back("--ttl");
args.push_back("--timestamp");
LDBCommand* command = rocksdb::LDBCommand::InitFromCmdLineArgs(
args, Options(), LDBOptions(), nullptr);
args, opts, LDBOptions(), nullptr);
const std::vector<std::string> flags = command->TEST_GetFlags();
EXPECT_EQ(flags.size(), 2);
EXPECT_EQ(flags[0], "ttl");
@ -107,7 +127,7 @@ TEST_F(LdbCmdTest, OptionParsing) {
"opq:__rst.uvw.xyz?a=3+4+bcd+efghi&jk=lm_no&pq=rst-0&uv=wx-8&yz=a&bcd_"
"ef=gh.ijk'");
LDBCommand* command = rocksdb::LDBCommand::InitFromCmdLineArgs(
args, Options(), LDBOptions(), nullptr);
args, opts, LDBOptions(), nullptr);
const std::map<std::string, std::string> option_map =
command->TEST_GetOptionMap();
EXPECT_EQ(option_map.at("db"), "/dev/shm/ldbtest/");
@ -120,7 +140,8 @@ TEST_F(LdbCmdTest, OptionParsing) {
}
TEST_F(LdbCmdTest, ListFileTombstone) {
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
Env* base_env = TryLoadCustomOrDefaultEnv();
std::unique_ptr<Env> env(NewMemEnv(base_env));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
@ -209,8 +230,18 @@ TEST_F(LdbCmdTest, ListFileTombstone) {
}
} // namespace rocksdb
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
extern "C" {
void RegisterCustomObjects(int argc, char** argv);
}
#else
void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
int main(int argc, char** argv) {
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}
#else

@ -516,13 +516,12 @@ class LDBTestCase(unittest.TestCase):
def testListColumnFamilies(self):
print "Running testListColumnFamilies..."
dbPath = os.path.join(self.TMP_DIR, self.DB_NAME)
self.assertRunOK("put x1 y1 --create_if_missing", "OK")
cmd = "list_column_families %s | grep -v \"Column families\""
cmd = "list_column_families | grep -v \"Column families\""
# Test on valid dbPath.
self.assertRunOKFull(cmd % dbPath, "{default}")
self.assertRunOK(cmd, "{default}")
# Test on empty path.
self.assertRunFAILFull(cmd % "")
self.assertRunFAIL(cmd)
def testColumnFamilies(self):
print "Running testColumnFamilies..."

@ -21,6 +21,8 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options,
ret.append("commands MUST specify --" + LDBCommand::ARG_DB +
"=<full_path_to_db_directory> when necessary\n");
ret.append("\n");
ret.append("commands can optionally specify --" + LDBCommand::ARG_ENV_URI +
"=<uri_of_environment> if necessary\n\n");
ret.append(
"The following optional parameters control if keys/values are "
"input/output as hex or as plain strings:\n");

@ -13,6 +13,7 @@
#include "rocksdb/sst_dump_tool.h"
#include "file/random_access_file_reader.h"
#include "port/stack_trace.h"
#include "rocksdb/filter_policy.h"
#include "table/block_based/block_based_table_factory.h"
#include "table/table_builder.h"
@ -85,15 +86,33 @@ void cleanup(const Options& opts, const std::string& file_name) {
// Test for sst dump tool "raw" mode
class SSTDumpToolTest : public testing::Test {
std::string testDir_;
std::string test_dir_;
Env* env_;
std::shared_ptr<Env> env_guard_;
public:
SSTDumpToolTest() { testDir_ = test::TmpDir(); }
SSTDumpToolTest() : env_(Env::Default()) {
const char* test_env_uri = getenv("TEST_ENV_URI");
if (test_env_uri) {
Env::LoadEnv(test_env_uri, &env_, &env_guard_);
}
test_dir_ = test::PerThreadDBPath(env_, "sst_dump_test_db");
Status s = env_->CreateDirIfMissing(test_dir_);
EXPECT_OK(s);
}
~SSTDumpToolTest() override {
if (getenv("KEEP_DB")) {
fprintf(stdout, "Data is still at %s\n", test_dir_.c_str());
} else {
EXPECT_OK(env_->DeleteDir(test_dir_));
}
}
~SSTDumpToolTest() override {}
Env* env() { return env_; }
std::string MakeFilePath(const std::string& file_name) const {
std::string path(testDir_);
std::string path(test_dir_);
path.append("/").append(file_name);
return path;
}
@ -112,6 +131,7 @@ class SSTDumpToolTest : public testing::Test {
TEST_F(SSTDumpToolTest, EmptyFilter) {
Options opts;
opts.env = env();
std::string file_path = MakeFilePath("rocksdb_sst_test.sst");
createSST(opts, file_path);
@ -129,6 +149,7 @@ TEST_F(SSTDumpToolTest, EmptyFilter) {
TEST_F(SSTDumpToolTest, FilterBlock) {
Options opts;
opts.env = env();
BlockBasedTableOptions table_opts;
table_opts.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, true));
opts.table_factory.reset(new BlockBasedTableFactory(table_opts));
@ -149,6 +170,7 @@ TEST_F(SSTDumpToolTest, FilterBlock) {
TEST_F(SSTDumpToolTest, FullFilterBlock) {
Options opts;
opts.env = env();
BlockBasedTableOptions table_opts;
table_opts.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
opts.table_factory.reset(new BlockBasedTableFactory(table_opts));
@ -169,6 +191,7 @@ TEST_F(SSTDumpToolTest, FullFilterBlock) {
TEST_F(SSTDumpToolTest, GetProperties) {
Options opts;
opts.env = env();
BlockBasedTableOptions table_opts;
table_opts.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
opts.table_factory.reset(new BlockBasedTableFactory(table_opts));
@ -189,6 +212,7 @@ TEST_F(SSTDumpToolTest, GetProperties) {
TEST_F(SSTDumpToolTest, CompressedSizes) {
Options opts;
opts.env = env();
BlockBasedTableOptions table_opts;
table_opts.filter_policy.reset(rocksdb::NewBloomFilterPolicy(10, false));
opts.table_factory.reset(new BlockBasedTableFactory(table_opts));
@ -208,9 +232,9 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
}
TEST_F(SSTDumpToolTest, MemEnv) {
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
std::unique_ptr<Env> mem_env(NewMemEnv(env()));
Options opts;
opts.env = env.get();
opts.env = mem_env.get();
std::string file_path = MakeFilePath("rocksdb_sst_test.sst");
createSST(opts, file_path);
@ -228,8 +252,18 @@ TEST_F(SSTDumpToolTest, MemEnv) {
} // namespace rocksdb
#ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
extern "C" {
void RegisterCustomObjects(int argc, char** argv);
}
#else
void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
#endif // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
int main(int argc, char** argv) {
rocksdb::port::InstallStackTraceHandler();
::testing::InitGoogleTest(&argc, argv);
RegisterCustomObjects(argc, argv);
return RUN_ALL_TESTS();
}

@ -150,7 +150,7 @@ Status SstFileDumper::VerifyChecksum() {
Status SstFileDumper::DumpTable(const std::string& out_filename) {
std::unique_ptr<WritableFile> out_file;
Env* env = Env::Default();
Env* env = options_.env;
env->NewWritableFile(out_filename, &out_file, soptions_);
Status s = table_reader_->DumpTable(out_file.get());
out_file->Close();
@ -161,7 +161,7 @@ uint64_t SstFileDumper::CalculateCompressedTableSize(
const TableBuilderOptions& tb_options, size_t block_size,
uint64_t* num_data_blocks) {
std::unique_ptr<WritableFile> out_file;
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
std::unique_ptr<Env> env(NewMemEnv(options_.env));
env->NewWritableFile(testFileName, &out_file, soptions_);
std::unique_ptr<WritableFileWriter> dest_writer;
dest_writer.reset(
@ -411,6 +411,9 @@ void print_help() {
--file=<data_dir_OR_sst_file>
Path to SST file or directory containing SST files
--env_uri=<uri of underlying Env>
URI of underlying Env
--command=check|scan|raw|verify
check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
scan: Iterate over entries in files and print them to screen
@ -463,6 +466,7 @@ void print_help() {
} // namespace
int SSTDumpTool::Run(int argc, char** argv, Options options) {
const char* env_uri = nullptr;
const char* dir_or_file = nullptr;
uint64_t read_num = std::numeric_limits<uint64_t>::max();
std::string command;
@ -489,15 +493,16 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
uint64_t total_index_block_size = 0;
uint64_t total_filter_block_size = 0;
for (int i = 1; i < argc; i++) {
if (strncmp(argv[i], "--file=", 7) == 0) {
if (strncmp(argv[i], "--env_uri=", 10) == 0) {
env_uri = argv[i] + 10;
} else if (strncmp(argv[i], "--file=", 7) == 0) {
dir_or_file = argv[i] + 7;
} else if (strcmp(argv[i], "--output_hex") == 0) {
output_hex = true;
} else if (strcmp(argv[i], "--input_key_hex") == 0) {
input_key_hex = true;
} else if (sscanf(argv[i],
"--read_num=%lu%c",
(unsigned long*)&n, &junk) == 1) {
} else if (sscanf(argv[i], "--read_num=%lu%c", (unsigned long*)&n, &junk) ==
1) {
read_num = n;
} else if (strcmp(argv[i], "--verify_checksum") == 0) {
verify_checksum = true;
@ -589,6 +594,23 @@ int SSTDumpTool::Run(int argc, char** argv, Options options) {
exit(1);
}
std::shared_ptr<rocksdb::Env> env_guard;
// If caller of SSTDumpTool::Run(...) does not specify a different env other
// than Env::Default(), then try to load custom env based on dir_or_file.
// Otherwise, the caller is responsible for creating custom env.
if (!options.env || options.env == rocksdb::Env::Default()) {
Env* env = Env::Default();
Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard);
if (!s.ok() && !s.IsNotFound()) {
fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
exit(1);
}
options.env = env;
} else {
fprintf(stdout, "options.env is %p\n", options.env);
}
std::vector<std::string> filenames;
rocksdb::Env* env = options.env;
rocksdb::Status st = env->GetChildren(dir_or_file, &filenames);

Loading…
Cancel
Save