Add command "list_file_range_deletes" in ldb (#5615)

Summary:
Add a command in ldb so that users can print out tombstones in SST files.
In order to test the code, change the interface of LDBCommandRunner::RunCommand() so that it doesn't return from the program, but return the status code.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5615

Test Plan: Add a new unit test

Differential Revision: D16550326

fbshipit-source-id: 88ddfe6984bdcbb3a528abdd115089df09eba52e
main
sdong 5 years ago committed by Facebook Github Bot
parent 6ec2bf3fce
commit bd2c753dd0
  1. 2
      HISTORY.md
  2. 21
      db/db_impl/db_impl.cc
  3. 8
      db/db_impl/db_impl.h
  4. 3
      db/db_impl/db_impl_debug.cc
  5. 22
      db/table_cache.cc
  6. 8
      db/table_cache.h
  7. 54
      db/version_set.cc
  8. 5
      db/version_set.h
  9. 3
      include/rocksdb/utilities/ldb_cmd.h
  10. 54
      tools/ldb_cmd.cc
  11. 16
      tools/ldb_cmd_impl.h
  12. 93
      tools/ldb_cmd_test.cc
  13. 16
      tools/ldb_tool.cc

@ -19,6 +19,7 @@
* ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator. * ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator.
* Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env. * Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env.
* Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors. * Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors.
* LDBCommandRunner::RunCommand() to return the status code as an integer, rather than call exit() using the code.
### New Features ### New Features
* Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact. * Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact.
@ -57,6 +58,7 @@
* Add an option `unordered_write` which trades snapshot guarantees with higher write throughput. When used with WRITE_PREPARED transactions with two_write_queues=true, it offers higher throughput with however no compromise on guarantees. * Add an option `unordered_write` which trades snapshot guarantees with higher write throughput. When used with WRITE_PREPARED transactions with two_write_queues=true, it offers higher throughput with however no compromise on guarantees.
* Allow DBImplSecondary to remove memtables with obsolete data after replaying MANIFEST and WAL. * Allow DBImplSecondary to remove memtables with obsolete data after replaying MANIFEST and WAL.
* Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error. * Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error.
* Add command `list_file_range_deletes` in ldb, which prints out tombstones in SST files.
### Performance Improvements ### Performance Improvements
* Reduce binary search when iterator reseek into the same data block. * Reduce binary search when iterator reseek into the same data block.

@ -33,9 +33,9 @@
#include "db/error_handler.h" #include "db/error_handler.h"
#include "db/event_helpers.h" #include "db/event_helpers.h"
#include "db/external_sst_file_ingestion_job.h" #include "db/external_sst_file_ingestion_job.h"
#include "db/import_column_family_job.h"
#include "db/flush_job.h" #include "db/flush_job.h"
#include "db/forward_iterator.h" #include "db/forward_iterator.h"
#include "db/import_column_family_job.h"
#include "db/job_context.h" #include "db/job_context.h"
#include "db/log_reader.h" #include "db/log_reader.h"
#include "db/log_writer.h" #include "db/log_writer.h"
@ -91,6 +91,7 @@
#include "tools/sst_dump_tool_imp.h" #include "tools/sst_dump_tool_imp.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/build_version.h" #include "util/build_version.h"
#include "util/cast_util.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
@ -864,6 +865,24 @@ void DBImpl::DumpStats() {
PrintStatistics(); PrintStatistics();
} }
Status DBImpl::TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family,
int max_entries_to_print,
std::string* out_str) {
auto* cfh =
static_cast_with_check<ColumnFamilyHandleImpl, ColumnFamilyHandle>(
column_family);
ColumnFamilyData* cfd = cfh->cfd();
SuperVersion* super_version = cfd->GetReferencedSuperVersion(&mutex_);
Version* version = super_version->current;
Status s =
version->TablesRangeTombstoneSummary(max_entries_to_print, out_str);
CleanupSuperVersion(super_version);
return s;
}
void DBImpl::ScheduleBgLogWriterClose(JobContext* job_context) { void DBImpl::ScheduleBgLogWriterClose(JobContext* job_context) {
if (!job_context->logs_to_free.empty()) { if (!job_context->logs_to_free.empty()) {
for (auto l : job_context->logs_to_free) { for (auto l : job_context->logs_to_free) {

@ -812,6 +812,13 @@ class DBImpl : public DB {
uint64_t* new_time, uint64_t* new_time,
std::map<std::string, uint64_t>* stats_map); std::map<std::string, uint64_t>* stats_map);
// Print information of all tombstones of all iterators to the std::string
// This is only used by ldb. The output might be capped. Tombstones
// printed out are not guaranteed to be in any order.
Status TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family,
int max_entries_to_print,
std::string* out_str);
#ifndef NDEBUG #ifndef NDEBUG
// Compact any files in the named level that overlap [*begin, *end] // Compact any files in the named level that overlap [*begin, *end]
Status TEST_CompactRange(int level, const Slice* begin, const Slice* end, Status TEST_CompactRange(int level, const Slice* begin, const Slice* end,
@ -912,7 +919,6 @@ class DBImpl : public DB {
void TEST_WaitForPersistStatsRun(std::function<void()> callback) const; void TEST_WaitForPersistStatsRun(std::function<void()> callback) const;
bool TEST_IsPersistentStatsEnabled() const; bool TEST_IsPersistentStatsEnabled() const;
size_t TEST_EstimateInMemoryStatsHistorySize() const; size_t TEST_EstimateInMemoryStatsHistorySize() const;
#endif // NDEBUG #endif // NDEBUG
protected: protected:

@ -9,12 +9,13 @@
#ifndef NDEBUG #ifndef NDEBUG
#include "db/column_family.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/error_handler.h" #include "db/error_handler.h"
#include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_updater.h"
#include "util/cast_util.h"
namespace rocksdb { namespace rocksdb {
uint64_t DBImpl::TEST_GetLevel0TotalSize() { uint64_t DBImpl::TEST_GetLevel0TotalSize() {
InstrumentedMutexLock l(&mutex_); InstrumentedMutexLock l(&mutex_);
return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0); return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0);

@ -255,6 +255,28 @@ InternalIterator* TableCache::NewIterator(
return result; return result;
} }
Status TableCache::GetRangeTombstoneIterator(
const ReadOptions& options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta,
std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter) {
const FileDescriptor& fd = file_meta.fd;
Status s;
TableReader* t = fd.table_reader;
Cache::Handle* handle = nullptr;
if (t == nullptr) {
s = FindTable(env_options_, internal_comparator, fd, &handle);
if (s.ok()) {
t = GetTableReaderFromHandle(handle);
}
}
if (s.ok()) {
out_iter->reset(t->NewRangeTombstoneIterator(options));
assert(out_iter);
}
return s;
}
Status TableCache::Get(const ReadOptions& options, Status TableCache::Get(const ReadOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta, const Slice& k, const FileMetaData& file_meta, const Slice& k,

@ -93,6 +93,14 @@ class TableCache {
HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, HistogramImpl* file_read_hist = nullptr, bool skip_filters = false,
int level = -1); int level = -1);
// Return the range delete tombstone iterator of the file specified by
// `file_meta`.
Status GetRangeTombstoneIterator(
const ReadOptions& options,
const InternalKeyComparator& internal_comparator,
const FileMetaData& file_meta,
std::unique_ptr<FragmentedRangeTombstoneIterator>* out_iter);
// If a seek to internal key "k" in specified file finds an entry, // If a seek to internal key "k" in specified file finds an entry,
// call get_context->SaveValue() repeatedly until // call get_context->SaveValue() repeatedly until
// it returns false. As a side effect, it will insert the TableReader // it returns false. As a side effect, it will insert the TableReader

@ -1257,6 +1257,60 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
return Status::OK(); return Status::OK();
} }
Status Version::TablesRangeTombstoneSummary(int max_entries_to_print,
std::string* out_str) {
if (max_entries_to_print <= 0) {
return Status::OK();
}
int num_entries_left = max_entries_to_print;
std::stringstream ss;
for (int level = 0; level < storage_info_.num_levels_; level++) {
for (const auto& file_meta : storage_info_.files_[level]) {
auto fname =
TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(),
file_meta->fd.GetPathId());
ss << "=== file : " << fname << " ===\n";
TableCache* table_cache = cfd_->table_cache();
std::unique_ptr<FragmentedRangeTombstoneIterator> tombstone_iter;
Status s = table_cache->GetRangeTombstoneIterator(
ReadOptions(), cfd_->internal_comparator(), *file_meta,
&tombstone_iter);
if (!s.ok()) {
return s;
}
if (tombstone_iter) {
tombstone_iter->SeekToFirst();
while (tombstone_iter->Valid() && num_entries_left > 0) {
ss << "start: " << tombstone_iter->start_key().ToString(true)
<< " end: " << tombstone_iter->end_key().ToString(true)
<< " seq: " << tombstone_iter->seq() << '\n';
tombstone_iter->Next();
num_entries_left--;
}
if (num_entries_left <= 0) {
break;
}
}
}
if (num_entries_left <= 0) {
break;
}
}
assert(num_entries_left >= 0);
if (num_entries_left <= 0) {
ss << "(results may not be complete)\n";
}
*out_str = ss.str();
return Status::OK();
}
Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props, Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props,
int level) { int level) {
for (const auto& file_meta : storage_info_.files_[level]) { for (const auto& file_meta : storage_info_.files_[level]) {

@ -630,6 +630,11 @@ class Version {
Status GetPropertiesOfTablesInRange(const Range* range, std::size_t n, Status GetPropertiesOfTablesInRange(const Range* range, std::size_t n,
TablePropertiesCollection* props) const; TablePropertiesCollection* props) const;
// Print summary of range delete tombstones in SST files into out_str,
// with maximum max_entries_to_print entries printed out.
Status TablesRangeTombstoneSummary(int max_entries_to_print,
std::string* out_str);
// REQUIRES: lock is held // REQUIRES: lock is held
// On success, "tp" will contains the aggregated table property among // On success, "tp" will contains the aggregated table property among
// the table properties of all sst files in this version. // the table properties of all sst files in this version.

@ -261,7 +261,8 @@ class LDBCommandRunner {
public: public:
static void PrintHelp(const LDBOptions& ldb_options, const char* exec_name); static void PrintHelp(const LDBOptions& ldb_options, const char* exec_name);
static void RunCommand( // Returns the status code to return. 0 is no error.
static int RunCommand(
int argc, char** argv, Options options, const LDBOptions& ldb_options, int argc, char** argv, Options options, const LDBOptions& ldb_options,
const std::vector<ColumnFamilyDescriptor>* column_families); const std::vector<ColumnFamilyDescriptor>* column_families);
}; };

@ -261,6 +261,9 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) {
return new IngestExternalSstFilesCommand(parsed_params.cmd_params, return new IngestExternalSstFilesCommand(parsed_params.cmd_params,
parsed_params.option_map, parsed_params.option_map,
parsed_params.flags); parsed_params.flags);
} else if (parsed_params.cmd == ListFileRangeDeletesCommand::Name()) {
return new ListFileRangeDeletesCommand(parsed_params.option_map,
parsed_params.flags);
} }
return nullptr; return nullptr;
} }
@ -3228,5 +3231,56 @@ Options IngestExternalSstFilesCommand::PrepareOptionsForOpenDB() {
return opt; return opt;
} }
ListFileRangeDeletesCommand::ListFileRangeDeletesCommand(
const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags)
: LDBCommand(options, flags, true, BuildCmdLineOptions({ARG_MAX_KEYS})) {
std::map<std::string, std::string>::const_iterator itr =
options.find(ARG_MAX_KEYS);
if (itr != options.end()) {
try {
#if defined(CYGWIN)
max_keys_ = strtol(itr->second.c_str(), 0, 10);
#else
max_keys_ = std::stoi(itr->second);
#endif
} catch (const std::invalid_argument&) {
exec_state_ = LDBCommandExecuteResult::Failed(ARG_MAX_KEYS +
" has an invalid value");
} catch (const std::out_of_range&) {
exec_state_ = LDBCommandExecuteResult::Failed(
ARG_MAX_KEYS + " has a value out-of-range");
}
}
}
void ListFileRangeDeletesCommand::Help(std::string& ret) {
ret.append(" ");
ret.append(ListFileRangeDeletesCommand::Name());
ret.append(" [--" + ARG_MAX_KEYS + "=<N>]");
ret.append(" : print tombstones in SST files.\n");
}
void ListFileRangeDeletesCommand::DoCommand() {
if (!db_) {
assert(GetExecuteState().IsFailed());
return;
}
DBImpl* db_impl = static_cast_with_check<DBImpl, DB>(db_->GetRootDB());
std::string out_str;
Status st =
db_impl->TablesRangeTombstoneSummary(GetCfHandle(), max_keys_, &out_str);
if (st.ok()) {
TEST_SYNC_POINT_CALLBACK(
"ListFileRangeDeletesCommand::DoCommand:BeforePrint", &out_str);
fprintf(stdout, "%s\n", out_str.c_str());
} else {
exec_state_ = LDBCommandExecuteResult::Failed(st.ToString());
}
}
} // namespace rocksdb } // namespace rocksdb
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE

@ -592,4 +592,20 @@ class IngestExternalSstFilesCommand : public LDBCommand {
static const std::string ARG_WRITE_GLOBAL_SEQNO; static const std::string ARG_WRITE_GLOBAL_SEQNO;
}; };
// Command that prints out range delete tombstones in SST files.
class ListFileRangeDeletesCommand : public LDBCommand {
public:
static std::string Name() { return "list_file_range_deletes"; }
ListFileRangeDeletesCommand(const std::map<std::string, std::string>& options,
const std::vector<std::string>& flags);
void DoCommand() override;
static void Help(std::string& ret);
private:
int max_keys_ = 1000;
};
} // namespace rocksdb } // namespace rocksdb

@ -6,6 +6,7 @@
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
#include "rocksdb/utilities/ldb_cmd.h" #include "rocksdb/utilities/ldb_cmd.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
using std::string; using std::string;
@ -77,8 +78,8 @@ TEST_F(LdbCmdTest, MemEnv) {
char arg3[] = "dump_live_files"; char arg3[] = "dump_live_files";
char* argv[] = {arg1, arg2, arg3}; char* argv[] = {arg1, arg2, arg3};
rocksdb::LDBTool tool; ASSERT_EQ(0,
tool.Run(3, argv, opts); LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
} }
TEST_F(LdbCmdTest, OptionParsing) { TEST_F(LdbCmdTest, OptionParsing) {
@ -118,6 +119,94 @@ TEST_F(LdbCmdTest, OptionParsing) {
} }
} }
TEST_F(LdbCmdTest, ListFileTombstone) {
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
Options opts;
opts.env = env.get();
opts.create_if_missing = true;
DB* db = nullptr;
std::string dbname = test::TmpDir();
ASSERT_OK(DB::Open(opts, dbname, &db));
WriteOptions wopts;
ASSERT_OK(db->Put(wopts, "foo", "1"));
ASSERT_OK(db->Put(wopts, "bar", "2"));
FlushOptions fopts;
fopts.wait = true;
ASSERT_OK(db->Flush(fopts));
ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "foo", "foo2"));
ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "bar", "foo2"));
ASSERT_OK(db->Flush(fopts));
delete db;
{
char arg1[] = "./ldb";
char arg2[1024];
snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str());
char arg3[] = "list_file_range_deletes";
char* argv[] = {arg1, arg2, arg3};
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) {
std::string* out_str = reinterpret_cast<std::string*>(arg);
// Count number of tombstones printed
int num_tb = 0;
const std::string kFingerprintStr = "start: ";
auto offset = out_str->find(kFingerprintStr);
while (offset != std::string::npos) {
num_tb++;
offset =
out_str->find(kFingerprintStr, offset + kFingerprintStr.size());
}
EXPECT_EQ(2, num_tb);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_EQ(
0, LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr));
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
// Test the case of limiting tombstones
{
char arg1[] = "./ldb";
char arg2[1024];
snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str());
char arg3[] = "list_file_range_deletes";
char arg4[] = "--max_keys=1";
char* argv[] = {arg1, arg2, arg3, arg4};
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) {
std::string* out_str = reinterpret_cast<std::string*>(arg);
// Count number of tombstones printed
int num_tb = 0;
const std::string kFingerprintStr = "start: ";
auto offset = out_str->find(kFingerprintStr);
while (offset != std::string::npos) {
num_tb++;
offset =
out_str->find(kFingerprintStr, offset + kFingerprintStr.size());
}
EXPECT_EQ(1, num_tb);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
ASSERT_EQ(
0, LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr));
rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks();
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -71,6 +71,7 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options,
DBQuerierCommand::Help(ret); DBQuerierCommand::Help(ret);
ApproxSizeCommand::Help(ret); ApproxSizeCommand::Help(ret);
CheckConsistencyCommand::Help(ret); CheckConsistencyCommand::Help(ret);
ListFileRangeDeletesCommand::Help(ret);
ret.append("\n\n"); ret.append("\n\n");
ret.append("Admin Commands:\n"); ret.append("Admin Commands:\n");
@ -96,12 +97,12 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options,
fprintf(stderr, "%s\n", ret.c_str()); fprintf(stderr, "%s\n", ret.c_str());
} }
void LDBCommandRunner::RunCommand( int LDBCommandRunner::RunCommand(
int argc, char** argv, Options options, const LDBOptions& ldb_options, int argc, char** argv, Options options, const LDBOptions& ldb_options,
const std::vector<ColumnFamilyDescriptor>* column_families) { const std::vector<ColumnFamilyDescriptor>* column_families) {
if (argc <= 2) { if (argc <= 2) {
PrintHelp(ldb_options, argv[0]); PrintHelp(ldb_options, argv[0]);
exit(1); return 1;
} }
LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs( LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs(
@ -109,11 +110,11 @@ void LDBCommandRunner::RunCommand(
if (cmdObj == nullptr) { if (cmdObj == nullptr) {
fprintf(stderr, "Unknown command\n"); fprintf(stderr, "Unknown command\n");
PrintHelp(ldb_options, argv[0]); PrintHelp(ldb_options, argv[0]);
exit(1); return 1;
} }
if (!cmdObj->ValidateCmdLineOptions()) { if (!cmdObj->ValidateCmdLineOptions()) {
exit(1); return 1;
} }
cmdObj->Run(); cmdObj->Run();
@ -121,14 +122,15 @@ void LDBCommandRunner::RunCommand(
fprintf(stderr, "%s\n", ret.ToString().c_str()); fprintf(stderr, "%s\n", ret.ToString().c_str());
delete cmdObj; delete cmdObj;
exit(ret.IsFailed()); return ret.IsFailed() ? 1 : 0;
} }
void LDBTool::Run(int argc, char** argv, Options options, void LDBTool::Run(int argc, char** argv, Options options,
const LDBOptions& ldb_options, const LDBOptions& ldb_options,
const std::vector<ColumnFamilyDescriptor>* column_families) { const std::vector<ColumnFamilyDescriptor>* column_families) {
LDBCommandRunner::RunCommand(argc, argv, options, ldb_options, int error_code = LDBCommandRunner::RunCommand(argc, argv, options,
column_families); ldb_options, column_families);
exit(error_code);
} }
} // namespace rocksdb } // namespace rocksdb

Loading…
Cancel
Save