From bd2c753dd0b48f83818042d1caae5e47fc6444aa Mon Sep 17 00:00:00 2001 From: sdong Date: Thu, 15 Aug 2019 16:59:42 -0700 Subject: [PATCH] Add command "list_file_range_deletes" in ldb (#5615) Summary: Add a command in ldb so that users can print out tombstones in SST files. In order to test the code, change the interface of LDBCommandRunner::RunCommand() so that it doesn't return from the program, but return the status code. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5615 Test Plan: Add a new unit test Differential Revision: D16550326 fbshipit-source-id: 88ddfe6984bdcbb3a528abdd115089df09eba52e --- HISTORY.md | 2 + db/db_impl/db_impl.cc | 21 ++++++- db/db_impl/db_impl.h | 8 ++- db/db_impl/db_impl_debug.cc | 3 +- db/table_cache.cc | 22 +++++++ db/table_cache.h | 8 +++ db/version_set.cc | 54 +++++++++++++++++ db/version_set.h | 5 ++ include/rocksdb/utilities/ldb_cmd.h | 3 +- tools/ldb_cmd.cc | 54 +++++++++++++++++ tools/ldb_cmd_impl.h | 16 +++++ tools/ldb_cmd_test.cc | 93 ++++++++++++++++++++++++++++- tools/ldb_tool.cc | 16 ++--- 13 files changed, 292 insertions(+), 13 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 8bcb47505..37a774c55 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -19,6 +19,7 @@ * ldb sometimes uses a string-append merge operator if no merge operator is passed in. This is to allow users to print keys from a DB with a merge operator. * Replaces old Registra with ObjectRegistry to allow user to create custom object from string, also add LoadEnv() to Env. * Added new overload of GetApproximateSizes which gets SizeApproximationOptions object and returns a Status. The older overloads are redirecting their calls to this new method and no longer assert if the include_flags doesn't have either of INCLUDE_MEMTABLES or INCLUDE_FILES bits set. It's recommended to use the new method only, as it is more type safe and returns a meaningful status in case of errors. +* LDBCommandRunner::RunCommand() to return the status code as an integer, rather than call exit() using the code. ### New Features * Add argument `--secondary_path` to ldb to open the database as the secondary instance. This would keep the original DB intact. @@ -57,6 +58,7 @@ * Add an option `unordered_write` which trades snapshot guarantees with higher write throughput. When used with WRITE_PREPARED transactions with two_write_queues=true, it offers higher throughput with however no compromise on guarantees. * Allow DBImplSecondary to remove memtables with obsolete data after replaying MANIFEST and WAL. * Add an option `failed_move_fall_back_to_copy` (default is true) for external SST ingestion. When `move_files` is true and hard link fails, ingestion falls back to copy if `failed_move_fall_back_to_copy` is true. Otherwise, ingestion reports an error. +* Add command `list_file_range_deletes` in ldb, which prints out tombstones in SST files. ### Performance Improvements * Reduce binary search when iterator reseek into the same data block. diff --git a/db/db_impl/db_impl.cc b/db/db_impl/db_impl.cc index 9236d911e..5d885b8b2 100644 --- a/db/db_impl/db_impl.cc +++ b/db/db_impl/db_impl.cc @@ -33,9 +33,9 @@ #include "db/error_handler.h" #include "db/event_helpers.h" #include "db/external_sst_file_ingestion_job.h" -#include "db/import_column_family_job.h" #include "db/flush_job.h" #include "db/forward_iterator.h" +#include "db/import_column_family_job.h" #include "db/job_context.h" #include "db/log_reader.h" #include "db/log_writer.h" @@ -91,6 +91,7 @@ #include "tools/sst_dump_tool_imp.h" #include "util/autovector.h" #include "util/build_version.h" +#include "util/cast_util.h" #include "util/coding.h" #include "util/compression.h" #include "util/crc32c.h" @@ -864,6 +865,24 @@ void DBImpl::DumpStats() { PrintStatistics(); } +Status DBImpl::TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family, + int max_entries_to_print, + std::string* out_str) { + auto* cfh = + static_cast_with_check( + column_family); + ColumnFamilyData* cfd = cfh->cfd(); + + SuperVersion* super_version = cfd->GetReferencedSuperVersion(&mutex_); + Version* version = super_version->current; + + Status s = + version->TablesRangeTombstoneSummary(max_entries_to_print, out_str); + + CleanupSuperVersion(super_version); + return s; +} + void DBImpl::ScheduleBgLogWriterClose(JobContext* job_context) { if (!job_context->logs_to_free.empty()) { for (auto l : job_context->logs_to_free) { diff --git a/db/db_impl/db_impl.h b/db/db_impl/db_impl.h index f1dbc5d02..f2b3df5e6 100644 --- a/db/db_impl/db_impl.h +++ b/db/db_impl/db_impl.h @@ -812,6 +812,13 @@ class DBImpl : public DB { uint64_t* new_time, std::map* stats_map); + // Print information of all tombstones of all iterators to the std::string + // This is only used by ldb. The output might be capped. Tombstones + // printed out are not guaranteed to be in any order. + Status TablesRangeTombstoneSummary(ColumnFamilyHandle* column_family, + int max_entries_to_print, + std::string* out_str); + #ifndef NDEBUG // Compact any files in the named level that overlap [*begin, *end] Status TEST_CompactRange(int level, const Slice* begin, const Slice* end, @@ -912,7 +919,6 @@ class DBImpl : public DB { void TEST_WaitForPersistStatsRun(std::function callback) const; bool TEST_IsPersistentStatsEnabled() const; size_t TEST_EstimateInMemoryStatsHistorySize() const; - #endif // NDEBUG protected: diff --git a/db/db_impl/db_impl_debug.cc b/db/db_impl/db_impl_debug.cc index ec8489848..d783355ce 100644 --- a/db/db_impl/db_impl_debug.cc +++ b/db/db_impl/db_impl_debug.cc @@ -9,12 +9,13 @@ #ifndef NDEBUG +#include "db/column_family.h" #include "db/db_impl/db_impl.h" #include "db/error_handler.h" #include "monitoring/thread_status_updater.h" +#include "util/cast_util.h" namespace rocksdb { - uint64_t DBImpl::TEST_GetLevel0TotalSize() { InstrumentedMutexLock l(&mutex_); return default_cf_handle_->cfd()->current()->storage_info()->NumLevelBytes(0); diff --git a/db/table_cache.cc b/db/table_cache.cc index 48415beff..3c8a36c3b 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -255,6 +255,28 @@ InternalIterator* TableCache::NewIterator( return result; } +Status TableCache::GetRangeTombstoneIterator( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, + std::unique_ptr* out_iter) { + const FileDescriptor& fd = file_meta.fd; + Status s; + TableReader* t = fd.table_reader; + Cache::Handle* handle = nullptr; + if (t == nullptr) { + s = FindTable(env_options_, internal_comparator, fd, &handle); + if (s.ok()) { + t = GetTableReaderFromHandle(handle); + } + } + if (s.ok()) { + out_iter->reset(t->NewRangeTombstoneIterator(options)); + assert(out_iter); + } + return s; +} + Status TableCache::Get(const ReadOptions& options, const InternalKeyComparator& internal_comparator, const FileMetaData& file_meta, const Slice& k, diff --git a/db/table_cache.h b/db/table_cache.h index 89a0b1b5c..ff9a70b57 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -93,6 +93,14 @@ class TableCache { HistogramImpl* file_read_hist = nullptr, bool skip_filters = false, int level = -1); + // Return the range delete tombstone iterator of the file specified by + // `file_meta`. + Status GetRangeTombstoneIterator( + const ReadOptions& options, + const InternalKeyComparator& internal_comparator, + const FileMetaData& file_meta, + std::unique_ptr* out_iter); + // If a seek to internal key "k" in specified file finds an entry, // call get_context->SaveValue() repeatedly until // it returns false. As a side effect, it will insert the TableReader diff --git a/db/version_set.cc b/db/version_set.cc index af0168f76..d1216646a 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1257,6 +1257,60 @@ Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { return Status::OK(); } +Status Version::TablesRangeTombstoneSummary(int max_entries_to_print, + std::string* out_str) { + if (max_entries_to_print <= 0) { + return Status::OK(); + } + int num_entries_left = max_entries_to_print; + + std::stringstream ss; + + for (int level = 0; level < storage_info_.num_levels_; level++) { + for (const auto& file_meta : storage_info_.files_[level]) { + auto fname = + TableFileName(cfd_->ioptions()->cf_paths, file_meta->fd.GetNumber(), + file_meta->fd.GetPathId()); + + ss << "=== file : " << fname << " ===\n"; + + TableCache* table_cache = cfd_->table_cache(); + std::unique_ptr tombstone_iter; + + Status s = table_cache->GetRangeTombstoneIterator( + ReadOptions(), cfd_->internal_comparator(), *file_meta, + &tombstone_iter); + if (!s.ok()) { + return s; + } + if (tombstone_iter) { + tombstone_iter->SeekToFirst(); + + while (tombstone_iter->Valid() && num_entries_left > 0) { + ss << "start: " << tombstone_iter->start_key().ToString(true) + << " end: " << tombstone_iter->end_key().ToString(true) + << " seq: " << tombstone_iter->seq() << '\n'; + tombstone_iter->Next(); + num_entries_left--; + } + if (num_entries_left <= 0) { + break; + } + } + } + if (num_entries_left <= 0) { + break; + } + } + assert(num_entries_left >= 0); + if (num_entries_left <= 0) { + ss << "(results may not be complete)\n"; + } + + *out_str = ss.str(); + return Status::OK(); +} + Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props, int level) { for (const auto& file_meta : storage_info_.files_[level]) { diff --git a/db/version_set.h b/db/version_set.h index 25598630e..3b4d5661c 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -630,6 +630,11 @@ class Version { Status GetPropertiesOfTablesInRange(const Range* range, std::size_t n, TablePropertiesCollection* props) const; + // Print summary of range delete tombstones in SST files into out_str, + // with maximum max_entries_to_print entries printed out. + Status TablesRangeTombstoneSummary(int max_entries_to_print, + std::string* out_str); + // REQUIRES: lock is held // On success, "tp" will contains the aggregated table property among // the table properties of all sst files in this version. diff --git a/include/rocksdb/utilities/ldb_cmd.h b/include/rocksdb/utilities/ldb_cmd.h index e7000742d..efac3f84d 100644 --- a/include/rocksdb/utilities/ldb_cmd.h +++ b/include/rocksdb/utilities/ldb_cmd.h @@ -261,7 +261,8 @@ class LDBCommandRunner { public: static void PrintHelp(const LDBOptions& ldb_options, const char* exec_name); - static void RunCommand( + // Returns the status code to return. 0 is no error. + static int RunCommand( int argc, char** argv, Options options, const LDBOptions& ldb_options, const std::vector* column_families); }; diff --git a/tools/ldb_cmd.cc b/tools/ldb_cmd.cc index 86dfcc54e..83a303825 100644 --- a/tools/ldb_cmd.cc +++ b/tools/ldb_cmd.cc @@ -261,6 +261,9 @@ LDBCommand* LDBCommand::SelectCommand(const ParsedParams& parsed_params) { return new IngestExternalSstFilesCommand(parsed_params.cmd_params, parsed_params.option_map, parsed_params.flags); + } else if (parsed_params.cmd == ListFileRangeDeletesCommand::Name()) { + return new ListFileRangeDeletesCommand(parsed_params.option_map, + parsed_params.flags); } return nullptr; } @@ -3228,5 +3231,56 @@ Options IngestExternalSstFilesCommand::PrepareOptionsForOpenDB() { return opt; } +ListFileRangeDeletesCommand::ListFileRangeDeletesCommand( + const std::map& options, + const std::vector& flags) + : LDBCommand(options, flags, true, BuildCmdLineOptions({ARG_MAX_KEYS})) { + std::map::const_iterator itr = + options.find(ARG_MAX_KEYS); + if (itr != options.end()) { + try { +#if defined(CYGWIN) + max_keys_ = strtol(itr->second.c_str(), 0, 10); +#else + max_keys_ = std::stoi(itr->second); +#endif + } catch (const std::invalid_argument&) { + exec_state_ = LDBCommandExecuteResult::Failed(ARG_MAX_KEYS + + " has an invalid value"); + } catch (const std::out_of_range&) { + exec_state_ = LDBCommandExecuteResult::Failed( + ARG_MAX_KEYS + " has a value out-of-range"); + } + } +} + +void ListFileRangeDeletesCommand::Help(std::string& ret) { + ret.append(" "); + ret.append(ListFileRangeDeletesCommand::Name()); + ret.append(" [--" + ARG_MAX_KEYS + "=]"); + ret.append(" : print tombstones in SST files.\n"); +} + +void ListFileRangeDeletesCommand::DoCommand() { + if (!db_) { + assert(GetExecuteState().IsFailed()); + return; + } + + DBImpl* db_impl = static_cast_with_check(db_->GetRootDB()); + + std::string out_str; + + Status st = + db_impl->TablesRangeTombstoneSummary(GetCfHandle(), max_keys_, &out_str); + if (st.ok()) { + TEST_SYNC_POINT_CALLBACK( + "ListFileRangeDeletesCommand::DoCommand:BeforePrint", &out_str); + fprintf(stdout, "%s\n", out_str.c_str()); + } else { + exec_state_ = LDBCommandExecuteResult::Failed(st.ToString()); + } +} + } // namespace rocksdb #endif // ROCKSDB_LITE diff --git a/tools/ldb_cmd_impl.h b/tools/ldb_cmd_impl.h index 23bafe682..38b2817c0 100644 --- a/tools/ldb_cmd_impl.h +++ b/tools/ldb_cmd_impl.h @@ -592,4 +592,20 @@ class IngestExternalSstFilesCommand : public LDBCommand { static const std::string ARG_WRITE_GLOBAL_SEQNO; }; +// Command that prints out range delete tombstones in SST files. +class ListFileRangeDeletesCommand : public LDBCommand { + public: + static std::string Name() { return "list_file_range_deletes"; } + + ListFileRangeDeletesCommand(const std::map& options, + const std::vector& flags); + + void DoCommand() override; + + static void Help(std::string& ret); + + private: + int max_keys_ = 1000; +}; + } // namespace rocksdb diff --git a/tools/ldb_cmd_test.cc b/tools/ldb_cmd_test.cc index 24622b7cc..a8e2d5c7b 100644 --- a/tools/ldb_cmd_test.cc +++ b/tools/ldb_cmd_test.cc @@ -6,6 +6,7 @@ #ifndef ROCKSDB_LITE #include "rocksdb/utilities/ldb_cmd.h" +#include "test_util/sync_point.h" #include "test_util/testharness.h" using std::string; @@ -77,8 +78,8 @@ TEST_F(LdbCmdTest, MemEnv) { char arg3[] = "dump_live_files"; char* argv[] = {arg1, arg2, arg3}; - rocksdb::LDBTool tool; - tool.Run(3, argv, opts); + ASSERT_EQ(0, + LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); } TEST_F(LdbCmdTest, OptionParsing) { @@ -118,6 +119,94 @@ TEST_F(LdbCmdTest, OptionParsing) { } } +TEST_F(LdbCmdTest, ListFileTombstone) { + std::unique_ptr env(NewMemEnv(Env::Default())); + Options opts; + opts.env = env.get(); + opts.create_if_missing = true; + + DB* db = nullptr; + std::string dbname = test::TmpDir(); + ASSERT_OK(DB::Open(opts, dbname, &db)); + + WriteOptions wopts; + ASSERT_OK(db->Put(wopts, "foo", "1")); + ASSERT_OK(db->Put(wopts, "bar", "2")); + + FlushOptions fopts; + fopts.wait = true; + ASSERT_OK(db->Flush(fopts)); + + ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "foo", "foo2")); + ASSERT_OK(db->DeleteRange(wopts, db->DefaultColumnFamily(), "bar", "foo2")); + ASSERT_OK(db->Flush(fopts)); + + delete db; + + { + char arg1[] = "./ldb"; + char arg2[1024]; + snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); + char arg3[] = "list_file_range_deletes"; + char* argv[] = {arg1, arg2, arg3}; + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) { + std::string* out_str = reinterpret_cast(arg); + + // Count number of tombstones printed + int num_tb = 0; + const std::string kFingerprintStr = "start: "; + auto offset = out_str->find(kFingerprintStr); + while (offset != std::string::npos) { + num_tb++; + offset = + out_str->find(kFingerprintStr, offset + kFingerprintStr.size()); + } + EXPECT_EQ(2, num_tb); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_EQ( + 0, LDBCommandRunner::RunCommand(3, argv, opts, LDBOptions(), nullptr)); + + rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + } + + // Test the case of limiting tombstones + { + char arg1[] = "./ldb"; + char arg2[1024]; + snprintf(arg2, sizeof(arg2), "--db=%s", dbname.c_str()); + char arg3[] = "list_file_range_deletes"; + char arg4[] = "--max_keys=1"; + char* argv[] = {arg1, arg2, arg3, arg4}; + + rocksdb::SyncPoint::GetInstance()->SetCallBack( + "ListFileRangeDeletesCommand::DoCommand:BeforePrint", [&](void* arg) { + std::string* out_str = reinterpret_cast(arg); + + // Count number of tombstones printed + int num_tb = 0; + const std::string kFingerprintStr = "start: "; + auto offset = out_str->find(kFingerprintStr); + while (offset != std::string::npos) { + num_tb++; + offset = + out_str->find(kFingerprintStr, offset + kFingerprintStr.size()); + } + EXPECT_EQ(1, num_tb); + }); + rocksdb::SyncPoint::GetInstance()->EnableProcessing(); + + ASSERT_EQ( + 0, LDBCommandRunner::RunCommand(4, argv, opts, LDBOptions(), nullptr)); + + rocksdb::SyncPoint::GetInstance()->ClearAllCallBacks(); + rocksdb::SyncPoint::GetInstance()->DisableProcessing(); + } +} } // namespace rocksdb int main(int argc, char** argv) { diff --git a/tools/ldb_tool.cc b/tools/ldb_tool.cc index 2813f6c6e..2831afe63 100644 --- a/tools/ldb_tool.cc +++ b/tools/ldb_tool.cc @@ -71,6 +71,7 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options, DBQuerierCommand::Help(ret); ApproxSizeCommand::Help(ret); CheckConsistencyCommand::Help(ret); + ListFileRangeDeletesCommand::Help(ret); ret.append("\n\n"); ret.append("Admin Commands:\n"); @@ -96,12 +97,12 @@ void LDBCommandRunner::PrintHelp(const LDBOptions& ldb_options, fprintf(stderr, "%s\n", ret.c_str()); } -void LDBCommandRunner::RunCommand( +int LDBCommandRunner::RunCommand( int argc, char** argv, Options options, const LDBOptions& ldb_options, const std::vector* column_families) { if (argc <= 2) { PrintHelp(ldb_options, argv[0]); - exit(1); + return 1; } LDBCommand* cmdObj = LDBCommand::InitFromCmdLineArgs( @@ -109,11 +110,11 @@ void LDBCommandRunner::RunCommand( if (cmdObj == nullptr) { fprintf(stderr, "Unknown command\n"); PrintHelp(ldb_options, argv[0]); - exit(1); + return 1; } if (!cmdObj->ValidateCmdLineOptions()) { - exit(1); + return 1; } cmdObj->Run(); @@ -121,14 +122,15 @@ void LDBCommandRunner::RunCommand( fprintf(stderr, "%s\n", ret.ToString().c_str()); delete cmdObj; - exit(ret.IsFailed()); + return ret.IsFailed() ? 1 : 0; } void LDBTool::Run(int argc, char** argv, Options options, const LDBOptions& ldb_options, const std::vector* column_families) { - LDBCommandRunner::RunCommand(argc, argv, options, ldb_options, - column_families); + int error_code = LDBCommandRunner::RunCommand(argc, argv, options, + ldb_options, column_families); + exit(error_code); } } // namespace rocksdb