diff --git a/Makefile b/Makefile index 3e787149d..5d8b2bc3d 100644 --- a/Makefile +++ b/Makefile @@ -609,6 +609,7 @@ ifdef ASSERT_STATUS_CHECKED merger_test \ mock_env_test \ object_registry_test \ + repair_test \ configurable_test \ options_settable_test \ options_test \ diff --git a/db/db_sst_test.cc b/db/db_sst_test.cc index 5c708ef87..2287c2425 100644 --- a/db/db_sst_test.cc +++ b/db/db_sst_test.cc @@ -304,11 +304,14 @@ TEST_F(DBSSTTest, DBWithSstFileManager) { dbfull()->TEST_WaitForFlushMemTable(); dbfull()->TEST_WaitForCompact(); // Verify that we are tracking all sst files in dbname_ - ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles()); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db)); + ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); } ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)); - auto files_in_db = GetAllSSTFiles(); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db)); // Verify that we are tracking all sst files in dbname_ ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db); // Verify the total files size @@ -762,7 +765,8 @@ TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) { ASSERT_OK(Flush()); uint64_t first_file_size = 0; - auto files_in_db = GetAllSSTFiles(&first_file_size); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db, &first_file_size)); ASSERT_EQ(sfm->GetTotalSize(), first_file_size); // Set the maximum allowed space usage to the current total size @@ -802,7 +806,8 @@ TEST_F(DBSSTTest, CancellingCompactionsWorks) { } ASSERT_OK(Flush()); uint64_t total_file_size = 0; - auto files_in_db = GetAllSSTFiles(&total_file_size); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_file_size)); // Set the maximum allowed space usage to the current total size sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); @@ -849,7 +854,8 @@ TEST_F(DBSSTTest, CancellingManualCompactionsWorks) { } ASSERT_OK(Flush()); uint64_t total_file_size = 0; - auto files_in_db = GetAllSSTFiles(&total_file_size); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_file_size)); // Set the maximum allowed space usage to the current total size sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1); @@ -959,7 +965,8 @@ TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) { } ASSERT_TRUE(bg_error_set); uint64_t total_sst_files_size = 0; - GetAllSSTFiles(&total_sst_files_size); + std::unordered_map files_in_db; + ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_sst_files_size)); ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024); ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing(); } diff --git a/db/db_test_util.cc b/db/db_test_util.cc index 3ad1f31e4..611892d21 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -1407,29 +1407,33 @@ void DBTestBase::CopyFile(const std::string& source, ASSERT_OK(destfile->Close()); } -std::unordered_map DBTestBase::GetAllSSTFiles( - uint64_t* total_size) { - std::unordered_map res; - +Status DBTestBase::GetAllSSTFiles( + std::unordered_map* sst_files, + uint64_t* total_size /* = nullptr */) { if (total_size) { *total_size = 0; } std::vector files; - env_->GetChildren(dbname_, &files); - for (auto& file_name : files) { - uint64_t number; - FileType type; - std::string file_path = dbname_ + "/" + file_name; - if (ParseFileName(file_name, &number, &type) && type == kTableFile) { - uint64_t file_size = 0; - env_->GetFileSize(file_path, &file_size); - res[file_path] = file_size; - if (total_size) { - *total_size += file_size; + Status s = env_->GetChildren(dbname_, &files); + if (s.ok()) { + for (auto& file_name : files) { + uint64_t number; + FileType type; + if (ParseFileName(file_name, &number, &type) && type == kTableFile) { + std::string file_path = dbname_ + "/" + file_name; + uint64_t file_size = 0; + s = env_->GetFileSize(file_path, &file_size); + if (!s.ok()) { + break; + } + (*sst_files)[file_path] = file_size; + if (total_size) { + *total_size += file_size; + } } } } - return res; + return s; } std::vector DBTestBase::ListTableFiles(Env* env, diff --git a/db/db_test_util.h b/db/db_test_util.h index fc5c5929a..7a2fbfc70 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -1145,8 +1145,8 @@ class DBTestBase : public testing::Test { void CopyFile(const std::string& source, const std::string& destination, uint64_t size = 0); - std::unordered_map GetAllSSTFiles( - uint64_t* total_size = nullptr); + Status GetAllSSTFiles(std::unordered_map* sst_files, + uint64_t* total_size = nullptr); std::vector ListTableFiles(Env* env, const std::string& path); diff --git a/db/repair.cc b/db/repair.cc index a96af2c3e..8b50ba5c3 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -119,7 +119,8 @@ class Repairer { raw_table_cache_.get(), &wb_, &wc_, /*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr), next_file_number_(1), - db_lock_(nullptr) { + db_lock_(nullptr), + closed_(false) { for (const auto& cfd : column_families) { cf_name_to_opts_[cfd.name] = cfd.options; } @@ -163,31 +164,41 @@ class Repairer { return status; } - ~Repairer() { - if (db_lock_ != nullptr) { - env_->UnlockFile(db_lock_); + Status Close() { + Status s = Status::OK(); + if (!closed_) { + if (db_lock_ != nullptr) { + s = env_->UnlockFile(db_lock_); + db_lock_ = nullptr; + } + closed_ = true; } - delete table_cache_; + return s; } + ~Repairer() { Close().PermitUncheckedError(); } + Status Run() { Status status = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!status.ok()) { return status; } status = FindFiles(); + DBImpl* db_impl = nullptr; if (status.ok()) { // Discard older manifests and start a fresh one for (size_t i = 0; i < manifests_.size(); i++) { ArchiveFile(dbname_ + "/" + manifests_[i]); } // Just create a DBImpl temporarily so we can reuse NewDB() - DBImpl* db_impl = new DBImpl(db_options_, dbname_); + db_impl = new DBImpl(db_options_, dbname_); // Also use this temp DBImpl to get a session id - db_impl->GetDbSessionId(db_session_id_); + status = db_impl->GetDbSessionId(db_session_id_); + } + if (status.ok()) { status = db_impl->NewDB(/*new_filenames=*/nullptr); - delete db_impl; } + delete db_impl; if (status.ok()) { // Recover using the fresh manifest created by NewDB() @@ -242,7 +253,7 @@ class Repairer { const ColumnFamilyOptions unknown_cf_opts_; const bool create_unknown_cfs_; std::shared_ptr raw_table_cache_; - TableCache* table_cache_; + std::unique_ptr table_cache_; WriteBufferManager wb_; WriteController wc_; VersionSet vset_; @@ -257,6 +268,7 @@ class Repairer { // Lock over the persistent DB state. Non-nullptr iff successfully // acquired. FileLock* db_lock_; + bool closed_; Status FindFiles() { std::vector filenames; @@ -385,15 +397,16 @@ class Repairer { record.size(), Status::Corruption("log record too small")); continue; } - WriteBatchInternal::SetContents(&batch, record); - status = - WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr); - if (status.ok()) { + Status record_status = WriteBatchInternal::SetContents(&batch, record); + if (record_status.ok()) { + record_status = + WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr); + } + if (record_status.ok()) { counter += WriteBatchInternal::Count(&batch); } else { ROCKS_LOG_WARN(db_options_.info_log, "Log #%" PRIu64 ": ignoring %s", - log, status.ToString().c_str()); - status = Status::OK(); // Keep going with rest of file + log, record_status.ToString().c_str()); } } @@ -430,7 +443,7 @@ class Repairer { IOStatus io_s; status = BuildTable( dbname_, /* versions */ nullptr, env_, &fs, *cfd->ioptions(), - *cfd->GetLatestMutableCFOptions(), env_options_, table_cache_, + *cfd->GetLatestMutableCFOptions(), env_options_, table_cache_.get(), iter.get(), std::move(range_del_iters), &meta, nullptr /* blob_file_additions */, cfd->internal_comparator(), cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(), @@ -623,7 +636,7 @@ class Repairer { new_dir.assign(fname.data(), slash - fname.data()); } new_dir.append("/lost"); - env_->CreateDir(new_dir); // Ignore error + env_->CreateDir(new_dir).PermitUncheckedError(); // Ignore error std::string new_file = new_dir; new_file.append("/"); new_file.append((slash == nullptr) ? fname.c_str() : slash + 1); @@ -655,12 +668,16 @@ Status RepairDB(const std::string& dbname, const DBOptions& db_options, ) { ColumnFamilyOptions default_cf_opts; Status status = GetDefaultCFOptions(column_families, &default_cf_opts); + if (!status.ok()) { + return status; + } + + Repairer repairer(dbname, db_options, column_families, default_cf_opts, + ColumnFamilyOptions() /* unknown_cf_opts */, + false /* create_unknown_cfs */); + status = repairer.Run(); if (status.ok()) { - Repairer repairer(dbname, db_options, column_families, - default_cf_opts, - ColumnFamilyOptions() /* unknown_cf_opts */, - false /* create_unknown_cfs */); - status = repairer.Run(); + status = repairer.Close(); } return status; } @@ -670,25 +687,33 @@ Status RepairDB(const std::string& dbname, const DBOptions& db_options, const ColumnFamilyOptions& unknown_cf_opts) { ColumnFamilyOptions default_cf_opts; Status status = GetDefaultCFOptions(column_families, &default_cf_opts); + if (!status.ok()) { + return status; + } + + Repairer repairer(dbname, db_options, column_families, default_cf_opts, + unknown_cf_opts, true /* create_unknown_cfs */); + status = repairer.Run(); if (status.ok()) { - Repairer repairer(dbname, db_options, - column_families, default_cf_opts, - unknown_cf_opts, true /* create_unknown_cfs */); - status = repairer.Run(); + status = repairer.Close(); } return status; } Status RepairDB(const std::string& dbname, const Options& options) { Options opts(options); - DBOptions db_options(opts); ColumnFamilyOptions cf_options(opts); + Repairer repairer(dbname, db_options, {}, cf_options /* default_cf_opts */, cf_options /* unknown_cf_opts */, true /* create_unknown_cfs */); - return repairer.Run(); + Status status = repairer.Run(); + if (status.ok()) { + status = repairer.Close(); + } + return status; } } // namespace ROCKSDB_NAMESPACE diff --git a/db/repair_test.cc b/db/repair_test.cc index d1b8fc700..49a96bf8a 100644 --- a/db/repair_test.cc +++ b/db/repair_test.cc @@ -24,28 +24,33 @@ class RepairTest : public DBTestBase { public: RepairTest() : DBTestBase("/repair_test", /*env_do_fsync=*/true) {} - std::string GetFirstSstPath() { + Status GetFirstSstPath(std::string* first_sst_path) { + assert(first_sst_path != nullptr); + first_sst_path->clear(); uint64_t manifest_size; std::vector files; - db_->GetLiveFiles(files, &manifest_size); - auto sst_iter = - std::find_if(files.begin(), files.end(), [](const std::string& file) { - uint64_t number; - FileType type; - bool ok = ParseFileName(file, &number, &type); - return ok && type == kTableFile; - }); - return sst_iter == files.end() ? "" : dbname_ + *sst_iter; + Status s = db_->GetLiveFiles(files, &manifest_size); + if (s.ok()) { + auto sst_iter = + std::find_if(files.begin(), files.end(), [](const std::string& file) { + uint64_t number; + FileType type; + bool ok = ParseFileName(file, &number, &type); + return ok && type == kTableFile; + }); + *first_sst_path = sst_iter == files.end() ? "" : dbname_ + *sst_iter; + } + return s; } }; TEST_F(RepairTest, LostManifest) { // Add a couple SST files, delete the manifest, and verify RepairDB() saves // the day. - Put("key", "val"); - Flush(); - Put("key2", "val2"); - Flush(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); // Need to get path before Close() deletes db_, but delete it after Close() to // ensure Close() didn't change the manifest. std::string manifest_path = @@ -63,10 +68,10 @@ TEST_F(RepairTest, LostManifest) { TEST_F(RepairTest, CorruptManifest) { // Manifest is in an invalid format. Expect a full recovery. - Put("key", "val"); - Flush(); - Put("key2", "val2"); - Flush(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); // Need to get path before Close() deletes db_, but overwrite it after Close() // to ensure Close() didn't change the manifest. std::string manifest_path = @@ -76,7 +81,7 @@ TEST_F(RepairTest, CorruptManifest) { ASSERT_OK(env_->FileExists(manifest_path)); LegacyFileSystemWrapper fs(env_); - CreateFile(&fs, manifest_path, "blah", false /* use_fsync */); + ASSERT_OK(CreateFile(&fs, manifest_path, "blah", false /* use_fsync */)); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); Reopen(CurrentOptions()); @@ -87,13 +92,13 @@ TEST_F(RepairTest, CorruptManifest) { TEST_F(RepairTest, IncompleteManifest) { // In this case, the manifest is valid but does not reference all of the SST // files. Expect a full recovery. - Put("key", "val"); - Flush(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); std::string orig_manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); CopyFile(orig_manifest_path, orig_manifest_path + ".tmp"); - Put("key2", "val2"); - Flush(); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); // Need to get path before Close() deletes db_, but overwrite it after Close() // to ensure Close() didn't change the manifest. std::string new_manifest_path = @@ -113,10 +118,10 @@ TEST_F(RepairTest, IncompleteManifest) { TEST_F(RepairTest, PostRepairSstFileNumbering) { // Verify after a DB is repaired, new files will be assigned higher numbers // than old files. - Put("key", "val"); - Flush(); - Put("key2", "val2"); - Flush(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo(); Close(); @@ -130,11 +135,12 @@ TEST_F(RepairTest, PostRepairSstFileNumbering) { TEST_F(RepairTest, LostSst) { // Delete one of the SST files but preserve the manifest that refers to it, // then verify the DB is still usable for the intact SST. - Put("key", "val"); - Flush(); - Put("key2", "val2"); - Flush(); - auto sst_path = GetFirstSstPath(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); + std::string sst_path; + ASSERT_OK(GetFirstSstPath(&sst_path)); ASSERT_FALSE(sst_path.empty()); ASSERT_OK(env_->DeleteFile(sst_path)); @@ -149,15 +155,16 @@ TEST_F(RepairTest, LostSst) { TEST_F(RepairTest, CorruptSst) { // Corrupt one of the SST files but preserve the manifest that refers to it, // then verify the DB is still usable for the intact SST. - Put("key", "val"); - Flush(); - Put("key2", "val2"); - Flush(); - auto sst_path = GetFirstSstPath(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); + ASSERT_OK(Put("key2", "val2")); + ASSERT_OK(Flush()); + std::string sst_path; + ASSERT_OK(GetFirstSstPath(&sst_path)); ASSERT_FALSE(sst_path.empty()); LegacyFileSystemWrapper fs(env_); - CreateFile(&fs, sst_path, "blah", false /* use_fsync */); + ASSERT_OK(CreateFile(&fs, sst_path, "blah", false /* use_fsync */)); Close(); ASSERT_OK(RepairDB(dbname_, CurrentOptions())); @@ -170,13 +177,16 @@ TEST_F(RepairTest, CorruptSst) { TEST_F(RepairTest, UnflushedSst) { // This test case invokes repair while some data is unflushed, then verifies // that data is in the db. - Put("key", "val"); + ASSERT_OK(Put("key", "val")); VectorLogPtr wal_files; ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 1); - uint64_t total_ssts_size; - GetAllSSTFiles(&total_ssts_size); - ASSERT_EQ(total_ssts_size, 0); + { + uint64_t total_ssts_size; + std::unordered_map sst_files; + ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size)); + ASSERT_EQ(total_ssts_size, 0); + } // Need to get path before Close() deletes db_, but delete it after Close() to // ensure Close() didn't change the manifest. std::string manifest_path = @@ -190,8 +200,12 @@ TEST_F(RepairTest, UnflushedSst) { ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 0); - GetAllSSTFiles(&total_ssts_size); - ASSERT_GT(total_ssts_size, 0); + { + uint64_t total_ssts_size; + std::unordered_map sst_files; + ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size)); + ASSERT_GT(total_ssts_size, 0); + } ASSERT_EQ(Get("key"), "val"); } @@ -199,14 +213,17 @@ TEST_F(RepairTest, SeparateWalDir) { do { Options options = CurrentOptions(); DestroyAndReopen(options); - Put("key", "val"); - Put("foo", "bar"); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Put("foo", "bar")); VectorLogPtr wal_files; ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 1); - uint64_t total_ssts_size; - GetAllSSTFiles(&total_ssts_size); - ASSERT_EQ(total_ssts_size, 0); + { + uint64_t total_ssts_size; + std::unordered_map sst_files; + ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size)); + ASSERT_EQ(total_ssts_size, 0); + } std::string manifest_path = DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo()); @@ -221,8 +238,12 @@ TEST_F(RepairTest, SeparateWalDir) { Reopen(options); ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files)); ASSERT_EQ(wal_files.size(), 0); - GetAllSSTFiles(&total_ssts_size); - ASSERT_GT(total_ssts_size, 0); + { + uint64_t total_ssts_size; + std::unordered_map sst_files; + ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size)); + ASSERT_GT(total_ssts_size, 0); + } ASSERT_EQ(Get("key"), "val"); ASSERT_EQ(Get("foo"), "bar"); @@ -238,13 +259,13 @@ TEST_F(RepairTest, RepairMultipleColumnFamilies) { CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions()); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { - Put(i, "key" + ToString(j), "val" + ToString(j)); + ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j))); if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) { // Leave one unflushed so we can verify WAL entries are properly // associated with column families. continue; } - Flush(i); + ASSERT_OK(Flush(i)); } } @@ -283,12 +304,12 @@ TEST_F(RepairTest, RepairColumnFamilyOptions) { std::vector{opts, rev_opts}); for (int i = 0; i < kNumCfs; ++i) { for (int j = 0; j < kEntriesPerCf; ++j) { - Put(i, "key" + ToString(j), "val" + ToString(j)); + ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j))); if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) { // Leave one unflushed so we can verify RepairDB's flush logic continue; } - Flush(i); + ASSERT_OK(Flush(i)); } } Close(); @@ -308,7 +329,7 @@ TEST_F(RepairTest, RepairColumnFamilyOptions) { // Examine table properties to verify RepairDB() used the right options when // converting WAL->SST TablePropertiesCollection fname_to_props; - db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props); + ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props)); ASSERT_EQ(fname_to_props.size(), 2U); for (const auto& fname_and_props : fname_to_props) { std::string comparator_name ( @@ -342,8 +363,8 @@ TEST_F(RepairTest, DbNameContainsTrailingSlash) { } } - Put("key", "val"); - Flush(); + ASSERT_OK(Put("key", "val")); + ASSERT_OK(Flush()); Close(); ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions())); diff --git a/db/table_cache.cc b/db/table_cache.cc index 1f760c905..a9f334828 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -169,7 +169,7 @@ Status TableCache::FindTable(const ReadOptions& ro, const_cast(&no_io)); if (*handle == nullptr) { - if (no_io) { // Don't do IO and return a not-found status + if (no_io) { return Status::Incomplete("Table not found in table_cache, no_io is set"); } MutexLock load_lock(loader_mutex_.get(key)); diff --git a/db/wal_manager.cc b/db/wal_manager.cc index a2e88e2c2..327f89cdf 100644 --- a/db/wal_manager.cc +++ b/db/wal_manager.cc @@ -492,14 +492,19 @@ Status WalManager::ReadFirstLine(const std::string& fname, // TODO read record's till the first no corrupt entry? } else { WriteBatch batch; - WriteBatchInternal::SetContents(&batch, record); - *sequence = WriteBatchInternal::Sequence(&batch); - return Status::OK(); + // We can overwrite an existing non-OK Status since it'd only reach here + // with `paranoid_checks == false`. + status = WriteBatchInternal::SetContents(&batch, record); + if (status.ok()) { + *sequence = WriteBatchInternal::Sequence(&batch); + return status; + } } } - // ReadRecord returns false on EOF, which means that the log file is empty. we - // return status.ok() in that case and set sequence number to 0 + // ReadRecord might have returned false on EOF, which means that the log file + // is empty. Or, a failure may have occurred while processing the first entry. + // In any case, return status and set sequence number to 0. *sequence = 0; return status; }