add Status check assertions for repair_test (#7455)

Summary: Pull Request resolved: https://github.com/facebook/rocksdb/pull/7455

Reviewed By: riversand963

Differential Revision: D23985283

Pulled By: ajkr

fbshipit-source-id: 5dd2be62350f6e31d13a1e7821cb848a37699c93
main
Andrew Kryczka 4 years ago committed by Facebook GitHub Bot
parent bd5d9e2a1d
commit 8115eb520d
  1. 1
      Makefile
  2. 19
      db/db_sst_test.cc
  3. 36
      db/db_test_util.cc
  4. 4
      db/db_test_util.h
  5. 81
      db/repair.cc
  6. 137
      db/repair_test.cc
  7. 2
      db/table_cache.cc
  8. 15
      db/wal_manager.cc

@ -609,6 +609,7 @@ ifdef ASSERT_STATUS_CHECKED
merger_test \
mock_env_test \
object_registry_test \
repair_test \
configurable_test \
options_settable_test \
options_test \

@ -304,11 +304,14 @@ TEST_F(DBSSTTest, DBWithSstFileManager) {
dbfull()->TEST_WaitForFlushMemTable();
dbfull()->TEST_WaitForCompact();
// Verify that we are tracking all sst files in dbname_
ASSERT_EQ(sfm->GetTrackedFiles(), GetAllSSTFiles());
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db));
ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
}
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
auto files_in_db = GetAllSSTFiles();
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db));
// Verify that we are tracking all sst files in dbname_
ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
// Verify the total files size
@ -762,7 +765,8 @@ TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {
ASSERT_OK(Flush());
uint64_t first_file_size = 0;
auto files_in_db = GetAllSSTFiles(&first_file_size);
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db, &first_file_size));
ASSERT_EQ(sfm->GetTotalSize(), first_file_size);
// Set the maximum allowed space usage to the current total size
@ -802,7 +806,8 @@ TEST_F(DBSSTTest, CancellingCompactionsWorks) {
}
ASSERT_OK(Flush());
uint64_t total_file_size = 0;
auto files_in_db = GetAllSSTFiles(&total_file_size);
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_file_size));
// Set the maximum allowed space usage to the current total size
sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1);
@ -849,7 +854,8 @@ TEST_F(DBSSTTest, CancellingManualCompactionsWorks) {
}
ASSERT_OK(Flush());
uint64_t total_file_size = 0;
auto files_in_db = GetAllSSTFiles(&total_file_size);
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_file_size));
// Set the maximum allowed space usage to the current total size
sfm->SetMaxAllowedSpaceUsage(2 * total_file_size + 1);
@ -959,7 +965,8 @@ TEST_F(DBSSTTest, DBWithMaxSpaceAllowedRandomized) {
}
ASSERT_TRUE(bg_error_set);
uint64_t total_sst_files_size = 0;
GetAllSSTFiles(&total_sst_files_size);
std::unordered_map<std::string, uint64_t> files_in_db;
ASSERT_OK(GetAllSSTFiles(&files_in_db, &total_sst_files_size));
ASSERT_GE(total_sst_files_size, limit_mb * 1024 * 1024);
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
}

@ -1407,29 +1407,33 @@ void DBTestBase::CopyFile(const std::string& source,
ASSERT_OK(destfile->Close());
}
std::unordered_map<std::string, uint64_t> DBTestBase::GetAllSSTFiles(
uint64_t* total_size) {
std::unordered_map<std::string, uint64_t> res;
Status DBTestBase::GetAllSSTFiles(
std::unordered_map<std::string, uint64_t>* sst_files,
uint64_t* total_size /* = nullptr */) {
if (total_size) {
*total_size = 0;
}
std::vector<std::string> files;
env_->GetChildren(dbname_, &files);
for (auto& file_name : files) {
uint64_t number;
FileType type;
std::string file_path = dbname_ + "/" + file_name;
if (ParseFileName(file_name, &number, &type) && type == kTableFile) {
uint64_t file_size = 0;
env_->GetFileSize(file_path, &file_size);
res[file_path] = file_size;
if (total_size) {
*total_size += file_size;
Status s = env_->GetChildren(dbname_, &files);
if (s.ok()) {
for (auto& file_name : files) {
uint64_t number;
FileType type;
if (ParseFileName(file_name, &number, &type) && type == kTableFile) {
std::string file_path = dbname_ + "/" + file_name;
uint64_t file_size = 0;
s = env_->GetFileSize(file_path, &file_size);
if (!s.ok()) {
break;
}
(*sst_files)[file_path] = file_size;
if (total_size) {
*total_size += file_size;
}
}
}
}
return res;
return s;
}
std::vector<std::uint64_t> DBTestBase::ListTableFiles(Env* env,

@ -1145,8 +1145,8 @@ class DBTestBase : public testing::Test {
void CopyFile(const std::string& source, const std::string& destination,
uint64_t size = 0);
std::unordered_map<std::string, uint64_t> GetAllSSTFiles(
uint64_t* total_size = nullptr);
Status GetAllSSTFiles(std::unordered_map<std::string, uint64_t>* sst_files,
uint64_t* total_size = nullptr);
std::vector<std::uint64_t> ListTableFiles(Env* env, const std::string& path);

@ -119,7 +119,8 @@ class Repairer {
raw_table_cache_.get(), &wb_, &wc_,
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr),
next_file_number_(1),
db_lock_(nullptr) {
db_lock_(nullptr),
closed_(false) {
for (const auto& cfd : column_families) {
cf_name_to_opts_[cfd.name] = cfd.options;
}
@ -163,31 +164,41 @@ class Repairer {
return status;
}
~Repairer() {
if (db_lock_ != nullptr) {
env_->UnlockFile(db_lock_);
Status Close() {
Status s = Status::OK();
if (!closed_) {
if (db_lock_ != nullptr) {
s = env_->UnlockFile(db_lock_);
db_lock_ = nullptr;
}
closed_ = true;
}
delete table_cache_;
return s;
}
~Repairer() { Close().PermitUncheckedError(); }
Status Run() {
Status status = env_->LockFile(LockFileName(dbname_), &db_lock_);
if (!status.ok()) {
return status;
}
status = FindFiles();
DBImpl* db_impl = nullptr;
if (status.ok()) {
// Discard older manifests and start a fresh one
for (size_t i = 0; i < manifests_.size(); i++) {
ArchiveFile(dbname_ + "/" + manifests_[i]);
}
// Just create a DBImpl temporarily so we can reuse NewDB()
DBImpl* db_impl = new DBImpl(db_options_, dbname_);
db_impl = new DBImpl(db_options_, dbname_);
// Also use this temp DBImpl to get a session id
db_impl->GetDbSessionId(db_session_id_);
status = db_impl->GetDbSessionId(db_session_id_);
}
if (status.ok()) {
status = db_impl->NewDB(/*new_filenames=*/nullptr);
delete db_impl;
}
delete db_impl;
if (status.ok()) {
// Recover using the fresh manifest created by NewDB()
@ -242,7 +253,7 @@ class Repairer {
const ColumnFamilyOptions unknown_cf_opts_;
const bool create_unknown_cfs_;
std::shared_ptr<Cache> raw_table_cache_;
TableCache* table_cache_;
std::unique_ptr<TableCache> table_cache_;
WriteBufferManager wb_;
WriteController wc_;
VersionSet vset_;
@ -257,6 +268,7 @@ class Repairer {
// Lock over the persistent DB state. Non-nullptr iff successfully
// acquired.
FileLock* db_lock_;
bool closed_;
Status FindFiles() {
std::vector<std::string> filenames;
@ -385,15 +397,16 @@ class Repairer {
record.size(), Status::Corruption("log record too small"));
continue;
}
WriteBatchInternal::SetContents(&batch, record);
status =
WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr);
if (status.ok()) {
Status record_status = WriteBatchInternal::SetContents(&batch, record);
if (record_status.ok()) {
record_status =
WriteBatchInternal::InsertInto(&batch, cf_mems, nullptr, nullptr);
}
if (record_status.ok()) {
counter += WriteBatchInternal::Count(&batch);
} else {
ROCKS_LOG_WARN(db_options_.info_log, "Log #%" PRIu64 ": ignoring %s",
log, status.ToString().c_str());
status = Status::OK(); // Keep going with rest of file
log, record_status.ToString().c_str());
}
}
@ -430,7 +443,7 @@ class Repairer {
IOStatus io_s;
status = BuildTable(
dbname_, /* versions */ nullptr, env_, &fs, *cfd->ioptions(),
*cfd->GetLatestMutableCFOptions(), env_options_, table_cache_,
*cfd->GetLatestMutableCFOptions(), env_options_, table_cache_.get(),
iter.get(), std::move(range_del_iters), &meta,
nullptr /* blob_file_additions */, cfd->internal_comparator(),
cfd->int_tbl_prop_collector_factories(), cfd->GetID(), cfd->GetName(),
@ -623,7 +636,7 @@ class Repairer {
new_dir.assign(fname.data(), slash - fname.data());
}
new_dir.append("/lost");
env_->CreateDir(new_dir); // Ignore error
env_->CreateDir(new_dir).PermitUncheckedError(); // Ignore error
std::string new_file = new_dir;
new_file.append("/");
new_file.append((slash == nullptr) ? fname.c_str() : slash + 1);
@ -655,12 +668,16 @@ Status RepairDB(const std::string& dbname, const DBOptions& db_options,
) {
ColumnFamilyOptions default_cf_opts;
Status status = GetDefaultCFOptions(column_families, &default_cf_opts);
if (!status.ok()) {
return status;
}
Repairer repairer(dbname, db_options, column_families, default_cf_opts,
ColumnFamilyOptions() /* unknown_cf_opts */,
false /* create_unknown_cfs */);
status = repairer.Run();
if (status.ok()) {
Repairer repairer(dbname, db_options, column_families,
default_cf_opts,
ColumnFamilyOptions() /* unknown_cf_opts */,
false /* create_unknown_cfs */);
status = repairer.Run();
status = repairer.Close();
}
return status;
}
@ -670,25 +687,33 @@ Status RepairDB(const std::string& dbname, const DBOptions& db_options,
const ColumnFamilyOptions& unknown_cf_opts) {
ColumnFamilyOptions default_cf_opts;
Status status = GetDefaultCFOptions(column_families, &default_cf_opts);
if (!status.ok()) {
return status;
}
Repairer repairer(dbname, db_options, column_families, default_cf_opts,
unknown_cf_opts, true /* create_unknown_cfs */);
status = repairer.Run();
if (status.ok()) {
Repairer repairer(dbname, db_options,
column_families, default_cf_opts,
unknown_cf_opts, true /* create_unknown_cfs */);
status = repairer.Run();
status = repairer.Close();
}
return status;
}
Status RepairDB(const std::string& dbname, const Options& options) {
Options opts(options);
DBOptions db_options(opts);
ColumnFamilyOptions cf_options(opts);
Repairer repairer(dbname, db_options,
{}, cf_options /* default_cf_opts */,
cf_options /* unknown_cf_opts */,
true /* create_unknown_cfs */);
return repairer.Run();
Status status = repairer.Run();
if (status.ok()) {
status = repairer.Close();
}
return status;
}
} // namespace ROCKSDB_NAMESPACE

@ -24,28 +24,33 @@ class RepairTest : public DBTestBase {
public:
RepairTest() : DBTestBase("/repair_test", /*env_do_fsync=*/true) {}
std::string GetFirstSstPath() {
Status GetFirstSstPath(std::string* first_sst_path) {
assert(first_sst_path != nullptr);
first_sst_path->clear();
uint64_t manifest_size;
std::vector<std::string> files;
db_->GetLiveFiles(files, &manifest_size);
auto sst_iter =
std::find_if(files.begin(), files.end(), [](const std::string& file) {
uint64_t number;
FileType type;
bool ok = ParseFileName(file, &number, &type);
return ok && type == kTableFile;
});
return sst_iter == files.end() ? "" : dbname_ + *sst_iter;
Status s = db_->GetLiveFiles(files, &manifest_size);
if (s.ok()) {
auto sst_iter =
std::find_if(files.begin(), files.end(), [](const std::string& file) {
uint64_t number;
FileType type;
bool ok = ParseFileName(file, &number, &type);
return ok && type == kTableFile;
});
*first_sst_path = sst_iter == files.end() ? "" : dbname_ + *sst_iter;
}
return s;
}
};
TEST_F(RepairTest, LostManifest) {
// Add a couple SST files, delete the manifest, and verify RepairDB() saves
// the day.
Put("key", "val");
Flush();
Put("key2", "val2");
Flush();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() didn't change the manifest.
std::string manifest_path =
@ -63,10 +68,10 @@ TEST_F(RepairTest, LostManifest) {
TEST_F(RepairTest, CorruptManifest) {
// Manifest is in an invalid format. Expect a full recovery.
Put("key", "val");
Flush();
Put("key2", "val2");
Flush();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but overwrite it after Close()
// to ensure Close() didn't change the manifest.
std::string manifest_path =
@ -76,7 +81,7 @@ TEST_F(RepairTest, CorruptManifest) {
ASSERT_OK(env_->FileExists(manifest_path));
LegacyFileSystemWrapper fs(env_);
CreateFile(&fs, manifest_path, "blah", false /* use_fsync */);
ASSERT_OK(CreateFile(&fs, manifest_path, "blah", false /* use_fsync */));
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
Reopen(CurrentOptions());
@ -87,13 +92,13 @@ TEST_F(RepairTest, CorruptManifest) {
TEST_F(RepairTest, IncompleteManifest) {
// In this case, the manifest is valid but does not reference all of the SST
// files. Expect a full recovery.
Put("key", "val");
Flush();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
std::string orig_manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
CopyFile(orig_manifest_path, orig_manifest_path + ".tmp");
Put("key2", "val2");
Flush();
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
// Need to get path before Close() deletes db_, but overwrite it after Close()
// to ensure Close() didn't change the manifest.
std::string new_manifest_path =
@ -113,10 +118,10 @@ TEST_F(RepairTest, IncompleteManifest) {
TEST_F(RepairTest, PostRepairSstFileNumbering) {
// Verify after a DB is repaired, new files will be assigned higher numbers
// than old files.
Put("key", "val");
Flush();
Put("key2", "val2");
Flush();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
uint64_t pre_repair_file_num = dbfull()->TEST_Current_Next_FileNo();
Close();
@ -130,11 +135,12 @@ TEST_F(RepairTest, PostRepairSstFileNumbering) {
TEST_F(RepairTest, LostSst) {
// Delete one of the SST files but preserve the manifest that refers to it,
// then verify the DB is still usable for the intact SST.
Put("key", "val");
Flush();
Put("key2", "val2");
Flush();
auto sst_path = GetFirstSstPath();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
std::string sst_path;
ASSERT_OK(GetFirstSstPath(&sst_path));
ASSERT_FALSE(sst_path.empty());
ASSERT_OK(env_->DeleteFile(sst_path));
@ -149,15 +155,16 @@ TEST_F(RepairTest, LostSst) {
TEST_F(RepairTest, CorruptSst) {
// Corrupt one of the SST files but preserve the manifest that refers to it,
// then verify the DB is still usable for the intact SST.
Put("key", "val");
Flush();
Put("key2", "val2");
Flush();
auto sst_path = GetFirstSstPath();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
ASSERT_OK(Put("key2", "val2"));
ASSERT_OK(Flush());
std::string sst_path;
ASSERT_OK(GetFirstSstPath(&sst_path));
ASSERT_FALSE(sst_path.empty());
LegacyFileSystemWrapper fs(env_);
CreateFile(&fs, sst_path, "blah", false /* use_fsync */);
ASSERT_OK(CreateFile(&fs, sst_path, "blah", false /* use_fsync */));
Close();
ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
@ -170,13 +177,16 @@ TEST_F(RepairTest, CorruptSst) {
TEST_F(RepairTest, UnflushedSst) {
// This test case invokes repair while some data is unflushed, then verifies
// that data is in the db.
Put("key", "val");
ASSERT_OK(Put("key", "val"));
VectorLogPtr wal_files;
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 1);
uint64_t total_ssts_size;
GetAllSSTFiles(&total_ssts_size);
ASSERT_EQ(total_ssts_size, 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size));
ASSERT_EQ(total_ssts_size, 0);
}
// Need to get path before Close() deletes db_, but delete it after Close() to
// ensure Close() didn't change the manifest.
std::string manifest_path =
@ -190,8 +200,12 @@ TEST_F(RepairTest, UnflushedSst) {
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 0);
GetAllSSTFiles(&total_ssts_size);
ASSERT_GT(total_ssts_size, 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size));
ASSERT_GT(total_ssts_size, 0);
}
ASSERT_EQ(Get("key"), "val");
}
@ -199,14 +213,17 @@ TEST_F(RepairTest, SeparateWalDir) {
do {
Options options = CurrentOptions();
DestroyAndReopen(options);
Put("key", "val");
Put("foo", "bar");
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Put("foo", "bar"));
VectorLogPtr wal_files;
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 1);
uint64_t total_ssts_size;
GetAllSSTFiles(&total_ssts_size);
ASSERT_EQ(total_ssts_size, 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size));
ASSERT_EQ(total_ssts_size, 0);
}
std::string manifest_path =
DescriptorFileName(dbname_, dbfull()->TEST_Current_Manifest_FileNo());
@ -221,8 +238,12 @@ TEST_F(RepairTest, SeparateWalDir) {
Reopen(options);
ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
ASSERT_EQ(wal_files.size(), 0);
GetAllSSTFiles(&total_ssts_size);
ASSERT_GT(total_ssts_size, 0);
{
uint64_t total_ssts_size;
std::unordered_map<std::string, uint64_t> sst_files;
ASSERT_OK(GetAllSSTFiles(&sst_files, &total_ssts_size));
ASSERT_GT(total_ssts_size, 0);
}
ASSERT_EQ(Get("key"), "val");
ASSERT_EQ(Get("foo"), "bar");
@ -238,13 +259,13 @@ TEST_F(RepairTest, RepairMultipleColumnFamilies) {
CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions());
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
Put(i, "key" + ToString(j), "val" + ToString(j));
ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j)));
if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) {
// Leave one unflushed so we can verify WAL entries are properly
// associated with column families.
continue;
}
Flush(i);
ASSERT_OK(Flush(i));
}
}
@ -283,12 +304,12 @@ TEST_F(RepairTest, RepairColumnFamilyOptions) {
std::vector<Options>{opts, rev_opts});
for (int i = 0; i < kNumCfs; ++i) {
for (int j = 0; j < kEntriesPerCf; ++j) {
Put(i, "key" + ToString(j), "val" + ToString(j));
ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j)));
if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) {
// Leave one unflushed so we can verify RepairDB's flush logic
continue;
}
Flush(i);
ASSERT_OK(Flush(i));
}
}
Close();
@ -308,7 +329,7 @@ TEST_F(RepairTest, RepairColumnFamilyOptions) {
// Examine table properties to verify RepairDB() used the right options when
// converting WAL->SST
TablePropertiesCollection fname_to_props;
db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props);
ASSERT_OK(db_->GetPropertiesOfAllTables(handles_[1], &fname_to_props));
ASSERT_EQ(fname_to_props.size(), 2U);
for (const auto& fname_and_props : fname_to_props) {
std::string comparator_name (
@ -342,8 +363,8 @@ TEST_F(RepairTest, DbNameContainsTrailingSlash) {
}
}
Put("key", "val");
Flush();
ASSERT_OK(Put("key", "val"));
ASSERT_OK(Flush());
Close();
ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions()));

@ -169,7 +169,7 @@ Status TableCache::FindTable(const ReadOptions& ro,
const_cast<bool*>(&no_io));
if (*handle == nullptr) {
if (no_io) { // Don't do IO and return a not-found status
if (no_io) {
return Status::Incomplete("Table not found in table_cache, no_io is set");
}
MutexLock load_lock(loader_mutex_.get(key));

@ -492,14 +492,19 @@ Status WalManager::ReadFirstLine(const std::string& fname,
// TODO read record's till the first no corrupt entry?
} else {
WriteBatch batch;
WriteBatchInternal::SetContents(&batch, record);
*sequence = WriteBatchInternal::Sequence(&batch);
return Status::OK();
// We can overwrite an existing non-OK Status since it'd only reach here
// with `paranoid_checks == false`.
status = WriteBatchInternal::SetContents(&batch, record);
if (status.ok()) {
*sequence = WriteBatchInternal::Sequence(&batch);
return status;
}
}
}
// ReadRecord returns false on EOF, which means that the log file is empty. we
// return status.ok() in that case and set sequence number to 0
// ReadRecord might have returned false on EOF, which means that the log file
// is empty. Or, a failure may have occurred while processing the first entry.
// In any case, return status and set sequence number to 0.
*sequence = 0;
return status;
}

Loading…
Cancel
Save