Remove GetSortedWalFiles/GetCurrentWalFile from the crash test (#6491)

Summary:
Currently, `db_stress` tests a randomly picked one of `GetLiveFiles`,
`GetSortedWalFiles`, and `GetCurrentWalFile` with a 1/N chance when the
command line parameter `get_live_files_and_wal_files_one_in` is specified.
The problem is that `GetSortedWalFiles` and `GetCurrentWalFile` are unreliable
in the sense that they can return errors if another thread removes a WAL file
while they are executing (which is a perfectly plausible and legitimate scenario).
The patch splits this command line parameter into three (one for each API),
and changes the crash test script so that only `GetLiveFiles` is tested during
our continuous crash test runs.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6491

Test Plan:
```
make check
python tools/db_crashtest.py whitebox
```

Reviewed By: siying

Differential Revision: D20312200

Pulled By: ltamasi

fbshipit-source-id: e7c3481eddfe3bd3d5349476e34abc9eee5b7dc8
main
Levi Tamasi 4 years ago committed by Facebook GitHub Bot
parent 8ad4b32c5d
commit 217ce20021
  1. 4
      db_stress_tool/db_stress_common.h
  2. 19
      db_stress_tool/db_stress_gflags.cc
  3. 62
      db_stress_tool/db_stress_test_base.cc
  4. 5
      db_stress_tool/db_stress_test_base.h
  5. 6
      tools/db_crashtest.py

@ -128,7 +128,9 @@ DECLARE_int32(universal_min_merge_width);
DECLARE_int32(universal_max_merge_width);
DECLARE_int32(universal_max_size_amplification_percent);
DECLARE_int32(clear_column_family_one_in);
DECLARE_int32(get_live_files_and_wal_files_one_in);
DECLARE_int32(get_live_files_one_in);
DECLARE_int32(get_sorted_wal_files_one_in);
DECLARE_int32(get_current_wal_file_one_in);
DECLARE_int32(set_options_one_in);
DECLARE_int32(set_in_place_one_in);
DECLARE_int64(cache_size);

@ -256,10 +256,21 @@ DEFINE_int32(clear_column_family_one_in, 1000000,
"it again. If N == 0, never drop/create column families. "
"When test_batches_snapshots is true, this flag has no effect");
DEFINE_int32(get_live_files_and_wal_files_one_in, 1000000,
"With a chance of 1/N, call GetLiveFiles, GetSortedWalFiles "
"and GetCurrentWalFile to verify if it returns correctly. If "
"N == 0, never call the three interfaces.");
DEFINE_int32(get_live_files_one_in, 1000000,
"With a chance of 1/N, call GetLiveFiles to verify if it returns "
"correctly. If N == 0, do not call the interface.");
DEFINE_int32(
get_sorted_wal_files_one_in, 1000000,
"With a chance of 1/N, call GetSortedWalFiles to verify if it returns "
"correctly. (Note that this API may legitimately return an error.) If N == "
"0, do not call the interface.");
DEFINE_int32(
get_current_wal_file_one_in, 1000000,
"With a chance of 1/N, call GetCurrentWalFile to verify if it returns "
"correctly. (Note that this API may legitimately return an error.) If N == "
"0, do not call the interface.");
DEFINE_int32(set_options_one_in, 0,
"With a chance of 1/N, change some random options");

@ -593,13 +593,28 @@ void StressTest::OperateDb(ThreadState* thread) {
}
#ifndef ROCKSDB_LITE
// Every 1 in N verify the one of the following: 1) GetLiveFiles
// 2) GetSortedWalFiles 3) GetCurrentWalFile. Each time, randomly select
// one of them to run the test.
if (thread->rand.OneInOpt(FLAGS_get_live_files_and_wal_files_one_in)) {
Status status = VerifyGetLiveAndWalFiles(thread);
// Verify GetLiveFiles with a 1 in N chance.
if (thread->rand.OneInOpt(FLAGS_get_live_files_one_in)) {
Status status = VerifyGetLiveFiles();
if (!status.ok()) {
VerificationAbort(shared, "VerifyGetLiveAndWalFiles status not OK",
VerificationAbort(shared, "VerifyGetLiveFiles status not OK", status);
}
}
// Verify GetSortedWalFiles with a 1 in N chance.
if (thread->rand.OneInOpt(FLAGS_get_sorted_wal_files_one_in)) {
Status status = VerifyGetSortedWalFiles();
if (!status.ok()) {
VerificationAbort(shared, "VerifyGetSortedWalFiles status not OK",
status);
}
}
// Verify GetCurrentWalFile with a 1 in N chance.
if (thread->rand.OneInOpt(FLAGS_get_current_wal_file_one_in)) {
Status status = VerifyGetCurrentWalFile();
if (!status.ok()) {
VerificationAbort(shared, "VerifyGetCurrentWalFile status not OK",
status);
}
}
@ -978,28 +993,23 @@ Status StressTest::TestIterate(ThreadState* thread,
}
#ifndef ROCKSDB_LITE
// Test the return status of GetLiveFiles, GetSortedWalFiles, and
// GetCurrentWalFile. Each time, randomly select one of them to run
// and return the status.
Status StressTest::VerifyGetLiveAndWalFiles(ThreadState* thread) {
int case_num = thread->rand.Uniform(3);
if (case_num == 0) {
std::vector<std::string> live_file;
uint64_t manifest_size;
return db_->GetLiveFiles(live_file, &manifest_size);
}
// Test the return status of GetLiveFiles.
Status StressTest::VerifyGetLiveFiles() const {
std::vector<std::string> live_file;
uint64_t manifest_size = 0;
return db_->GetLiveFiles(live_file, &manifest_size);
}
if (case_num == 1) {
VectorLogPtr log_ptr;
return db_->GetSortedWalFiles(log_ptr);
}
// Test the return status of GetSortedWalFiles.
Status StressTest::VerifyGetSortedWalFiles() const {
VectorLogPtr log_ptr;
return db_->GetSortedWalFiles(log_ptr);
}
if (case_num == 2) {
std::unique_ptr<LogFile> cur_wal_file;
return db_->GetCurrentWalFile(&cur_wal_file);
}
assert(false);
return Status::Corruption("Undefined case happens!");
// Test the return status of GetCurrentWalFile.
Status StressTest::VerifyGetCurrentWalFile() const {
std::unique_ptr<LogFile> cur_wal_file;
return db_->GetCurrentWalFile(&cur_wal_file);
}
#endif // !ROCKSDB_LITE

@ -184,7 +184,10 @@ class StressTest {
Status MaybeReleaseSnapshots(ThreadState* thread, uint64_t i);
#ifndef ROCKSDB_LITE
Status VerifyGetLiveAndWalFiles(ThreadState* thread);
Status VerifyGetLiveFiles() const;
Status VerifyGetSortedWalFiles() const;
Status VerifyGetCurrentWalFile() const;
virtual Status TestApproximateSize(
ThreadState* thread, uint64_t iteration,
const std::vector<int>& rand_column_families,

@ -51,7 +51,11 @@ default_params = {
"enable_pipelined_write": lambda: random.randint(0, 1),
"expected_values_path": expected_values_file.name,
"flush_one_in": 1000000,
"get_live_files_and_wal_files_one_in": 1000000,
"get_live_files_one_in": 1000000,
# Note: the following two are intentionally disabled as the corresponding
# APIs are not guaranteed to succeed.
"get_sorted_wal_files_one_in": 0,
"get_current_wal_file_one_in": 0,
# Temporarily disable hash index
"index_type": lambda: random.choice([0,2]),
"max_background_compactions": 20,

Loading…
Cancel
Save