Enable IngestExternalFile() in crash test (#9357)

Summary:
Thanks to https://github.com/facebook/rocksdb/issues/9919 and https://github.com/facebook/rocksdb/issues/10051 the known bugs in file ingestion (besides mmap read + file checksum) are fixed. Now we can  try again to enable file ingestion in crash test.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9357

Test Plan: stress file ingestion heavily for an hour: `$ TEST_TMPDIR=/dev/shm python3 tools/db_crashtest.py blackbox --max_key=1000000 --ingest_external_file_one_in=100 --duration=3600 --interval=20 --write_buffer_size=524288 --target_file_size_base=524288 --max_bytes_for_level_base=2097152`

Reviewed By: riversand963

Differential Revision: D33410746

Pulled By: ajkr

fbshipit-source-id: d276431390995a67f68390d61c06a40945fdd280
main
Andrew Kryczka 3 years ago committed by Facebook GitHub Bot
parent c9c58a320f
commit 91ba7837b7
  1. 8
      db_stress_tool/db_stress_tool.cc
  2. 21
      db_stress_tool/no_batched_ops_stress.cc
  3. 19
      tools/db_crashtest.py

@ -184,9 +184,11 @@ int db_stress_tool(int argc, char** argv) {
"Error: nooverwritepercent must not be 100 when using merge operands");
exit(1);
}
if (FLAGS_ingest_external_file_one_in > 0 && FLAGS_nooverwritepercent > 0) {
fprintf(stderr,
"Error: nooverwritepercent must be 0 when using file ingestion\n");
if (FLAGS_ingest_external_file_one_in > 0 &&
FLAGS_nooverwritepercent == 100) {
fprintf(
stderr,
"Error: nooverwritepercent must not be 100 when using file ingestion");
exit(1);
}
if (FLAGS_clear_column_family_one_in > 0 && FLAGS_backup_one_in > 0) {

@ -805,13 +805,18 @@ class NonBatchedOpsStressTest : public StressTest {
int64_t key_base = rand_keys[0];
int column_family = rand_column_families[0];
std::vector<std::unique_ptr<MutexLock>> range_locks;
range_locks.reserve(FLAGS_ingest_external_file_width);
std::vector<int64_t> keys;
keys.reserve(FLAGS_ingest_external_file_width);
std::vector<uint32_t> values;
values.reserve(FLAGS_ingest_external_file_width);
SharedState* shared = thread->shared;
assert(FLAGS_nooverwritepercent < 100);
// Grab locks, set pending state on expected values, and add keys
for (int64_t key = key_base;
s.ok() && key < std::min(key_base + FLAGS_ingest_external_file_width,
shared->GetMaxKey());
s.ok() && key < shared->GetMaxKey() &&
static_cast<int32_t>(keys.size()) < FLAGS_ingest_external_file_width;
++key) {
if (key == key_base) {
range_locks.emplace_back(std::move(lock));
@ -819,6 +824,12 @@ class NonBatchedOpsStressTest : public StressTest {
range_locks.emplace_back(
new MutexLock(shared->GetMutexForKey(column_family, key)));
}
if (!shared->AllowsOverwrite(key)) {
// We could alternatively include `key` on the condition its current
// value is `DELETION_SENTINEL`.
continue;
}
keys.push_back(key);
uint32_t value_base = thread->rand.Next() % shared->UNKNOWN_SENTINEL;
values.push_back(value_base);
@ -841,10 +852,8 @@ class NonBatchedOpsStressTest : public StressTest {
fprintf(stderr, "file ingestion error: %s\n", s.ToString().c_str());
std::terminate();
}
int64_t key = key_base;
for (int32_t value : values) {
shared->Put(column_family, key, value, false /* pending */);
++key;
for (size_t i = 0; i < keys.size(); ++i) {
shared->Put(column_family, keys[i], values[i], false /* pending */);
}
}
#endif // ROCKSDB_LITE

@ -78,6 +78,7 @@ default_params = {
"get_current_wal_file_one_in": 0,
# Temporarily disable hash index
"index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]),
"ingest_external_file_one_in": 1000000,
"iterpercent": 10,
"mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1),
"max_background_compactions": 20,
@ -432,6 +433,11 @@ def finalize_and_sanitize(src_params):
if dest_params["mmap_read"] == 1:
dest_params["use_direct_io_for_flush_and_compaction"] = 0
dest_params["use_direct_reads"] = 0
if dest_params["file_checksum_impl"] != "none":
# TODO(T109283569): there is a bug in `GenerateOneFileChecksum()`,
# used by `IngestExternalFile()`, causing it to fail with mmap
# reads. Remove this once it is fixed.
dest_params["ingest_external_file_one_in"] = 0
if (dest_params["use_direct_io_for_flush_and_compaction"] == 1
or dest_params["use_direct_reads"] == 1) and \
not is_direct_io_supported(dest_params["db"]):
@ -444,12 +450,23 @@ def finalize_and_sanitize(src_params):
else:
dest_params["mock_direct_io"] = True
# DeleteRange is not currnetly compatible with Txns and timestamp
# Multi-key operations are not currently compatible with transactions or
# timestamp.
if (dest_params.get("test_batches_snapshots") == 1 or
dest_params.get("use_txn") == 1 or
dest_params.get("user_timestamp_size") > 0):
dest_params["delpercent"] += dest_params["delrangepercent"]
dest_params["delrangepercent"] = 0
dest_params["ingest_external_file_one_in"] = 0
# File ingestion does not guarantee prefix-recoverability with WAL disabled.
# Ingesting a file persists data immediately that is newer than memtable
# data that can be lost on restart.
#
# Even if the above issue is fixed or worked around, our trace-and-replay
# does not trace file ingestion, so in its current form it would not recover
# the expected state to the correct point in time.
if (dest_params.get("disable_wal") == 1):
dest_params["ingest_external_file_one_in"] = 0
# Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
if dest_params.get("unordered_write", 0) == 1:
dest_params["txn_write_policy"] = 1

Loading…
Cancel
Save