Tiered storage stress test (#10493)

Summary:
Add Tiered storage stress test and db_bench option

Pull Request resolved: https://github.com/facebook/rocksdb/pull/10493

Test Plan:
new crashtest:
https://app.circleci.com/pipelines/github/facebook/rocksdb/16905/workflows/68c2967c-9274-434f-8506-1403cf441ead

Reviewed By: ajkr

Differential Revision: D38481892

Pulled By: jay-zhuang

fbshipit-source-id: 217a0be4acb93d420222e6ede2a1290d9f464776
main
Jay Zhuang 2 years ago committed by Facebook GitHub Bot
parent 0d885e80d4
commit 1e86d424e4
  1. 28
      .circleci/config.yml
  2. 16
      crash_test.mk
  3. 4
      db_stress_tool/db_stress_common.h
  4. 6
      db_stress_tool/db_stress_gflags.cc
  5. 6
      db_stress_tool/db_stress_test_base.cc
  6. 6
      tools/db_bench_tool.cc
  7. 12
      tools/db_crashtest.py

@ -575,6 +575,34 @@ jobs:
- run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
- post-steps - post-steps
build-linux-crashtest-tiered-storage-bb:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 blackbox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-linux-crashtest-tiered-storage-wb:
machine:
image: ubuntu-2004:202111-02
resource_class: 2xlarge
steps:
- pre-steps
- install-gflags
- install-compression-libs
- run:
name: "run crashtest"
command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 whitebox_crash_test_with_tiered_storage
no_output_timeout: 100m
- post-steps
build-windows: build-windows:
executor: windows-2xlarge executor: windows-2xlarge
parameters: parameters:

@ -18,7 +18,9 @@ CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD)
whitebox_crash_test whitebox_crash_test_with_atomic_flush \ whitebox_crash_test whitebox_crash_test_with_atomic_flush \
whitebox_crash_test_with_txn whitebox_crash_test_with_ts \ whitebox_crash_test_with_txn whitebox_crash_test_with_ts \
blackbox_crash_test_with_multiops_wc_txn \ blackbox_crash_test_with_multiops_wc_txn \
blackbox_crash_test_with_multiops_wp_txn blackbox_crash_test_with_multiops_wp_txn \
crash_test_with_tiered_storage blackbox_crash_test_with_tiered_storage \
whitebox_crash_test_with_tiered_storage \
crash_test: $(DB_STRESS_CMD) crash_test: $(DB_STRESS_CMD)
# Do not parallelize # Do not parallelize
@ -42,6 +44,11 @@ crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) whitebox_crash_test_with_ts $(CRASHTEST_MAKE) whitebox_crash_test_with_ts
$(CRASHTEST_MAKE) blackbox_crash_test_with_ts $(CRASHTEST_MAKE) blackbox_crash_test_with_ts
crash_test_with_tiered_storage: $(DB_STRESS_CMD)
# Do not parallelize
$(CRASHTEST_MAKE) whitebox_crash_test_with_tiered_storage
$(CRASHTEST_MAKE) blackbox_crash_test_with_tiered_storage
crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD) crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
$(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn $(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn
@ -70,6 +77,9 @@ blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD) blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS) $(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS)
ifeq ($(CRASH_TEST_KILL_ODD),) ifeq ($(CRASH_TEST_KILL_ODD),)
CRASH_TEST_KILL_ODD=888887 CRASH_TEST_KILL_ODD=888887
endif endif
@ -91,3 +101,7 @@ whitebox_crash_test_with_txn: $(DB_STRESS_CMD)
whitebox_crash_test_with_ts: $(DB_STRESS_CMD) whitebox_crash_test_with_ts: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \ $(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)
whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD)
$(CRASHTEST_PY) --enable_tiered_storage whitebox --random_kill_odd \
$(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS)

@ -307,6 +307,10 @@ DECLARE_int32(create_timestamped_snapshot_one_in);
DECLARE_bool(allow_data_in_errors); DECLARE_bool(allow_data_in_errors);
// Tiered storage
DECLARE_bool(enable_tiered_storage); // set last_level_temperature
DECLARE_int64(preclude_last_level_data_seconds);
constexpr long KB = 1024; constexpr long KB = 1024;
constexpr int kRandomValueMaxFactor = 3; constexpr int kRandomValueMaxFactor = 3;
constexpr int kValueMaxLen = 100; constexpr int kValueMaxLen = 100;

@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0,
"[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 " "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 "
"to disable and 1 to insert during flush."); "to disable and 1 to insert during flush.");
DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature");
DEFINE_int64(preclude_last_level_data_seconds, 0,
"Preclude data from the last level. Used with tiered storage "
"feature to preclude new data from comacting to the last level.");
static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);

@ -3063,6 +3063,12 @@ void InitializeOptionsFromFlags(
options.wal_compression = options.wal_compression =
StringToCompressionType(FLAGS_wal_compression.c_str()); StringToCompressionType(FLAGS_wal_compression.c_str());
if (FLAGS_enable_tiered_storage) {
options.bottommost_temperature = Temperature::kCold;
}
options.preclude_last_level_data_seconds =
FLAGS_preclude_last_level_data_seconds;
switch (FLAGS_rep_factory) { switch (FLAGS_rep_factory) {
case kSkipList: case kSkipList:
// no need to do anything // no need to do anything

@ -1315,6 +1315,10 @@ DEFINE_int32(simulate_hybrid_hdd_multipliers, 1,
"are simulated."); "are simulated.");
DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD."); DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD.");
DEFINE_int64(
preclude_last_level_data_seconds, 0,
"Preclude the latest data from the last level. (Used for tiered storage)");
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard; static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
static ROCKSDB_NAMESPACE::Env* FLAGS_env = ROCKSDB_NAMESPACE::Env::Default(); static ROCKSDB_NAMESPACE::Env* FLAGS_env = ROCKSDB_NAMESPACE::Env::Default();
@ -4449,6 +4453,8 @@ class Benchmark {
if (FLAGS_simulate_hybrid_fs_file != "") { if (FLAGS_simulate_hybrid_fs_file != "") {
options.bottommost_temperature = Temperature::kWarm; options.bottommost_temperature = Temperature::kWarm;
} }
options.preclude_last_level_data_seconds =
FLAGS_preclude_last_level_data_seconds;
options.sample_for_compression = FLAGS_sample_for_compression; options.sample_for_compression = FLAGS_sample_for_compression;
options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds; options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds;
options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB; options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB;

@ -6,7 +6,6 @@ import os
import sys import sys
import time import time
import random import random
import re
import tempfile import tempfile
import subprocess import subprocess
import shutil import shutil
@ -367,6 +366,14 @@ ts_params = {
"ingest_external_file_one_in": 0, "ingest_external_file_one_in": 0,
} }
tiered_params = {
"enable_tiered_storage": 1,
"preclude_last_level_data_seconds": lambda: random.choice([3600]),
# only test universal compaction for now, level has known issue of
# endless compaction
"compaction_style": 1,
}
multiops_txn_default_params = { multiops_txn_default_params = {
"test_cf_consistency": 0, "test_cf_consistency": 0,
"test_batches_snapshots": 0, "test_batches_snapshots": 0,
@ -573,6 +580,8 @@ def gen_cmd_params(args):
params.update(multiops_wc_txn_params) params.update(multiops_wc_txn_params)
elif args.write_policy == 'write_prepared': elif args.write_policy == 'write_prepared':
params.update(multiops_wp_txn_params) params.update(multiops_wp_txn_params)
if args.enable_tiered_storage:
params.update(tiered_params)
# Best-effort recovery and BlobDB are currently incompatible. Test BE recovery # Best-effort recovery and BlobDB are currently incompatible. Test BE recovery
# if specified on the command line; otherwise, apply BlobDB related overrides # if specified on the command line; otherwise, apply BlobDB related overrides
@ -820,6 +829,7 @@ def main():
parser.add_argument("--test_multiops_txn", action='store_true') parser.add_argument("--test_multiops_txn", action='store_true')
parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"]) parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
parser.add_argument("--stress_cmd") parser.add_argument("--stress_cmd")
parser.add_argument("--enable_tiered_storage", action='store_true')
all_params = dict(list(default_params.items()) all_params = dict(list(default_params.items())
+ list(blackbox_default_params.items()) + list(blackbox_default_params.items())

Loading…
Cancel
Save