From 1e86d424e454f0d73901c48159b0223443661d77 Mon Sep 17 00:00:00 2001 From: Jay Zhuang Date: Mon, 8 Aug 2022 13:08:35 -0700 Subject: [PATCH] Tiered storage stress test (#10493) Summary: Add Tiered storage stress test and db_bench option Pull Request resolved: https://github.com/facebook/rocksdb/pull/10493 Test Plan: new crashtest: https://app.circleci.com/pipelines/github/facebook/rocksdb/16905/workflows/68c2967c-9274-434f-8506-1403cf441ead Reviewed By: ajkr Differential Revision: D38481892 Pulled By: jay-zhuang fbshipit-source-id: 217a0be4acb93d420222e6ede2a1290d9f464776 --- .circleci/config.yml | 28 +++++++++++++++++++++++++++ crash_test.mk | 16 ++++++++++++++- db_stress_tool/db_stress_common.h | 4 ++++ db_stress_tool/db_stress_gflags.cc | 6 ++++++ db_stress_tool/db_stress_test_base.cc | 6 ++++++ tools/db_bench_tool.cc | 6 ++++++ tools/db_crashtest.py | 12 +++++++++++- 7 files changed, 76 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d9bf25fdf..e1bd6dfda 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -575,6 +575,34 @@ jobs: - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush - post-steps + build-linux-crashtest-tiered-storage-bb: + machine: + image: ubuntu-2004:202111-02 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-compression-libs + - run: + name: "run crashtest" + command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 blackbox_crash_test_with_tiered_storage + no_output_timeout: 100m + - post-steps + + build-linux-crashtest-tiered-storage-wb: + machine: + image: ubuntu-2004:202111-02 + resource_class: 2xlarge + steps: + - pre-steps + - install-gflags + - install-compression-libs + - run: + name: "run crashtest" + command: ulimit -S -n `ulimit -H -n` && make V=1 -j32 CRASH_TEST_EXT_ARGS=--duration=10800 whitebox_crash_test_with_tiered_storage + no_output_timeout: 100m + - post-steps + build-windows: executor: windows-2xlarge parameters: diff --git a/crash_test.mk b/crash_test.mk index 2a6a1f308..ac908a4a3 100644 --- a/crash_test.mk +++ b/crash_test.mk @@ -18,7 +18,9 @@ CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD) whitebox_crash_test whitebox_crash_test_with_atomic_flush \ whitebox_crash_test_with_txn whitebox_crash_test_with_ts \ blackbox_crash_test_with_multiops_wc_txn \ - blackbox_crash_test_with_multiops_wp_txn + blackbox_crash_test_with_multiops_wp_txn \ + crash_test_with_tiered_storage blackbox_crash_test_with_tiered_storage \ + whitebox_crash_test_with_tiered_storage \ crash_test: $(DB_STRESS_CMD) # Do not parallelize @@ -42,6 +44,11 @@ crash_test_with_ts: $(DB_STRESS_CMD) $(CRASHTEST_MAKE) whitebox_crash_test_with_ts $(CRASHTEST_MAKE) blackbox_crash_test_with_ts +crash_test_with_tiered_storage: $(DB_STRESS_CMD) +# Do not parallelize + $(CRASHTEST_MAKE) whitebox_crash_test_with_tiered_storage + $(CRASHTEST_MAKE) blackbox_crash_test_with_tiered_storage + crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD) $(CRASHTEST_MAKE) blackbox_crash_test_with_multiops_wc_txn @@ -70,6 +77,9 @@ blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD) blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD) $(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS) +blackbox_crash_test_with_tiered_storage: $(DB_STRESS_CMD) + $(CRASHTEST_PY) --enable_tiered_storage blackbox $(CRASH_TEST_EXT_ARGS) + ifeq ($(CRASH_TEST_KILL_ODD),) CRASH_TEST_KILL_ODD=888887 endif @@ -91,3 +101,7 @@ whitebox_crash_test_with_txn: $(DB_STRESS_CMD) whitebox_crash_test_with_ts: $(DB_STRESS_CMD) $(CRASHTEST_PY) --enable_ts whitebox --random_kill_odd \ $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) + +whitebox_crash_test_with_tiered_storage: $(DB_STRESS_CMD) + $(CRASHTEST_PY) --enable_tiered_storage whitebox --random_kill_odd \ + $(CRASH_TEST_KILL_ODD) $(CRASH_TEST_EXT_ARGS) diff --git a/db_stress_tool/db_stress_common.h b/db_stress_tool/db_stress_common.h index 494dc5a3f..ad4b6bb7e 100644 --- a/db_stress_tool/db_stress_common.h +++ b/db_stress_tool/db_stress_common.h @@ -307,6 +307,10 @@ DECLARE_int32(create_timestamped_snapshot_one_in); DECLARE_bool(allow_data_in_errors); +// Tiered storage +DECLARE_bool(enable_tiered_storage); // set last_level_temperature +DECLARE_int64(preclude_last_level_data_seconds); + constexpr long KB = 1024; constexpr int kRandomValueMaxFactor = 3; constexpr int kValueMaxLen = 100; diff --git a/db_stress_tool/db_stress_gflags.cc b/db_stress_tool/db_stress_gflags.cc index 0bb5fb499..a8733a52b 100644 --- a/db_stress_tool/db_stress_gflags.cc +++ b/db_stress_tool/db_stress_gflags.cc @@ -483,6 +483,12 @@ DEFINE_int32(prepopulate_blob_cache, 0, "[Integrated BlobDB] Pre-populate hot/warm blobs in blob cache. 0 " "to disable and 1 to insert during flush."); +DEFINE_bool(enable_tiered_storage, false, "Set last_level_temperature"); + +DEFINE_int64(preclude_last_level_data_seconds, 0, + "Preclude data from the last level. Used with tiered storage " + "feature to preclude new data from comacting to the last level."); + static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) = RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range); diff --git a/db_stress_tool/db_stress_test_base.cc b/db_stress_tool/db_stress_test_base.cc index cc40eac11..24ea7cc1f 100644 --- a/db_stress_tool/db_stress_test_base.cc +++ b/db_stress_tool/db_stress_test_base.cc @@ -3063,6 +3063,12 @@ void InitializeOptionsFromFlags( options.wal_compression = StringToCompressionType(FLAGS_wal_compression.c_str()); + if (FLAGS_enable_tiered_storage) { + options.bottommost_temperature = Temperature::kCold; + } + options.preclude_last_level_data_seconds = + FLAGS_preclude_last_level_data_seconds; + switch (FLAGS_rep_factory) { case kSkipList: // no need to do anything diff --git a/tools/db_bench_tool.cc b/tools/db_bench_tool.cc index 1ea410318..8c3945f0f 100644 --- a/tools/db_bench_tool.cc +++ b/tools/db_bench_tool.cc @@ -1315,6 +1315,10 @@ DEFINE_int32(simulate_hybrid_hdd_multipliers, 1, "are simulated."); DEFINE_bool(simulate_hdd, false, "Simulate read/write latency on HDD."); +DEFINE_int64( + preclude_last_level_data_seconds, 0, + "Preclude the latest data from the last level. (Used for tiered storage)"); + static std::shared_ptr env_guard; static ROCKSDB_NAMESPACE::Env* FLAGS_env = ROCKSDB_NAMESPACE::Env::Default(); @@ -4449,6 +4453,8 @@ class Benchmark { if (FLAGS_simulate_hybrid_fs_file != "") { options.bottommost_temperature = Temperature::kWarm; } + options.preclude_last_level_data_seconds = + FLAGS_preclude_last_level_data_seconds; options.sample_for_compression = FLAGS_sample_for_compression; options.WAL_ttl_seconds = FLAGS_wal_ttl_seconds; options.WAL_size_limit_MB = FLAGS_wal_size_limit_MB; diff --git a/tools/db_crashtest.py b/tools/db_crashtest.py index 3c11bddb4..3219c5241 100644 --- a/tools/db_crashtest.py +++ b/tools/db_crashtest.py @@ -6,7 +6,6 @@ import os import sys import time import random -import re import tempfile import subprocess import shutil @@ -367,6 +366,14 @@ ts_params = { "ingest_external_file_one_in": 0, } +tiered_params = { + "enable_tiered_storage": 1, + "preclude_last_level_data_seconds": lambda: random.choice([3600]), + # only test universal compaction for now, level has known issue of + # endless compaction + "compaction_style": 1, +} + multiops_txn_default_params = { "test_cf_consistency": 0, "test_batches_snapshots": 0, @@ -573,6 +580,8 @@ def gen_cmd_params(args): params.update(multiops_wc_txn_params) elif args.write_policy == 'write_prepared': params.update(multiops_wp_txn_params) + if args.enable_tiered_storage: + params.update(tiered_params) # Best-effort recovery and BlobDB are currently incompatible. Test BE recovery # if specified on the command line; otherwise, apply BlobDB related overrides @@ -820,6 +829,7 @@ def main(): parser.add_argument("--test_multiops_txn", action='store_true') parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"]) parser.add_argument("--stress_cmd") + parser.add_argument("--enable_tiered_storage", action='store_true') all_params = dict(list(default_params.items()) + list(blackbox_default_params.items())