Add a SystemClock class to capture the time functions of an Env (#7858)

Summary:
Introduces and uses a SystemClock class to RocksDB.  This class contains the time-related functions of an Env and these functions can be redirected from the Env to the SystemClock.

Many of the places that used an Env (Timer, PerfStepTimer, RepeatableThread, RateLimiter, WriteController) for time-related functions have been changed to use SystemClock instead.  There are likely more places that can be changed, but this is a start to show what can/should be done.  Over time it would be nice to migrate most (if not all) of the uses of the time functions from the Env to the SystemClock.

There are several Env classes that implement these functions.  Most of these have not been converted yet to SystemClock implementations; that will come in a subsequent PR.  It would be good to unify many of the Mock Timer implementations, so that they behave similarly and be tested similarly (some override Sleep, some use a MockSleep, etc).

Additionally, this change will allow new methods to be introduced to the SystemClock (like https://github.com/facebook/rocksdb/issues/7101 WaitFor) in a consistent manner across a smaller number of classes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7858

Reviewed By: pdillinger

Differential Revision: D26006406

Pulled By: mrambacher

fbshipit-source-id: ed10a8abbdab7ff2e23d69d85bd25b3e7e899e90
main
mrambacher 3 years ago committed by Facebook GitHub Bot
parent 1d226018af
commit 12f1137355
  1. 1
      CMakeLists.txt
  2. 2
      HISTORY.md
  3. 2
      TARGETS
  4. 7
      cache/cache_bench.cc
  5. 8
      db/blob/blob_file_builder.cc
  6. 3
      db/blob/blob_file_builder.h
  7. 12
      db/blob/blob_file_builder_test.cc
  8. 8
      db/blob/blob_file_cache_test.cc
  9. 9
      db/blob/blob_file_reader.cc
  10. 16
      db/blob/blob_file_reader_test.cc
  11. 8
      db/blob/blob_log_sequential_reader.cc
  12. 7
      db/blob/blob_log_sequential_reader.h
  13. 14
      db/blob/blob_log_writer.cc
  14. 8
      db/blob/blob_log_writer.h
  15. 6
      db/builder.cc
  16. 4
      db/column_family.cc
  17. 3
      db/compaction/compaction_iterator.cc
  18. 1
      db/compaction/compaction_iterator.h
  19. 15
      db/compaction/compaction_job.cc
  20. 2
      db/compaction/compaction_job.h
  21. 1
      db/db_compaction_test.cc
  22. 1
      db/db_dynamic_level_test.cc
  23. 1
      db/db_flush_test.cc
  24. 36
      db/db_impl/db_impl.cc
  25. 4
      db/db_impl/db_impl.h
  26. 6
      db/db_impl/db_impl_compaction_flush.cc
  27. 2
      db/db_impl/db_impl_files.cc
  28. 10
      db/db_impl/db_impl_open.cc
  29. 4
      db/db_impl/db_impl_secondary.cc
  30. 18
      db/db_impl/db_impl_write.cc
  31. 33
      db/db_iter.cc
  32. 3
      db/db_iter.h
  33. 1
      db/db_log_iter_test.cc
  34. 1
      db/db_test_util.cc
  35. 3
      db/db_test_util.h
  36. 2
      db/error_handler.cc
  37. 12
      db/external_sst_file_ingestion_job.cc
  38. 15
      db/external_sst_file_ingestion_job.h
  39. 11
      db/flush_job.cc
  40. 1
      db/flush_job.h
  41. 2
      db/import_column_family_job.cc
  42. 10
      db/import_column_family_job.h
  43. 4
      db/internal_stats.cc
  44. 16
      db/internal_stats.h
  45. 15
      db/memtable.cc
  46. 4
      db/memtable.h
  47. 24
      db/merge_helper.cc
  48. 14
      db/merge_helper.h
  49. 58
      db/perf_context_test.cc
  50. 34
      db/periodic_work_scheduler.cc
  51. 12
      db/periodic_work_scheduler.h
  52. 35
      db/periodic_work_scheduler_test.cc
  53. 7
      db/prefix_test.cc
  54. 11
      db/range_del_aggregator_bench.cc
  55. 12
      db/table_cache.cc
  56. 24
      db/version_set.cc
  57. 4
      db/version_set.h
  58. 4
      db/version_set_test.cc
  59. 3
      db/write_batch.cc
  60. 13
      db/write_controller.cc
  61. 7
      db/write_controller.h
  62. 84
      db/write_controller_test.cc
  63. 369
      env/composite_env.cc
  64. 395
      env/composite_env_wrapper.h
  65. 57
      env/env.cc
  66. 158
      env/env_posix.cc
  67. 4
      env/env_test.cc
  68. 139
      env/file_system_tracer.cc
  69. 27
      env/file_system_tracer.h
  70. 13
      file/delete_scheduler.cc
  71. 10
      file/delete_scheduler.h
  72. 7
      file/delete_scheduler_test.cc
  73. 13
      file/file_util.h
  74. 12
      file/filename.cc
  75. 7
      file/filename.h
  76. 19
      file/random_access_file_reader.cc
  77. 11
      file/random_access_file_reader.h
  78. 3
      file/random_access_file_reader_test.cc
  79. 22
      file/sst_file_manager_impl.cc
  80. 14
      file/sst_file_manager_impl.h
  81. 5
      file/writable_file_writer.cc
  82. 9
      file/writable_file_writer.h
  83. 15
      include/rocksdb/env.h
  84. 102
      include/rocksdb/system_clock.h
  85. 50
      logging/auto_roll_logger.cc
  86. 17
      logging/auto_roll_logger.h
  87. 91
      logging/auto_roll_logger_test.cc
  88. 2
      logging/env_logger.h
  89. 3
      memtable/memtablerep_bench.cc
  90. 3
      memtable/write_buffer_manager_test.cc
  91. 34
      monitoring/histogram_test.cc
  92. 16
      monitoring/histogram_windowing.cc
  93. 8
      monitoring/histogram_windowing.h
  94. 13
      monitoring/instrumented_mutex.cc
  95. 27
      monitoring/instrumented_mutex.h
  96. 6
      monitoring/iostats_context_imp.h
  97. 12
      monitoring/perf_context_imp.h
  98. 31
      monitoring/perf_step_timer.h
  99. 94
      monitoring/stats_history_test.cc
  100. 5
      monitoring/thread_status_updater.cc
  101. Some files were not shown because too many files have changed in this diff Show More

@ -642,6 +642,7 @@ set(SOURCES
db/write_batch_base.cc
db/write_controller.cc
db/write_thread.cc
env/composite_env.cc
env/env.cc
env/env_chroot.cc
env/env_encryption.cc

@ -12,7 +12,7 @@
### Public API Change
* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks.
* Add a SystemClock class that contains the time-related methods from Env. The original methods in Env may be deprecated in a future release. This class will allow easier testing, development, and expansion of time-related features.
## 6.16.0 (12/18/2020)
### Behavior Changes
* Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation.

@ -206,6 +206,7 @@ cpp_library(
"db/write_batch_base.cc",
"db/write_controller.cc",
"db/write_thread.cc",
"env/composite_env.cc",
"env/env.cc",
"env/env_chroot.cc",
"env/env_encryption.cc",
@ -510,6 +511,7 @@ cpp_library(
"db/write_batch_base.cc",
"db/write_controller.cc",
"db/write_thread.cc",
"env/composite_env.cc",
"env/env.cc",
"env/env_chroot.cc",
"env/env_encryption.cc",

@ -13,6 +13,7 @@ int main() {
#include <stdio.h>
#include <sys/types.h>
#include <cinttypes>
#include <limits>
@ -20,6 +21,7 @@ int main() {
#include "rocksdb/cache.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/hash.h"
@ -210,6 +212,7 @@ class CacheBench {
bool Run() {
ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default();
const auto& clock = env->GetSystemClock();
PrintEnv();
SharedState shared(this);
@ -224,7 +227,7 @@ class CacheBench {
shared.GetCondVar()->Wait();
}
// Record start time
uint64_t start_time = env->NowMicros();
uint64_t start_time = clock->NowMicros();
// Start all threads
shared.SetStart();
@ -236,7 +239,7 @@ class CacheBench {
}
// Record end time
uint64_t end_time = env->NowMicros();
uint64_t end_time = clock->NowMicros();
double elapsed = static_cast<double>(end_time - start_time) * 1e-6;
uint32_t qps = static_cast<uint32_t>(
static_cast<double>(FLAGS_threads * FLAGS_ops_per_thread) / elapsed);

@ -49,7 +49,6 @@ BlobFileBuilder::BlobFileBuilder(
std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions)
: file_number_generator_(std::move(file_number_generator)),
env_(env),
fs_(fs),
immutable_cf_options_(immutable_cf_options),
min_blob_size_(mutable_cf_options->min_blob_size),
@ -66,7 +65,7 @@ BlobFileBuilder::BlobFileBuilder(
blob_count_(0),
blob_bytes_(0) {
assert(file_number_generator_);
assert(env_);
assert(env);
assert(fs_);
assert(immutable_cf_options_);
assert(file_options_);
@ -74,6 +73,7 @@ BlobFileBuilder::BlobFileBuilder(
assert(blob_file_paths_->empty());
assert(blob_file_additions_);
assert(blob_file_additions_->empty());
clock_ = env->GetSystemClock();
}
BlobFileBuilder::~BlobFileBuilder() = default;
@ -181,14 +181,14 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
Statistics* const statistics = immutable_cf_options_->statistics;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_paths_->back(), *file_options_, env_,
std::move(file), blob_file_paths_->back(), *file_options_, clock_,
nullptr /*IOTracer*/, statistics, immutable_cf_options_->listeners,
immutable_cf_options_->file_checksum_gen_factory));
constexpr bool do_flush = false;
std::unique_ptr<BlobLogWriter> blob_log_writer(new BlobLogWriter(
std::move(file_writer), env_, statistics, blob_file_number,
std::move(file_writer), clock_, statistics, blob_file_number,
immutable_cf_options_->use_fsync, do_flush));
constexpr bool has_ttl = false;

@ -18,6 +18,7 @@ namespace ROCKSDB_NAMESPACE {
class VersionSet;
class FileSystem;
class SystemClock;
struct ImmutableCFOptions;
struct MutableCFOptions;
struct FileOptions;
@ -69,8 +70,8 @@ class BlobFileBuilder {
Status CloseBlobFileIfNeeded();
std::function<uint64_t()> file_number_generator_;
Env* env_;
FileSystem* fs_;
std::shared_ptr<SystemClock> clock_;
const ImmutableCFOptions* immutable_cf_options_;
uint64_t min_blob_size_;
uint64_t blob_file_size_;

@ -39,8 +39,10 @@ class TestFileNumberGenerator {
class BlobFileBuilderTest : public testing::Test {
protected:
BlobFileBuilderTest()
: mock_env_(Env::Default()), fs_(mock_env_.GetFileSystem().get()) {}
BlobFileBuilderTest() : mock_env_(Env::Default()) {
fs_ = mock_env_.GetFileSystem().get();
clock_ = mock_env_.GetSystemClock();
}
void VerifyBlobFile(uint64_t blob_file_number,
const std::string& blob_file_path,
@ -57,11 +59,10 @@ class BlobFileBuilderTest : public testing::Test {
fs_->NewRandomAccessFile(blob_file_path, file_options_, &file, dbg));
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file), blob_file_path,
&mock_env_));
new RandomAccessFileReader(std::move(file), blob_file_path, clock_));
constexpr Statistics* statistics = nullptr;
BlobLogSequentialReader blob_log_reader(std::move(file_reader), &mock_env_,
BlobLogSequentialReader blob_log_reader(std::move(file_reader), clock_,
statistics);
BlobLogHeader header;
@ -109,6 +110,7 @@ class BlobFileBuilderTest : public testing::Test {
MockEnv mock_env_;
FileSystem* fs_;
std::shared_ptr<SystemClock> clock_;
FileOptions file_options_;
};

@ -42,15 +42,15 @@ void WriteBlobFile(uint32_t column_family_id,
std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env));
immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer),
immutable_cf_options.env, statistics,
blob_file_number, use_fsync, do_flush);
BlobLogWriter blob_log_writer(
std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
statistics, blob_file_number, use_fsync, do_flush);
constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range;

@ -117,10 +117,11 @@ Status BlobFileReader::OpenFile(
}
file_reader->reset(new RandomAccessFileReader(
std::move(file), blob_file_path, immutable_cf_options.env,
std::shared_ptr<IOTracer>(), immutable_cf_options.statistics,
BLOB_DB_BLOB_FILE_READ_MICROS, blob_file_read_hist,
immutable_cf_options.rate_limiter, immutable_cf_options.listeners));
std::move(file), blob_file_path,
immutable_cf_options.env->GetSystemClock(), std::shared_ptr<IOTracer>(),
immutable_cf_options.statistics, BLOB_DB_BLOB_FILE_READ_MICROS,
blob_file_read_hist, immutable_cf_options.rate_limiter,
immutable_cf_options.listeners));
return Status::OK();
}

@ -50,15 +50,15 @@ void WriteBlobFile(const ImmutableCFOptions& immutable_cf_options,
std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env));
immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer),
immutable_cf_options.env, statistics,
blob_file_number, use_fsync, do_flush);
BlobLogWriter blob_log_writer(
std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
statistics, blob_file_number, use_fsync, do_flush);
BlobLogHeader header(column_family_id, compression_type, has_ttl,
expiration_range_header);
@ -260,15 +260,15 @@ TEST_F(BlobFileReaderTest, Malformed) {
std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env));
immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false;
constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer),
immutable_cf_options.env, statistics,
blob_file_number, use_fsync, do_flush);
BlobLogWriter blob_log_writer(
std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
statistics, blob_file_number, use_fsync, do_flush);
BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range);

@ -13,10 +13,10 @@
namespace ROCKSDB_NAMESPACE {
BlobLogSequentialReader::BlobLogSequentialReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, Env* env,
Statistics* statistics)
std::unique_ptr<RandomAccessFileReader>&& file_reader,
const std::shared_ptr<SystemClock>& clock, Statistics* statistics)
: file_(std::move(file_reader)),
env_(env),
clock_(clock),
statistics_(statistics),
next_byte_(0) {}
@ -27,7 +27,7 @@ Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
assert(slice);
assert(file_);
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size),
slice, buf, nullptr);
next_byte_ += size;

@ -16,6 +16,7 @@ class RandomAccessFileReader;
class Env;
class Statistics;
class Status;
class SystemClock;
/**
* BlobLogSequentialReader is a general purpose log stream reader
@ -35,7 +36,8 @@ class BlobLogSequentialReader {
// Create a reader that will return log records from "*file_reader".
BlobLogSequentialReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
Env* env, Statistics* statistics);
const std::shared_ptr<SystemClock>& clock,
Statistics* statistics);
// No copying allowed
BlobLogSequentialReader(const BlobLogSequentialReader&) = delete;
@ -63,7 +65,8 @@ class BlobLogSequentialReader {
Status ReadSlice(uint64_t size, Slice* slice, char* buf);
const std::unique_ptr<RandomAccessFileReader> file_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
Statistics* statistics_;
Slice buffer_;

@ -11,7 +11,7 @@
#include "db/blob/blob_log_format.h"
#include "file/writable_file_writer.h"
#include "monitoring/statistics.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/coding.h"
#include "util/stop_watch.h"
@ -19,11 +19,11 @@
namespace ROCKSDB_NAMESPACE {
BlobLogWriter::BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
Env* env, Statistics* statistics,
uint64_t log_number, bool use_fs, bool do_flush,
uint64_t boffset)
const std::shared_ptr<SystemClock>& clock,
Statistics* statistics, uint64_t log_number,
bool use_fs, bool do_flush, uint64_t boffset)
: dest_(std::move(dest)),
env_(env),
clock_(clock),
statistics_(statistics),
log_number_(log_number),
block_offset_(boffset),
@ -36,7 +36,7 @@ BlobLogWriter::~BlobLogWriter() = default;
Status BlobLogWriter::Sync() {
TEST_SYNC_POINT("BlobLogWriter::Sync");
StopWatch sync_sw(env_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS);
StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS);
Status s = dest_->Sync(use_fsync_);
RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED);
return s;
@ -148,7 +148,7 @@ Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf,
const Slice& key, const Slice& val,
uint64_t* key_offset,
uint64_t* blob_offset) {
StopWatch write_sw(env_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS);
StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS);
Status s = dest_->Append(Slice(headerbuf));
if (s.ok()) {
s = dest_->Append(key);

@ -9,7 +9,6 @@
#include <string>
#include "db/blob/blob_log_format.h"
#include "rocksdb/env.h"
#include "rocksdb/slice.h"
#include "rocksdb/statistics.h"
#include "rocksdb/status.h"
@ -18,7 +17,7 @@
namespace ROCKSDB_NAMESPACE {
class WritableFileWriter;
class SystemClock;
/**
* BlobLogWriter is the blob log stream writer. It provides an append-only
* abstraction for writing blob data.
@ -32,7 +31,8 @@ class BlobLogWriter {
// Create a writer that will append data to "*dest".
// "*dest" must be initially empty.
// "*dest" must remain live while this BlobLogWriter is in use.
BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest, Env* env,
BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
const std::shared_ptr<SystemClock>& clock,
Statistics* statistics, uint64_t log_number, bool use_fsync,
bool do_flush, uint64_t boffset = 0);
// No copying allowed
@ -69,7 +69,7 @@ class BlobLogWriter {
private:
std::unique_ptr<WritableFileWriter> dest_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
Statistics* statistics_;
uint64_t log_number_;
uint64_t block_offset_; // Current offset in block

@ -124,6 +124,8 @@ Status BuildTable(
assert(env);
FileSystem* fs = db_options.fs.get();
assert(fs);
const auto& clock = env->GetSystemClock();
TableProperties tp;
if (iter->Valid() || !range_del_agg->IsEmpty()) {
TableBuilder* builder;
@ -151,7 +153,7 @@ Status BuildTable(
file->SetWriteLifeTimeHint(write_hint);
file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, env, io_tracer,
std::move(file), fname, file_options, clock, io_tracer,
ioptions.statistics, ioptions.listeners,
ioptions.file_checksum_gen_factory));
@ -254,7 +256,7 @@ Status BuildTable(
// Finish and check for file errors
TEST_SYNC_POINT("BuildTable:BeforeSyncTable");
if (s.ok() && !empty) {
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
StopWatch sw(clock, ioptions.statistics, TABLE_SYNC_MICROS);
*io_status = file_writer->Sync(ioptions.use_fsync);
}
TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile");

@ -556,8 +556,8 @@ ColumnFamilyData::ColumnFamilyData(
// if _dummy_versions is nullptr, then this is a dummy column family.
if (_dummy_versions != nullptr) {
internal_stats_.reset(
new InternalStats(ioptions_.num_levels, db_options.env, this));
internal_stats_.reset(new InternalStats(
ioptions_.num_levels, db_options.env->GetSystemClock(), this));
table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache,
block_cache_tracer, io_tracer));
blob_file_cache_.reset(

@ -80,6 +80,7 @@ CompactionIterator::CompactionIterator(
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
snapshot_checker_(snapshot_checker),
env_(env),
clock_(env_->GetSystemClock()),
report_detailed_time_(report_detailed_time),
expect_valid_internal_key_(expect_valid_internal_key),
range_del_agg_(range_del_agg),
@ -219,7 +220,7 @@ bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
// to get sequence number.
Slice& filter_key = ikey_.type == kTypeValue ? ikey_.user_key : key_;
{
StopWatchNano timer(env_, report_detailed_time_);
StopWatchNano timer(clock_, report_detailed_time_);
filter = compaction_filter_->FilterV2(
compaction_->level(), filter_key, value_type, value_,
&compaction_filter_value_, compaction_filter_skip_until_.rep());

@ -248,6 +248,7 @@ class CompactionIterator {
const SequenceNumber earliest_write_conflict_snapshot_;
const SnapshotChecker* const snapshot_checker_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
bool report_detailed_time_;
bool expect_valid_internal_key_;
CompactionRangeDelAggregator* range_del_agg_;

@ -320,6 +320,7 @@ CompactionJob::CompactionJob(
db_options_(db_options),
file_options_(file_options),
env_(db_options.env),
clock_(env_->GetSystemClock()),
io_tracer_(io_tracer),
fs_(db_options.fs, io_tracer),
file_options_for_read_(
@ -419,7 +420,7 @@ void CompactionJob::Prepare() {
if (c->ShouldFormSubcompactions()) {
{
StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME);
StopWatch sw(clock_, stats_, SUBCOMPACTION_SETUP_TIME);
GenSubcompactionBoundaries();
}
assert(sizes_.size() == boundaries_.size() + 1);
@ -585,7 +586,7 @@ Status CompactionJob::Run() {
const size_t num_threads = compact_->sub_compact_states.size();
assert(num_threads > 0);
const uint64_t start_micros = env_->NowMicros();
const uint64_t start_micros = clock_->NowMicros();
// Launch a thread for each of subcompactions 1...num_threads-1
std::vector<port::Thread> thread_pool;
@ -604,7 +605,7 @@ Status CompactionJob::Run() {
thread.join();
}
compaction_stats_.micros = env_->NowMicros() - start_micros;
compaction_stats_.micros = clock_->NowMicros() - start_micros;
compaction_stats_.cpu_micros = 0;
for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) {
compaction_stats_.cpu_micros +=
@ -894,7 +895,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
assert(sub_compact);
assert(sub_compact->compaction);
uint64_t prev_cpu_micros = env_->NowCPUNanos() / 1000;
uint64_t prev_cpu_micros = clock_->CPUNanos() / 1000;
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
@ -1184,7 +1185,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
}
sub_compact->compaction_job_stats.cpu_micros =
env_->NowCPUNanos() / 1000 - prev_cpu_micros;
clock_->CPUNanos() / 1000 - prev_cpu_micros;
if (measure_io_stats_) {
sub_compact->compaction_job_stats.file_write_nanos +=
@ -1463,7 +1464,7 @@ Status CompactionJob::FinishCompactionOutputFile(
// Finish and check for file errors
if (s.ok()) {
StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
StopWatch sw(clock_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
io_s = sub_compact->outfile->Sync(db_options_.use_fsync);
}
if (s.ok() && io_s.ok()) {
@ -1738,7 +1739,7 @@ Status CompactionJob::OpenCompactionOutputFile(
const auto& listeners =
sub_compact->compaction->immutable_cf_options()->listeners;
sub_compact->outfile.reset(new WritableFileWriter(
std::move(writable_file), fname, file_options_, env_, io_tracer_,
std::move(writable_file), fname, file_options_, clock_, io_tracer_,
db_options_.statistics.get(), listeners,
db_options_.file_checksum_gen_factory.get()));

@ -50,6 +50,7 @@ class Arena;
class ErrorHandler;
class MemTable;
class SnapshotChecker;
class SystemClock;
class TableCache;
class Version;
class VersionEdit;
@ -159,6 +160,7 @@ class CompactionJob {
const FileOptions file_options_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<IOTracer> io_tracer_;
FileSystemPtr fs_;
// env_option optimized for compaction table reads

@ -11,6 +11,7 @@
#include "db/blob/blob_index.h"
#include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "rocksdb/concurrent_task_limiter.h"

@ -13,6 +13,7 @@
#if !defined(ROCKSDB_LITE)
#include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/port.h"
#include "port/stack_trace.h"
#include "util/random.h"

@ -11,6 +11,7 @@
#include "db/db_impl/db_impl.h"
#include "db/db_test_util.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "port/port.h"
#include "port/stack_trace.h"

@ -151,12 +151,13 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
own_info_log_(options.info_log == nullptr),
initial_db_options_(SanitizeOptions(dbname, options)),
env_(initial_db_options_.env),
clock_(initial_db_options_.env->GetSystemClock()),
io_tracer_(std::make_shared<IOTracer>()),
immutable_db_options_(initial_db_options_),
fs_(immutable_db_options_.fs, io_tracer_),
mutable_db_options_(initial_db_options_),
stats_(immutable_db_options_.statistics.get()),
mutex_(stats_, env_, DB_MUTEX_WAIT_MICROS,
mutex_(stats_, clock_, DB_MUTEX_WAIT_MICROS,
immutable_db_options_.use_adaptive_mutex),
default_cf_handle_(nullptr),
max_total_in_memory_state_(0),
@ -191,7 +192,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
bg_purge_scheduled_(0),
disable_delete_obsolete_files_(0),
pending_purge_obsolete_files_(0),
delete_obsolete_files_last_run_(env_->NowMicros()),
delete_obsolete_files_last_run_(clock_->NowMicros()),
last_stats_dump_time_microsec_(0),
next_job_id_(1),
has_unpersisted_data_(false),
@ -752,7 +753,7 @@ void DBImpl::PersistStats() {
return;
}
TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning");
uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond;
uint64_t now_seconds = clock_->NowMicros() / kMicrosInSecond;
Statistics* statistics = immutable_db_options_.statistics.get();
if (!statistics) {
@ -1653,8 +1654,8 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
}
#endif // NDEBUG
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_GET);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(clock_, stats_, DB_GET);
PERF_TIMER_GUARD(get_snapshot_time);
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
@ -1842,8 +1843,8 @@ std::vector<Status> DBImpl::MultiGet(
const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys, std::vector<std::string>* values,
std::vector<std::string>* timestamps) {
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_MULTIGET);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(clock_, stats_, DB_MULTIGET);
PERF_TIMER_GUARD(get_snapshot_time);
#ifndef NDEBUG
@ -1974,7 +1975,7 @@ std::vector<Status> DBImpl::MultiGet(
}
if (read_options.deadline.count() &&
env_->NowMicros() >
clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count())) {
break;
}
@ -1983,8 +1984,8 @@ std::vector<Status> DBImpl::MultiGet(
if (keys_read < num_keys) {
// The only reason to break out of the loop is when the deadline is
// exceeded
assert(env_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count()));
assert(clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count()));
for (++keys_read; keys_read < num_keys; ++keys_read) {
stat_list[keys_read] = Status::TimedOut();
}
@ -2422,8 +2423,8 @@ Status DBImpl::MultiGetImpl(
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys,
SuperVersion* super_version, SequenceNumber snapshot,
ReadCallback* callback) {
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_MULTIGET);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(clock_, stats_, DB_MULTIGET);
// For each of the given keys, apply the entire "get" process as follows:
// First look in the memtable, then in the immutable memtable (if any).
@ -2434,7 +2435,7 @@ Status DBImpl::MultiGetImpl(
uint64_t curr_value_size = 0;
while (keys_left) {
if (read_options.deadline.count() &&
env_->NowMicros() >
clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count())) {
s = Status::TimedOut();
break;
@ -3137,7 +3138,8 @@ FileSystem* DBImpl::GetFileSystem() const {
Status DBImpl::StartIOTrace(Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) {
assert(trace_writer != nullptr);
return io_tracer_->StartIOTrace(env, trace_options, std::move(trace_writer));
return io_tracer_->StartIOTrace(env->GetSystemClock(), trace_options,
std::move(trace_writer));
}
Status DBImpl::EndIOTrace() {
@ -4417,7 +4419,7 @@ Status DBImpl::IngestExternalFiles(
for (const auto& arg : args) {
auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
ingestion_jobs.emplace_back(
env_, versions_.get(), cfd, immutable_db_options_, file_options_,
clock_, versions_.get(), cfd, immutable_db_options_, file_options_,
&snapshots_, arg.options, &directories_, &event_logger_, io_tracer_);
}
@ -4685,7 +4687,7 @@ Status DBImpl::CreateColumnFamilyWithImport(
// Import sst files from metadata.
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(*handle);
auto cfd = cfh->cfd();
ImportColumnFamilyJob import_job(env_, versions_.get(), cfd,
ImportColumnFamilyJob import_job(clock_, versions_.get(), cfd,
immutable_db_options_, file_options_,
import_options, metadata.files, io_tracer_);
@ -4941,7 +4943,7 @@ void DBImpl::WaitForIngestFile() {
Status DBImpl::StartTrace(const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) {
InstrumentedMutexLock lock(&trace_mutex_);
tracer_.reset(new Tracer(env_, trace_options, std::move(trace_writer)));
tracer_.reset(new Tracer(clock_, trace_options, std::move(trace_writer)));
return Status::OK();
}

@ -54,9 +54,6 @@
#include "rocksdb/transaction_log.h"
#include "rocksdb/write_buffer_manager.h"
#include "table/scoped_arena_iterator.h"
#include "trace_replay/block_cache_tracer.h"
#include "trace_replay/io_tracer.h"
#include "trace_replay/trace_replay.h"
#include "util/autovector.h"
#include "util/hash.h"
#include "util/repeatable_thread.h"
@ -1053,6 +1050,7 @@ class DBImpl : public DB {
bool own_info_log_;
const DBOptions initial_db_options_;
Env* const env_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<IOTracer> io_tracer_;
const ImmutableDBOptions immutable_db_options_;
FileSystemPtr fs_;

@ -2529,7 +2529,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
s.ToString().c_str(), error_cnt);
log_buffer.FlushBufferToLog();
LogFlush(immutable_db_options_.info_log);
env_->SleepForMicroseconds(1000000);
clock_->SleepForMicroseconds(1000000);
mutex_.Lock();
}
@ -2602,7 +2602,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
if (s.IsBusy()) {
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
mutex_.Unlock();
env_->SleepForMicroseconds(10000); // prevent hot loop
clock_->SleepForMicroseconds(10000); // prevent hot loop
mutex_.Lock();
} else if (!s.ok() && !s.IsShutdownInProgress() &&
!s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
@ -2620,7 +2620,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
"Accumulated background error counts: %" PRIu64,
s.ToString().c_str(), error_cnt);
LogFlush(immutable_db_options_.info_log);
env_->SleepForMicroseconds(1000000);
clock_->SleepForMicroseconds(1000000);
mutex_.Lock();
} else if (s.IsManualCompactionPaused()) {
ManualCompactionState* m = prepicked_compaction->manual_compaction_state;

@ -120,7 +120,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
mutable_db_options_.delete_obsolete_files_period_micros == 0) {
doing_the_full_scan = true;
} else {
const uint64_t now_micros = env_->NowMicros();
const uint64_t now_micros = clock_->NowMicros();
if ((delete_obsolete_files_last_run_ +
mutable_db_options_.delete_obsolete_files_period_micros) <
now_micros) {

@ -292,14 +292,14 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
file->SetPreallocationBlockSize(
immutable_db_options_.manifest_preallocation_size);
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), manifest, file_options, env_, io_tracer_,
std::move(file), manifest, file_options, clock_, io_tracer_,
nullptr /* stats */, immutable_db_options_.listeners));
log::Writer log(std::move(file_writer), 0, false);
std::string record;
new_db.EncodeTo(&record);
s = log.AddRecord(record);
if (s.ok()) {
s = SyncManifest(env_, &immutable_db_options_, log.file());
s = SyncManifest(clock_, &immutable_db_options_, log.file());
}
}
if (s.ok()) {
@ -1295,7 +1295,7 @@ Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
MemTable* mem, VersionEdit* edit) {
mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros();
const uint64_t start_micros = clock_->NowMicros();
FileMetaData meta;
std::vector<BlobFileAddition> blob_file_additions;
@ -1395,7 +1395,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
}
InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
stats.micros = env_->NowMicros() - start_micros;
stats.micros = clock_->NowMicros() - start_micros;
if (has_output) {
stats.bytes_written = meta.fd.GetFileSize();
@ -1486,7 +1486,7 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
const auto& listeners = immutable_db_options_.listeners;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(lfile), log_fname, opt_file_options, env_, io_tracer_,
std::move(lfile), log_fname, opt_file_options, clock_, io_tracer_,
nullptr /* stats */, listeners));
*new_log = new log::Writer(std::move(file_writer), log_file_num,
immutable_db_options_.recycle_log_file_num > 0,

@ -327,8 +327,8 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options,
ColumnFamilyHandle* column_family,
const Slice& key, PinnableSlice* pinnable_val) {
assert(pinnable_val != nullptr);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_);
StopWatch sw(env_, stats_, DB_GET);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(clock_, stats_, DB_GET);
PERF_TIMER_GUARD(get_snapshot_time);
auto cfh = static_cast<ColumnFamilyHandleImpl*>(column_family);

@ -160,7 +160,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
RecordTick(stats_, WRITE_WITH_WAL);
}
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
write_thread_.JoinBatchGroup(&w);
if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
@ -465,7 +465,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
uint64_t* log_used, uint64_t log_ref,
bool disable_memtable, uint64_t* seq_used) {
PERF_TIMER_GUARD(write_pre_and_post_process_time);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
WriteContext write_context;
@ -621,7 +621,7 @@ Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
SequenceNumber seq,
const size_t sub_batch_cnt) {
PERF_TIMER_GUARD(write_pre_and_post_process_time);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
false /*disable_memtable*/);
@ -676,7 +676,7 @@ Status DBImpl::WriteImplWALOnly(
WriteThread::Writer w(write_options, my_batch, callback, log_ref,
disable_memtable, sub_batch_cnt, pre_release_callback);
RecordTick(stats_, WRITE_WITH_WAL);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE);
StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
write_thread->JoinBatchGroup(&w);
assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER);
@ -1093,7 +1093,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
}
if (io_s.ok() && need_log_sync) {
StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS);
StopWatch sw(clock_, stats_, WAL_FILE_SYNC_MICROS);
// It's safe to access logs_ with unlocked mutex_ here because:
// - we've set getting_synced=true for all logs,
// so other threads won't pop from logs_ while we're here,
@ -1457,8 +1457,8 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
uint64_t time_delayed = 0;
bool delayed = false;
{
StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed);
uint64_t delay = write_controller_.GetDelay(env_, num_bytes);
StopWatch sw(clock_, stats_, WRITE_STALL, &time_delayed);
uint64_t delay = write_controller_.GetDelay(clock_, num_bytes);
if (delay > 0) {
if (write_options.no_slowdown) {
return Status::Incomplete("Write stall");
@ -1475,14 +1475,14 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
const uint64_t kDelayInterval = 1000;
uint64_t stall_end = sw.start_time() + delay;
while (write_controller_.NeedsDelay()) {
if (env_->NowMicros() >= stall_end) {
if (clock_->NowMicros() >= stall_end) {
// We already delayed this write `delay` microseconds
break;
}
delayed = true;
// Sleep for 0.001 seconds
env_->SleepForMicroseconds(kDelayInterval);
clock_->SleepForMicroseconds(kDelayInterval);
}
mutex_.Lock();
write_thread_.EndWriteStall();

@ -8,9 +8,10 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_iter.h"
#include <string>
#include <iostream>
#include <limits>
#include <string>
#include "db/dbformat.h"
#include "db/merge_context.h"
@ -24,6 +25,7 @@
#include "rocksdb/iterator.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/options.h"
#include "rocksdb/system_clock.h"
#include "table/internal_iterator.h"
#include "table/iterator_wrapper.h"
#include "trace_replay/trace_replay.h"
@ -43,6 +45,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
ColumnFamilyData* cfd, bool expose_blob_index)
: prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
env_(_env),
clock_(_env->GetSystemClock()),
logger_(cf_options.info_log),
user_comparator_(cmp),
merge_operator_(cf_options.merge_operator),
@ -127,7 +130,7 @@ void DBIter::Next() {
assert(valid_);
assert(status_.ok());
PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, env_);
PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
// Release temporarily pinned blocks from last operation
ReleaseTempPinnedData();
local_stats_.skip_count_ += num_internal_keys_skipped_;
@ -574,7 +577,7 @@ bool DBIter::MergeValuesNewToOld() {
const Slice val = iter_.value();
Status s = MergeHelper::TimedFullMerge(
merge_operator_, ikey.user_key, &val, merge_context_.GetOperands(),
&saved_value_, logger_, statistics_, env_, &pinned_value_, true);
&saved_value_, logger_, statistics_, clock_, &pinned_value_, true);
if (!s.ok()) {
valid_ = false;
status_ = s;
@ -617,7 +620,7 @@ bool DBIter::MergeValuesNewToOld() {
// client can differentiate this scenario and do things accordingly.
Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, clock_,
&pinned_value_, true);
if (!s.ok()) {
valid_ = false;
@ -640,7 +643,7 @@ void DBIter::Prev() {
assert(valid_);
assert(status_.ok());
PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, env_);
PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter();
bool ok = true;
@ -921,7 +924,7 @@ bool DBIter::FindValueForCurrentKey() {
s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true);
clock_, &pinned_value_, true);
} else if (last_not_merge_type == kTypeBlobIndex) {
status_ =
Status::NotSupported("BlobDB does not support merge operator.");
@ -932,7 +935,7 @@ bool DBIter::FindValueForCurrentKey() {
s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), &pinned_value_,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true);
clock_, &pinned_value_, true);
}
break;
case kTypeValue:
@ -1070,7 +1073,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), &val,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true);
clock_, &pinned_value_, true);
if (!s.ok()) {
valid_ = false;
status_ = s;
@ -1097,7 +1100,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, clock_,
&pinned_value_, true);
if (!s.ok()) {
valid_ = false;
@ -1248,8 +1251,8 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
}
void DBIter::Seek(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(clock_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) {
@ -1310,8 +1313,8 @@ void DBIter::Seek(const Slice& target) {
}
void DBIter::SeekForPrev(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
StopWatch sw(env_, statistics_, DB_SEEK);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(clock_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) {
@ -1378,7 +1381,7 @@ void DBIter::SeekToFirst() {
Seek(*iterate_lower_bound_);
return;
}
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used.
if (!expect_total_order_inner_iter()) {
@ -1439,7 +1442,7 @@ void DBIter::SeekToLast() {
return;
}
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_);
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used.
if (!expect_total_order_inner_iter()) {

@ -21,7 +21,7 @@
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
class SystemClock_;
class Version;
// This file declares the factory functions of DBIter, in its original form
@ -298,6 +298,7 @@ class DBIter final : public Iterator {
const SliceTransform* prefix_extractor_;
Env* const env_;
std::shared_ptr<SystemClock> clock_;
Logger* logger_;
UserComparatorWrapper user_comparator_;
const MergeOperator* const merge_operator_;

@ -13,6 +13,7 @@
#if !defined(ROCKSDB_LITE)
#include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE {

@ -10,6 +10,7 @@
#include "db/db_test_util.h"
#include "db/forward_iterator.h"
#include "env/mock_env.h"
#include "rocksdb/convenience.h"
#include "rocksdb/env_encryption.h"
#include "rocksdb/utilities/object_registry.h"

@ -23,7 +23,6 @@
#include "db/db_impl/db_impl.h"
#include "db/dbformat.h"
#include "env/mock_env.h"
#include "file/filename.h"
#include "memtable/hash_linklist_rep.h"
#include "rocksdb/cache.h"
@ -40,7 +39,6 @@
#include "rocksdb/utilities/checkpoint.h"
#include "table/mock_table.h"
#include "table/scoped_arena_iterator.h"
#include "test_util/mock_time_env.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "util/cast_util.h"
@ -50,6 +48,7 @@
#include "utilities/merge_operators.h"
namespace ROCKSDB_NAMESPACE {
class MockEnv;
namespace anon {
class AtomicCounter {

@ -670,7 +670,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
// a period of time and redo auto resume if it is allowed.
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait0");
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1");
int64_t wait_until = db_->env_->NowMicros() + wait_interval;
int64_t wait_until = db_->clock_->NowMicros() + wait_interval;
cv_.TimedWait(wait_until);
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterWait0");
} else {

@ -293,12 +293,13 @@ Status ExternalSstFileIngestionJob::Prepare(
// TODO: The following is duplicated with Cleanup().
if (!status.ok()) {
IOOptions io_opts;
// We failed, remove all files that we copied into the db
for (IngestedFileInfo& f : files_to_ingest_) {
if (f.internal_file_path.empty()) {
continue;
}
Status s = env_->DeleteFile(f.internal_file_path);
Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log,
"AddFile() clean up for file %s failed : %s",
@ -392,7 +393,7 @@ Status ExternalSstFileIngestionJob::Run() {
int64_t temp_current_time = 0;
uint64_t current_time = kUnknownFileCreationTime;
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
if (env_->GetCurrentTime(&temp_current_time).ok()) {
if (clock_->GetCurrentTime(&temp_current_time).ok()) {
current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time);
}
@ -410,7 +411,7 @@ void ExternalSstFileIngestionJob::UpdateStats() {
// Update internal stats for new ingested files
uint64_t total_keys = 0;
uint64_t total_l0_files = 0;
uint64_t total_time = env_->NowMicros() - job_start_time_;
uint64_t total_time = clock_->NowMicros() - job_start_time_;
EventLoggerStream stream = event_logger_->Log();
stream << "event"
@ -466,6 +467,7 @@ void ExternalSstFileIngestionJob::UpdateStats() {
}
void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
IOOptions io_opts;
if (!status.ok()) {
// We failed to add the files to the database
// remove all the files we copied
@ -473,7 +475,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
if (f.internal_file_path.empty()) {
continue;
}
Status s = env_->DeleteFile(f.internal_file_path);
Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log,
"AddFile() clean up for file %s failed : %s",
@ -485,7 +487,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
} else if (status.ok() && ingestion_options_.move_files) {
// The files were moved and added successfully, remove original file links
for (IngestedFileInfo& f : files_to_ingest_) {
Status s = env_->DeleteFile(f.external_file_path);
Status s = fs_->DeleteFile(f.external_file_path, io_opts, nullptr);
if (!s.ok()) {
ROCKS_LOG_WARN(
db_options_.info_log,

@ -16,13 +16,14 @@
#include "logging/event_logger.h"
#include "options/db_options.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/sst_file_writer.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
class Directories;
class SystemClock;
struct IngestedFileInfo {
// External file path
@ -73,13 +74,13 @@ struct IngestedFileInfo {
class ExternalSstFileIngestionJob {
public:
ExternalSstFileIngestionJob(
Env* env, VersionSet* versions, ColumnFamilyData* cfd,
const ImmutableDBOptions& db_options, const EnvOptions& env_options,
SnapshotList* db_snapshots,
const std::shared_ptr<SystemClock>& clock, VersionSet* versions,
ColumnFamilyData* cfd, const ImmutableDBOptions& db_options,
const EnvOptions& env_options, SnapshotList* db_snapshots,
const IngestExternalFileOptions& ingestion_options,
Directories* directories, EventLogger* event_logger,
const std::shared_ptr<IOTracer>& io_tracer)
: env_(env),
: clock_(clock),
fs_(db_options.fs, io_tracer),
versions_(versions),
cfd_(cfd),
@ -89,7 +90,7 @@ class ExternalSstFileIngestionJob {
ingestion_options_(ingestion_options),
directories_(directories),
event_logger_(event_logger),
job_start_time_(env_->NowMicros()),
job_start_time_(clock_->NowMicros()),
consumed_seqno_count_(0),
io_tracer_(io_tracer) {
assert(directories != nullptr);
@ -169,7 +170,7 @@ class ExternalSstFileIngestionJob {
template <typename TWritableFile>
Status SyncIngestedFile(TWritableFile* file);
Env* env_;
std::shared_ptr<SystemClock> clock_;
FileSystemPtr fs_;
VersionSet* versions_;
ColumnFamilyData* cfd_;

@ -127,6 +127,7 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
pick_memtable_called(false),
thread_pri_(thread_pri),
io_tracer_(io_tracer),
clock_(db_options_.env->GetSystemClock()),
full_history_ts_low_(std::move(full_history_ts_low)) {
// Update the thread status to indicate flush.
ReportStartedFlush();
@ -309,8 +310,8 @@ Status FlushJob::WriteLevel0Table() {
AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_FLUSH_WRITE_L0);
db_mutex_->AssertHeld();
const uint64_t start_micros = db_options_.env->NowMicros();
const uint64_t start_cpu_micros = db_options_.env->NowCPUNanos() / 1000;
const uint64_t start_micros = clock_->NowMicros();
const uint64_t start_cpu_micros = clock_->CPUNanos() / 1000;
Status s;
std::vector<BlobFileAddition> blob_file_additions;
@ -371,7 +372,7 @@ Status FlushJob::WriteLevel0Table() {
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
&output_compression_);
int64_t _current_time = 0;
auto status = db_options_.env->GetCurrentTime(&_current_time);
auto status = clock_->GetCurrentTime(&_current_time);
// Safe to proceed even if GetCurrentTime fails. So, log and proceed.
if (!status.ok()) {
ROCKS_LOG_WARN(
@ -466,8 +467,8 @@ Status FlushJob::WriteLevel0Table() {
// Note that here we treat flush as level 0 compaction in internal stats
InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
stats.micros = db_options_.env->NowMicros() - start_micros;
stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros;
stats.micros = clock_->NowMicros() - start_micros;
stats.cpu_micros = clock_->CPUNanos() / 1000 - start_cpu_micros;
if (has_output) {
stats.bytes_written = meta_.fd.GetFileSize();

@ -162,6 +162,7 @@ class FlushJob {
IOStatus io_status_;
const std::shared_ptr<IOTracer> io_tracer_;
const std::shared_ptr<SystemClock> clock_;
const std::string full_history_ts_low_;
};

@ -140,7 +140,7 @@ Status ImportColumnFamilyJob::Run() {
int64_t temp_current_time = 0;
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
uint64_t current_time = kUnknownOldestAncesterTime;
if (env_->GetCurrentTime(&temp_current_time).ok()) {
if (clock_->GetCurrentTime(&temp_current_time).ok()) {
current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time);
}

@ -9,24 +9,26 @@
#include "db/snapshot_impl.h"
#include "options/db_options.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/metadata.h"
#include "rocksdb/sst_file_writer.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
struct EnvOptions;
class SystemClock;
// Imports a set of sst files as is into a new column family. Logic is similar
// to ExternalSstFileIngestionJob.
class ImportColumnFamilyJob {
public:
ImportColumnFamilyJob(Env* env, VersionSet* versions, ColumnFamilyData* cfd,
ImportColumnFamilyJob(const std::shared_ptr<SystemClock>& clock,
VersionSet* versions, ColumnFamilyData* cfd,
const ImmutableDBOptions& db_options,
const EnvOptions& env_options,
const ImportColumnFamilyOptions& import_options,
const std::vector<LiveFileMetaData>& metadata,
const std::shared_ptr<IOTracer>& io_tracer)
: env_(env),
: clock_(clock),
versions_(versions),
cfd_(cfd),
db_options_(db_options),
@ -59,7 +61,7 @@ class ImportColumnFamilyJob {
IngestedFileInfo* file_to_import,
SuperVersion* sv);
Env* env_;
std::shared_ptr<SystemClock> clock_;
VersionSet* versions_;
ColumnFamilyData* cfd_;
const ImmutableDBOptions& db_options_;

@ -990,7 +990,7 @@ bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
void InternalStats::DumpDBStats(std::string* value) {
char buf[1000];
// DB-level stats, only available from default column family
double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf),
"\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
@ -1313,7 +1313,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
}
}
double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
seconds_up, interval_seconds_up);

@ -14,6 +14,7 @@
#include <vector>
#include "db/version_set.h"
#include "rocksdb/system_clock.h"
class ColumnFamilyData;
@ -122,7 +123,8 @@ class InternalStats {
kIntStatsNumMax,
};
InternalStats(int num_levels, Env* env, ColumnFamilyData* cfd)
InternalStats(int num_levels, const std::shared_ptr<SystemClock>& clock,
ColumnFamilyData* cfd)
: db_stats_{},
cf_stats_value_{},
cf_stats_count_{},
@ -131,9 +133,9 @@ class InternalStats {
file_read_latency_(num_levels),
bg_error_count_(0),
number_levels_(num_levels),
env_(env),
clock_(clock),
cfd_(cfd),
started_at_(env->NowMicros()) {}
started_at_(clock->NowMicros()) {}
// Per level compaction stats. comp_stats_[level] stores the stats for
// compactions that produced data for the specified "level".
@ -341,7 +343,7 @@ class InternalStats {
cf_stats_snapshot_.Clear();
db_stats_snapshot_.Clear();
bg_error_count_ = 0;
started_at_ = env_->NowMicros();
started_at_ = clock_->NowMicros();
}
void AddCompactionStats(int level, Env::Priority thread_pri,
@ -602,7 +604,7 @@ class InternalStats {
uint64_t bg_error_count_;
const int number_levels_;
Env* env_;
const std::shared_ptr<SystemClock> clock_;
ColumnFamilyData* cfd_;
uint64_t started_at_;
};
@ -641,7 +643,9 @@ class InternalStats {
kIntStatsNumMax,
};
InternalStats(int /*num_levels*/, Env* /*env*/, ColumnFamilyData* /*cfd*/) {}
InternalStats(int /*num_levels*/,
const std::shared_ptr<SystemClock>& /*clock*/,
ColumnFamilyData* /*cfd*/) {}
struct CompactionStats {
uint64_t micros;

@ -102,7 +102,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
: 0),
prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
flush_state_(FLUSH_NOT_REQUESTED),
env_(ioptions.env),
clock_(ioptions.env->GetSystemClock()),
insert_with_hint_prefix_extractor_(
ioptions.memtable_insert_with_hint_prefix_extractor),
oldest_key_time_(std::numeric_limits<uint64_t>::max()),
@ -221,7 +221,7 @@ void MemTable::UpdateOldestKeyTime() {
uint64_t oldest_key_time = oldest_key_time_.load(std::memory_order_relaxed);
if (oldest_key_time == std::numeric_limits<uint64_t>::max()) {
int64_t current_time = 0;
auto s = env_->GetCurrentTime(&current_time);
auto s = clock_->GetCurrentTime(&current_time);
if (s.ok()) {
assert(current_time >= 0);
// If fail, the timestamp is already set.
@ -628,7 +628,8 @@ struct Saver {
Statistics* statistics;
bool inplace_update_support;
bool do_merge;
Env* env_;
std::shared_ptr<SystemClock> clock;
ReadCallback* callback_;
bool* is_blob_index;
bool allow_data_in_errors;
@ -712,7 +713,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), &v,
merge_context->GetOperands(), s->value, s->logger,
s->statistics, s->env_, nullptr /* result_operand */, true);
s->statistics, s->clock, nullptr /* result_operand */, true);
}
} else {
// Preserve the value with the goal of returning it as part of
@ -751,7 +752,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), nullptr,
merge_context->GetOperands(), s->value, s->logger,
s->statistics, s->env_, nullptr /* result_operand */, true);
s->statistics, s->clock, nullptr /* result_operand */, true);
}
} else {
*(s->status) = Status::NotFound();
@ -779,7 +780,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), nullptr,
merge_context->GetOperands(), s->value, s->logger, s->statistics,
s->env_, nullptr /* result_operand */, true);
s->clock, nullptr /* result_operand */, true);
*(s->found_final_value) = true;
return false;
}
@ -887,7 +888,7 @@ void MemTable::GetFromTable(const LookupKey& key,
saver.logger = moptions_.info_log;
saver.inplace_update_support = moptions_.inplace_update_support;
saver.statistics = moptions_.statistics;
saver.env_ = env_;
saver.clock = clock_;
saver.callback_ = callback;
saver.is_blob_index = is_blob_index;
saver.do_merge = do_merge;

@ -24,7 +24,6 @@
#include "monitoring/instrumented_mutex.h"
#include "options/cf_options.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/memtablerep.h"
#include "table/multiget_context.h"
#include "util/dynamic_bloom.h"
@ -36,6 +35,7 @@ struct FlushJobInfo;
class Mutex;
class MemTableIterator;
class MergeContext;
class SystemClock;
struct ImmutableMemTableOptions {
explicit ImmutableMemTableOptions(const ImmutableCFOptions& ioptions,
@ -504,7 +504,7 @@ class MemTable {
std::atomic<FlushStateEnum> flush_state_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
// Extract sequential insert prefixes.
const SliceTransform* insert_with_hint_prefix_extractor_;

@ -14,6 +14,7 @@
#include "rocksdb/comparator.h"
#include "rocksdb/db.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/system_clock.h"
#include "table/format.h"
#include "table/internal_iterator.h"
@ -28,6 +29,7 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
Statistics* stats,
const std::atomic<bool>* shutting_down)
: env_(env),
clock_(env->GetSystemClock()),
user_comparator_(user_comparator),
user_merge_operator_(user_merge_operator),
compaction_filter_(compaction_filter),
@ -39,7 +41,7 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
snapshot_checker_(snapshot_checker),
level_(level),
keys_(),
filter_timer_(env_),
filter_timer_(clock_),
total_filter_time_(0U),
stats_(stats) {
assert(user_comparator_ != nullptr);
@ -48,13 +50,11 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
}
}
Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
const Slice& key, const Slice* value,
const std::vector<Slice>& operands,
std::string* result, Logger* logger,
Statistics* statistics, Env* env,
Slice* result_operand,
bool update_num_ops_stats) {
Status MergeHelper::TimedFullMerge(
const MergeOperator* merge_operator, const Slice& key, const Slice* value,
const std::vector<Slice>& operands, std::string* result, Logger* logger,
Statistics* statistics, const std::shared_ptr<SystemClock>& clock,
Slice* result_operand, bool update_num_ops_stats) {
assert(merge_operator != nullptr);
if (operands.size() == 0) {
@ -75,7 +75,7 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand);
{
// Setup to time the merge
StopWatchNano timer(env, statistics != nullptr);
StopWatchNano timer(clock, statistics != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos);
// Do the merge
@ -213,7 +213,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
std::string merge_result;
s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr,
merge_context_.GetOperands(), &merge_result, logger_,
stats_, env_);
stats_, clock_);
// We store the result in keys_.back() and operands_.back()
// if nothing went wrong (i.e.: no operand corruption on disk)
@ -324,7 +324,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
std::string merge_result;
s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
merge_context_.GetOperands(), &merge_result, logger_,
stats_, env_);
stats_, clock_);
if (s.ok()) {
// The original key encountered
// We are certain that keys_ is not empty here (see assertions couple of
@ -347,7 +347,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
bool merge_success = false;
std::string merge_result;
{
StopWatchNano timer(env_, stats_ != nullptr);
StopWatchNano timer(clock_, stats_ != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos);
merge_success = user_merge_operator_->PartialMergeMulti(
orig_ikey.user_key,

@ -25,6 +25,7 @@ class Iterator;
class Logger;
class MergeOperator;
class Statistics;
class SystemClock;
class MergeHelper {
public:
@ -44,13 +45,11 @@ class MergeHelper {
// Returns one of the following statuses:
// - OK: Entries were successfully merged.
// - Corruption: Merge operator reported unsuccessful merge.
static Status TimedFullMerge(const MergeOperator* merge_operator,
const Slice& key, const Slice* value,
const std::vector<Slice>& operands,
std::string* result, Logger* logger,
Statistics* statistics, Env* env,
Slice* result_operand = nullptr,
bool update_num_ops_stats = false);
static Status TimedFullMerge(
const MergeOperator* merge_operator, const Slice& key, const Slice* value,
const std::vector<Slice>& operands, std::string* result, Logger* logger,
Statistics* statistics, const std::shared_ptr<SystemClock>& clock,
Slice* result_operand = nullptr, bool update_num_ops_stats = false);
// Merge entries until we hit
// - a corrupted key
@ -140,6 +139,7 @@ class MergeHelper {
private:
Env* env_;
std::shared_ptr<SystemClock> clock_;
const Comparator* user_comparator_;
const MergeOperator* user_merge_operator_;
const CompactionFilter* compaction_filter_;

@ -3,6 +3,8 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "rocksdb/perf_context.h"
#include <algorithm>
#include <iostream>
#include <thread>
@ -15,8 +17,8 @@
#include "port/port.h"
#include "rocksdb/db.h"
#include "rocksdb/memtablerep.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "test_util/testharness.h"
#include "util/stop_watch.h"
#include "util/string_util.h"
@ -91,7 +93,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::string value;
get_perf_context()->Reset();
StopWatchNano timer(Env::Default());
StopWatchNano timer(SystemClock::Default());
timer.Start();
auto status = db->Get(read_options, key, &value);
auto elapsed_nanos = timer.ElapsedNanos();
@ -110,7 +112,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
iter->SeekToFirst();
hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count);
auto elapsed_nanos = timer.ElapsedNanos();
@ -131,7 +133,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::string key = "k" + ToString(i);
get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
iter->Seek(key);
auto elapsed_nanos = timer.ElapsedNanos();
hist_seek.Add(get_perf_context()->user_key_comparison_count);
@ -145,7 +147,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
get_perf_context()->Reset();
ASSERT_TRUE(iter->Valid());
StopWatchNano timer2(Env::Default(), true);
StopWatchNano timer2(SystemClock::Default(), true);
iter->Next();
auto elapsed_nanos2 = timer2.ElapsedNanos();
if (FLAGS_verbose) {
@ -164,7 +166,7 @@ TEST_F(PerfContextTest, StopWatchNanoOverhead) {
const int kTotalIterations = 1000000;
std::vector<uint64_t> timings(kTotalIterations);
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
for (auto& timing : timings) {
timing = timer.ElapsedNanos(true /* reset */);
}
@ -185,7 +187,7 @@ TEST_F(PerfContextTest, StopWatchOverhead) {
uint64_t elapsed = 0;
std::vector<uint64_t> timings(kTotalIterations);
StopWatch timer(Env::Default(), nullptr, 0, &elapsed);
StopWatch timer(SystemClock::Default(), nullptr, 0, &elapsed);
for (auto& timing : timings) {
timing = elapsed;
}
@ -539,7 +541,7 @@ TEST_F(PerfContextTest, SeekKeyComparison) {
HistogramImpl hist_time_diff;
SetPerfLevel(kEnableTime);
StopWatchNano timer(Env::Default());
StopWatchNano timer(SystemClock::Default());
for (const int i : keys) {
std::string key = "k" + ToString(i);
std::string value = "v" + ToString(i);
@ -592,25 +594,25 @@ TEST_F(PerfContextTest, DBMutexLockCounter) {
for (PerfLevel perf_level_test :
{PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) {
for (int c = 0; c < 2; ++c) {
InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]);
mutex.Lock();
ROCKSDB_NAMESPACE::port::Thread child_thread([&] {
SetPerfLevel(perf_level_test);
get_perf_context()->Reset();
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
InstrumentedMutex mutex(nullptr, SystemClock::Default(), stats_code[c]);
mutex.Lock();
mutex.Unlock();
if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex ||
stats_code[c] != DB_MUTEX_WAIT_MICROS) {
ROCKSDB_NAMESPACE::port::Thread child_thread([&] {
SetPerfLevel(perf_level_test);
get_perf_context()->Reset();
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
} else {
// increment the counter only when it's a DB Mutex
ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0);
}
});
Env::Default()->SleepForMicroseconds(100);
mutex.Unlock();
child_thread.join();
mutex.Lock();
mutex.Unlock();
if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex ||
stats_code[c] != DB_MUTEX_WAIT_MICROS) {
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
} else {
// increment the counter only when it's a DB Mutex
ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0);
}
});
SystemClock::Default()->SleepForMicroseconds(100);
mutex.Unlock();
child_thread.join();
}
}
}
@ -619,7 +621,7 @@ TEST_F(PerfContextTest, FalseDBMutexWait) {
SetPerfLevel(kEnableTime);
int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)};
for (int c = 0; c < 2; ++c) {
InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]);
InstrumentedMutex mutex(nullptr, SystemClock::Default(), stats_code[c]);
InstrumentedCondVar lock(&mutex);
get_perf_context()->Reset();
mutex.Lock();
@ -824,8 +826,8 @@ TEST_F(PerfContextTest, PerfContextByLevelGetSet) {
}
TEST_F(PerfContextTest, CPUTimer) {
if (Env::Default()->NowCPUNanos() == 0) {
ROCKSDB_GTEST_SKIP("Target without NowCPUNanos support");
if (SystemClock::Default()->CPUNanos() == 0) {
ROCKSDB_GTEST_SKIP("Target without CPUNanos support");
return;
}

@ -6,12 +6,14 @@
#include "db/periodic_work_scheduler.h"
#include "db/db_impl/db_impl.h"
#include "rocksdb/system_clock.h"
#ifndef ROCKSDB_LITE
namespace ROCKSDB_NAMESPACE {
PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) : timer_mu_(env) {
timer = std::unique_ptr<Timer>(new Timer(env));
PeriodicWorkScheduler::PeriodicWorkScheduler(
const std::shared_ptr<SystemClock>& clock) {
timer = std::unique_ptr<Timer>(new Timer(clock));
}
void PeriodicWorkScheduler::Register(DBImpl* dbi,
@ -52,10 +54,10 @@ void PeriodicWorkScheduler::Unregister(DBImpl* dbi) {
}
PeriodicWorkScheduler* PeriodicWorkScheduler::Default() {
// Always use the default Env for the scheduler, as we only use the NowMicros
// which is the same for all env.
// The Env could only be overridden in test.
static PeriodicWorkScheduler scheduler(Env::Default());
// Always use the default SystemClock for the scheduler, as we only use the
// NowMicros which is the same for all clocks. The Env could only be
// overridden in test.
static PeriodicWorkScheduler scheduler(SystemClock::Default());
return &scheduler;
}
@ -69,12 +71,13 @@ std::string PeriodicWorkScheduler::GetTaskName(DBImpl* dbi,
#ifndef NDEBUG
// Get the static scheduler. For a new env, it needs to re-create the internal
// timer, so only re-create it when there's no running task. Otherwise, return
// the existing scheduler. Which means if the unittest needs to update MockEnv,
// Close all db instances and then re-open them.
PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) {
static PeriodicWorkTestScheduler scheduler(env);
// Get the static scheduler. For a new SystemClock, it needs to re-create the
// internal timer, so only re-create it when there's no running task. Otherwise,
// return the existing scheduler. Which means if the unittest needs to update
// MockClock, Close all db instances and then re-open them.
PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(
const std::shared_ptr<SystemClock>& clock) {
static PeriodicWorkTestScheduler scheduler(clock);
static port::Mutex mutex;
{
MutexLock l(&mutex);
@ -84,7 +87,7 @@ PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) {
MutexLock timer_mu_guard(&scheduler.timer_mu_);
scheduler.timer->Shutdown();
}
scheduler.timer.reset(new Timer(env));
scheduler.timer.reset(new Timer(clock));
}
}
return &scheduler;
@ -104,8 +107,9 @@ size_t PeriodicWorkTestScheduler::TEST_GetValidTaskNum() const {
return 0;
}
PeriodicWorkTestScheduler::PeriodicWorkTestScheduler(Env* env)
: PeriodicWorkScheduler(env) {}
PeriodicWorkTestScheduler::PeriodicWorkTestScheduler(
const std::shared_ptr<SystemClock>& clock)
: PeriodicWorkScheduler(clock) {}
#endif // !NDEBUG
} // namespace ROCKSDB_NAMESPACE

@ -11,6 +11,7 @@
#include "util/timer.h"
namespace ROCKSDB_NAMESPACE {
class SystemClock;
// PeriodicWorkScheduler is a singleton object, which is scheduling/running
// DumpStats(), PersistStats(), and FlushInfoLog() for all DB instances. All DB
@ -49,25 +50,26 @@ class PeriodicWorkScheduler {
// the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically.
port::Mutex timer_mu_;
explicit PeriodicWorkScheduler(Env* env);
explicit PeriodicWorkScheduler(const std::shared_ptr<SystemClock>& clock);
private:
std::string GetTaskName(DBImpl* dbi, const std::string& func_name);
};
#ifndef NDEBUG
// PeriodicWorkTestScheduler is for unittest, which can specify the Env like
// SafeMockTimeEnv. It also contains functions for unittest.
// PeriodicWorkTestScheduler is for unittest, which can specify the SystemClock
// It also contains functions for unittest.
class PeriodicWorkTestScheduler : public PeriodicWorkScheduler {
public:
static PeriodicWorkTestScheduler* Default(Env* env);
static PeriodicWorkTestScheduler* Default(
const std::shared_ptr<SystemClock>& clock);
void TEST_WaitForRun(std::function<void()> callback) const;
size_t TEST_GetValidTaskNum() const;
private:
explicit PeriodicWorkTestScheduler(Env* env);
explicit PeriodicWorkTestScheduler(const std::shared_ptr<SystemClock>& clock);
};
#endif // !NDEBUG

@ -6,6 +6,8 @@
#include "db/periodic_work_scheduler.h"
#include "db/db_test_util.h"
#include "env/composite_env_wrapper.h"
#include "test_util/mock_time_env.h"
namespace ROCKSDB_NAMESPACE {
@ -14,20 +16,22 @@ class PeriodicWorkSchedulerTest : public DBTestBase {
public:
PeriodicWorkSchedulerTest()
: DBTestBase("/periodic_work_scheduler_test", /*env_do_fsync=*/true) {
mock_env_.reset(new MockTimeEnv(env_));
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
}
protected:
std::unique_ptr<MockTimeEnv> mock_env_;
std::unique_ptr<Env> mock_env_;
std::shared_ptr<MockSystemClock> mock_clock_;
void SetUp() override {
mock_env_->InstallTimedWaitFixCallback();
mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) {
auto* periodic_work_scheduler_ptr =
reinterpret_cast<PeriodicWorkScheduler**>(arg);
*periodic_work_scheduler_ptr =
PeriodicWorkTestScheduler::Default(mock_env_.get());
PeriodicWorkTestScheduler::Default(mock_clock_);
});
}
};
@ -63,7 +67,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_GT(kPeriodSec, 1u);
dbfull()->TEST_WaitForStatsDumpRun([&] {
mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec) - 1);
mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec) - 1);
});
auto scheduler = dbfull()->TEST_GetPeriodicWorkScheduler();
@ -75,14 +79,14 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_EQ(1, flush_info_log_counter);
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(2, dump_st_counter);
ASSERT_EQ(2, pst_st_counter);
ASSERT_EQ(2, flush_info_log_counter);
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
@ -96,7 +100,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
// Info log flush should still run.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(4, flush_info_log_counter);
@ -114,7 +118,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_EQ(2, scheduler->TEST_GetValidTaskNum());
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
[&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(4, dump_st_counter);
ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(5, flush_info_log_counter);
@ -154,19 +158,19 @@ TEST_F(PeriodicWorkSchedulerTest, MultiInstances) {
int expected_run = kInstanceNum;
dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum;
dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum;
dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
@ -178,9 +182,9 @@ TEST_F(PeriodicWorkSchedulerTest, MultiInstances) {
expected_run += (kInstanceNum - half) * 2;
dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter);
@ -202,7 +206,8 @@ TEST_F(PeriodicWorkSchedulerTest, MultiEnv) {
Reopen(options1);
std::unique_ptr<MockTimeEnv> mock_env2(new MockTimeEnv(Env::Default()));
std::unique_ptr<Env> mock_env2(
new CompositeEnvWrapper(Env::Default(), mock_clock_));
Options options2;
options2.stats_dump_period_sec = kDumpPeriodSec;
options2.stats_persist_period_sec = kPersistPeriodSec;

@ -25,6 +25,7 @@ int main() {
#include "rocksdb/memtablerep.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table.h"
#include "test_util/testharness.h"
#include "util/cast_util.h"
@ -608,7 +609,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
std::string value(FLAGS_value_size, 0);
get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
ASSERT_OK(db->Put(write_options, key, value));
hist_put_time.Add(timer.ElapsedNanos());
hist_put_comparison.Add(get_perf_context()->user_key_comparison_count);
@ -631,7 +632,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
std::string value = "v" + ToString(0);
get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
auto key_prefix = options.prefix_extractor->Transform(key);
uint64_t total_keys = 0;
for (iter->Seek(key);
@ -665,7 +666,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
Slice key = TestKeyToSlice(s, test_key);
get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
iter->Seek(key);
hist_no_seek_time.Add(timer.ElapsedNanos());
hist_no_seek_comparison.Add(get_perf_context()->user_key_comparison_count);

@ -11,8 +11,8 @@ int main() {
}
#else
#include <iostream>
#include <iomanip>
#include <iostream>
#include <memory>
#include <random>
#include <set>
@ -22,14 +22,13 @@ int main() {
#include "db/range_del_aggregator.h"
#include "db/range_tombstone_fragmenter.h"
#include "rocksdb/comparator.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "test_util/testutil.h"
#include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/random.h"
#include "util/stop_watch.h"
#include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_int32(num_range_tombstones, 1000, "number of range tombstones created");
@ -220,7 +219,7 @@ int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::kMaxSequenceNumber));
ROCKSDB_NAMESPACE::StopWatchNano stop_watch_add_tombstones(
ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */);
ROCKSDB_NAMESPACE::SystemClock::Default(), true /* auto_start */);
range_del_agg.AddTombstones(std::move(fragmented_range_del_iter));
stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos();
}
@ -237,7 +236,7 @@ int main(int argc, char** argv) {
parsed_key.user_key = key_string;
ROCKSDB_NAMESPACE::StopWatchNano stop_watch_should_delete(
ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */);
ROCKSDB_NAMESPACE::SystemClock::Default(), true /* auto_start */);
range_del_agg.ShouldDelete(parsed_key, mode);
uint64_t call_time = stop_watch_should_delete.ElapsedNanos();

@ -106,14 +106,15 @@ Status TableCache::GetTableReader(
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file;
FileOptions fopts = file_options;
Status s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
const auto& clock = ioptions_.env->GetSystemClock();
Status s = PrepareIOFromReadOptions(ro, clock, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
}
RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.IsPathNotFound()) {
fname = Rocks2LevelTableFileName(fname);
s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options);
s = PrepareIOFromReadOptions(ro, clock, fopts.io_options);
if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
nullptr);
@ -125,10 +126,10 @@ Status TableCache::GetTableReader(
if (!sequential_mode && ioptions_.advise_random_on_open) {
file->Hint(FSRandomAccessFile::kRandom);
}
StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
StopWatch sw(clock, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(
std::move(file), fname, ioptions_.env, io_tracer_,
std::move(file), fname, clock, io_tracer_,
record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter, ioptions_.listeners));
s = ioptions_.table_factory->NewTableReader(
@ -161,7 +162,8 @@ Status TableCache::FindTable(const ReadOptions& ro,
HistogramImpl* file_read_hist, bool skip_filters,
int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) {
PERF_TIMER_GUARD_WITH_ENV(find_table_nanos, ioptions_.env);
PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos,
ioptions_.env->GetSystemClock());
uint64_t number = fd.GetNumber();
Slice key = GetSliceForFileNumber(&number);
*handle = cache_->Lookup(key);

@ -1761,6 +1761,7 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
const std::shared_ptr<IOTracer>& io_tracer,
uint64_t version_number)
: env_(vset->env_),
clock_(env_->GetSystemClock()),
cfd_(column_family_data),
info_log_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->info_log),
db_statistics_((cfd_ == nullptr) ? nullptr
@ -1879,7 +1880,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
user_comparator(), merge_operator_, info_log_, db_statistics_,
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
do_merge ? value : nullptr, do_merge ? timestamp : nullptr, value_found,
merge_context, do_merge, max_covering_tombstone_seq, this->env_, seq,
merge_context, do_merge, max_covering_tombstone_seq, clock_, seq,
merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob_to_use,
tracing_get_id);
@ -1907,7 +1908,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
bool timer_enabled =
GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
get_perf_context()->per_level_perf_context_enabled;
StopWatchNano timer(env_, timer_enabled /* auto_start */);
StopWatchNano timer(clock_, timer_enabled /* auto_start */);
*status = table_cache_->Get(
read_options, *internal_comparator(), *f->file_metadata, ikey,
&get_context, mutable_cf_options_.prefix_extractor.get(),
@ -1996,7 +1997,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
std::string* str_value = value != nullptr ? value->GetSelf() : nullptr;
*status = MergeHelper::TimedFullMerge(
merge_operator_, user_key, nullptr, merge_context->GetOperands(),
str_value, info_log_, db_statistics_, env_,
str_value, info_log_, db_statistics_, clock_,
nullptr /* result_operand */, true);
if (LIKELY(value != nullptr)) {
value->PinSelf();
@ -2033,9 +2034,9 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
user_comparator(), merge_operator_, info_log_, db_statistics_,
iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge,
iter->ukey_with_ts, iter->value, iter->timestamp, nullptr,
&(iter->merge_context), true, &iter->max_covering_tombstone_seq,
this->env_, nullptr, merge_operator_ ? &pinned_iters_mgr : nullptr,
callback, &iter->is_blob_index, tracing_mget_id);
&(iter->merge_context), true, &iter->max_covering_tombstone_seq, clock_,
nullptr, merge_operator_ ? &pinned_iters_mgr : nullptr, callback,
&iter->is_blob_index, tracing_mget_id);
// MergeInProgress status, if set, has been transferred to the get_context
// state, so we set status to ok here. From now on, the iter status will
// be used for IO errors, and get_context state will be used for any
@ -2065,7 +2066,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
bool timer_enabled =
GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
get_perf_context()->per_level_perf_context_enabled;
StopWatchNano timer(env_, timer_enabled /* auto_start */);
StopWatchNano timer(clock_, timer_enabled /* auto_start */);
s = table_cache_->MultiGet(
read_options, *internal_comparator(), *f->file_metadata, &file_range,
mutable_cf_options_.prefix_extractor.get(),
@ -2228,7 +2229,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
iter->value != nullptr ? iter->value->GetSelf() : nullptr;
*status = MergeHelper::TimedFullMerge(
merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(),
str_value, info_log_, db_statistics_, env_,
str_value, info_log_, db_statistics_, clock_,
nullptr /* result_operand */, true);
if (LIKELY(iter->value != nullptr)) {
iter->value->PinSelf();
@ -3782,6 +3783,7 @@ VersionSet::VersionSet(const std::string& dbname,
table_cache_(table_cache),
env_(_db_options->env),
fs_(_db_options->fs, io_tracer),
clock_(env_->GetSystemClock()),
dbname_(dbname),
db_options_(_db_options),
next_file_number_(2),
@ -4119,7 +4121,7 @@ Status VersionSet::ProcessManifestWrites(
db_options_->manifest_preallocation_size);
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(descriptor_file), descriptor_fname, opt_file_opts, env_,
std::move(descriptor_file), descriptor_fname, opt_file_opts, clock_,
io_tracer_, nullptr, db_options_->listeners));
descriptor_log_.reset(
new log::Writer(std::move(file_writer), 0, false));
@ -4167,7 +4169,7 @@ Status VersionSet::ProcessManifestWrites(
}
}
if (s.ok()) {
io_s = SyncManifest(env_, db_options_, descriptor_log_->file());
io_s = SyncManifest(clock_, db_options_, descriptor_log_->file());
TEST_SYNC_POINT_CALLBACK(
"VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s);
}
@ -6302,7 +6304,7 @@ Status ReactiveVersionSet::MaybeSwitchManifest(
"ReactiveVersionSet::MaybeSwitchManifest:"
"AfterGetCurrentManifestPath:1");
s = fs_->NewSequentialFile(manifest_path,
env_->OptimizeForManifestRead(file_options_),
fs_->OptimizeForManifestRead(file_options_),
&manifest_file, nullptr);
} else {
// No need to switch manifest.

@ -71,6 +71,7 @@ class WriteBufferManager;
class MergeContext;
class ColumnFamilySet;
class MergeIteratorBuilder;
class SystemClock;
// VersionEdit is always supposed to be valid and it is used to point at
// entries in Manifest. Ideally it should not be used as a container to
@ -779,6 +780,8 @@ class Version {
private:
Env* env_;
std::shared_ptr<SystemClock> clock_;
friend class ReactiveVersionSet;
friend class VersionSet;
friend class VersionEditHandler;
@ -1346,6 +1349,7 @@ class VersionSet {
Cache* table_cache_;
Env* const env_;
FileSystemPtr const fs_;
const std::shared_ptr<SystemClock> clock_;
const std::string dbname_;
std::string db_id_;
const ImmutableDBOptions* const db_options_;

@ -2786,8 +2786,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
std::unique_ptr<FSWritableFile> file;
Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr);
ASSERT_OK(s);
std::unique_ptr<WritableFileWriter> fwriter(
new WritableFileWriter(std::move(file), fname, FileOptions(), env_));
std::unique_ptr<WritableFileWriter> fwriter(new WritableFileWriter(
std::move(file), fname, FileOptions(), env_->GetSystemClock()));
std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
int_tbl_prop_collector_factories;

@ -55,6 +55,7 @@
#include "monitoring/statistics.h"
#include "port/lang.h"
#include "rocksdb/merge_operator.h"
#include "rocksdb/system_clock.h"
#include "util/autovector.h"
#include "util/cast_util.h"
#include "util/coding.h"
@ -1804,7 +1805,7 @@ class MemTableInserter : public WriteBatch::Handler {
Status merge_status = MergeHelper::TimedFullMerge(
merge_operator, key, &get_value_slice, {value}, &new_value,
moptions->info_log, moptions->statistics, Env::Default());
moptions->info_log, moptions->statistics, SystemClock::Default());
if (!merge_status.ok()) {
// Failed to merge!

@ -8,7 +8,8 @@
#include <atomic>
#include <cassert>
#include <ratio>
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE {
@ -42,7 +43,8 @@ bool WriteController::IsStopped() const {
// If it turns out to be a performance issue, we can redesign the thread
// synchronization model here.
// The function trust caller will sleep micros returned.
uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) {
uint64_t WriteController::GetDelay(const std::shared_ptr<SystemClock>& clock,
uint64_t num_bytes) {
if (total_stopped_.load(std::memory_order_relaxed) > 0) {
return 0;
}
@ -59,7 +61,7 @@ uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) {
}
// The frequency to get time inside DB mutex is less than one per refill
// interval.
auto time_now = NowMicrosMonotonic(env);
auto time_now = NowMicrosMonotonic(clock);
uint64_t sleep_debt = 0;
uint64_t time_since_last_refill = 0;
@ -106,8 +108,9 @@ uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) {
return sleep_amount;
}
uint64_t WriteController::NowMicrosMonotonic(Env* env) {
return env->NowNanos() / std::milli::den;
uint64_t WriteController::NowMicrosMonotonic(
const std::shared_ptr<SystemClock>& clock) {
return clock->NowNanos() / std::milli::den;
}
StopWriteToken::~StopWriteToken() {

@ -13,7 +13,7 @@
namespace ROCKSDB_NAMESPACE {
class Env;
class SystemClock;
class WriteControllerToken;
// WriteController is controlling write stalls in our write code-path. Write
@ -57,7 +57,8 @@ class WriteController {
// return how many microseconds the caller needs to sleep after the call
// num_bytes: how many number of bytes to put into the DB.
// Prerequisite: DB mutex held.
uint64_t GetDelay(Env* env, uint64_t num_bytes);
uint64_t GetDelay(const std::shared_ptr<SystemClock>& clock,
uint64_t num_bytes);
void set_delayed_write_rate(uint64_t write_rate) {
// avoid divide 0
if (write_rate == 0) {
@ -85,7 +86,7 @@ class WriteController {
RateLimiter* low_pri_rate_limiter() { return low_pri_rate_limiter_.get(); }
private:
uint64_t NowMicrosMonotonic(Env* env);
uint64_t NowMicrosMonotonic(const std::shared_ptr<SystemClock>& clock);
friend class WriteControllerToken;
friend class StopWriteToken;

@ -3,46 +3,50 @@
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include <ratio>
#include "db/write_controller.h"
#include "rocksdb/env.h"
#include <ratio>
#include "rocksdb/system_clock.h"
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
class WriteControllerTest : public testing::Test {};
class TimeSetEnv : public EnvWrapper {
namespace {
class TimeSetClock : public SystemClockWrapper {
public:
explicit TimeSetEnv() : EnvWrapper(nullptr) {}
explicit TimeSetClock() : SystemClockWrapper(nullptr) {}
const char* Name() const override { return "TimeSetClock"; }
uint64_t now_micros_ = 6666;
uint64_t NowNanos() override { return now_micros_ * std::milli::den; }
};
} // namespace
class WriteControllerTest : public testing::Test {
public:
WriteControllerTest() { clock_ = std::make_shared<TimeSetClock>(); }
std::shared_ptr<TimeSetClock> clock_;
};
TEST_F(WriteControllerTest, ChangeDelayRateTest) {
TimeSetEnv env;
WriteController controller(40000000u); // also set max delayed rate
controller.set_delayed_write_rate(10000000u);
auto delay_token_0 =
controller.GetDelayToken(controller.delayed_write_rate());
ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
auto delay_token_1 = controller.GetDelayToken(2000000u);
ASSERT_EQ(static_cast<uint64_t>(10000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
auto delay_token_2 = controller.GetDelayToken(1000000u);
ASSERT_EQ(static_cast<uint64_t>(20000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
auto delay_token_3 = controller.GetDelayToken(20000000u);
ASSERT_EQ(static_cast<uint64_t>(1000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
// This is more than max rate. Max delayed rate will be used.
auto delay_token_4 =
controller.GetDelayToken(controller.delayed_write_rate() * 3);
ASSERT_EQ(static_cast<uint64_t>(500000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
}
TEST_F(WriteControllerTest, SanityTest) {
@ -56,73 +60,71 @@ TEST_F(WriteControllerTest, SanityTest) {
stop_token_2.reset();
ASSERT_FALSE(controller.IsStopped());
TimeSetEnv env;
auto delay_token_1 = controller.GetDelayToken(10000000u);
ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
env.now_micros_ += 1999900u; // sleep debt 1000
clock_->now_micros_ += 1999900u; // sleep debt 1000
auto delay_token_2 = controller.GetDelayToken(10000000u);
// Rate reset after changing the token.
ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u));
controller.GetDelay(clock_, 20000000u));
env.now_micros_ += 1999900u; // sleep debt 1000
clock_->now_micros_ += 1999900u; // sleep debt 1000
// One refill: 10240 bytes allowed, 1000 used, 9240 left
ASSERT_EQ(static_cast<uint64_t>(1124), controller.GetDelay(&env, 1000u));
env.now_micros_ += 1124u; // sleep debt 0
ASSERT_EQ(static_cast<uint64_t>(1124), controller.GetDelay(clock_, 1000u));
clock_->now_micros_ += 1124u; // sleep debt 0
delay_token_2.reset();
// 1000 used, 8240 left
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u));
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 100u; // sleep credit 100
clock_->now_micros_ += 100u; // sleep credit 100
// 1000 used, 7240 left
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u));
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 100u; // sleep credit 200
clock_->now_micros_ += 100u; // sleep credit 200
// One refill: 10240 fileed, sleep credit generates 2000. 8000 used
// 7240 + 10240 + 2000 - 8000 = 11480 left
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 8000u));
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 8000u));
env.now_micros_ += 200u; // sleep debt 824
clock_->now_micros_ += 200u; // sleep debt 824
// 1000 used, 10480 left.
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u));
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 200u; // sleep debt 624
clock_->now_micros_ += 200u; // sleep debt 624
// Out of bound sleep, still 10480 left
ASSERT_EQ(static_cast<uint64_t>(3000624u),
controller.GetDelay(&env, 30000000u));
controller.GetDelay(clock_, 30000000u));
env.now_micros_ += 3000724u; // sleep credit 100
clock_->now_micros_ += 3000724u; // sleep credit 100
// 6000 used, 4480 left.
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 6000u));
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 6000u));
env.now_micros_ += 200u; // sleep credit 300
clock_->now_micros_ += 200u; // sleep credit 300
// One refill, credit 4480 balance + 3000 credit + 10240 refill
// Use 8000, 9720 left
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 8000u));
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 8000u));
env.now_micros_ += 3024u; // sleep credit 2000
clock_->now_micros_ += 3024u; // sleep credit 2000
// 1720 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u));
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// 1720 balance + 20000 credit = 20170 left
// Use 8000, 12170 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u));
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// 4170 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u));
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// Need a refill
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 9000u));
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 9000u));
delay_token_1.reset();
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 30000000u));
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 30000000u));
delay_token_1.reset();
ASSERT_FALSE(controller.IsStopped());
}

369
env/composite_env.cc vendored

@ -0,0 +1,369 @@
// Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "env/composite_env_wrapper.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// The CompositeEnvWrapper class provides an interface that is compatible
// with the old monolithic Env API, and an implementation that wraps around
// the new Env that provides threading and other OS related functionality, and
// the new FileSystem API that provides storage functionality. By
// providing the old Env interface, it allows the rest of RocksDB code to
// be agnostic of whether the underlying Env implementation is a monolithic
// Env or an Env + FileSystem. In the former case, the user will specify
// Options::env only, whereas in the latter case, the user will specify
// Options::env and Options::file_system.
class CompositeSequentialFileWrapper : public SequentialFile {
public:
explicit CompositeSequentialFileWrapper(
std::unique_ptr<FSSequentialFile>& target)
: target_(std::move(target)) {}
Status Read(size_t n, Slice* result, char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(n, io_opts, result, scratch, &dbg);
}
Status Skip(uint64_t n) override { return target_->Skip(n); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status PositionedRead(uint64_t offset, size_t n, Slice* result,
char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg);
}
private:
std::unique_ptr<FSSequentialFile> target_;
};
class CompositeRandomAccessFileWrapper : public RandomAccessFile {
public:
explicit CompositeRandomAccessFileWrapper(
std::unique_ptr<FSRandomAccessFile>& target)
: target_(std::move(target)) {}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
IOOptions io_opts;
IODebugContext dbg;
std::vector<FSReadRequest> fs_reqs;
Status status;
fs_reqs.resize(num_reqs);
for (size_t i = 0; i < num_reqs; ++i) {
fs_reqs[i].offset = reqs[i].offset;
fs_reqs[i].len = reqs[i].len;
fs_reqs[i].scratch = reqs[i].scratch;
fs_reqs[i].status = IOStatus::OK();
}
status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg);
for (size_t i = 0; i < num_reqs; ++i) {
reqs[i].result = fs_reqs[i].result;
reqs[i].status = fs_reqs[i].status;
}
return status;
}
Status Prefetch(uint64_t offset, size_t n) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Prefetch(offset, n, io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
void Hint(AccessPattern pattern) override {
target_->Hint((FSRandomAccessFile::AccessPattern)pattern);
}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
private:
std::unique_ptr<FSRandomAccessFile> target_;
};
class CompositeWritableFileWrapper : public WritableFile {
public:
explicit CompositeWritableFileWrapper(std::unique_ptr<FSWritableFile>& t)
: target_(std::move(t)) {}
Status Append(const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Append(data, io_opts, &dbg);
}
Status PositionedAppend(const Slice& data, uint64_t offset) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedAppend(data, offset, io_opts, &dbg);
}
Status Truncate(uint64_t size) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Truncate(size, io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
target_->SetWriteLifeTimeHint(hint);
}
Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
return target_->GetWriteLifeTimeHint();
}
uint64_t GetFileSize() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->GetFileSize(io_opts, &dbg);
}
void SetPreallocationBlockSize(size_t size) override {
target_->SetPreallocationBlockSize(size);
}
void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override {
target_->GetPreallocationStatus(block_size, last_allocated_block);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->RangeSync(offset, nbytes, io_opts, &dbg);
}
void PrepareWrite(size_t offset, size_t len) override {
IOOptions io_opts;
IODebugContext dbg;
target_->PrepareWrite(offset, len, io_opts, &dbg);
}
Status Allocate(uint64_t offset, uint64_t len) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Allocate(offset, len, io_opts, &dbg);
}
std::unique_ptr<FSWritableFile>* target() { return &target_; }
private:
std::unique_ptr<FSWritableFile> target_;
};
class CompositeRandomRWFileWrapper : public RandomRWFile {
public:
explicit CompositeRandomRWFileWrapper(std::unique_ptr<FSRandomRWFile>& target)
: target_(std::move(target)) {}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status Write(uint64_t offset, const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Write(offset, data, io_opts, &dbg);
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
private:
std::unique_ptr<FSRandomRWFile> target_;
};
class CompositeDirectoryWrapper : public Directory {
public:
explicit CompositeDirectoryWrapper(std::unique_ptr<FSDirectory>& target)
: target_(std::move(target)) {}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
private:
std::unique_ptr<FSDirectory> target_;
};
} // namespace
Status CompositeEnv::NewSequentialFile(const std::string& f,
std::unique_ptr<SequentialFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSSequentialFile> file;
Status status;
status =
file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeSequentialFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewRandomAccessFile(const std::string& f,
std::unique_ptr<RandomAccessFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSRandomAccessFile> file;
Status status;
status =
file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeRandomAccessFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewWritableFile(const std::string& f,
std::unique_ptr<WritableFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSWritableFile> file;
Status status;
status = file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::ReopenWritableFile(const std::string& fname,
std::unique_ptr<WritableFile>* result,
const EnvOptions& options) {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReopenWritableFile(fname, FileOptions(options), &file,
&dbg);
if (status.ok()) {
result->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
std::unique_ptr<WritableFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReuseWritableFile(fname, old_fname,
FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewRandomRWFile(const std::string& fname,
std::unique_ptr<RandomRWFile>* result,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSRandomRWFile> file;
Status status;
status =
file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg);
if (status.ok()) {
result->reset(new CompositeRandomRWFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) {
IOOptions io_opts;
IODebugContext dbg;
std::unique_ptr<FSDirectory> dir;
Status status;
status = file_system_->NewDirectory(name, io_opts, &dir, &dbg);
if (status.ok()) {
result->reset(new CompositeDirectoryWrapper(dir));
}
return status;
}
} // namespace ROCKSDB_NAMESPACE

@ -7,6 +7,7 @@
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#ifdef _WIN32
// Windows API macro interference
@ -17,272 +18,14 @@
namespace ROCKSDB_NAMESPACE {
// The CompositeEnvWrapper class provides an interface that is compatible
// with the old monolithic Env API, and an implementation that wraps around
// the new Env that provides threading and other OS related functionality, and
// the new FileSystem API that provides storage functionality. By
// providing the old Env interface, it allows the rest of RocksDB code to
// be agnostic of whether the underlying Env implementation is a monolithic
// Env or an Env + FileSystem. In the former case, the user will specify
// Options::env only, whereas in the latter case, the user will specify
// Options::env and Options::file_system.
class CompositeSequentialFileWrapper : public SequentialFile {
public:
explicit CompositeSequentialFileWrapper(
std::unique_ptr<FSSequentialFile>& target)
: target_(std::move(target)) {}
Status Read(size_t n, Slice* result, char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(n, io_opts, result, scratch, &dbg);
}
Status Skip(uint64_t n) override { return target_->Skip(n); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status PositionedRead(uint64_t offset, size_t n, Slice* result,
char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg);
}
private:
std::unique_ptr<FSSequentialFile> target_;
};
class CompositeRandomAccessFileWrapper : public RandomAccessFile {
public:
explicit CompositeRandomAccessFileWrapper(
std::unique_ptr<FSRandomAccessFile>& target)
: target_(std::move(target)) {}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
IOOptions io_opts;
IODebugContext dbg;
std::vector<FSReadRequest> fs_reqs;
Status status;
fs_reqs.resize(num_reqs);
for (size_t i = 0; i < num_reqs; ++i) {
fs_reqs[i].offset = reqs[i].offset;
fs_reqs[i].len = reqs[i].len;
fs_reqs[i].scratch = reqs[i].scratch;
fs_reqs[i].status = IOStatus::OK();
}
status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg);
for (size_t i = 0; i < num_reqs; ++i) {
reqs[i].result = fs_reqs[i].result;
reqs[i].status = fs_reqs[i].status;
}
return status;
}
Status Prefetch(uint64_t offset, size_t n) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Prefetch(offset, n, io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
};
void Hint(AccessPattern pattern) override {
target_->Hint((FSRandomAccessFile::AccessPattern)pattern);
}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
private:
std::unique_ptr<FSRandomAccessFile> target_;
};
class CompositeWritableFileWrapper : public WritableFile {
public:
explicit CompositeWritableFileWrapper(std::unique_ptr<FSWritableFile>& t)
: target_(std::move(t)) {}
Status Append(const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Append(data, io_opts, &dbg);
}
Status PositionedAppend(const Slice& data, uint64_t offset) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedAppend(data, offset, io_opts, &dbg);
}
Status Truncate(uint64_t size) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Truncate(size, io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
target_->SetWriteLifeTimeHint(hint);
}
Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
return target_->GetWriteLifeTimeHint();
}
uint64_t GetFileSize() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->GetFileSize(io_opts, &dbg);
}
void SetPreallocationBlockSize(size_t size) override {
target_->SetPreallocationBlockSize(size);
}
void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override {
target_->GetPreallocationStatus(block_size, last_allocated_block);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->RangeSync(offset, nbytes, io_opts, &dbg);
}
void PrepareWrite(size_t offset, size_t len) override {
IOOptions io_opts;
IODebugContext dbg;
target_->PrepareWrite(offset, len, io_opts, &dbg);
}
Status Allocate(uint64_t offset, uint64_t len) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Allocate(offset, len, io_opts, &dbg);
}
std::unique_ptr<FSWritableFile>* target() { return &target_; }
private:
std::unique_ptr<FSWritableFile> target_;
};
class CompositeRandomRWFileWrapper : public RandomRWFile {
public:
explicit CompositeRandomRWFileWrapper(std::unique_ptr<FSRandomRWFile>& target)
: target_(std::move(target)) {}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status Write(uint64_t offset, const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Write(offset, data, io_opts, &dbg);
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
private:
std::unique_ptr<FSRandomRWFile> target_;
};
class CompositeDirectoryWrapper : public Directory {
public:
explicit CompositeDirectoryWrapper(std::unique_ptr<FSDirectory>& target)
: target_(std::move(target)) {}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
private:
std::unique_ptr<FSDirectory> target_;
};
class CompositeEnv : public Env {
public:
// Initialize a CompositeEnvWrapper that delegates all thread/time related
// calls to env, and all file operations to fs
explicit CompositeEnv(const std::shared_ptr<FileSystem>& fs) : Env(fs) {}
explicit CompositeEnv(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock)
: Env(fs, clock) {}
Status RegisterDbPaths(const std::vector<std::string>& paths) override {
return file_system_->RegisterDbPaths(paths);
@ -294,99 +37,37 @@ class CompositeEnv : public Env {
// The following text is boilerplate that forwards all methods to target()
Status NewSequentialFile(const std::string& f,
std::unique_ptr<SequentialFile>* r,
const EnvOptions& options) override {
IODebugContext dbg;
std::unique_ptr<FSSequentialFile> file;
Status status;
status =
file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeSequentialFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status NewRandomAccessFile(const std::string& f,
std::unique_ptr<RandomAccessFile>* r,
const EnvOptions& options) override {
IODebugContext dbg;
std::unique_ptr<FSRandomAccessFile> file;
Status status;
status =
file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeRandomAccessFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
const EnvOptions& options) override {
IODebugContext dbg;
std::unique_ptr<FSWritableFile> file;
Status status;
status =
file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status ReopenWritableFile(const std::string& fname,
std::unique_ptr<WritableFile>* result,
const EnvOptions& options) override {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReopenWritableFile(fname, FileOptions(options),
&file, &dbg);
if (status.ok()) {
result->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
std::unique_ptr<WritableFile>* r,
const EnvOptions& options) override {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReuseWritableFile(fname, old_fname,
FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status NewRandomRWFile(const std::string& fname,
std::unique_ptr<RandomRWFile>* result,
const EnvOptions& options) override {
IODebugContext dbg;
std::unique_ptr<FSRandomRWFile> file;
Status status;
status =
file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg);
if (status.ok()) {
result->reset(new CompositeRandomRWFileWrapper(file));
}
return status;
}
const EnvOptions& options) override;
Status NewMemoryMappedFileBuffer(
const std::string& fname,
std::unique_ptr<MemoryMappedFileBuffer>* result) override {
return file_system_->NewMemoryMappedFileBuffer(fname, result);
}
Status NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) override {
IOOptions io_opts;
IODebugContext dbg;
std::unique_ptr<FSDirectory> dir;
Status status;
status = file_system_->NewDirectory(name, io_opts, &dir, &dbg);
if (status.ok()) {
result->reset(new CompositeDirectoryWrapper(dir));
}
return status;
}
std::unique_ptr<Directory>* result) override;
Status FileExists(const std::string& f) override {
IOOptions io_opts;
IODebugContext dbg;
@ -548,6 +229,21 @@ class CompositeEnv : public Env {
IODebugContext dbg;
return file_system_->GetFreeSpace(path, io_opts, diskfree, &dbg);
}
uint64_t NowMicros() override { return system_clock_->NowMicros(); }
uint64_t NowNanos() override { return system_clock_->NowNanos(); }
uint64_t NowCPUNanos() override { return system_clock_->CPUNanos(); }
void SleepForMicroseconds(int micros) override {
system_clock_->SleepForMicroseconds(micros);
}
Status GetCurrentTime(int64_t* unix_time) override {
return system_clock_->GetCurrentTime(unix_time);
}
std::string TimeToString(uint64_t time) override {
return system_clock_->TimeToString(time);
}
};
class CompositeEnvWrapper : public CompositeEnv {
@ -555,7 +251,14 @@ class CompositeEnvWrapper : public CompositeEnv {
// Initialize a CompositeEnvWrapper that delegates all thread/time related
// calls to env, and all file operations to fs
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs)
: CompositeEnv(fs), env_target_(env) {}
: CompositeEnvWrapper(env, fs, env->GetSystemClock()) {}
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<SystemClock>& sc)
: CompositeEnvWrapper(env, env->GetFileSystem(), sc) {}
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& sc)
: CompositeEnv(fs, sc), env_target_(env) {}
// Return the target to which this Env forwards all calls
Env* env_target() const { return env_target_; }
@ -585,19 +288,9 @@ class CompositeEnvWrapper : public CompositeEnv {
return env_target_->GetThreadPoolQueueLen(pri);
}
uint64_t NowMicros() override { return env_target_->NowMicros(); }
uint64_t NowNanos() override { return env_target_->NowNanos(); }
uint64_t NowCPUNanos() override { return env_target_->NowCPUNanos(); }
void SleepForMicroseconds(int micros) override {
env_target_->SleepForMicroseconds(micros);
}
Status GetHostName(char* name, uint64_t len) override {
return env_target_->GetHostName(name, len);
}
Status GetCurrentTime(int64_t* unix_time) override {
return env_target_->GetCurrentTime(unix_time);
}
void SetBackgroundThreads(int num, Priority pri) override {
return env_target_->SetBackgroundThreads(num, pri);
}
@ -625,10 +318,6 @@ class CompositeEnvWrapper : public CompositeEnv {
return env_target_->LowerThreadPoolCPUPriority(pool, pri);
}
std::string TimeToString(uint64_t time) override {
return env_target_->TimeToString(time);
}
Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
return env_target_->GetThreadList(thread_list);
}

57
env/env.cc vendored

@ -10,17 +10,58 @@
#include "rocksdb/env.h"
#include <thread>
#include "env/composite_env_wrapper.h"
#include "logging/env_logger.h"
#include "memory/arena.h"
#include "options/db_options.h"
#include "port/port.h"
#include "rocksdb/options.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/utilities/object_registry.h"
#include "util/autovector.h"
namespace ROCKSDB_NAMESPACE {
namespace {
class LegacySystemClock : public SystemClock {
private:
Env* env_;
public:
explicit LegacySystemClock(Env* env) : env_(env) {}
const char* Name() const override { return "Legacy System Clock"; }
// Returns the number of micro-seconds since some fixed point in time.
// It is often used as system time such as in GenericRateLimiter
// and other places so a port needs to return system time in order to work.
uint64_t NowMicros() override { return env_->NowMicros(); }
// Returns the number of nano-seconds since some fixed point in time. Only
// useful for computing deltas of time in one run.
// Default implementation simply relies on NowMicros.
// In platform-specific implementations, NowNanos() should return time points
// that are MONOTONIC.
uint64_t NowNanos() override { return env_->NowNanos(); }
uint64_t CPUMicros() override { return CPUNanos() / 1000; }
uint64_t CPUNanos() override { return env_->NowCPUNanos(); }
// Sleep/delay the thread for the prescribed number of micro-seconds.
void SleepForMicroseconds(int micros) override {
env_->SleepForMicroseconds(micros);
}
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
Status GetCurrentTime(int64_t* unix_time) override {
return env_->GetCurrentTime(unix_time);
}
// Converts seconds-since-Jan-01-1970 to a printable string
std::string TimeToString(uint64_t time) override {
return env_->TimeToString(time);
}
};
class LegacyFileSystemWrapper : public FileSystem {
public:
// Initialize an EnvWrapper that delegates all calls to *t
@ -265,11 +306,17 @@ class LegacyFileSystemWrapper : public FileSystem {
Env::Env() : thread_status_updater_(nullptr) {
file_system_ = std::make_shared<LegacyFileSystemWrapper>(this);
system_clock_ = std::make_shared<LegacySystemClock>(this);
}
Env::Env(std::shared_ptr<FileSystem> fs)
: thread_status_updater_(nullptr),
file_system_(fs) {}
Env::Env(const std::shared_ptr<FileSystem>& fs)
: thread_status_updater_(nullptr), file_system_(fs) {
system_clock_ = std::make_shared<LegacySystemClock>(this);
}
Env::Env(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock)
: thread_status_updater_(nullptr), file_system_(fs), system_clock_(clock) {}
Env::~Env() {
}
@ -730,4 +777,8 @@ Status NewEnvLogger(const std::string& fname, Env* env,
const std::shared_ptr<FileSystem>& Env::GetFileSystem() const {
return file_system_;
}
const std::shared_ptr<SystemClock>& Env::GetSystemClock() const {
return system_clock_;
}
} // namespace ROCKSDB_NAMESPACE

158
env/env_posix.cc vendored

@ -56,8 +56,10 @@
#include "monitoring/iostats_context_imp.h"
#include "monitoring/thread_status_updater.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "rocksdb/slice.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/coding.h"
#include "util/compression_context_cache.h"
@ -121,6 +123,82 @@ class PosixDynamicLibrary : public DynamicLibrary {
void* handle_;
};
#endif // !ROCKSDB_NO_DYNAMIC_EXTENSION
class PosixClock : public SystemClock {
public:
const char* Name() const override { return "PosixClock"; }
uint64_t NowMicros() override {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
uint64_t NowNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(OS_SOLARIS)
return gethrtime();
#elif defined(__MACH__)
clock_serv_t cclock;
mach_timespec_t ts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &ts);
mach_port_deallocate(mach_task_self(), cclock);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
#endif
}
uint64_t CPUMicros() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000;
#endif
return 0;
}
uint64_t CPUNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
return 0;
}
void SleepForMicroseconds(int micros) override { usleep(micros); }
Status GetCurrentTime(int64_t* unix_time) override {
time_t ret = time(nullptr);
if (ret == (time_t)-1) {
return IOError("GetCurrentTime", "", errno);
}
*unix_time = (int64_t)ret;
return Status::OK();
}
std::string TimeToString(uint64_t secondsSince1970) override {
const time_t seconds = (time_t)secondsSince1970;
struct tm t;
int maxsize = 64;
std::string dummy;
dummy.reserve(maxsize);
dummy.resize(maxsize);
char* p = &dummy[0];
localtime_r(&seconds, &t);
snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
return dummy;
}
};
class PosixEnv : public CompositeEnv {
public:
@ -232,45 +310,6 @@ class PosixEnv : public CompositeEnv {
uint64_t GetThreadID() const override { return gettid(pthread_self()); }
uint64_t NowMicros() override {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
uint64_t NowNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(OS_SOLARIS)
return gethrtime();
#elif defined(__MACH__)
clock_serv_t cclock;
mach_timespec_t ts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &ts);
mach_port_deallocate(mach_task_self(), cclock);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch()).count();
#endif
}
uint64_t NowCPUNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
return 0;
}
void SleepForMicroseconds(int micros) override { usleep(micros); }
Status GetHostName(char* name, uint64_t len) override {
int ret = gethostname(name, static_cast<size_t>(len));
if (ret < 0) {
@ -283,15 +322,6 @@ class PosixEnv : public CompositeEnv {
return Status::OK();
}
Status GetCurrentTime(int64_t* unix_time) override {
time_t ret = time(nullptr);
if (ret == (time_t) -1) {
return IOError("GetCurrentTime", "", errno);
}
*unix_time = (int64_t) ret;
return Status::OK();
}
ThreadStatusUpdater* GetThreadStatusUpdater() const override {
return Env::GetThreadStatusUpdater();
}
@ -340,26 +370,6 @@ class PosixEnv : public CompositeEnv {
return Status::OK();
}
std::string TimeToString(uint64_t secondsSince1970) override {
const time_t seconds = (time_t)secondsSince1970;
struct tm t;
int maxsize = 64;
std::string dummy;
dummy.reserve(maxsize);
dummy.resize(maxsize);
char* p = &dummy[0];
localtime_r(&seconds, &t);
snprintf(p, maxsize,
"%04d/%02d/%02d-%02d:%02d:%02d ",
t.tm_year + 1900,
t.tm_mon + 1,
t.tm_mday,
t.tm_hour,
t.tm_min,
t.tm_sec);
return dummy;
}
private:
friend Env* Env::Default();
// Constructs the default Env, a singleton
@ -382,7 +392,7 @@ class PosixEnv : public CompositeEnv {
};
PosixEnv::PosixEnv()
: CompositeEnv(FileSystem::Default()),
: CompositeEnv(FileSystem::Default(), SystemClock::Default()),
thread_pools_storage_(Priority::TOTAL),
allow_non_owner_access_storage_(true),
thread_pools_(thread_pools_storage_),
@ -401,7 +411,7 @@ PosixEnv::PosixEnv()
PosixEnv::PosixEnv(const PosixEnv* default_env,
const std::shared_ptr<FileSystem>& fs)
: CompositeEnv(fs),
: CompositeEnv(fs, default_env->GetSystemClock()),
thread_pools_(default_env->thread_pools_),
mu_(default_env->mu_),
threads_to_join_(default_env->threads_to_join_),
@ -509,6 +519,14 @@ std::unique_ptr<Env> NewCompositeEnv(const std::shared_ptr<FileSystem>& fs) {
return std::unique_ptr<Env>(new PosixEnv(default_env, fs));
}
//
// Default Posix SystemClock
//
const std::shared_ptr<SystemClock>& SystemClock::Default() {
static std::shared_ptr<SystemClock> default_clock =
std::make_shared<PosixClock>();
return default_clock;
}
} // namespace ROCKSDB_NAMESPACE
#endif

4
env/env_test.cc vendored

@ -35,6 +35,7 @@
#include "port/malloc.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
@ -2213,7 +2214,8 @@ TEST_F(EnvTest, IsDirectory) {
ASSERT_OK(s);
std::unique_ptr<WritableFileWriter> fwriter;
fwriter.reset(new WritableFileWriter(std::move(wfile), test_file_path,
FileOptions(), Env::Default()));
FileOptions(),
SystemClock::Default()));
constexpr char buf[] = "test";
s = fwriter->Append(buf);
ASSERT_OK(s);

@ -5,18 +5,19 @@
#include "env/file_system_tracer.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE {
IOStatus FileSystemTracingWrapper::NewSequentialFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->NewSequentialFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -26,11 +27,11 @@ IOStatus FileSystemTracingWrapper::NewSequentialFile(
IOStatus FileSystemTracingWrapper::NewRandomAccessFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -40,11 +41,11 @@ IOStatus FileSystemTracingWrapper::NewRandomAccessFile(
IOStatus FileSystemTracingWrapper::NewWritableFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->NewWritableFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -54,11 +55,11 @@ IOStatus FileSystemTracingWrapper::NewWritableFile(
IOStatus FileSystemTracingWrapper::ReopenWritableFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->ReopenWritableFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -69,12 +70,12 @@ IOStatus FileSystemTracingWrapper::ReuseWritableFile(
const std::string& fname, const std::string& old_fname,
const FileOptions& file_opts, std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s =
target()->ReuseWritableFile(fname, old_fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -84,11 +85,11 @@ IOStatus FileSystemTracingWrapper::ReuseWritableFile(
IOStatus FileSystemTracingWrapper::NewRandomRWFile(
const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->NewRandomRWFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -98,11 +99,11 @@ IOStatus FileSystemTracingWrapper::NewRandomRWFile(
IOStatus FileSystemTracingWrapper::NewDirectory(
const std::string& name, const IOOptions& io_opts,
std::unique_ptr<FSDirectory>* result, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->NewDirectory(name, io_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
name.substr(name.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -113,11 +114,11 @@ IOStatus FileSystemTracingWrapper::GetChildren(const std::string& dir,
const IOOptions& io_opts,
std::vector<std::string>* r,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->GetChildren(dir, io_opts, r, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dir.substr(dir.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -127,11 +128,11 @@ IOStatus FileSystemTracingWrapper::GetChildren(const std::string& dir,
IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->DeleteFile(fname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -141,11 +142,11 @@ IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname,
IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->CreateDir(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -154,11 +155,11 @@ IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname,
IOStatus FileSystemTracingWrapper::CreateDirIfMissing(
const std::string& dirname, const IOOptions& options, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->CreateDirIfMissing(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -168,11 +169,11 @@ IOStatus FileSystemTracingWrapper::CreateDirIfMissing(
IOStatus FileSystemTracingWrapper::DeleteDir(const std::string& dirname,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->DeleteDir(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record);
@ -183,14 +184,14 @@ IOStatus FileSystemTracingWrapper::GetFileSize(const std::string& fname,
const IOOptions& options,
uint64_t* file_size,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->GetFileSize(fname, options, file_size, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record(
env_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, elapsed,
clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, elapsed,
s.ToString(), fname.substr(fname.find_last_of("/\\") + 1), *file_size);
io_tracer_->WriteIOOp(io_record);
return s;
@ -200,13 +201,13 @@ IOStatus FileSystemTracingWrapper::Truncate(const std::string& fname,
size_t size,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Truncate(fname, size, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1), size);
io_tracer_->WriteIOOp(io_record);
@ -217,13 +218,13 @@ IOStatus FSSequentialFileTracingWrapper::Read(size_t n,
const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Read(n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_,
result->size(), 0 /*Offset*/);
io_tracer_->WriteIOOp(io_record);
@ -232,14 +233,14 @@ IOStatus FSSequentialFileTracingWrapper::Read(size_t n,
IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length,
offset);
io_tracer_->WriteIOOp(io_record);
@ -249,7 +250,7 @@ IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSSequentialFileTracingWrapper::PositionedRead(
uint64_t offset, size_t n, const IOOptions& options, Slice* result,
char* scratch, IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s =
target()->PositionedRead(offset, n, options, result, scratch, dbg);
@ -257,7 +258,7 @@ IOStatus FSSequentialFileTracingWrapper::PositionedRead(
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_,
result->size(), offset);
io_tracer_->WriteIOOp(io_record);
@ -268,14 +269,14 @@ IOStatus FSRandomAccessFileTracingWrapper::Read(uint64_t offset, size_t n,
const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) const {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n,
offset);
io_tracer_->WriteIOOp(io_record);
@ -286,7 +287,7 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
size_t num_reqs,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->MultiRead(reqs, num_reqs, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
@ -295,9 +296,9 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
for (size_t i = 0; i < num_reqs; i++) {
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, latency, reqs[i].status.ToString(),
file_name_, reqs[i].len, reqs[i].offset);
IOTraceRecord io_record(
clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, latency,
reqs[i].status.ToString(), file_name_, reqs[i].len, reqs[i].offset);
io_tracer_->WriteIOOp(io_record);
}
return s;
@ -306,14 +307,14 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Prefetch(offset, n, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n,
offset);
io_tracer_->WriteIOOp(io_record);
@ -322,14 +323,14 @@ IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n,
IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length,
static_cast<uint64_t>(offset));
io_tracer_->WriteIOOp(io_record);
@ -339,13 +340,13 @@ IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Append(data, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_,
data.size(), 0 /*Offset*/);
io_tracer_->WriteIOOp(io_record);
@ -355,14 +356,14 @@ IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
IOStatus FSWritableFileTracingWrapper::PositionedAppend(
const Slice& data, uint64_t offset, const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->PositionedAppend(data, offset, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_,
data.size(), offset);
io_tracer_->WriteIOOp(io_record);
@ -372,13 +373,13 @@ IOStatus FSWritableFileTracingWrapper::PositionedAppend(
IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Truncate(size, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, size,
0 /*Offset*/);
io_tracer_->WriteIOOp(io_record);
@ -387,11 +388,11 @@ IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size,
IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Close(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_);
io_tracer_->WriteIOOp(io_record);
@ -400,13 +401,13 @@ IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options,
uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
uint64_t file_size = target()->GetFileSize(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, "OK", file_name_, file_size);
io_tracer_->WriteIOOp(io_record);
return file_size;
@ -414,14 +415,14 @@ uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options,
IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length,
static_cast<uint64_t>(offset));
io_tracer_->WriteIOOp(io_record);
@ -431,14 +432,14 @@ IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSRandomRWFileTracingWrapper::Write(uint64_t offset, const Slice& data,
const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Write(offset, data, options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_,
data.size(), offset);
io_tracer_->WriteIOOp(io_record);
@ -449,14 +450,14 @@ IOStatus FSRandomRWFileTracingWrapper::Read(uint64_t offset, size_t n,
const IOOptions& options,
Slice* result, char* scratch,
IODebugContext* dbg) const {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n,
offset);
io_tracer_->WriteIOOp(io_record);
@ -465,11 +466,11 @@ IOStatus FSRandomRWFileTracingWrapper::Read(uint64_t offset, size_t n,
IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Flush(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_);
io_tracer_->WriteIOOp(io_record);
@ -478,11 +479,11 @@ IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Close(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_);
io_tracer_->WriteIOOp(io_record);
@ -491,11 +492,11 @@ IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Sync(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_);
io_tracer_->WriteIOOp(io_record);
@ -504,11 +505,11 @@ IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Fsync(const IOOptions& options,
IODebugContext* dbg) {
StopWatchNano timer(env_);
StopWatchNano timer(clock_);
timer.Start();
IOStatus s = target()->Fsync(options, dbg);
uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer,
IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_);
io_tracer_->WriteIOOp(io_record);

@ -6,6 +6,7 @@
#pragma once
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "trace_replay/io_tracer.h"
namespace ROCKSDB_NAMESPACE {
@ -18,9 +19,11 @@ namespace ROCKSDB_NAMESPACE {
// overridden.
class FileSystemTracingWrapper : public FileSystemWrapper {
public:
FileSystemTracingWrapper(std::shared_ptr<FileSystem> t,
std::shared_ptr<IOTracer> io_tracer)
: FileSystemWrapper(t), io_tracer_(io_tracer), env_(Env::Default()) {}
FileSystemTracingWrapper(const std::shared_ptr<FileSystem>& t,
const std::shared_ptr<IOTracer>& io_tracer)
: FileSystemWrapper(t),
io_tracer_(io_tracer),
clock_(SystemClock::Default()) {}
~FileSystemTracingWrapper() override {}
@ -83,7 +86,7 @@ class FileSystemTracingWrapper : public FileSystemWrapper {
private:
std::shared_ptr<IOTracer> io_tracer_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
};
// The FileSystemPtr is a wrapper class that takes pointer to storage systems
@ -135,7 +138,7 @@ class FSSequentialFileTracingWrapper : public FSSequentialFileWrapper {
const std::string& file_name)
: FSSequentialFileWrapper(t),
io_tracer_(io_tracer),
env_(Env::Default()),
clock_(SystemClock::Default()),
file_name_(file_name) {}
~FSSequentialFileTracingWrapper() override {}
@ -151,7 +154,7 @@ class FSSequentialFileTracingWrapper : public FSSequentialFileWrapper {
private:
std::shared_ptr<IOTracer> io_tracer_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
std::string file_name_;
};
@ -207,7 +210,7 @@ class FSRandomAccessFileTracingWrapper : public FSRandomAccessFileWrapper {
const std::string& file_name)
: FSRandomAccessFileWrapper(t),
io_tracer_(io_tracer),
env_(Env::Default()),
clock_(SystemClock::Default()),
file_name_(file_name) {}
~FSRandomAccessFileTracingWrapper() override {}
@ -226,7 +229,7 @@ class FSRandomAccessFileTracingWrapper : public FSRandomAccessFileWrapper {
private:
std::shared_ptr<IOTracer> io_tracer_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path.
std::string file_name_;
};
@ -282,7 +285,7 @@ class FSWritableFileTracingWrapper : public FSWritableFileWrapper {
const std::string& file_name)
: FSWritableFileWrapper(t),
io_tracer_(io_tracer),
env_(Env::Default()),
clock_(SystemClock::Default()),
file_name_(file_name) {}
~FSWritableFileTracingWrapper() override {}
@ -316,7 +319,7 @@ class FSWritableFileTracingWrapper : public FSWritableFileWrapper {
private:
std::shared_ptr<IOTracer> io_tracer_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path.
std::string file_name_;
};
@ -379,7 +382,7 @@ class FSRandomRWFileTracingWrapper : public FSRandomRWFileWrapper {
const std::string& file_name)
: FSRandomRWFileWrapper(t),
io_tracer_(io_tracer),
env_(Env::Default()),
clock_(SystemClock::Default()),
file_name_(file_name) {}
~FSRandomRWFileTracingWrapper() override {}
@ -401,7 +404,7 @@ class FSRandomRWFileTracingWrapper : public FSRandomRWFileWrapper {
private:
std::shared_ptr<IOTracer> io_tracer_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path.
std::string file_name_;
};

@ -15,17 +15,20 @@
#include "logging/logging.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
DeleteScheduler::DeleteScheduler(Env* env, FileSystem* fs,
int64_t rate_bytes_per_sec, Logger* info_log,
DeleteScheduler::DeleteScheduler(const std::shared_ptr<SystemClock>& clock,
FileSystem* fs, int64_t rate_bytes_per_sec,
Logger* info_log,
SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk)
: env_(env),
: clock_(clock),
fs_(fs),
total_trash_size_(0),
rate_bytes_per_sec_(rate_bytes_per_sec),
@ -223,14 +226,14 @@ void DeleteScheduler::BackgroundEmptyTrash() {
}
// Delete all files in queue_
uint64_t start_time = env_->NowMicros();
uint64_t start_time = clock_->NowMicros();
uint64_t total_deleted_bytes = 0;
int64_t current_delete_rate = rate_bytes_per_sec_.load();
while (!queue_.empty() && !closing_) {
if (current_delete_rate != rate_bytes_per_sec_.load()) {
// User changed the delete rate
current_delete_rate = rate_bytes_per_sec_.load();
start_time = env_->NowMicros();
start_time = clock_->NowMicros();
total_deleted_bytes = 0;
ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64,
current_delete_rate);

@ -15,14 +15,15 @@
#include "monitoring/instrumented_mutex.h"
#include "port/port.h"
#include "rocksdb/file_system.h"
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class Env;
class FileSystem;
class Logger;
class SstFileManagerImpl;
class SystemClock;
// DeleteScheduler allows the DB to enforce a rate limit on file deletion,
// Instead of deleteing files immediately, files are marked as trash
@ -33,8 +34,9 @@ class SstFileManagerImpl;
// case DeleteScheduler will delete files immediately.
class DeleteScheduler {
public:
DeleteScheduler(Env* env, FileSystem* fs, int64_t rate_bytes_per_sec,
Logger* info_log, SstFileManagerImpl* sst_file_manager,
DeleteScheduler(const std::shared_ptr<SystemClock>& clock, FileSystem* fs,
int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
~DeleteScheduler();
@ -99,7 +101,7 @@ class DeleteScheduler {
void MaybeCreateBackgroundThread();
Env* env_;
const std::shared_ptr<SystemClock> clock_;
FileSystem* fs_;
// total size of trash files

@ -95,9 +95,10 @@ class DeleteSchedulerTest : public testing::Test {
// Tests in this file are for DeleteScheduler component and don't create any
// DBs, so we need to set max_trash_db_ratio to 100% (instead of default
// 25%)
sst_file_mgr_.reset(new SstFileManagerImpl(
env_, env_->GetFileSystem(), nullptr, rate_bytes_per_sec_,
/* max_trash_db_ratio= */ 1.1, 128 * 1024));
sst_file_mgr_.reset(
new SstFileManagerImpl(env_->GetSystemClock(), env_->GetFileSystem(),
nullptr, rate_bytes_per_sec_,
/* max_trash_db_ratio= */ 1.1, 128 * 1024));
delete_scheduler_ = sst_file_mgr_->delete_scheduler();
sst_file_mgr_->SetStatisticsPtr(stats_);
}

@ -12,6 +12,7 @@
#include "rocksdb/file_system.h"
#include "rocksdb/sst_file_writer.h"
#include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/types.h"
#include "trace_replay/io_tracer.h"
@ -67,14 +68,12 @@ inline IOStatus GenerateOneFileChecksum(
allow_mmap_reads, io_tracer);
}
inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, Env* env,
IOOptions& opts) {
if (!env) {
env = Env::Default();
}
inline IOStatus PrepareIOFromReadOptions(
const ReadOptions& ro, const std::shared_ptr<SystemClock>& clock,
IOOptions& opts) {
if (ro.deadline.count()) {
std::chrono::microseconds now = std::chrono::microseconds(env->NowMicros());
std::chrono::microseconds now =
std::chrono::microseconds(clock->NowMicros());
// Ensure there is atleast 1us available. We don't want to pass a value of
// 0 as that means no timeout
if (now >= ro.deadline) {

@ -419,15 +419,17 @@ Status SetIdentityFile(Env* env, const std::string& dbname,
return s;
}
IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options,
IOStatus SyncManifest(const std::shared_ptr<SystemClock>& clock,
const ImmutableDBOptions* db_options,
WritableFileWriter* file) {
TEST_KILL_RANDOM("SyncManifest:0", rocksdb_kill_odds * REDUCE_ODDS2);
StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS);
StopWatch sw(clock, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS);
return file->Sync(db_options->use_fsync);
}
Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
const std::string& dbname, std::string* parent_dir,
Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& db_log_dir, const std::string& dbname,
std::string* parent_dir,
std::vector<std::string>* info_log_list) {
assert(parent_dir != nullptr);
assert(info_log_list != nullptr);
@ -443,7 +445,7 @@ Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname);
std::vector<std::string> file_names;
Status s = env->GetChildren(*parent_dir, &file_names);
Status s = fs->GetChildren(*parent_dir, IOOptions(), &file_names, nullptr);
if (!s.ok()) {
return s;

@ -27,6 +27,7 @@ namespace ROCKSDB_NAMESPACE {
class Env;
class Directory;
class SystemClock;
class WritableFileWriter;
#ifdef OS_WIN
@ -166,14 +167,16 @@ extern Status SetIdentityFile(Env* env, const std::string& dbname,
const std::string& db_id = {});
// Sync manifest file `file`.
extern IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options,
extern IOStatus SyncManifest(const std::shared_ptr<SystemClock>& clock,
const ImmutableDBOptions* db_options,
WritableFileWriter* file);
// Return list of file names of info logs in `file_names`.
// The list only contains file name. The parent directory name is stored
// in `parent_dir`.
// `db_log_dir` should be the one as in options.db_log_dir
extern Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
extern Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& db_log_dir,
const std::string& dbname,
std::string* parent_dir,
std::vector<std::string>* file_names);

@ -12,6 +12,7 @@
#include <algorithm>
#include <mutex>
#include "file/file_util.h"
#include "monitoring/histogram.h"
#include "monitoring/iostats_context_imp.h"
#include "port/port.h"
@ -32,7 +33,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
Status s;
uint64_t elapsed = 0;
{
StopWatch sw(env_, stats_, hist_type_,
StopWatch sw(clock_, stats_, hist_type_,
(stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
true /*delay_enabled*/);
auto prev_perf_level = GetPerfLevel();
@ -68,7 +69,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
}
{
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
// Only user reads are expected to specify a timeout. And user reads
// are not subjected to rate_limiter and should go through only
// one iteration of this loop, so we don't need to check and adjust
@ -128,7 +129,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
#endif
{
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
// Only user reads are expected to specify a timeout. And user reads
// are not subjected to rate_limiter and should go through only
// one iteration of this loop, so we don't need to check and adjust
@ -205,7 +206,7 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
Status s;
uint64_t elapsed = 0;
{
StopWatch sw(env_, stats_, hist_type_,
StopWatch sw(clock_, stats_, hist_type_,
(stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
true /*delay_enabled*/);
auto prev_perf_level = GetPerfLevel();
@ -267,7 +268,7 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
#endif // ROCKSDB_LITE
{
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_);
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr);
}
@ -312,4 +313,12 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
return s;
}
IOStatus RandomAccessFileReader::PrepareIOOptions(const ReadOptions& ro,
IOOptions& opts) {
if (clock_.get() != nullptr) {
return PrepareIOFromReadOptions(ro, clock_, opts);
} else {
return PrepareIOFromReadOptions(ro, SystemClock::Default(), opts);
}
}
} // namespace ROCKSDB_NAMESPACE

@ -14,7 +14,6 @@
#include "env/file_system_tracer.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/listener.h"
#include "rocksdb/options.h"
@ -24,6 +23,7 @@
namespace ROCKSDB_NAMESPACE {
class Statistics;
class HistogramImpl;
class SystemClock;
using AlignedBuf = std::unique_ptr<char[]>;
@ -67,7 +67,7 @@ class RandomAccessFileReader {
FSRandomAccessFilePtr file_;
std::string file_name_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
Statistics* stats_;
uint32_t hist_type_;
HistogramImpl* file_read_hist_;
@ -77,14 +77,15 @@ class RandomAccessFileReader {
public:
explicit RandomAccessFileReader(
std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name,
Env* _env = nullptr, const std::shared_ptr<IOTracer>& io_tracer = nullptr,
const std::shared_ptr<SystemClock>& clock = nullptr,
const std::shared_ptr<IOTracer>& io_tracer = nullptr,
Statistics* stats = nullptr, uint32_t hist_type = 0,
HistogramImpl* file_read_hist = nullptr,
RateLimiter* rate_limiter = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {})
: file_(std::move(raf), io_tracer, _file_name),
file_name_(std::move(_file_name)),
env_(_env),
clock_(clock),
stats_(stats),
hist_type_(hist_type),
file_read_hist_(file_read_hist),
@ -137,6 +138,6 @@ class RandomAccessFileReader {
bool use_direct_io() const { return file_->use_direct_io(); }
Env* env() const { return env_; }
IOStatus PrepareIOOptions(const ReadOptions& ro, IOOptions& opts);
};
} // namespace ROCKSDB_NAMESPACE

@ -42,7 +42,8 @@ class RandomAccessFileReaderTest : public testing::Test {
std::string fpath = Path(fname);
std::unique_ptr<FSRandomAccessFile> f;
ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr));
(*reader).reset(new RandomAccessFileReader(std::move(f), fpath, env_));
(*reader).reset(new RandomAccessFileReader(std::move(f), fpath,
env_->GetSystemClock()));
}
void AssertResult(const std::string& content,

@ -18,12 +18,12 @@
namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs,
std::shared_ptr<Logger> logger,
int64_t rate_bytes_per_sec,
double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk)
: env_(env),
SstFileManagerImpl::SstFileManagerImpl(
const std::shared_ptr<SystemClock>& clock,
const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<Logger>& logger, int64_t rate_bytes_per_sec,
double max_trash_db_ratio, uint64_t bytes_max_delete_chunk)
: clock_(clock),
fs_(fs),
logger_(logger),
total_files_size_(0),
@ -31,8 +31,8 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs,
compaction_buffer_size_(0),
cur_compactions_reserved_size_(0),
max_allowed_space_(0),
delete_scheduler_(env, fs_.get(), rate_bytes_per_sec, logger.get(), this,
max_trash_db_ratio, bytes_max_delete_chunk),
delete_scheduler_(clock_, fs_.get(), rate_bytes_per_sec, logger.get(),
this, max_trash_db_ratio, bytes_max_delete_chunk),
cv_(&mu_),
closing_(false),
bg_thread_(nullptr),
@ -347,7 +347,7 @@ void SstFileManagerImpl::ClearError() {
if (!error_handler_list_.empty()) {
// If there are more instances to be recovered, reschedule after 5
// seconds
int64_t wait_until = env_->NowMicros() + 5000000;
int64_t wait_until = clock_->NowMicros() + 5000000;
cv_.TimedWait(wait_until);
}
@ -485,7 +485,6 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) {
const auto& fs = env->GetFileSystem();
return NewSstFileManager(env, fs, info_log, trash_dir, rate_bytes_per_sec,
delete_existing_trash, status, max_trash_db_ratio,
bytes_max_delete_chunk);
@ -498,8 +497,9 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<FileSystem> fs,
bool delete_existing_trash, Status* status,
double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) {
const auto& clock = env->GetSystemClock();
SstFileManagerImpl* res =
new SstFileManagerImpl(env, fs, info_log, rate_bytes_per_sec,
new SstFileManagerImpl(clock, fs, info_log, rate_bytes_per_sec,
max_trash_db_ratio, bytes_max_delete_chunk);
// trash_dir is deprecated and not needed anymore, but if user passed it

@ -12,14 +12,13 @@
#include "port/port.h"
#include "db/compaction/compaction.h"
#include "db/error_handler.h"
#include "file/delete_scheduler.h"
#include "rocksdb/file_system.h"
#include "rocksdb/sst_file_manager.h"
namespace ROCKSDB_NAMESPACE {
class Env;
class ErrorHandler;
class FileSystem;
class SystemClock;
class Logger;
// SstFileManager is used to track SST files in the DB and control their
@ -27,8 +26,9 @@ class Logger;
// All SstFileManager public functions are thread-safe.
class SstFileManagerImpl : public SstFileManager {
public:
explicit SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs,
std::shared_ptr<Logger> logger,
explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock,
const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<Logger>& logger,
int64_t rate_bytes_per_sec,
double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk);
@ -152,7 +152,7 @@ class SstFileManagerImpl : public SstFileManager {
return bg_err_.severity() == Status::Severity::kSoftError;
}
Env* env_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<FileSystem> fs_;
std::shared_ptr<Logger> logger_;
// Mutex to protect tracked_files_, total_files_size_

@ -16,6 +16,7 @@
#include "monitoring/histogram.h"
#include "monitoring/iostats_context_imp.h"
#include "port/port.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "util/random.h"
#include "util/rate_limiter.h"
@ -331,7 +332,7 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
IOSTATS_TIMER_GUARD(fsync_nanos);
TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0");
auto prev_perf_level = GetPerfLevel();
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_);
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
#ifndef ROCKSDB_LITE
FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) {
@ -406,7 +407,7 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
#endif
{
auto prev_perf_level = GetPerfLevel();
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_);
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
s = writable_file_->Append(Slice(src, allowed), IOOptions(), nullptr);
SetPerfLevel(prev_perf_level);
}

@ -14,7 +14,6 @@
#include "db/version_edit.h"
#include "env/file_system_tracer.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_checksum.h"
#include "rocksdb/file_system.h"
#include "rocksdb/io_status.h"
@ -25,6 +24,7 @@
namespace ROCKSDB_NAMESPACE {
class Statistics;
class SystemClock;
// WritableFileWriter is a wrapper on top of Env::WritableFile. It provides
// facilities to:
@ -121,7 +121,7 @@ class WritableFileWriter {
std::string file_name_;
FSWritableFilePtr writable_file_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
AlignedBuffer buf_;
size_t max_buffer_size_;
// Actually written data size can be used for truncate
@ -145,14 +145,15 @@ class WritableFileWriter {
public:
WritableFileWriter(
std::unique_ptr<FSWritableFile>&& file, const std::string& _file_name,
const FileOptions& options, Env* env = nullptr,
const FileOptions& options,
const std::shared_ptr<SystemClock>& clock = nullptr,
const std::shared_ptr<IOTracer>& io_tracer = nullptr,
Statistics* stats = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {},
FileChecksumGenFactory* file_checksum_gen_factory = nullptr)
: file_name_(_file_name),
writable_file_(std::move(file), io_tracer, _file_name),
env_(env),
clock_(clock),
buf_(),
max_buffer_size_(options.writable_file_max_buffer_size),
filesize_(0),

@ -59,6 +59,7 @@ class RateLimiter;
class ThreadStatusUpdater;
struct ThreadStatus;
class FileSystem;
class SystemClock;
const size_t kDefaultPageSize = 4 * 1024;
@ -150,8 +151,11 @@ class Env {
};
Env();
// Construct an Env with a separate FileSystem implementation
Env(std::shared_ptr<FileSystem> fs);
// Construct an Env with a separate FileSystem and/or SystemClock
// implementation
explicit Env(const std::shared_ptr<FileSystem>& fs);
Env(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock);
// No copying allowed
Env(const Env&) = delete;
void operator=(const Env&) = delete;
@ -576,6 +580,10 @@ class Env {
// could be a fully implemented one, or a wrapper class around the Env
const std::shared_ptr<FileSystem>& GetFileSystem() const;
// Get the SystemClock implementation this Env was constructed with. It
// could be a fully implemented one, or a wrapper class around the Env
const std::shared_ptr<SystemClock>& GetSystemClock() const;
// If you're adding methods here, remember to add them to EnvWrapper too.
protected:
@ -586,6 +594,9 @@ class Env {
// Pointer to the underlying FileSystem implementation
std::shared_ptr<FileSystem> file_system_;
// Pointer to the underlying SystemClock implementation
std::shared_ptr<SystemClock> system_clock_;
private:
static const size_t kMaxHostNameLen = 256;
};

@ -0,0 +1,102 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <rocksdb/rocksdb_namespace.h>
#include <rocksdb/status.h>
#include <stdint.h>
#include <memory>
#ifdef _WIN32
// Windows API macro interference
#undef GetCurrentTime
#endif
namespace ROCKSDB_NAMESPACE {
struct ConfigOptions;
// A SystemClock is an interface used by the rocksdb implementation to access
// operating system time-related functionality.
class SystemClock {
public:
virtual ~SystemClock() {}
static const char* Type() { return "SystemClock"; }
// The name of this system clock
virtual const char* Name() const = 0;
// Return a default SystemClock suitable for the current operating
// system.
static const std::shared_ptr<SystemClock>& Default();
// Returns the number of micro-seconds since some fixed point in time.
// It is often used as system time such as in GenericRateLimiter
// and other places so a port needs to return system time in order to work.
virtual uint64_t NowMicros() = 0;
// Returns the number of nano-seconds since some fixed point in time. Only
// useful for computing deltas of time in one run.
// Default implementation simply relies on NowMicros.
// In platform-specific implementations, NowNanos() should return time points
// that are MONOTONIC.
virtual uint64_t NowNanos() { return NowMicros() * 1000; }
// Returns the number of micro-seconds of CPU time used by the current thread.
// 0 indicates not supported.
virtual uint64_t CPUMicros() { return 0; }
// Returns the number of nano-seconds of CPU time used by the current thread.
// Default implementation simply relies on CPUMicros.
// 0 indicates not supported.
virtual uint64_t CPUNanos() { return CPUMicros() * 1000; }
// Sleep/delay the thread for the prescribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
virtual Status GetCurrentTime(int64_t* unix_time) = 0;
// Converts seconds-since-Jan-01-1970 to a printable string
virtual std::string TimeToString(uint64_t time) = 0;
};
// Wrapper class for a SystemClock. Redirects all methods (except Name)
// of the SystemClock interface to the target/wrapped class.
class SystemClockWrapper : public SystemClock {
public:
explicit SystemClockWrapper(const std::shared_ptr<SystemClock>& t)
: target_(t) {}
uint64_t NowMicros() override { return target_->NowMicros(); }
uint64_t NowNanos() override { return target_->NowNanos(); }
uint64_t CPUMicros() override { return target_->CPUMicros(); }
uint64_t CPUNanos() override { return target_->CPUNanos(); }
virtual void SleepForMicroseconds(int micros) override {
return target_->SleepForMicroseconds(micros);
}
Status GetCurrentTime(int64_t* unix_time) override {
return target_->GetCurrentTime(unix_time);
}
std::string TimeToString(uint64_t time) override {
return target_->TimeToString(time);
}
protected:
std::shared_ptr<SystemClock> target_;
};
} // end namespace ROCKSDB_NAMESPACE

@ -6,8 +6,12 @@
#include "logging/auto_roll_logger.h"
#include <algorithm>
#include "file/filename.h"
#include "logging/logging.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
@ -15,7 +19,9 @@ namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE
// -- AutoRollLogger
AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
AutoRollLogger::AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock,
const std::string& dbname,
const std::string& db_log_dir,
size_t log_max_size,
size_t log_file_time_to_roll,
@ -24,24 +30,26 @@ AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
: Logger(log_level),
dbname_(dbname),
db_log_dir_(db_log_dir),
env_(env),
fs_(fs),
clock_(clock),
status_(Status::OK()),
kMaxLogFileSize(log_max_size),
kLogFileTimeToRoll(log_file_time_to_roll),
kKeepLogFileNum(keep_log_file_num),
cached_now(static_cast<uint64_t>(env_->NowMicros() * 1e-6)),
cached_now(static_cast<uint64_t>(clock_->NowMicros() * 1e-6)),
ctime_(cached_now),
cached_now_access_count(0),
call_NowMicros_every_N_records_(100),
mutex_() {
Status s = env->GetAbsolutePath(dbname, &db_absolute_path_);
Status s = fs->GetAbsolutePath(dbname, io_options_, &db_absolute_path_,
&io_context_);
if (s.IsNotSupported()) {
db_absolute_path_ = dbname;
} else {
status_ = s;
}
log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_);
if (env_->FileExists(log_fname_).ok()) {
if (fs_->FileExists(log_fname_, io_options_, &io_context_).ok()) {
RollLogFile();
}
GetExistingFiles();
@ -53,7 +61,7 @@ AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
Status AutoRollLogger::ResetLogger() {
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
status_ = env_->NewLogger(log_fname_, &logger_);
status_ = fs_->NewLogger(log_fname_, io_options_, &logger_, &io_context_);
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger");
if (!status_.ok()) {
@ -67,7 +75,7 @@ Status AutoRollLogger::ResetLogger() {
"The underlying logger doesn't support GetLogFileSize()");
}
if (status_.ok()) {
cached_now = static_cast<uint64_t>(env_->NowMicros() * 1e-6);
cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
ctime_ = cached_now;
cached_now_access_count = 0;
}
@ -79,14 +87,14 @@ void AutoRollLogger::RollLogFile() {
// This function is called when log is rotating. Two rotations
// can happen quickly (NowMicro returns same value). To not overwrite
// previous log file we increment by one micro second and try again.
uint64_t now = env_->NowMicros();
uint64_t now = clock_->NowMicros();
std::string old_fname;
do {
old_fname = OldInfoLogFileName(
dbname_, now, db_absolute_path_, db_log_dir_);
now++;
} while (env_->FileExists(old_fname).ok());
Status s = env_->RenameFile(log_fname_, old_fname);
} while (fs_->FileExists(old_fname, io_options_, &io_context_).ok());
Status s = fs_->RenameFile(log_fname_, old_fname, io_options_, &io_context_);
if (!s.ok()) {
// What should we do on error?
}
@ -103,7 +111,7 @@ void AutoRollLogger::GetExistingFiles() {
std::string parent_dir;
std::vector<std::string> info_log_files;
Status s =
GetInfoLogFiles(env_, db_log_dir_, dbname_, &parent_dir, &info_log_files);
GetInfoLogFiles(fs_, db_log_dir_, dbname_, &parent_dir, &info_log_files);
if (status_.ok()) {
status_ = s;
}
@ -117,7 +125,7 @@ void AutoRollLogger::GetExistingFiles() {
}
Status AutoRollLogger::TrimOldLogFiles() {
// Here we directly list info files and delete them through Env.
// Here we directly list info files and delete them through FileSystem.
// The deletion isn't going through DB, so there are shortcomes:
// 1. the deletion is not rate limited by SstFileManager
// 2. there is a chance that an I/O will be issued here
@ -130,7 +138,8 @@ Status AutoRollLogger::TrimOldLogFiles() {
// it's essentially the same thing, and checking empty before accessing
// the queue feels safer.
while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) {
Status s = env_->DeleteFile(old_log_files_.front());
Status s =
fs_->DeleteFile(old_log_files_.front(), io_options_, &io_context_);
// Remove the file from the tracking anyway. It's possible that
// DB cleaned up the old log file, or people cleaned it up manually.
old_log_files_.pop();
@ -241,7 +250,7 @@ void AutoRollLogger::LogHeader(const char* format, va_list args) {
bool AutoRollLogger::LogExpired() {
if (cached_now_access_count >= call_NowMicros_every_N_records_) {
cached_now = static_cast<uint64_t>(env_->NowMicros() * 1e-6);
cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
cached_now_access_count = 0;
}
@ -267,15 +276,16 @@ Status CreateLoggerFromOptions(const std::string& dbname,
std::string fname =
InfoLogFileName(dbname, db_absolute_path, options.db_log_dir);
const auto& clock = env->GetSystemClock();
env->CreateDirIfMissing(dbname)
.PermitUncheckedError(); // In case it does not exist
// Currently we only support roll by time-to-roll and log size
#ifndef ROCKSDB_LITE
if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) {
AutoRollLogger* result = new AutoRollLogger(
env, dbname, options.db_log_dir, options.max_log_file_size,
options.log_file_time_to_roll, options.keep_log_file_num,
options.info_log_level);
env->GetFileSystem(), clock, dbname, options.db_log_dir,
options.max_log_file_size, options.log_file_time_to_roll,
options.keep_log_file_num, options.info_log_level);
s = result->GetStatus();
if (!s.ok()) {
delete result;
@ -286,9 +296,9 @@ Status CreateLoggerFromOptions(const std::string& dbname,
}
#endif // !ROCKSDB_LITE
// Open a log file in the same directory as the db
env->RenameFile(fname,
OldInfoLogFileName(dbname, env->NowMicros(), db_absolute_path,
options.db_log_dir))
env->RenameFile(
fname, OldInfoLogFileName(dbname, clock->NowMicros(), db_absolute_path,
options.db_log_dir))
.PermitUncheckedError();
s = env->NewLogger(fname, logger);
if (logger->get() != nullptr) {

@ -18,14 +18,18 @@
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
class FileSystem;
class SystemClock;
#ifndef ROCKSDB_LITE
// Rolls the log file by size and/or time
class AutoRollLogger : public Logger {
public:
AutoRollLogger(Env* env, const std::string& dbname,
const std::string& db_log_dir, size_t log_max_size,
size_t log_file_time_to_roll, size_t keep_log_file_num,
AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock,
const std::string& dbname, const std::string& db_log_dir,
size_t log_max_size, size_t log_file_time_to_roll,
size_t keep_log_file_num,
const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL);
using Logger::Logv;
@ -134,7 +138,8 @@ class AutoRollLogger : public Logger {
std::string dbname_;
std::string db_log_dir_;
std::string db_absolute_path_;
Env* env_;
std::shared_ptr<FileSystem> fs_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<Logger> logger_;
// current status of the logger
Status status_;
@ -148,11 +153,13 @@ class AutoRollLogger : public Logger {
// Full path is stored here. It consumes signifianctly more memory
// than only storing file name. Can optimize if it causes a problem.
std::queue<std::string> old_log_files_;
// to avoid frequent env->NowMicros() calls, we cached the current time
// to avoid frequent clock->NowMicros() calls, we cached the current time
uint64_t cached_now;
uint64_t ctime_;
uint64_t cached_now_access_count;
uint64_t call_NowMicros_every_N_records_;
IOOptions io_options_;
IODebugContext io_context_;
mutable port::Mutex mutex_;
};
#endif // !ROCKSDB_LITE

@ -7,7 +7,9 @@
#ifndef ROCKSDB_LITE
#include "logging/auto_roll_logger.h"
#include <sys/stat.h>
#include <algorithm>
#include <cmath>
#include <fstream>
@ -16,18 +18,24 @@
#include <string>
#include <thread>
#include <vector>
#include "logging/logging.h"
#include "port/port.h"
#include "rocksdb/db.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE {
namespace {
class NoSleepEnv : public EnvWrapper {
class NoSleepClock : public SystemClockWrapper {
public:
NoSleepEnv(Env* base) : EnvWrapper(base) {}
NoSleepClock(
const std::shared_ptr<SystemClock>& base = SystemClock::Default())
: SystemClockWrapper(base) {}
const char* Name() const override { return "NoSleepClock"; }
void SleepForMicroseconds(int micros) override {
fake_time_ += static_cast<uint64_t>(micros);
}
@ -75,7 +83,9 @@ class AutoRollLoggerTest : public testing::Test {
void RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size,
const std::string& log_message);
void RollLogFileByTimeTest(Env*, AutoRollLogger* logger, size_t time,
void RollLogFileByTimeTest(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& sc,
AutoRollLogger* logger, size_t time,
const std::string& log_message);
// return list of files under kTestDir that contains "LOG"
std::vector<std::string> GetLogFiles() {
@ -156,21 +166,22 @@ void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger,
ASSERT_TRUE(message_size == logger->GetLogFileSize());
}
void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger,
size_t time,
const std::string& log_message) {
void AutoRollLoggerTest::RollLogFileByTimeTest(
const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& sc, AutoRollLogger* logger, size_t time,
const std::string& log_message) {
uint64_t expected_ctime;
uint64_t actual_ctime;
uint64_t total_log_size;
EXPECT_OK(env->GetFileSize(kLogFile, &total_log_size));
EXPECT_OK(fs->GetFileSize(kLogFile, IOOptions(), &total_log_size, nullptr));
expected_ctime = logger->TEST_ctime();
logger->SetCallNowMicrosEveryNRecords(0);
// -- Write to the log for several times, which is supposed
// to be finished before time.
for (int i = 0; i < 10; ++i) {
env->SleepForMicroseconds(50000);
sc->SleepForMicroseconds(50000);
LogMessage(logger, log_message.c_str());
EXPECT_OK(logger->GetStatus());
// Make sure we always write to the same log file (by
@ -185,7 +196,7 @@ void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger,
}
// -- Make the log file expire
env->SleepForMicroseconds(static_cast<int>(time * 1000000));
sc->SleepForMicroseconds(static_cast<int>(time * 1000000));
LogMessage(logger, log_message.c_str());
// At this time, the new log file should be created.
@ -199,15 +210,15 @@ TEST_F(AutoRollLoggerTest, RollLogFileBySize) {
size_t log_max_size = 1024 * 5;
size_t keep_log_file_num = 10;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0,
keep_log_file_num);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_max_size, 0, keep_log_file_num);
RollLogFileBySizeTest(&logger, log_max_size,
kSampleMessage + ":RollLogFileBySize");
}
TEST_F(AutoRollLoggerTest, RollLogFileByTime) {
NoSleepEnv nse(Env::Default());
auto nsc = std::make_shared<NoSleepClock>();
size_t time = 2;
size_t log_size = 1024 * 5;
@ -216,10 +227,11 @@ TEST_F(AutoRollLoggerTest, RollLogFileByTime) {
InitTestDb();
// -- Test the existence of file during the server restart.
ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile));
AutoRollLogger logger(&nse, kTestDir, "", log_size, time, keep_log_file_num);
AutoRollLogger logger(default_env->GetFileSystem(), nsc, kTestDir, "",
log_size, time, keep_log_file_num);
ASSERT_OK(default_env->FileExists(kLogFile));
RollLogFileByTimeTest(&nse, &logger, time,
RollLogFileByTimeTest(default_env->GetFileSystem(), nsc, &logger, time,
kSampleMessage + ":RollLogFileByTime");
}
@ -254,15 +266,17 @@ TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) {
size_t log_size = 1024;
size_t keep_log_file_num = 10;
AutoRollLogger* logger = new AutoRollLogger(Env::Default(), kTestDir, "",
log_size, 0, keep_log_file_num);
AutoRollLogger* logger =
new AutoRollLogger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, keep_log_file_num);
LogMessage(logger, kSampleMessage.c_str());
ASSERT_GT(logger->GetLogFileSize(), kZero);
delete logger;
// reopens the log file and an empty log file will be created.
logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, 10);
logger = new AutoRollLogger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, 10);
ASSERT_EQ(logger->GetLogFileSize(), kZero);
delete logger;
}
@ -273,16 +287,16 @@ TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) {
InitTestDb();
NoSleepEnv nse(Env::Default());
AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time,
keep_log_file_num);
auto nsc = std::make_shared<NoSleepClock>();
AutoRollLogger logger(FileSystem::Default(), nsc, kTestDir, "", log_max_size,
time, keep_log_file_num);
// Test the ability to roll by size
RollLogFileBySizeTest(&logger, log_max_size,
kSampleMessage + ":CompositeRollByTimeAndSizeLogger");
// Test the ability to roll by Time
RollLogFileByTimeTest(&nse, &logger, time,
RollLogFileByTimeTest(FileSystem::Default(), nsc, &logger, time,
kSampleMessage + ":CompositeRollByTimeAndSizeLogger");
}
@ -291,7 +305,9 @@ TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) {
// port
TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
DBOptions options;
NoSleepEnv nse(Env::Default());
auto nsc = std::make_shared<NoSleepClock>();
std::unique_ptr<Env> nse(new CompositeEnvWrapper(Env::Default(), nsc));
std::shared_ptr<Logger> logger;
// Normal logger
@ -310,14 +326,15 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
kSampleMessage + ":CreateLoggerFromOptions - size");
// Only roll by Time
options.env = &nse;
options.env = nse.get();
InitTestDb();
options.max_log_file_size = 0;
options.log_file_time_to_roll = 2;
ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger));
auto_roll_logger =
dynamic_cast<AutoRollLogger*>(logger.get());
RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll,
RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger,
options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - time");
// roll by both Time and size
@ -329,7 +346,8 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
dynamic_cast<AutoRollLogger*>(logger.get());
RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size,
kSampleMessage + ":CreateLoggerFromOptions - both");
RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll,
RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger,
options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - both");
// Set keep_log_file_num
@ -402,8 +420,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
const size_t kMaxFileSize = 512;
{
size_t log_num = 8;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
dbname, db_log_dir, kMaxFileSize, 0, log_num);
RollNTimesBySize(&logger, log_num, kMaxFileSize);
ASSERT_EQ(log_num, GetLogFiles().size());
@ -411,8 +429,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
// Shrink number of files
{
size_t log_num = 5;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
dbname, db_log_dir, kMaxFileSize, 0, log_num);
ASSERT_EQ(log_num, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize);
@ -422,8 +440,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
// Increase number of files again.
{
size_t log_num = 7;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0,
log_num);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
dbname, db_log_dir, kMaxFileSize, 0, log_num);
ASSERT_EQ(6, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize);
@ -485,7 +503,8 @@ TEST_F(AutoRollLoggerTest, InfoLogLevel) {
// an extra-scope to force the AutoRollLogger to flush the log file when it
// becomes out of scope.
{
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -523,7 +542,8 @@ TEST_F(AutoRollLoggerTest, Close) {
size_t log_size = 8192;
size_t log_lines = 0;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10);
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), kTestDir,
"", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -590,8 +610,9 @@ TEST_F(AutoRollLoggerTest, LogHeaderTest) {
InitTestDb();
AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/"",
LOG_MAX_SIZE, /*log_file_time_to_roll=*/0,
AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
kTestDir, /*db_log_dir=*/"", LOG_MAX_SIZE,
/*log_file_time_to_roll=*/0,
/*keep_log_file_num=*/10);
if (test_num == 0) {

@ -31,7 +31,7 @@ class EnvLogger : public Logger {
const std::string& fname, const EnvOptions& options, Env* env,
InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL)
: Logger(log_level),
file_(std::move(writable_file), fname, options, env),
file_(std::move(writable_file), fname, options, env->GetSystemClock()),
last_flush_micros_(0),
env_(env),
flush_pending_(false) {}

@ -31,6 +31,7 @@ int main() {
#include "rocksdb/memtablerep.h"
#include "rocksdb/options.h"
#include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/write_buffer_manager.h"
#include "test_util/testutil.h"
#include "util/gflags_compat.h"
@ -417,7 +418,7 @@ class Benchmark {
uint64_t bytes_written = 0;
uint64_t bytes_read = 0;
uint64_t read_hits = 0;
StopWatchNano timer(Env::Default(), true);
StopWatchNano timer(SystemClock::Default(), true);
RunThreads(&threads, &bytes_written, &bytes_read, true, &read_hits);
auto elapsed_time = static_cast<double>(timer.ElapsedNanos() / 1000);
std::cout << "Elapsed time: " << static_cast<int>(elapsed_time) << " us"

@ -11,10 +11,11 @@
#include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE {
const size_t kSizeDummyEntry = 256 * 1024;
class WriteBufferManagerTest : public testing::Test {};
#ifndef ROCKSDB_LITE
const size_t kSizeDummyEntry = 256 * 1024;
TEST_F(WriteBufferManagerTest, ShouldFlush) {
// A write buffer manager of size 10MB
std::unique_ptr<WriteBufferManager> wbf(

@ -8,6 +8,7 @@
#include <cmath>
#include "monitoring/histogram_windowing.h"
#include "rocksdb/system_clock.h"
#include "test_util/mock_time_env.h"
#include "test_util/testharness.h"
#include "util/random.h"
@ -19,7 +20,8 @@ class HistogramTest : public testing::Test {};
namespace {
const double kIota = 0.1;
const HistogramBucketMapper bucketMapper;
MockTimeEnv* env = new MockTimeEnv(Env::Default());
std::shared_ptr<MockSystemClock> clock =
std::make_shared<MockSystemClock>(SystemClock::Default());
}
void PopulateHistogram(Histogram& histogram,
@ -29,11 +31,11 @@ void PopulateHistogram(Histogram& histogram,
for (uint64_t i = low; i <= high; i++) {
histogram.Add(i);
// sleep a random microseconds [0-10)
env->MockSleepForMicroseconds(rnd.Uniform(10));
clock->MockSleepForMicroseconds(rnd.Uniform(10));
}
}
// make sure each data population at least take some time
env->MockSleepForMicroseconds(1);
clock->MockSleepForMicroseconds(1);
}
void BasicOperation(Histogram& histogram) {
@ -139,23 +141,23 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
HistogramWindowingImpl
histogramWindowing(num_windows, micros_per_window, min_num_per_window);
histogramWindowing.TEST_UpdateEnv(env);
histogramWindowing.TEST_UpdateClock(clock);
PopulateHistogram(histogramWindowing, 1, 1, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 100);
ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 1);
ASSERT_EQ(histogramWindowing.Average(), 1);
PopulateHistogram(histogramWindowing, 2, 2, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 200);
ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 2);
ASSERT_EQ(histogramWindowing.Average(), 1.5);
PopulateHistogram(histogramWindowing, 3, 3, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 3);
@ -163,7 +165,7 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
// dropping oldest window with value 1, remaining 2 ~ 4
PopulateHistogram(histogramWindowing, 4, 4, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 2);
ASSERT_EQ(histogramWindowing.max(), 4);
@ -171,7 +173,7 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
// dropping oldest window with value 2, remaining 3 ~ 5
PopulateHistogram(histogramWindowing, 5, 5, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 3);
ASSERT_EQ(histogramWindowing.max(), 5);
@ -187,20 +189,20 @@ TEST_F(HistogramTest, HistogramWindowingMerge) {
histogramWindowing(num_windows, micros_per_window, min_num_per_window);
HistogramWindowingImpl
otherWindowing(num_windows, micros_per_window, min_num_per_window);
histogramWindowing.TEST_UpdateEnv(env);
otherWindowing.TEST_UpdateEnv(env);
histogramWindowing.TEST_UpdateClock(clock);
otherWindowing.TEST_UpdateClock(clock);
PopulateHistogram(histogramWindowing, 1, 1, 100);
PopulateHistogram(otherWindowing, 1, 1, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
PopulateHistogram(histogramWindowing, 2, 2, 100);
PopulateHistogram(otherWindowing, 2, 2, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
PopulateHistogram(histogramWindowing, 3, 3, 100);
PopulateHistogram(otherWindowing, 3, 3, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
histogramWindowing.Merge(otherWindowing);
ASSERT_EQ(histogramWindowing.num(), 600);
@ -210,14 +212,14 @@ TEST_F(HistogramTest, HistogramWindowingMerge) {
// dropping oldest window with value 1, remaining 2 ~ 4
PopulateHistogram(histogramWindowing, 4, 4, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 500);
ASSERT_EQ(histogramWindowing.min(), 2);
ASSERT_EQ(histogramWindowing.max(), 4);
// dropping oldest window with value 2, remaining 3 ~ 5
PopulateHistogram(histogramWindowing, 5, 5, 100);
env->MockSleepForMicroseconds(micros_per_window);
clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 400);
ASSERT_EQ(histogramWindowing.min(), 3);
ASSERT_EQ(histogramWindowing.max(), 5);

@ -8,15 +8,17 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "monitoring/histogram_windowing.h"
#include "monitoring/histogram.h"
#include "util/cast_util.h"
#include <algorithm>
#include "monitoring/histogram.h"
#include "rocksdb/system_clock.h"
#include "util/cast_util.h"
namespace ROCKSDB_NAMESPACE {
HistogramWindowingImpl::HistogramWindowingImpl() {
env_ = Env::Default();
clock_ = SystemClock::Default();
window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
Clear();
}
@ -28,7 +30,7 @@ HistogramWindowingImpl::HistogramWindowingImpl(
num_windows_(num_windows),
micros_per_window_(micros_per_window),
min_num_per_window_(min_num_per_window) {
env_ = Env::Default();
clock_ = SystemClock::Default();
window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
Clear();
}
@ -44,7 +46,7 @@ void HistogramWindowingImpl::Clear() {
window_stats_[i].Clear();
}
current_window_.store(0, std::memory_order_relaxed);
last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed);
last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
}
bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); }
@ -129,7 +131,7 @@ void HistogramWindowingImpl::Data(HistogramData * const data) const {
}
void HistogramWindowingImpl::TimerTick() {
uint64_t curr_time = env_->NowMicros();
uint64_t curr_time = clock_->NowMicros();
size_t curr_window_ = static_cast<size_t>(current_window());
if (curr_time - last_swap_time() > micros_per_window_ &&
window_stats_[curr_window_].num() >= min_num_per_window_) {
@ -144,7 +146,7 @@ void HistogramWindowingImpl::SwapHistoryBucket() {
// If mutex is held by Merge() or Clear(), next Add() will take care of the
// swap, if needed.
if (mutex_.try_lock()) {
last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed);
last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
uint64_t curr_window = current_window();
uint64_t next_window = (curr_window == num_windows_ - 1) ?

@ -10,9 +10,9 @@
#pragma once
#include "monitoring/histogram.h"
#include "rocksdb/env.h"
namespace ROCKSDB_NAMESPACE {
class SystemClock;
class HistogramWindowingImpl : public Histogram
{
@ -45,7 +45,9 @@ public:
virtual void Data(HistogramData* const data) const override;
#ifndef NDEBUG
void TEST_UpdateEnv(Env* env) { env_ = env; }
void TEST_UpdateClock(const std::shared_ptr<SystemClock>& clock) {
clock_ = clock;
}
#endif // NDEBUG
private:
@ -58,7 +60,7 @@ public:
return last_swap_time_.load(std::memory_order_relaxed);
}
Env* env_;
std::shared_ptr<SystemClock> clock_;
std::mutex mutex_;
// Aggregated stats over windows_stats_, all the computation is done

@ -4,15 +4,18 @@
// (found in the LICENSE.Apache file in the root directory).
#include "monitoring/instrumented_mutex.h"
#include "monitoring/perf_context_imp.h"
#include "monitoring/thread_status_util.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h"
namespace ROCKSDB_NAMESPACE {
namespace {
#ifndef NPERF_CONTEXT
Statistics* stats_for_report(Env* env, Statistics* stats) {
if (env != nullptr && stats != nullptr &&
Statistics* stats_for_report(const std::shared_ptr<SystemClock>& clock,
Statistics* stats) {
if (clock.get() != nullptr && stats != nullptr &&
stats->get_stats_level() > kExceptTimeForMutex) {
return stats;
} else {
@ -25,7 +28,7 @@ Statistics* stats_for_report(Env* env, Statistics* stats) {
void InstrumentedMutex::Lock() {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_);
stats_for_report(clock_, stats_), stats_code_);
LockInternal();
}
@ -39,7 +42,7 @@ void InstrumentedMutex::LockInternal() {
void InstrumentedCondVar::Wait() {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_);
stats_for_report(clock_, stats_), stats_code_);
WaitInternal();
}
@ -53,7 +56,7 @@ void InstrumentedCondVar::WaitInternal() {
bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_);
stats_for_report(clock_, stats_), stats_code_);
return TimedWaitInternal(abs_time_us);
}

@ -7,8 +7,8 @@
#include "monitoring/statistics.h"
#include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/statistics.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/thread_status.h"
#include "util/stop_watch.h"
@ -20,13 +20,18 @@ class InstrumentedCondVar;
class InstrumentedMutex {
public:
explicit InstrumentedMutex(bool adaptive = false)
: mutex_(adaptive), stats_(nullptr), env_(nullptr),
stats_code_(0) {}
InstrumentedMutex(
Statistics* stats, Env* env,
int stats_code, bool adaptive = false)
: mutex_(adaptive), stats_(stats), env_(env),
: mutex_(adaptive), stats_(nullptr), clock_(nullptr), stats_code_(0) {}
explicit InstrumentedMutex(const std::shared_ptr<SystemClock>& clock,
bool adaptive = false)
: mutex_(adaptive), stats_(nullptr), clock_(clock), stats_code_(0) {}
InstrumentedMutex(Statistics* stats,
const std::shared_ptr<SystemClock>& clock, int stats_code,
bool adaptive = false)
: mutex_(adaptive),
stats_(stats),
clock_(clock),
stats_code_(stats_code) {}
void Lock();
@ -44,7 +49,7 @@ class InstrumentedMutex {
friend class InstrumentedCondVar;
port::Mutex mutex_;
Statistics* stats_;
Env* env_;
std::shared_ptr<SystemClock> clock_;
int stats_code_;
};
@ -71,7 +76,7 @@ class InstrumentedCondVar {
explicit InstrumentedCondVar(InstrumentedMutex* instrumented_mutex)
: cond_(&(instrumented_mutex->mutex_)),
stats_(instrumented_mutex->stats_),
env_(instrumented_mutex->env_),
clock_(instrumented_mutex->clock_),
stats_code_(instrumented_mutex->stats_code_) {}
void Wait();
@ -91,7 +96,7 @@ class InstrumentedCondVar {
bool TimedWaitInternal(uint64_t abs_time_us);
port::CondVar cond_;
Statistics* stats_;
Env* env_;
const std::shared_ptr<SystemClock> clock_;
int stats_code_;
};

@ -38,9 +38,9 @@ extern __thread IOStatsContext iostats_context;
iostats_step_timer_##metric.Start();
// Declare and set start time of the timer
#define IOSTATS_CPU_TIMER_GUARD(metric, env) \
#define IOSTATS_CPU_TIMER_GUARD(metric, clock) \
PerfStepTimer iostats_step_timer_##metric( \
&(iostats_context.metric), env, true, \
&(iostats_context.metric), clock, true, \
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
iostats_step_timer_##metric.Start();
@ -55,6 +55,6 @@ extern __thread IOStatsContext iostats_context;
#define IOSTATS(metric) 0
#define IOSTATS_TIMER_GUARD(metric)
#define IOSTATS_CPU_TIMER_GUARD(metric, env) static_cast<void>(env)
#define IOSTATS_CPU_TIMER_GUARD(metric, clock) static_cast<void>(clock)
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL

@ -25,8 +25,8 @@ extern thread_local PerfContext perf_context;
#define PERF_TIMER_STOP(metric)
#define PERF_TIMER_START(metric)
#define PERF_TIMER_GUARD(metric)
#define PERF_TIMER_GUARD_WITH_ENV(metric, env)
#define PERF_CPU_TIMER_GUARD(metric, env)
#define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock)
#define PERF_CPU_TIMER_GUARD(metric, clock)
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \
ticker_type)
#define PERF_TIMER_MEASURE(metric)
@ -46,14 +46,14 @@ extern thread_local PerfContext perf_context;
perf_step_timer_##metric.Start();
// Declare and set start time of the timer
#define PERF_TIMER_GUARD_WITH_ENV(metric, env) \
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), env); \
#define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock) \
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), clock); \
perf_step_timer_##metric.Start();
// Declare and set start time of the timer
#define PERF_CPU_TIMER_GUARD(metric, env) \
#define PERF_CPU_TIMER_GUARD(metric, clock) \
PerfStepTimer perf_step_timer_##metric( \
&(perf_context.metric), env, true, \
&(perf_context.metric), clock, true, \
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
perf_step_timer_##metric.Start();

@ -5,22 +5,23 @@
//
#pragma once
#include "monitoring/perf_level_imp.h"
#include "rocksdb/env.h"
#include "util/stop_watch.h"
#include "monitoring/statistics.h"
#include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE {
class PerfStepTimer {
public:
explicit PerfStepTimer(
uint64_t* metric, Env* env = nullptr, bool use_cpu_time = false,
uint64_t* metric, const std::shared_ptr<SystemClock>& clock = nullptr,
bool use_cpu_time = false,
PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex,
Statistics* statistics = nullptr, uint32_t ticker_type = 0)
: perf_counter_enabled_(perf_level >= enable_level),
use_cpu_time_(use_cpu_time),
env_((perf_counter_enabled_ || statistics != nullptr)
? ((env != nullptr) ? env : Env::Default())
: nullptr),
clock_((perf_counter_enabled_ || statistics != nullptr)
? ((clock.get() != nullptr) ? clock : SystemClock::Default())
: nullptr),
start_(0),
metric_(metric),
statistics_(statistics),
@ -36,14 +37,6 @@ class PerfStepTimer {
}
}
uint64_t time_now() {
if (!use_cpu_time_) {
return env_->NowNanos();
} else {
return env_->NowCPUNanos();
}
}
void Measure() {
if (start_) {
uint64_t now = time_now();
@ -67,9 +60,17 @@ class PerfStepTimer {
}
private:
uint64_t time_now() {
if (!use_cpu_time_) {
return clock_->NowNanos();
} else {
return clock_->CPUNanos();
}
}
const bool perf_counter_enabled_;
const bool use_cpu_time_;
Env* const env_;
std::shared_ptr<SystemClock> clock_;
uint64_t start_;
uint64_t* metric_;
Statistics* statistics_;

@ -22,6 +22,7 @@
#include "rocksdb/cache.h"
#include "rocksdb/convenience.h"
#include "rocksdb/rate_limiter.h"
#include "test_util/mock_time_env.h"
#include "test_util/sync_point.h"
#include "test_util/testutil.h"
#include "util/random.h"
@ -33,20 +34,22 @@ class StatsHistoryTest : public DBTestBase {
public:
StatsHistoryTest()
: DBTestBase("/stats_history_test", /*env_do_fsync=*/true) {
mock_env_.reset(new MockTimeEnv(env_));
mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
}
protected:
std::unique_ptr<MockTimeEnv> mock_env_;
std::shared_ptr<MockSystemClock> mock_clock_;
std::unique_ptr<Env> mock_env_;
void SetUp() override {
mock_env_->InstallTimedWaitFixCallback();
mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) {
auto* periodic_work_scheduler_ptr =
reinterpret_cast<PeriodicWorkScheduler**>(arg);
*periodic_work_scheduler_ptr =
PeriodicWorkTestScheduler::Default(mock_env_.get());
PeriodicWorkTestScheduler::Default(mock_clock_);
});
}
};
@ -66,17 +69,17 @@ TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) {
// Wait for the first stats persist to finish, as the initial delay could be
// different.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1);
// Test cancel job through SetOptions
ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}}));
int old_val = counter;
for (int i = 1; i < 20; ++i) {
mock_env_->MockSleepForSeconds(kPeriodSec);
mock_clock_->MockSleepForSeconds(kPeriodSec);
}
ASSERT_EQ(counter, old_val);
Close();
@ -98,17 +101,17 @@ TEST_F(StatsHistoryTest, StatsPersistScheduling) {
// Wait for the first stats persist to finish, as the initial delay could be
// different.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1);
// Test cancel job through SetOptions
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
int old_val = counter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec * 2); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec * 2); });
ASSERT_EQ(counter, old_val);
Close();
@ -130,7 +133,7 @@ TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) {
ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec);
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1);
Close();
}
@ -149,30 +152,31 @@ TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) {
// make sure the first stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
// disabled stats snapshots
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
size_t stats_count = 0;
for (; stats_iter->Valid(); stats_iter->Next()) {
auto stats_map = stats_iter->GetStatsMap();
ASSERT_EQ(stats_iter->GetStatsTime(), mock_env_->NowSeconds());
ASSERT_EQ(stats_iter->GetStatsTime(), mock_clock_->NowSeconds());
stats_count += stats_map.size();
}
ASSERT_GT(stats_count, 0);
// Wait a bit and verify no more stats are found
for (int i = 0; i < 10; ++i) {
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(1); });
[&] { mock_clock_->MockSleepForSeconds(1); });
}
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter));
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_new = 0;
for (; stats_iter->Valid(); stats_iter->Next()) {
@ -225,11 +229,12 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
const int kIterations = 10;
for (int i = 0; i < kIterations; ++i) {
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
}
std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count = 0;
int slice_count = 0;
@ -248,10 +253,11 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
// Wait for stats persist to finish
for (int i = 0; i < kIterations; ++i) {
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
}
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_reopen = 0;
slice_count = 0;
@ -296,11 +302,11 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
// Wait for the first stats persist to finish, as the initial delay could be
// different.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
@ -308,14 +314,14 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
delete iter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count2 = countkeys(iter);
delete iter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count3 = countkeys(iter);
@ -324,7 +330,8 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
ASSERT_GE(key_count3, key_count2);
ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1);
std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count = 0;
int slice_count = 0;
@ -345,7 +352,8 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
ASSERT_EQ(stats_count, key_count3 - 2);
// verify reopen will not cause data loss
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_reopen = 0;
int slice_count_reopen = 0;
@ -387,37 +395,38 @@ TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
// Wait for the first stats persist to finish, as the initial delay could be
// different.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter);
delete iter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter);
delete iter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter);
delete iter;
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
std::map<std::string, uint64_t> stats_map_after;
ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after));
std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
std::string sample = "rocksdb.num.iterator.deleted";
uint64_t recovered_value = 0;
@ -434,7 +443,8 @@ TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
// test stats value retains after recovery
ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter));
ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
uint64_t new_recovered_value = 0;
for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) {
@ -474,10 +484,10 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
// make sure the first stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count = countkeys(iter);
@ -486,7 +496,7 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
uint64_t num_write_wal = 0;
std::string sample = "rocksdb.write.wal";
std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter));
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
for (; stats_iter->Valid(); stats_iter->Next()) {
auto stats_map = stats_iter->GetStatsMap();
@ -522,7 +532,7 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName,
&handle));
// verify stats is not affected by prior failed CF creation
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter));
ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr);
num_write_wal = 0;
for (; stats_iter->Valid(); stats_iter->Next()) {
@ -574,7 +584,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
// Wait for the first stats persist to finish, as the initial delay could be
// different.
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
ColumnFamilyData* cfd_default =
static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily())
@ -593,7 +603,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v0", Get(1, "Eevee"));
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to all three cf, flush default cf
// LogNumbers: default: 14, stats: 4, pikachu: 4
ASSERT_OK(Flush());
@ -618,7 +628,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v2", Get("foo2"));
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to default and stats cf, flushing default cf
// LogNumbers: default: 19, stats: 19, pikachu: 19
ASSERT_OK(Flush());
@ -633,7 +643,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v3", Get(1, "Jolteon"));
dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); });
[&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to all three cf, flushing test cf
// LogNumbers: default: 19, stats: 19, pikachu: 22
ASSERT_OK(Flush(1));

@ -4,9 +4,12 @@
// (found in the LICENSE.Apache file in the root directory).
#include "monitoring/thread_status_updater.h"
#include <memory>
#include "port/likely.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE {
@ -159,7 +162,7 @@ Status ThreadStatusUpdater::GetThreadList(
std::vector<ThreadStatus>* thread_list) {
thread_list->clear();
std::vector<std::shared_ptr<ThreadStatusData>> valid_list;
uint64_t now_micros = Env::Default()->NowMicros();
uint64_t now_micros = SystemClock::Default()->NowMicros();
std::lock_guard<std::mutex> lck(thread_list_mutex_);
for (auto* thread_data : thread_data_set_) {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save