Add a SystemClock class to capture the time functions of an Env (#7858)

Summary:
Introduces and uses a SystemClock class to RocksDB.  This class contains the time-related functions of an Env and these functions can be redirected from the Env to the SystemClock.

Many of the places that used an Env (Timer, PerfStepTimer, RepeatableThread, RateLimiter, WriteController) for time-related functions have been changed to use SystemClock instead.  There are likely more places that can be changed, but this is a start to show what can/should be done.  Over time it would be nice to migrate most (if not all) of the uses of the time functions from the Env to the SystemClock.

There are several Env classes that implement these functions.  Most of these have not been converted yet to SystemClock implementations; that will come in a subsequent PR.  It would be good to unify many of the Mock Timer implementations, so that they behave similarly and be tested similarly (some override Sleep, some use a MockSleep, etc).

Additionally, this change will allow new methods to be introduced to the SystemClock (like https://github.com/facebook/rocksdb/issues/7101 WaitFor) in a consistent manner across a smaller number of classes.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7858

Reviewed By: pdillinger

Differential Revision: D26006406

Pulled By: mrambacher

fbshipit-source-id: ed10a8abbdab7ff2e23d69d85bd25b3e7e899e90
main
mrambacher 4 years ago committed by Facebook GitHub Bot
parent 1d226018af
commit 12f1137355
  1. 1
      CMakeLists.txt
  2. 2
      HISTORY.md
  3. 2
      TARGETS
  4. 7
      cache/cache_bench.cc
  5. 8
      db/blob/blob_file_builder.cc
  6. 3
      db/blob/blob_file_builder.h
  7. 12
      db/blob/blob_file_builder_test.cc
  8. 8
      db/blob/blob_file_cache_test.cc
  9. 9
      db/blob/blob_file_reader.cc
  10. 16
      db/blob/blob_file_reader_test.cc
  11. 8
      db/blob/blob_log_sequential_reader.cc
  12. 7
      db/blob/blob_log_sequential_reader.h
  13. 14
      db/blob/blob_log_writer.cc
  14. 8
      db/blob/blob_log_writer.h
  15. 6
      db/builder.cc
  16. 4
      db/column_family.cc
  17. 3
      db/compaction/compaction_iterator.cc
  18. 1
      db/compaction/compaction_iterator.h
  19. 15
      db/compaction/compaction_job.cc
  20. 2
      db/compaction/compaction_job.h
  21. 1
      db/db_compaction_test.cc
  22. 1
      db/db_dynamic_level_test.cc
  23. 1
      db/db_flush_test.cc
  24. 36
      db/db_impl/db_impl.cc
  25. 4
      db/db_impl/db_impl.h
  26. 6
      db/db_impl/db_impl_compaction_flush.cc
  27. 2
      db/db_impl/db_impl_files.cc
  28. 10
      db/db_impl/db_impl_open.cc
  29. 4
      db/db_impl/db_impl_secondary.cc
  30. 18
      db/db_impl/db_impl_write.cc
  31. 33
      db/db_iter.cc
  32. 3
      db/db_iter.h
  33. 1
      db/db_log_iter_test.cc
  34. 1
      db/db_test_util.cc
  35. 3
      db/db_test_util.h
  36. 2
      db/error_handler.cc
  37. 12
      db/external_sst_file_ingestion_job.cc
  38. 15
      db/external_sst_file_ingestion_job.h
  39. 11
      db/flush_job.cc
  40. 1
      db/flush_job.h
  41. 2
      db/import_column_family_job.cc
  42. 10
      db/import_column_family_job.h
  43. 4
      db/internal_stats.cc
  44. 16
      db/internal_stats.h
  45. 15
      db/memtable.cc
  46. 4
      db/memtable.h
  47. 24
      db/merge_helper.cc
  48. 14
      db/merge_helper.h
  49. 58
      db/perf_context_test.cc
  50. 34
      db/periodic_work_scheduler.cc
  51. 12
      db/periodic_work_scheduler.h
  52. 35
      db/periodic_work_scheduler_test.cc
  53. 7
      db/prefix_test.cc
  54. 11
      db/range_del_aggregator_bench.cc
  55. 12
      db/table_cache.cc
  56. 24
      db/version_set.cc
  57. 4
      db/version_set.h
  58. 4
      db/version_set_test.cc
  59. 3
      db/write_batch.cc
  60. 13
      db/write_controller.cc
  61. 7
      db/write_controller.h
  62. 84
      db/write_controller_test.cc
  63. 369
      env/composite_env.cc
  64. 395
      env/composite_env_wrapper.h
  65. 57
      env/env.cc
  66. 158
      env/env_posix.cc
  67. 4
      env/env_test.cc
  68. 139
      env/file_system_tracer.cc
  69. 27
      env/file_system_tracer.h
  70. 13
      file/delete_scheduler.cc
  71. 10
      file/delete_scheduler.h
  72. 7
      file/delete_scheduler_test.cc
  73. 13
      file/file_util.h
  74. 12
      file/filename.cc
  75. 7
      file/filename.h
  76. 19
      file/random_access_file_reader.cc
  77. 11
      file/random_access_file_reader.h
  78. 3
      file/random_access_file_reader_test.cc
  79. 22
      file/sst_file_manager_impl.cc
  80. 14
      file/sst_file_manager_impl.h
  81. 5
      file/writable_file_writer.cc
  82. 9
      file/writable_file_writer.h
  83. 15
      include/rocksdb/env.h
  84. 102
      include/rocksdb/system_clock.h
  85. 50
      logging/auto_roll_logger.cc
  86. 17
      logging/auto_roll_logger.h
  87. 91
      logging/auto_roll_logger_test.cc
  88. 2
      logging/env_logger.h
  89. 3
      memtable/memtablerep_bench.cc
  90. 3
      memtable/write_buffer_manager_test.cc
  91. 34
      monitoring/histogram_test.cc
  92. 16
      monitoring/histogram_windowing.cc
  93. 8
      monitoring/histogram_windowing.h
  94. 13
      monitoring/instrumented_mutex.cc
  95. 27
      monitoring/instrumented_mutex.h
  96. 6
      monitoring/iostats_context_imp.h
  97. 12
      monitoring/perf_context_imp.h
  98. 31
      monitoring/perf_step_timer.h
  99. 94
      monitoring/stats_history_test.cc
  100. 5
      monitoring/thread_status_updater.cc
  101. Some files were not shown because too many files have changed in this diff Show More

@ -642,6 +642,7 @@ set(SOURCES
db/write_batch_base.cc db/write_batch_base.cc
db/write_controller.cc db/write_controller.cc
db/write_thread.cc db/write_thread.cc
env/composite_env.cc
env/env.cc env/env.cc
env/env_chroot.cc env/env_chroot.cc
env/env_encryption.cc env/env_encryption.cc

@ -12,7 +12,7 @@
### Public API Change ### Public API Change
* Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks. * Add a public API WriteBufferManager::dummy_entries_in_cache_usage() which reports the size of dummy entries stored in cache (passed to WriteBufferManager). Dummy entries are used to account for DataBlocks.
* Add a SystemClock class that contains the time-related methods from Env. The original methods in Env may be deprecated in a future release. This class will allow easier testing, development, and expansion of time-related features.
## 6.16.0 (12/18/2020) ## 6.16.0 (12/18/2020)
### Behavior Changes ### Behavior Changes
* Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation. * Attempting to write a merge operand without explicitly configuring `merge_operator` now fails immediately, causing the DB to enter read-only mode. Previously, failure was deferred until the `merge_operator` was needed by a user read or a background operation.

@ -206,6 +206,7 @@ cpp_library(
"db/write_batch_base.cc", "db/write_batch_base.cc",
"db/write_controller.cc", "db/write_controller.cc",
"db/write_thread.cc", "db/write_thread.cc",
"env/composite_env.cc",
"env/env.cc", "env/env.cc",
"env/env_chroot.cc", "env/env_chroot.cc",
"env/env_encryption.cc", "env/env_encryption.cc",
@ -510,6 +511,7 @@ cpp_library(
"db/write_batch_base.cc", "db/write_batch_base.cc",
"db/write_controller.cc", "db/write_controller.cc",
"db/write_thread.cc", "db/write_thread.cc",
"env/composite_env.cc",
"env/env.cc", "env/env.cc",
"env/env_chroot.cc", "env/env_chroot.cc",
"env/env_encryption.cc", "env/env_encryption.cc",

@ -13,6 +13,7 @@ int main() {
#include <stdio.h> #include <stdio.h>
#include <sys/types.h> #include <sys/types.h>
#include <cinttypes> #include <cinttypes>
#include <limits> #include <limits>
@ -20,6 +21,7 @@ int main() {
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/gflags_compat.h" #include "util/gflags_compat.h"
#include "util/hash.h" #include "util/hash.h"
@ -210,6 +212,7 @@ class CacheBench {
bool Run() { bool Run() {
ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default(); ROCKSDB_NAMESPACE::Env* env = ROCKSDB_NAMESPACE::Env::Default();
const auto& clock = env->GetSystemClock();
PrintEnv(); PrintEnv();
SharedState shared(this); SharedState shared(this);
@ -224,7 +227,7 @@ class CacheBench {
shared.GetCondVar()->Wait(); shared.GetCondVar()->Wait();
} }
// Record start time // Record start time
uint64_t start_time = env->NowMicros(); uint64_t start_time = clock->NowMicros();
// Start all threads // Start all threads
shared.SetStart(); shared.SetStart();
@ -236,7 +239,7 @@ class CacheBench {
} }
// Record end time // Record end time
uint64_t end_time = env->NowMicros(); uint64_t end_time = clock->NowMicros();
double elapsed = static_cast<double>(end_time - start_time) * 1e-6; double elapsed = static_cast<double>(end_time - start_time) * 1e-6;
uint32_t qps = static_cast<uint32_t>( uint32_t qps = static_cast<uint32_t>(
static_cast<double>(FLAGS_threads * FLAGS_ops_per_thread) / elapsed); static_cast<double>(FLAGS_threads * FLAGS_ops_per_thread) / elapsed);

@ -49,7 +49,6 @@ BlobFileBuilder::BlobFileBuilder(
std::vector<std::string>* blob_file_paths, std::vector<std::string>* blob_file_paths,
std::vector<BlobFileAddition>* blob_file_additions) std::vector<BlobFileAddition>* blob_file_additions)
: file_number_generator_(std::move(file_number_generator)), : file_number_generator_(std::move(file_number_generator)),
env_(env),
fs_(fs), fs_(fs),
immutable_cf_options_(immutable_cf_options), immutable_cf_options_(immutable_cf_options),
min_blob_size_(mutable_cf_options->min_blob_size), min_blob_size_(mutable_cf_options->min_blob_size),
@ -66,7 +65,7 @@ BlobFileBuilder::BlobFileBuilder(
blob_count_(0), blob_count_(0),
blob_bytes_(0) { blob_bytes_(0) {
assert(file_number_generator_); assert(file_number_generator_);
assert(env_); assert(env);
assert(fs_); assert(fs_);
assert(immutable_cf_options_); assert(immutable_cf_options_);
assert(file_options_); assert(file_options_);
@ -74,6 +73,7 @@ BlobFileBuilder::BlobFileBuilder(
assert(blob_file_paths_->empty()); assert(blob_file_paths_->empty());
assert(blob_file_additions_); assert(blob_file_additions_);
assert(blob_file_additions_->empty()); assert(blob_file_additions_->empty());
clock_ = env->GetSystemClock();
} }
BlobFileBuilder::~BlobFileBuilder() = default; BlobFileBuilder::~BlobFileBuilder() = default;
@ -181,14 +181,14 @@ Status BlobFileBuilder::OpenBlobFileIfNeeded() {
Statistics* const statistics = immutable_cf_options_->statistics; Statistics* const statistics = immutable_cf_options_->statistics;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), blob_file_paths_->back(), *file_options_, env_, std::move(file), blob_file_paths_->back(), *file_options_, clock_,
nullptr /*IOTracer*/, statistics, immutable_cf_options_->listeners, nullptr /*IOTracer*/, statistics, immutable_cf_options_->listeners,
immutable_cf_options_->file_checksum_gen_factory)); immutable_cf_options_->file_checksum_gen_factory));
constexpr bool do_flush = false; constexpr bool do_flush = false;
std::unique_ptr<BlobLogWriter> blob_log_writer(new BlobLogWriter( std::unique_ptr<BlobLogWriter> blob_log_writer(new BlobLogWriter(
std::move(file_writer), env_, statistics, blob_file_number, std::move(file_writer), clock_, statistics, blob_file_number,
immutable_cf_options_->use_fsync, do_flush)); immutable_cf_options_->use_fsync, do_flush));
constexpr bool has_ttl = false; constexpr bool has_ttl = false;

@ -18,6 +18,7 @@ namespace ROCKSDB_NAMESPACE {
class VersionSet; class VersionSet;
class FileSystem; class FileSystem;
class SystemClock;
struct ImmutableCFOptions; struct ImmutableCFOptions;
struct MutableCFOptions; struct MutableCFOptions;
struct FileOptions; struct FileOptions;
@ -69,8 +70,8 @@ class BlobFileBuilder {
Status CloseBlobFileIfNeeded(); Status CloseBlobFileIfNeeded();
std::function<uint64_t()> file_number_generator_; std::function<uint64_t()> file_number_generator_;
Env* env_;
FileSystem* fs_; FileSystem* fs_;
std::shared_ptr<SystemClock> clock_;
const ImmutableCFOptions* immutable_cf_options_; const ImmutableCFOptions* immutable_cf_options_;
uint64_t min_blob_size_; uint64_t min_blob_size_;
uint64_t blob_file_size_; uint64_t blob_file_size_;

@ -39,8 +39,10 @@ class TestFileNumberGenerator {
class BlobFileBuilderTest : public testing::Test { class BlobFileBuilderTest : public testing::Test {
protected: protected:
BlobFileBuilderTest() BlobFileBuilderTest() : mock_env_(Env::Default()) {
: mock_env_(Env::Default()), fs_(mock_env_.GetFileSystem().get()) {} fs_ = mock_env_.GetFileSystem().get();
clock_ = mock_env_.GetSystemClock();
}
void VerifyBlobFile(uint64_t blob_file_number, void VerifyBlobFile(uint64_t blob_file_number,
const std::string& blob_file_path, const std::string& blob_file_path,
@ -57,11 +59,10 @@ class BlobFileBuilderTest : public testing::Test {
fs_->NewRandomAccessFile(blob_file_path, file_options_, &file, dbg)); fs_->NewRandomAccessFile(blob_file_path, file_options_, &file, dbg));
std::unique_ptr<RandomAccessFileReader> file_reader( std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file), blob_file_path, new RandomAccessFileReader(std::move(file), blob_file_path, clock_));
&mock_env_));
constexpr Statistics* statistics = nullptr; constexpr Statistics* statistics = nullptr;
BlobLogSequentialReader blob_log_reader(std::move(file_reader), &mock_env_, BlobLogSequentialReader blob_log_reader(std::move(file_reader), clock_,
statistics); statistics);
BlobLogHeader header; BlobLogHeader header;
@ -109,6 +110,7 @@ class BlobFileBuilderTest : public testing::Test {
MockEnv mock_env_; MockEnv mock_env_;
FileSystem* fs_; FileSystem* fs_;
std::shared_ptr<SystemClock> clock_;
FileOptions file_options_; FileOptions file_options_;
}; };

@ -42,15 +42,15 @@ void WriteBlobFile(uint32_t column_family_id,
std::unique_ptr<WritableFileWriter> file_writer( std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(), new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env)); immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr; constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false; constexpr bool use_fsync = false;
constexpr bool do_flush = false; constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer), BlobLogWriter blob_log_writer(
immutable_cf_options.env, statistics, std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
blob_file_number, use_fsync, do_flush); statistics, blob_file_number, use_fsync, do_flush);
constexpr bool has_ttl = false; constexpr bool has_ttl = false;
constexpr ExpirationRange expiration_range; constexpr ExpirationRange expiration_range;

@ -117,10 +117,11 @@ Status BlobFileReader::OpenFile(
} }
file_reader->reset(new RandomAccessFileReader( file_reader->reset(new RandomAccessFileReader(
std::move(file), blob_file_path, immutable_cf_options.env, std::move(file), blob_file_path,
std::shared_ptr<IOTracer>(), immutable_cf_options.statistics, immutable_cf_options.env->GetSystemClock(), std::shared_ptr<IOTracer>(),
BLOB_DB_BLOB_FILE_READ_MICROS, blob_file_read_hist, immutable_cf_options.statistics, BLOB_DB_BLOB_FILE_READ_MICROS,
immutable_cf_options.rate_limiter, immutable_cf_options.listeners)); blob_file_read_hist, immutable_cf_options.rate_limiter,
immutable_cf_options.listeners));
return Status::OK(); return Status::OK();
} }

@ -50,15 +50,15 @@ void WriteBlobFile(const ImmutableCFOptions& immutable_cf_options,
std::unique_ptr<WritableFileWriter> file_writer( std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(), new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env)); immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr; constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false; constexpr bool use_fsync = false;
constexpr bool do_flush = false; constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer), BlobLogWriter blob_log_writer(
immutable_cf_options.env, statistics, std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
blob_file_number, use_fsync, do_flush); statistics, blob_file_number, use_fsync, do_flush);
BlobLogHeader header(column_family_id, compression_type, has_ttl, BlobLogHeader header(column_family_id, compression_type, has_ttl,
expiration_range_header); expiration_range_header);
@ -260,15 +260,15 @@ TEST_F(BlobFileReaderTest, Malformed) {
std::unique_ptr<WritableFileWriter> file_writer( std::unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), blob_file_path, FileOptions(), new WritableFileWriter(std::move(file), blob_file_path, FileOptions(),
immutable_cf_options.env)); immutable_cf_options.env->GetSystemClock()));
constexpr Statistics* statistics = nullptr; constexpr Statistics* statistics = nullptr;
constexpr bool use_fsync = false; constexpr bool use_fsync = false;
constexpr bool do_flush = false; constexpr bool do_flush = false;
BlobLogWriter blob_log_writer(std::move(file_writer), BlobLogWriter blob_log_writer(
immutable_cf_options.env, statistics, std::move(file_writer), immutable_cf_options.env->GetSystemClock(),
blob_file_number, use_fsync, do_flush); statistics, blob_file_number, use_fsync, do_flush);
BlobLogHeader header(column_family_id, kNoCompression, has_ttl, BlobLogHeader header(column_family_id, kNoCompression, has_ttl,
expiration_range); expiration_range);

@ -13,10 +13,10 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
BlobLogSequentialReader::BlobLogSequentialReader( BlobLogSequentialReader::BlobLogSequentialReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, Env* env, std::unique_ptr<RandomAccessFileReader>&& file_reader,
Statistics* statistics) const std::shared_ptr<SystemClock>& clock, Statistics* statistics)
: file_(std::move(file_reader)), : file_(std::move(file_reader)),
env_(env), clock_(clock),
statistics_(statistics), statistics_(statistics),
next_byte_(0) {} next_byte_(0) {}
@ -27,7 +27,7 @@ Status BlobLogSequentialReader::ReadSlice(uint64_t size, Slice* slice,
assert(slice); assert(slice);
assert(file_); assert(file_);
StopWatch read_sw(env_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS); StopWatch read_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_READ_MICROS);
Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size), Status s = file_->Read(IOOptions(), next_byte_, static_cast<size_t>(size),
slice, buf, nullptr); slice, buf, nullptr);
next_byte_ += size; next_byte_ += size;

@ -16,6 +16,7 @@ class RandomAccessFileReader;
class Env; class Env;
class Statistics; class Statistics;
class Status; class Status;
class SystemClock;
/** /**
* BlobLogSequentialReader is a general purpose log stream reader * BlobLogSequentialReader is a general purpose log stream reader
@ -35,7 +36,8 @@ class BlobLogSequentialReader {
// Create a reader that will return log records from "*file_reader". // Create a reader that will return log records from "*file_reader".
BlobLogSequentialReader(std::unique_ptr<RandomAccessFileReader>&& file_reader, BlobLogSequentialReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
Env* env, Statistics* statistics); const std::shared_ptr<SystemClock>& clock,
Statistics* statistics);
// No copying allowed // No copying allowed
BlobLogSequentialReader(const BlobLogSequentialReader&) = delete; BlobLogSequentialReader(const BlobLogSequentialReader&) = delete;
@ -63,7 +65,8 @@ class BlobLogSequentialReader {
Status ReadSlice(uint64_t size, Slice* slice, char* buf); Status ReadSlice(uint64_t size, Slice* slice, char* buf);
const std::unique_ptr<RandomAccessFileReader> file_; const std::unique_ptr<RandomAccessFileReader> file_;
Env* env_; std::shared_ptr<SystemClock> clock_;
Statistics* statistics_; Statistics* statistics_;
Slice buffer_; Slice buffer_;

@ -11,7 +11,7 @@
#include "db/blob/blob_log_format.h" #include "db/blob/blob_log_format.h"
#include "file/writable_file_writer.h" #include "file/writable_file_writer.h"
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "rocksdb/env.h" #include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -19,11 +19,11 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
BlobLogWriter::BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest, BlobLogWriter::BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
Env* env, Statistics* statistics, const std::shared_ptr<SystemClock>& clock,
uint64_t log_number, bool use_fs, bool do_flush, Statistics* statistics, uint64_t log_number,
uint64_t boffset) bool use_fs, bool do_flush, uint64_t boffset)
: dest_(std::move(dest)), : dest_(std::move(dest)),
env_(env), clock_(clock),
statistics_(statistics), statistics_(statistics),
log_number_(log_number), log_number_(log_number),
block_offset_(boffset), block_offset_(boffset),
@ -36,7 +36,7 @@ BlobLogWriter::~BlobLogWriter() = default;
Status BlobLogWriter::Sync() { Status BlobLogWriter::Sync() {
TEST_SYNC_POINT("BlobLogWriter::Sync"); TEST_SYNC_POINT("BlobLogWriter::Sync");
StopWatch sync_sw(env_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS); StopWatch sync_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_SYNC_MICROS);
Status s = dest_->Sync(use_fsync_); Status s = dest_->Sync(use_fsync_);
RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED); RecordTick(statistics_, BLOB_DB_BLOB_FILE_SYNCED);
return s; return s;
@ -148,7 +148,7 @@ Status BlobLogWriter::EmitPhysicalRecord(const std::string& headerbuf,
const Slice& key, const Slice& val, const Slice& key, const Slice& val,
uint64_t* key_offset, uint64_t* key_offset,
uint64_t* blob_offset) { uint64_t* blob_offset) {
StopWatch write_sw(env_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS); StopWatch write_sw(clock_, statistics_, BLOB_DB_BLOB_FILE_WRITE_MICROS);
Status s = dest_->Append(Slice(headerbuf)); Status s = dest_->Append(Slice(headerbuf));
if (s.ok()) { if (s.ok()) {
s = dest_->Append(key); s = dest_->Append(key);

@ -9,7 +9,6 @@
#include <string> #include <string>
#include "db/blob/blob_log_format.h" #include "db/blob/blob_log_format.h"
#include "rocksdb/env.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
@ -18,7 +17,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class WritableFileWriter; class WritableFileWriter;
class SystemClock;
/** /**
* BlobLogWriter is the blob log stream writer. It provides an append-only * BlobLogWriter is the blob log stream writer. It provides an append-only
* abstraction for writing blob data. * abstraction for writing blob data.
@ -32,7 +31,8 @@ class BlobLogWriter {
// Create a writer that will append data to "*dest". // Create a writer that will append data to "*dest".
// "*dest" must be initially empty. // "*dest" must be initially empty.
// "*dest" must remain live while this BlobLogWriter is in use. // "*dest" must remain live while this BlobLogWriter is in use.
BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest, Env* env, BlobLogWriter(std::unique_ptr<WritableFileWriter>&& dest,
const std::shared_ptr<SystemClock>& clock,
Statistics* statistics, uint64_t log_number, bool use_fsync, Statistics* statistics, uint64_t log_number, bool use_fsync,
bool do_flush, uint64_t boffset = 0); bool do_flush, uint64_t boffset = 0);
// No copying allowed // No copying allowed
@ -69,7 +69,7 @@ class BlobLogWriter {
private: private:
std::unique_ptr<WritableFileWriter> dest_; std::unique_ptr<WritableFileWriter> dest_;
Env* env_; std::shared_ptr<SystemClock> clock_;
Statistics* statistics_; Statistics* statistics_;
uint64_t log_number_; uint64_t log_number_;
uint64_t block_offset_; // Current offset in block uint64_t block_offset_; // Current offset in block

@ -124,6 +124,8 @@ Status BuildTable(
assert(env); assert(env);
FileSystem* fs = db_options.fs.get(); FileSystem* fs = db_options.fs.get();
assert(fs); assert(fs);
const auto& clock = env->GetSystemClock();
TableProperties tp; TableProperties tp;
if (iter->Valid() || !range_del_agg->IsEmpty()) { if (iter->Valid() || !range_del_agg->IsEmpty()) {
TableBuilder* builder; TableBuilder* builder;
@ -151,7 +153,7 @@ Status BuildTable(
file->SetWriteLifeTimeHint(write_hint); file->SetWriteLifeTimeHint(write_hint);
file_writer.reset(new WritableFileWriter( file_writer.reset(new WritableFileWriter(
std::move(file), fname, file_options, env, io_tracer, std::move(file), fname, file_options, clock, io_tracer,
ioptions.statistics, ioptions.listeners, ioptions.statistics, ioptions.listeners,
ioptions.file_checksum_gen_factory)); ioptions.file_checksum_gen_factory));
@ -254,7 +256,7 @@ Status BuildTable(
// Finish and check for file errors // Finish and check for file errors
TEST_SYNC_POINT("BuildTable:BeforeSyncTable"); TEST_SYNC_POINT("BuildTable:BeforeSyncTable");
if (s.ok() && !empty) { if (s.ok() && !empty) {
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS); StopWatch sw(clock, ioptions.statistics, TABLE_SYNC_MICROS);
*io_status = file_writer->Sync(ioptions.use_fsync); *io_status = file_writer->Sync(ioptions.use_fsync);
} }
TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile"); TEST_SYNC_POINT("BuildTable:BeforeCloseTableFile");

@ -556,8 +556,8 @@ ColumnFamilyData::ColumnFamilyData(
// if _dummy_versions is nullptr, then this is a dummy column family. // if _dummy_versions is nullptr, then this is a dummy column family.
if (_dummy_versions != nullptr) { if (_dummy_versions != nullptr) {
internal_stats_.reset( internal_stats_.reset(new InternalStats(
new InternalStats(ioptions_.num_levels, db_options.env, this)); ioptions_.num_levels, db_options.env->GetSystemClock(), this));
table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache, table_cache_.reset(new TableCache(ioptions_, file_options, _table_cache,
block_cache_tracer, io_tracer)); block_cache_tracer, io_tracer));
blob_file_cache_.reset( blob_file_cache_.reset(

@ -80,6 +80,7 @@ CompactionIterator::CompactionIterator(
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot), earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
snapshot_checker_(snapshot_checker), snapshot_checker_(snapshot_checker),
env_(env), env_(env),
clock_(env_->GetSystemClock()),
report_detailed_time_(report_detailed_time), report_detailed_time_(report_detailed_time),
expect_valid_internal_key_(expect_valid_internal_key), expect_valid_internal_key_(expect_valid_internal_key),
range_del_agg_(range_del_agg), range_del_agg_(range_del_agg),
@ -219,7 +220,7 @@ bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
// to get sequence number. // to get sequence number.
Slice& filter_key = ikey_.type == kTypeValue ? ikey_.user_key : key_; Slice& filter_key = ikey_.type == kTypeValue ? ikey_.user_key : key_;
{ {
StopWatchNano timer(env_, report_detailed_time_); StopWatchNano timer(clock_, report_detailed_time_);
filter = compaction_filter_->FilterV2( filter = compaction_filter_->FilterV2(
compaction_->level(), filter_key, value_type, value_, compaction_->level(), filter_key, value_type, value_,
&compaction_filter_value_, compaction_filter_skip_until_.rep()); &compaction_filter_value_, compaction_filter_skip_until_.rep());

@ -248,6 +248,7 @@ class CompactionIterator {
const SequenceNumber earliest_write_conflict_snapshot_; const SequenceNumber earliest_write_conflict_snapshot_;
const SnapshotChecker* const snapshot_checker_; const SnapshotChecker* const snapshot_checker_;
Env* env_; Env* env_;
std::shared_ptr<SystemClock> clock_;
bool report_detailed_time_; bool report_detailed_time_;
bool expect_valid_internal_key_; bool expect_valid_internal_key_;
CompactionRangeDelAggregator* range_del_agg_; CompactionRangeDelAggregator* range_del_agg_;

@ -320,6 +320,7 @@ CompactionJob::CompactionJob(
db_options_(db_options), db_options_(db_options),
file_options_(file_options), file_options_(file_options),
env_(db_options.env), env_(db_options.env),
clock_(env_->GetSystemClock()),
io_tracer_(io_tracer), io_tracer_(io_tracer),
fs_(db_options.fs, io_tracer), fs_(db_options.fs, io_tracer),
file_options_for_read_( file_options_for_read_(
@ -419,7 +420,7 @@ void CompactionJob::Prepare() {
if (c->ShouldFormSubcompactions()) { if (c->ShouldFormSubcompactions()) {
{ {
StopWatch sw(env_, stats_, SUBCOMPACTION_SETUP_TIME); StopWatch sw(clock_, stats_, SUBCOMPACTION_SETUP_TIME);
GenSubcompactionBoundaries(); GenSubcompactionBoundaries();
} }
assert(sizes_.size() == boundaries_.size() + 1); assert(sizes_.size() == boundaries_.size() + 1);
@ -585,7 +586,7 @@ Status CompactionJob::Run() {
const size_t num_threads = compact_->sub_compact_states.size(); const size_t num_threads = compact_->sub_compact_states.size();
assert(num_threads > 0); assert(num_threads > 0);
const uint64_t start_micros = env_->NowMicros(); const uint64_t start_micros = clock_->NowMicros();
// Launch a thread for each of subcompactions 1...num_threads-1 // Launch a thread for each of subcompactions 1...num_threads-1
std::vector<port::Thread> thread_pool; std::vector<port::Thread> thread_pool;
@ -604,7 +605,7 @@ Status CompactionJob::Run() {
thread.join(); thread.join();
} }
compaction_stats_.micros = env_->NowMicros() - start_micros; compaction_stats_.micros = clock_->NowMicros() - start_micros;
compaction_stats_.cpu_micros = 0; compaction_stats_.cpu_micros = 0;
for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) { for (size_t i = 0; i < compact_->sub_compact_states.size(); i++) {
compaction_stats_.cpu_micros += compaction_stats_.cpu_micros +=
@ -894,7 +895,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
assert(sub_compact); assert(sub_compact);
assert(sub_compact->compaction); assert(sub_compact->compaction);
uint64_t prev_cpu_micros = env_->NowCPUNanos() / 1000; uint64_t prev_cpu_micros = clock_->CPUNanos() / 1000;
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data(); ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
@ -1184,7 +1185,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
} }
sub_compact->compaction_job_stats.cpu_micros = sub_compact->compaction_job_stats.cpu_micros =
env_->NowCPUNanos() / 1000 - prev_cpu_micros; clock_->CPUNanos() / 1000 - prev_cpu_micros;
if (measure_io_stats_) { if (measure_io_stats_) {
sub_compact->compaction_job_stats.file_write_nanos += sub_compact->compaction_job_stats.file_write_nanos +=
@ -1463,7 +1464,7 @@ Status CompactionJob::FinishCompactionOutputFile(
// Finish and check for file errors // Finish and check for file errors
if (s.ok()) { if (s.ok()) {
StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS); StopWatch sw(clock_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
io_s = sub_compact->outfile->Sync(db_options_.use_fsync); io_s = sub_compact->outfile->Sync(db_options_.use_fsync);
} }
if (s.ok() && io_s.ok()) { if (s.ok() && io_s.ok()) {
@ -1738,7 +1739,7 @@ Status CompactionJob::OpenCompactionOutputFile(
const auto& listeners = const auto& listeners =
sub_compact->compaction->immutable_cf_options()->listeners; sub_compact->compaction->immutable_cf_options()->listeners;
sub_compact->outfile.reset(new WritableFileWriter( sub_compact->outfile.reset(new WritableFileWriter(
std::move(writable_file), fname, file_options_, env_, io_tracer_, std::move(writable_file), fname, file_options_, clock_, io_tracer_,
db_options_.statistics.get(), listeners, db_options_.statistics.get(), listeners,
db_options_.file_checksum_gen_factory.get())); db_options_.file_checksum_gen_factory.get()));

@ -50,6 +50,7 @@ class Arena;
class ErrorHandler; class ErrorHandler;
class MemTable; class MemTable;
class SnapshotChecker; class SnapshotChecker;
class SystemClock;
class TableCache; class TableCache;
class Version; class Version;
class VersionEdit; class VersionEdit;
@ -159,6 +160,7 @@ class CompactionJob {
const FileOptions file_options_; const FileOptions file_options_;
Env* env_; Env* env_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
FileSystemPtr fs_; FileSystemPtr fs_;
// env_option optimized for compaction table reads // env_option optimized for compaction table reads

@ -11,6 +11,7 @@
#include "db/blob/blob_index.h" #include "db/blob/blob_index.h"
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/port.h" #include "port/port.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "rocksdb/concurrent_task_limiter.h" #include "rocksdb/concurrent_task_limiter.h"

@ -13,6 +13,7 @@
#if !defined(ROCKSDB_LITE) #if !defined(ROCKSDB_LITE)
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/port.h" #include "port/port.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
#include "util/random.h" #include "util/random.h"

@ -11,6 +11,7 @@
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "env/mock_env.h"
#include "file/filename.h" #include "file/filename.h"
#include "port/port.h" #include "port/port.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"

@ -151,12 +151,13 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
own_info_log_(options.info_log == nullptr), own_info_log_(options.info_log == nullptr),
initial_db_options_(SanitizeOptions(dbname, options)), initial_db_options_(SanitizeOptions(dbname, options)),
env_(initial_db_options_.env), env_(initial_db_options_.env),
clock_(initial_db_options_.env->GetSystemClock()),
io_tracer_(std::make_shared<IOTracer>()), io_tracer_(std::make_shared<IOTracer>()),
immutable_db_options_(initial_db_options_), immutable_db_options_(initial_db_options_),
fs_(immutable_db_options_.fs, io_tracer_), fs_(immutable_db_options_.fs, io_tracer_),
mutable_db_options_(initial_db_options_), mutable_db_options_(initial_db_options_),
stats_(immutable_db_options_.statistics.get()), stats_(immutable_db_options_.statistics.get()),
mutex_(stats_, env_, DB_MUTEX_WAIT_MICROS, mutex_(stats_, clock_, DB_MUTEX_WAIT_MICROS,
immutable_db_options_.use_adaptive_mutex), immutable_db_options_.use_adaptive_mutex),
default_cf_handle_(nullptr), default_cf_handle_(nullptr),
max_total_in_memory_state_(0), max_total_in_memory_state_(0),
@ -191,7 +192,7 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
bg_purge_scheduled_(0), bg_purge_scheduled_(0),
disable_delete_obsolete_files_(0), disable_delete_obsolete_files_(0),
pending_purge_obsolete_files_(0), pending_purge_obsolete_files_(0),
delete_obsolete_files_last_run_(env_->NowMicros()), delete_obsolete_files_last_run_(clock_->NowMicros()),
last_stats_dump_time_microsec_(0), last_stats_dump_time_microsec_(0),
next_job_id_(1), next_job_id_(1),
has_unpersisted_data_(false), has_unpersisted_data_(false),
@ -752,7 +753,7 @@ void DBImpl::PersistStats() {
return; return;
} }
TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning"); TEST_SYNC_POINT("DBImpl::PersistStats:StartRunning");
uint64_t now_seconds = env_->NowMicros() / kMicrosInSecond; uint64_t now_seconds = clock_->NowMicros() / kMicrosInSecond;
Statistics* statistics = immutable_db_options_.statistics.get(); Statistics* statistics = immutable_db_options_.statistics.get();
if (!statistics) { if (!statistics) {
@ -1653,8 +1654,8 @@ Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
} }
#endif // NDEBUG #endif // NDEBUG
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(env_, stats_, DB_GET); StopWatch sw(clock_, stats_, DB_GET);
PERF_TIMER_GUARD(get_snapshot_time); PERF_TIMER_GUARD(get_snapshot_time);
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>( auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(
@ -1842,8 +1843,8 @@ std::vector<Status> DBImpl::MultiGet(
const std::vector<ColumnFamilyHandle*>& column_family, const std::vector<ColumnFamilyHandle*>& column_family,
const std::vector<Slice>& keys, std::vector<std::string>* values, const std::vector<Slice>& keys, std::vector<std::string>* values,
std::vector<std::string>* timestamps) { std::vector<std::string>* timestamps) {
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(env_, stats_, DB_MULTIGET); StopWatch sw(clock_, stats_, DB_MULTIGET);
PERF_TIMER_GUARD(get_snapshot_time); PERF_TIMER_GUARD(get_snapshot_time);
#ifndef NDEBUG #ifndef NDEBUG
@ -1974,7 +1975,7 @@ std::vector<Status> DBImpl::MultiGet(
} }
if (read_options.deadline.count() && if (read_options.deadline.count() &&
env_->NowMicros() > clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count())) { static_cast<uint64_t>(read_options.deadline.count())) {
break; break;
} }
@ -1983,8 +1984,8 @@ std::vector<Status> DBImpl::MultiGet(
if (keys_read < num_keys) { if (keys_read < num_keys) {
// The only reason to break out of the loop is when the deadline is // The only reason to break out of the loop is when the deadline is
// exceeded // exceeded
assert(env_->NowMicros() > assert(clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count())); static_cast<uint64_t>(read_options.deadline.count()));
for (++keys_read; keys_read < num_keys; ++keys_read) { for (++keys_read; keys_read < num_keys; ++keys_read) {
stat_list[keys_read] = Status::TimedOut(); stat_list[keys_read] = Status::TimedOut();
} }
@ -2422,8 +2423,8 @@ Status DBImpl::MultiGetImpl(
autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys, autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE>* sorted_keys,
SuperVersion* super_version, SequenceNumber snapshot, SuperVersion* super_version, SequenceNumber snapshot,
ReadCallback* callback) { ReadCallback* callback) {
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(env_, stats_, DB_MULTIGET); StopWatch sw(clock_, stats_, DB_MULTIGET);
// For each of the given keys, apply the entire "get" process as follows: // For each of the given keys, apply the entire "get" process as follows:
// First look in the memtable, then in the immutable memtable (if any). // First look in the memtable, then in the immutable memtable (if any).
@ -2434,7 +2435,7 @@ Status DBImpl::MultiGetImpl(
uint64_t curr_value_size = 0; uint64_t curr_value_size = 0;
while (keys_left) { while (keys_left) {
if (read_options.deadline.count() && if (read_options.deadline.count() &&
env_->NowMicros() > clock_->NowMicros() >
static_cast<uint64_t>(read_options.deadline.count())) { static_cast<uint64_t>(read_options.deadline.count())) {
s = Status::TimedOut(); s = Status::TimedOut();
break; break;
@ -3137,7 +3138,8 @@ FileSystem* DBImpl::GetFileSystem() const {
Status DBImpl::StartIOTrace(Env* env, const TraceOptions& trace_options, Status DBImpl::StartIOTrace(Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) { std::unique_ptr<TraceWriter>&& trace_writer) {
assert(trace_writer != nullptr); assert(trace_writer != nullptr);
return io_tracer_->StartIOTrace(env, trace_options, std::move(trace_writer)); return io_tracer_->StartIOTrace(env->GetSystemClock(), trace_options,
std::move(trace_writer));
} }
Status DBImpl::EndIOTrace() { Status DBImpl::EndIOTrace() {
@ -4417,7 +4419,7 @@ Status DBImpl::IngestExternalFiles(
for (const auto& arg : args) { for (const auto& arg : args) {
auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd(); auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
ingestion_jobs.emplace_back( ingestion_jobs.emplace_back(
env_, versions_.get(), cfd, immutable_db_options_, file_options_, clock_, versions_.get(), cfd, immutable_db_options_, file_options_,
&snapshots_, arg.options, &directories_, &event_logger_, io_tracer_); &snapshots_, arg.options, &directories_, &event_logger_, io_tracer_);
} }
@ -4685,7 +4687,7 @@ Status DBImpl::CreateColumnFamilyWithImport(
// Import sst files from metadata. // Import sst files from metadata.
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(*handle); auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(*handle);
auto cfd = cfh->cfd(); auto cfd = cfh->cfd();
ImportColumnFamilyJob import_job(env_, versions_.get(), cfd, ImportColumnFamilyJob import_job(clock_, versions_.get(), cfd,
immutable_db_options_, file_options_, immutable_db_options_, file_options_,
import_options, metadata.files, io_tracer_); import_options, metadata.files, io_tracer_);
@ -4941,7 +4943,7 @@ void DBImpl::WaitForIngestFile() {
Status DBImpl::StartTrace(const TraceOptions& trace_options, Status DBImpl::StartTrace(const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer) { std::unique_ptr<TraceWriter>&& trace_writer) {
InstrumentedMutexLock lock(&trace_mutex_); InstrumentedMutexLock lock(&trace_mutex_);
tracer_.reset(new Tracer(env_, trace_options, std::move(trace_writer))); tracer_.reset(new Tracer(clock_, trace_options, std::move(trace_writer)));
return Status::OK(); return Status::OK();
} }

@ -54,9 +54,6 @@
#include "rocksdb/transaction_log.h" #include "rocksdb/transaction_log.h"
#include "rocksdb/write_buffer_manager.h" #include "rocksdb/write_buffer_manager.h"
#include "table/scoped_arena_iterator.h" #include "table/scoped_arena_iterator.h"
#include "trace_replay/block_cache_tracer.h"
#include "trace_replay/io_tracer.h"
#include "trace_replay/trace_replay.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/repeatable_thread.h" #include "util/repeatable_thread.h"
@ -1053,6 +1050,7 @@ class DBImpl : public DB {
bool own_info_log_; bool own_info_log_;
const DBOptions initial_db_options_; const DBOptions initial_db_options_;
Env* const env_; Env* const env_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
const ImmutableDBOptions immutable_db_options_; const ImmutableDBOptions immutable_db_options_;
FileSystemPtr fs_; FileSystemPtr fs_;

@ -2529,7 +2529,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
s.ToString().c_str(), error_cnt); s.ToString().c_str(), error_cnt);
log_buffer.FlushBufferToLog(); log_buffer.FlushBufferToLog();
LogFlush(immutable_db_options_.info_log); LogFlush(immutable_db_options_.info_log);
env_->SleepForMicroseconds(1000000); clock_->SleepForMicroseconds(1000000);
mutex_.Lock(); mutex_.Lock();
} }
@ -2602,7 +2602,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
if (s.IsBusy()) { if (s.IsBusy()) {
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
mutex_.Unlock(); mutex_.Unlock();
env_->SleepForMicroseconds(10000); // prevent hot loop clock_->SleepForMicroseconds(10000); // prevent hot loop
mutex_.Lock(); mutex_.Lock();
} else if (!s.ok() && !s.IsShutdownInProgress() && } else if (!s.ok() && !s.IsShutdownInProgress() &&
!s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) { !s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
@ -2620,7 +2620,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
"Accumulated background error counts: %" PRIu64, "Accumulated background error counts: %" PRIu64,
s.ToString().c_str(), error_cnt); s.ToString().c_str(), error_cnt);
LogFlush(immutable_db_options_.info_log); LogFlush(immutable_db_options_.info_log);
env_->SleepForMicroseconds(1000000); clock_->SleepForMicroseconds(1000000);
mutex_.Lock(); mutex_.Lock();
} else if (s.IsManualCompactionPaused()) { } else if (s.IsManualCompactionPaused()) {
ManualCompactionState* m = prepicked_compaction->manual_compaction_state; ManualCompactionState* m = prepicked_compaction->manual_compaction_state;

@ -120,7 +120,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
mutable_db_options_.delete_obsolete_files_period_micros == 0) { mutable_db_options_.delete_obsolete_files_period_micros == 0) {
doing_the_full_scan = true; doing_the_full_scan = true;
} else { } else {
const uint64_t now_micros = env_->NowMicros(); const uint64_t now_micros = clock_->NowMicros();
if ((delete_obsolete_files_last_run_ + if ((delete_obsolete_files_last_run_ +
mutable_db_options_.delete_obsolete_files_period_micros) < mutable_db_options_.delete_obsolete_files_period_micros) <
now_micros) { now_micros) {

@ -292,14 +292,14 @@ Status DBImpl::NewDB(std::vector<std::string>* new_filenames) {
file->SetPreallocationBlockSize( file->SetPreallocationBlockSize(
immutable_db_options_.manifest_preallocation_size); immutable_db_options_.manifest_preallocation_size);
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(file), manifest, file_options, env_, io_tracer_, std::move(file), manifest, file_options, clock_, io_tracer_,
nullptr /* stats */, immutable_db_options_.listeners)); nullptr /* stats */, immutable_db_options_.listeners));
log::Writer log(std::move(file_writer), 0, false); log::Writer log(std::move(file_writer), 0, false);
std::string record; std::string record;
new_db.EncodeTo(&record); new_db.EncodeTo(&record);
s = log.AddRecord(record); s = log.AddRecord(record);
if (s.ok()) { if (s.ok()) {
s = SyncManifest(env_, &immutable_db_options_, log.file()); s = SyncManifest(clock_, &immutable_db_options_, log.file());
} }
} }
if (s.ok()) { if (s.ok()) {
@ -1295,7 +1295,7 @@ Status DBImpl::RestoreAliveLogFiles(const std::vector<uint64_t>& wal_numbers) {
Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd, Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
MemTable* mem, VersionEdit* edit) { MemTable* mem, VersionEdit* edit) {
mutex_.AssertHeld(); mutex_.AssertHeld();
const uint64_t start_micros = env_->NowMicros(); const uint64_t start_micros = clock_->NowMicros();
FileMetaData meta; FileMetaData meta;
std::vector<BlobFileAddition> blob_file_additions; std::vector<BlobFileAddition> blob_file_additions;
@ -1395,7 +1395,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
} }
InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
stats.micros = env_->NowMicros() - start_micros; stats.micros = clock_->NowMicros() - start_micros;
if (has_output) { if (has_output) {
stats.bytes_written = meta.fd.GetFileSize(); stats.bytes_written = meta.fd.GetFileSize();
@ -1486,7 +1486,7 @@ IOStatus DBImpl::CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
const auto& listeners = immutable_db_options_.listeners; const auto& listeners = immutable_db_options_.listeners;
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(lfile), log_fname, opt_file_options, env_, io_tracer_, std::move(lfile), log_fname, opt_file_options, clock_, io_tracer_,
nullptr /* stats */, listeners)); nullptr /* stats */, listeners));
*new_log = new log::Writer(std::move(file_writer), log_file_num, *new_log = new log::Writer(std::move(file_writer), log_file_num,
immutable_db_options_.recycle_log_file_num > 0, immutable_db_options_.recycle_log_file_num > 0,

@ -327,8 +327,8 @@ Status DBImplSecondary::GetImpl(const ReadOptions& read_options,
ColumnFamilyHandle* column_family, ColumnFamilyHandle* column_family,
const Slice& key, PinnableSlice* pinnable_val) { const Slice& key, PinnableSlice* pinnable_val) {
assert(pinnable_val != nullptr); assert(pinnable_val != nullptr);
PERF_CPU_TIMER_GUARD(get_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(get_cpu_nanos, clock_);
StopWatch sw(env_, stats_, DB_GET); StopWatch sw(clock_, stats_, DB_GET);
PERF_TIMER_GUARD(get_snapshot_time); PERF_TIMER_GUARD(get_snapshot_time);
auto cfh = static_cast<ColumnFamilyHandleImpl*>(column_family); auto cfh = static_cast<ColumnFamilyHandleImpl*>(column_family);

@ -160,7 +160,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
RecordTick(stats_, WRITE_WITH_WAL); RecordTick(stats_, WRITE_WITH_WAL);
} }
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
write_thread_.JoinBatchGroup(&w); write_thread_.JoinBatchGroup(&w);
if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) { if (w.state == WriteThread::STATE_PARALLEL_MEMTABLE_WRITER) {
@ -465,7 +465,7 @@ Status DBImpl::PipelinedWriteImpl(const WriteOptions& write_options,
uint64_t* log_used, uint64_t log_ref, uint64_t* log_used, uint64_t log_ref,
bool disable_memtable, uint64_t* seq_used) { bool disable_memtable, uint64_t* seq_used) {
PERF_TIMER_GUARD(write_pre_and_post_process_time); PERF_TIMER_GUARD(write_pre_and_post_process_time);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
WriteContext write_context; WriteContext write_context;
@ -621,7 +621,7 @@ Status DBImpl::UnorderedWriteMemtable(const WriteOptions& write_options,
SequenceNumber seq, SequenceNumber seq,
const size_t sub_batch_cnt) { const size_t sub_batch_cnt) {
PERF_TIMER_GUARD(write_pre_and_post_process_time); PERF_TIMER_GUARD(write_pre_and_post_process_time);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
WriteThread::Writer w(write_options, my_batch, callback, log_ref, WriteThread::Writer w(write_options, my_batch, callback, log_ref,
false /*disable_memtable*/); false /*disable_memtable*/);
@ -676,7 +676,7 @@ Status DBImpl::WriteImplWALOnly(
WriteThread::Writer w(write_options, my_batch, callback, log_ref, WriteThread::Writer w(write_options, my_batch, callback, log_ref,
disable_memtable, sub_batch_cnt, pre_release_callback); disable_memtable, sub_batch_cnt, pre_release_callback);
RecordTick(stats_, WRITE_WITH_WAL); RecordTick(stats_, WRITE_WITH_WAL);
StopWatch write_sw(env_, immutable_db_options_.statistics.get(), DB_WRITE); StopWatch write_sw(clock_, immutable_db_options_.statistics.get(), DB_WRITE);
write_thread->JoinBatchGroup(&w); write_thread->JoinBatchGroup(&w);
assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER); assert(w.state != WriteThread::STATE_PARALLEL_MEMTABLE_WRITER);
@ -1093,7 +1093,7 @@ IOStatus DBImpl::WriteToWAL(const WriteThread::WriteGroup& write_group,
} }
if (io_s.ok() && need_log_sync) { if (io_s.ok() && need_log_sync) {
StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS); StopWatch sw(clock_, stats_, WAL_FILE_SYNC_MICROS);
// It's safe to access logs_ with unlocked mutex_ here because: // It's safe to access logs_ with unlocked mutex_ here because:
// - we've set getting_synced=true for all logs, // - we've set getting_synced=true for all logs,
// so other threads won't pop from logs_ while we're here, // so other threads won't pop from logs_ while we're here,
@ -1457,8 +1457,8 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
uint64_t time_delayed = 0; uint64_t time_delayed = 0;
bool delayed = false; bool delayed = false;
{ {
StopWatch sw(env_, stats_, WRITE_STALL, &time_delayed); StopWatch sw(clock_, stats_, WRITE_STALL, &time_delayed);
uint64_t delay = write_controller_.GetDelay(env_, num_bytes); uint64_t delay = write_controller_.GetDelay(clock_, num_bytes);
if (delay > 0) { if (delay > 0) {
if (write_options.no_slowdown) { if (write_options.no_slowdown) {
return Status::Incomplete("Write stall"); return Status::Incomplete("Write stall");
@ -1475,14 +1475,14 @@ Status DBImpl::DelayWrite(uint64_t num_bytes,
const uint64_t kDelayInterval = 1000; const uint64_t kDelayInterval = 1000;
uint64_t stall_end = sw.start_time() + delay; uint64_t stall_end = sw.start_time() + delay;
while (write_controller_.NeedsDelay()) { while (write_controller_.NeedsDelay()) {
if (env_->NowMicros() >= stall_end) { if (clock_->NowMicros() >= stall_end) {
// We already delayed this write `delay` microseconds // We already delayed this write `delay` microseconds
break; break;
} }
delayed = true; delayed = true;
// Sleep for 0.001 seconds // Sleep for 0.001 seconds
env_->SleepForMicroseconds(kDelayInterval); clock_->SleepForMicroseconds(kDelayInterval);
} }
mutex_.Lock(); mutex_.Lock();
write_thread_.EndWriteStall(); write_thread_.EndWriteStall();

@ -8,9 +8,10 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_iter.h" #include "db/db_iter.h"
#include <string>
#include <iostream> #include <iostream>
#include <limits> #include <limits>
#include <string>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "db/merge_context.h" #include "db/merge_context.h"
@ -24,6 +25,7 @@
#include "rocksdb/iterator.h" #include "rocksdb/iterator.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/system_clock.h"
#include "table/internal_iterator.h" #include "table/internal_iterator.h"
#include "table/iterator_wrapper.h" #include "table/iterator_wrapper.h"
#include "trace_replay/trace_replay.h" #include "trace_replay/trace_replay.h"
@ -43,6 +45,7 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
ColumnFamilyData* cfd, bool expose_blob_index) ColumnFamilyData* cfd, bool expose_blob_index)
: prefix_extractor_(mutable_cf_options.prefix_extractor.get()), : prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
env_(_env), env_(_env),
clock_(_env->GetSystemClock()),
logger_(cf_options.info_log), logger_(cf_options.info_log),
user_comparator_(cmp), user_comparator_(cmp),
merge_operator_(cf_options.merge_operator), merge_operator_(cf_options.merge_operator),
@ -127,7 +130,7 @@ void DBIter::Next() {
assert(valid_); assert(valid_);
assert(status_.ok()); assert(status_.ok());
PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_next_cpu_nanos, clock_);
// Release temporarily pinned blocks from last operation // Release temporarily pinned blocks from last operation
ReleaseTempPinnedData(); ReleaseTempPinnedData();
local_stats_.skip_count_ += num_internal_keys_skipped_; local_stats_.skip_count_ += num_internal_keys_skipped_;
@ -574,7 +577,7 @@ bool DBIter::MergeValuesNewToOld() {
const Slice val = iter_.value(); const Slice val = iter_.value();
Status s = MergeHelper::TimedFullMerge( Status s = MergeHelper::TimedFullMerge(
merge_operator_, ikey.user_key, &val, merge_context_.GetOperands(), merge_operator_, ikey.user_key, &val, merge_context_.GetOperands(),
&saved_value_, logger_, statistics_, env_, &pinned_value_, true); &saved_value_, logger_, statistics_, clock_, &pinned_value_, true);
if (!s.ok()) { if (!s.ok()) {
valid_ = false; valid_ = false;
status_ = s; status_ = s;
@ -617,7 +620,7 @@ bool DBIter::MergeValuesNewToOld() {
// client can differentiate this scenario and do things accordingly. // client can differentiate this scenario and do things accordingly.
Status s = MergeHelper::TimedFullMerge( Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr, merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, clock_,
&pinned_value_, true); &pinned_value_, true);
if (!s.ok()) { if (!s.ok()) {
valid_ = false; valid_ = false;
@ -640,7 +643,7 @@ void DBIter::Prev() {
assert(valid_); assert(valid_);
assert(status_.ok()); assert(status_.ok());
PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_prev_cpu_nanos, clock_);
ReleaseTempPinnedData(); ReleaseTempPinnedData();
ResetInternalKeysSkippedCounter(); ResetInternalKeysSkippedCounter();
bool ok = true; bool ok = true;
@ -921,7 +924,7 @@ bool DBIter::FindValueForCurrentKey() {
s = MergeHelper::TimedFullMerge( s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr, merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true); clock_, &pinned_value_, true);
} else if (last_not_merge_type == kTypeBlobIndex) { } else if (last_not_merge_type == kTypeBlobIndex) {
status_ = status_ =
Status::NotSupported("BlobDB does not support merge operator."); Status::NotSupported("BlobDB does not support merge operator.");
@ -932,7 +935,7 @@ bool DBIter::FindValueForCurrentKey() {
s = MergeHelper::TimedFullMerge( s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), &pinned_value_, merge_operator_, saved_key_.GetUserKey(), &pinned_value_,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true); clock_, &pinned_value_, true);
} }
break; break;
case kTypeValue: case kTypeValue:
@ -1070,7 +1073,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
Status s = MergeHelper::TimedFullMerge( Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), &val, merge_operator_, saved_key_.GetUserKey(), &val,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_,
env_, &pinned_value_, true); clock_, &pinned_value_, true);
if (!s.ok()) { if (!s.ok()) {
valid_ = false; valid_ = false;
status_ = s; status_ = s;
@ -1097,7 +1100,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
Status s = MergeHelper::TimedFullMerge( Status s = MergeHelper::TimedFullMerge(
merge_operator_, saved_key_.GetUserKey(), nullptr, merge_operator_, saved_key_.GetUserKey(), nullptr,
merge_context_.GetOperands(), &saved_value_, logger_, statistics_, env_, merge_context_.GetOperands(), &saved_value_, logger_, statistics_, clock_,
&pinned_value_, true); &pinned_value_, true);
if (!s.ok()) { if (!s.ok()) {
valid_ = false; valid_ = false;
@ -1248,8 +1251,8 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
} }
void DBIter::Seek(const Slice& target) { void DBIter::Seek(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(env_, statistics_, DB_SEEK); StopWatch sw(clock_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) { if (db_impl_ != nullptr && cfd_ != nullptr) {
@ -1310,8 +1313,8 @@ void DBIter::Seek(const Slice& target) {
} }
void DBIter::SeekForPrev(const Slice& target) { void DBIter::SeekForPrev(const Slice& target) {
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
StopWatch sw(env_, statistics_, DB_SEEK); StopWatch sw(clock_, statistics_, DB_SEEK);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (db_impl_ != nullptr && cfd_ != nullptr) { if (db_impl_ != nullptr && cfd_ != nullptr) {
@ -1378,7 +1381,7 @@ void DBIter::SeekToFirst() {
Seek(*iterate_lower_bound_); Seek(*iterate_lower_bound_);
return; return;
} }
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor // Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used. // because prefix seek will be used.
if (!expect_total_order_inner_iter()) { if (!expect_total_order_inner_iter()) {
@ -1439,7 +1442,7 @@ void DBIter::SeekToLast() {
return; return;
} }
PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, env_); PERF_CPU_TIMER_GUARD(iter_seek_cpu_nanos, clock_);
// Don't use iter_::Seek() if we set a prefix extractor // Don't use iter_::Seek() if we set a prefix extractor
// because prefix seek will be used. // because prefix seek will be used.
if (!expect_total_order_inner_iter()) { if (!expect_total_order_inner_iter()) {

@ -21,7 +21,7 @@
#include "util/autovector.h" #include "util/autovector.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class SystemClock_;
class Version; class Version;
// This file declares the factory functions of DBIter, in its original form // This file declares the factory functions of DBIter, in its original form
@ -298,6 +298,7 @@ class DBIter final : public Iterator {
const SliceTransform* prefix_extractor_; const SliceTransform* prefix_extractor_;
Env* const env_; Env* const env_;
std::shared_ptr<SystemClock> clock_;
Logger* logger_; Logger* logger_;
UserComparatorWrapper user_comparator_; UserComparatorWrapper user_comparator_;
const MergeOperator* const merge_operator_; const MergeOperator* const merge_operator_;

@ -13,6 +13,7 @@
#if !defined(ROCKSDB_LITE) #if !defined(ROCKSDB_LITE)
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "env/mock_env.h"
#include "port/stack_trace.h" #include "port/stack_trace.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {

@ -10,6 +10,7 @@
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "db/forward_iterator.h" #include "db/forward_iterator.h"
#include "env/mock_env.h"
#include "rocksdb/convenience.h" #include "rocksdb/convenience.h"
#include "rocksdb/env_encryption.h" #include "rocksdb/env_encryption.h"
#include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/object_registry.h"

@ -23,7 +23,6 @@
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "env/mock_env.h"
#include "file/filename.h" #include "file/filename.h"
#include "memtable/hash_linklist_rep.h" #include "memtable/hash_linklist_rep.h"
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
@ -40,7 +39,6 @@
#include "rocksdb/utilities/checkpoint.h" #include "rocksdb/utilities/checkpoint.h"
#include "table/mock_table.h" #include "table/mock_table.h"
#include "table/scoped_arena_iterator.h" #include "table/scoped_arena_iterator.h"
#include "test_util/mock_time_env.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/cast_util.h" #include "util/cast_util.h"
@ -50,6 +48,7 @@
#include "utilities/merge_operators.h" #include "utilities/merge_operators.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class MockEnv;
namespace anon { namespace anon {
class AtomicCounter { class AtomicCounter {

@ -670,7 +670,7 @@ void ErrorHandler::RecoverFromRetryableBGIOError() {
// a period of time and redo auto resume if it is allowed. // a period of time and redo auto resume if it is allowed.
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait0"); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait0");
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1"); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:BeforeWait1");
int64_t wait_until = db_->env_->NowMicros() + wait_interval; int64_t wait_until = db_->clock_->NowMicros() + wait_interval;
cv_.TimedWait(wait_until); cv_.TimedWait(wait_until);
TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterWait0"); TEST_SYNC_POINT("RecoverFromRetryableBGIOError:AfterWait0");
} else { } else {

@ -293,12 +293,13 @@ Status ExternalSstFileIngestionJob::Prepare(
// TODO: The following is duplicated with Cleanup(). // TODO: The following is duplicated with Cleanup().
if (!status.ok()) { if (!status.ok()) {
IOOptions io_opts;
// We failed, remove all files that we copied into the db // We failed, remove all files that we copied into the db
for (IngestedFileInfo& f : files_to_ingest_) { for (IngestedFileInfo& f : files_to_ingest_) {
if (f.internal_file_path.empty()) { if (f.internal_file_path.empty()) {
continue; continue;
} }
Status s = env_->DeleteFile(f.internal_file_path); Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, ROCKS_LOG_WARN(db_options_.info_log,
"AddFile() clean up for file %s failed : %s", "AddFile() clean up for file %s failed : %s",
@ -392,7 +393,7 @@ Status ExternalSstFileIngestionJob::Run() {
int64_t temp_current_time = 0; int64_t temp_current_time = 0;
uint64_t current_time = kUnknownFileCreationTime; uint64_t current_time = kUnknownFileCreationTime;
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
if (env_->GetCurrentTime(&temp_current_time).ok()) { if (clock_->GetCurrentTime(&temp_current_time).ok()) {
current_time = oldest_ancester_time = current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time); static_cast<uint64_t>(temp_current_time);
} }
@ -410,7 +411,7 @@ void ExternalSstFileIngestionJob::UpdateStats() {
// Update internal stats for new ingested files // Update internal stats for new ingested files
uint64_t total_keys = 0; uint64_t total_keys = 0;
uint64_t total_l0_files = 0; uint64_t total_l0_files = 0;
uint64_t total_time = env_->NowMicros() - job_start_time_; uint64_t total_time = clock_->NowMicros() - job_start_time_;
EventLoggerStream stream = event_logger_->Log(); EventLoggerStream stream = event_logger_->Log();
stream << "event" stream << "event"
@ -466,6 +467,7 @@ void ExternalSstFileIngestionJob::UpdateStats() {
} }
void ExternalSstFileIngestionJob::Cleanup(const Status& status) { void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
IOOptions io_opts;
if (!status.ok()) { if (!status.ok()) {
// We failed to add the files to the database // We failed to add the files to the database
// remove all the files we copied // remove all the files we copied
@ -473,7 +475,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
if (f.internal_file_path.empty()) { if (f.internal_file_path.empty()) {
continue; continue;
} }
Status s = env_->DeleteFile(f.internal_file_path); Status s = fs_->DeleteFile(f.internal_file_path, io_opts, nullptr);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, ROCKS_LOG_WARN(db_options_.info_log,
"AddFile() clean up for file %s failed : %s", "AddFile() clean up for file %s failed : %s",
@ -485,7 +487,7 @@ void ExternalSstFileIngestionJob::Cleanup(const Status& status) {
} else if (status.ok() && ingestion_options_.move_files) { } else if (status.ok() && ingestion_options_.move_files) {
// The files were moved and added successfully, remove original file links // The files were moved and added successfully, remove original file links
for (IngestedFileInfo& f : files_to_ingest_) { for (IngestedFileInfo& f : files_to_ingest_) {
Status s = env_->DeleteFile(f.external_file_path); Status s = fs_->DeleteFile(f.external_file_path, io_opts, nullptr);
if (!s.ok()) { if (!s.ok()) {
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
db_options_.info_log, db_options_.info_log,

@ -16,13 +16,14 @@
#include "logging/event_logger.h" #include "logging/event_logger.h"
#include "options/db_options.h" #include "options/db_options.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/file_system.h"
#include "rocksdb/sst_file_writer.h" #include "rocksdb/sst_file_writer.h"
#include "util/autovector.h" #include "util/autovector.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Directories; class Directories;
class SystemClock;
struct IngestedFileInfo { struct IngestedFileInfo {
// External file path // External file path
@ -73,13 +74,13 @@ struct IngestedFileInfo {
class ExternalSstFileIngestionJob { class ExternalSstFileIngestionJob {
public: public:
ExternalSstFileIngestionJob( ExternalSstFileIngestionJob(
Env* env, VersionSet* versions, ColumnFamilyData* cfd, const std::shared_ptr<SystemClock>& clock, VersionSet* versions,
const ImmutableDBOptions& db_options, const EnvOptions& env_options, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options,
SnapshotList* db_snapshots, const EnvOptions& env_options, SnapshotList* db_snapshots,
const IngestExternalFileOptions& ingestion_options, const IngestExternalFileOptions& ingestion_options,
Directories* directories, EventLogger* event_logger, Directories* directories, EventLogger* event_logger,
const std::shared_ptr<IOTracer>& io_tracer) const std::shared_ptr<IOTracer>& io_tracer)
: env_(env), : clock_(clock),
fs_(db_options.fs, io_tracer), fs_(db_options.fs, io_tracer),
versions_(versions), versions_(versions),
cfd_(cfd), cfd_(cfd),
@ -89,7 +90,7 @@ class ExternalSstFileIngestionJob {
ingestion_options_(ingestion_options), ingestion_options_(ingestion_options),
directories_(directories), directories_(directories),
event_logger_(event_logger), event_logger_(event_logger),
job_start_time_(env_->NowMicros()), job_start_time_(clock_->NowMicros()),
consumed_seqno_count_(0), consumed_seqno_count_(0),
io_tracer_(io_tracer) { io_tracer_(io_tracer) {
assert(directories != nullptr); assert(directories != nullptr);
@ -169,7 +170,7 @@ class ExternalSstFileIngestionJob {
template <typename TWritableFile> template <typename TWritableFile>
Status SyncIngestedFile(TWritableFile* file); Status SyncIngestedFile(TWritableFile* file);
Env* env_; std::shared_ptr<SystemClock> clock_;
FileSystemPtr fs_; FileSystemPtr fs_;
VersionSet* versions_; VersionSet* versions_;
ColumnFamilyData* cfd_; ColumnFamilyData* cfd_;

@ -127,6 +127,7 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
pick_memtable_called(false), pick_memtable_called(false),
thread_pri_(thread_pri), thread_pri_(thread_pri),
io_tracer_(io_tracer), io_tracer_(io_tracer),
clock_(db_options_.env->GetSystemClock()),
full_history_ts_low_(std::move(full_history_ts_low)) { full_history_ts_low_(std::move(full_history_ts_low)) {
// Update the thread status to indicate flush. // Update the thread status to indicate flush.
ReportStartedFlush(); ReportStartedFlush();
@ -309,8 +310,8 @@ Status FlushJob::WriteLevel0Table() {
AutoThreadOperationStageUpdater stage_updater( AutoThreadOperationStageUpdater stage_updater(
ThreadStatus::STAGE_FLUSH_WRITE_L0); ThreadStatus::STAGE_FLUSH_WRITE_L0);
db_mutex_->AssertHeld(); db_mutex_->AssertHeld();
const uint64_t start_micros = db_options_.env->NowMicros(); const uint64_t start_micros = clock_->NowMicros();
const uint64_t start_cpu_micros = db_options_.env->NowCPUNanos() / 1000; const uint64_t start_cpu_micros = clock_->CPUNanos() / 1000;
Status s; Status s;
std::vector<BlobFileAddition> blob_file_additions; std::vector<BlobFileAddition> blob_file_additions;
@ -371,7 +372,7 @@ Status FlushJob::WriteLevel0Table() {
TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression", TEST_SYNC_POINT_CALLBACK("FlushJob::WriteLevel0Table:output_compression",
&output_compression_); &output_compression_);
int64_t _current_time = 0; int64_t _current_time = 0;
auto status = db_options_.env->GetCurrentTime(&_current_time); auto status = clock_->GetCurrentTime(&_current_time);
// Safe to proceed even if GetCurrentTime fails. So, log and proceed. // Safe to proceed even if GetCurrentTime fails. So, log and proceed.
if (!status.ok()) { if (!status.ok()) {
ROCKS_LOG_WARN( ROCKS_LOG_WARN(
@ -466,8 +467,8 @@ Status FlushJob::WriteLevel0Table() {
// Note that here we treat flush as level 0 compaction in internal stats // Note that here we treat flush as level 0 compaction in internal stats
InternalStats::CompactionStats stats(CompactionReason::kFlush, 1); InternalStats::CompactionStats stats(CompactionReason::kFlush, 1);
stats.micros = db_options_.env->NowMicros() - start_micros; stats.micros = clock_->NowMicros() - start_micros;
stats.cpu_micros = db_options_.env->NowCPUNanos() / 1000 - start_cpu_micros; stats.cpu_micros = clock_->CPUNanos() / 1000 - start_cpu_micros;
if (has_output) { if (has_output) {
stats.bytes_written = meta_.fd.GetFileSize(); stats.bytes_written = meta_.fd.GetFileSize();

@ -162,6 +162,7 @@ class FlushJob {
IOStatus io_status_; IOStatus io_status_;
const std::shared_ptr<IOTracer> io_tracer_; const std::shared_ptr<IOTracer> io_tracer_;
const std::shared_ptr<SystemClock> clock_;
const std::string full_history_ts_low_; const std::string full_history_ts_low_;
}; };

@ -140,7 +140,7 @@ Status ImportColumnFamilyJob::Run() {
int64_t temp_current_time = 0; int64_t temp_current_time = 0;
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime; uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
uint64_t current_time = kUnknownOldestAncesterTime; uint64_t current_time = kUnknownOldestAncesterTime;
if (env_->GetCurrentTime(&temp_current_time).ok()) { if (clock_->GetCurrentTime(&temp_current_time).ok()) {
current_time = oldest_ancester_time = current_time = oldest_ancester_time =
static_cast<uint64_t>(temp_current_time); static_cast<uint64_t>(temp_current_time);
} }

@ -9,24 +9,26 @@
#include "db/snapshot_impl.h" #include "db/snapshot_impl.h"
#include "options/db_options.h" #include "options/db_options.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/metadata.h" #include "rocksdb/metadata.h"
#include "rocksdb/sst_file_writer.h" #include "rocksdb/sst_file_writer.h"
#include "util/autovector.h" #include "util/autovector.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
struct EnvOptions;
class SystemClock;
// Imports a set of sst files as is into a new column family. Logic is similar // Imports a set of sst files as is into a new column family. Logic is similar
// to ExternalSstFileIngestionJob. // to ExternalSstFileIngestionJob.
class ImportColumnFamilyJob { class ImportColumnFamilyJob {
public: public:
ImportColumnFamilyJob(Env* env, VersionSet* versions, ColumnFamilyData* cfd, ImportColumnFamilyJob(const std::shared_ptr<SystemClock>& clock,
VersionSet* versions, ColumnFamilyData* cfd,
const ImmutableDBOptions& db_options, const ImmutableDBOptions& db_options,
const EnvOptions& env_options, const EnvOptions& env_options,
const ImportColumnFamilyOptions& import_options, const ImportColumnFamilyOptions& import_options,
const std::vector<LiveFileMetaData>& metadata, const std::vector<LiveFileMetaData>& metadata,
const std::shared_ptr<IOTracer>& io_tracer) const std::shared_ptr<IOTracer>& io_tracer)
: env_(env), : clock_(clock),
versions_(versions), versions_(versions),
cfd_(cfd), cfd_(cfd),
db_options_(db_options), db_options_(db_options),
@ -59,7 +61,7 @@ class ImportColumnFamilyJob {
IngestedFileInfo* file_to_import, IngestedFileInfo* file_to_import,
SuperVersion* sv); SuperVersion* sv);
Env* env_; std::shared_ptr<SystemClock> clock_;
VersionSet* versions_; VersionSet* versions_;
ColumnFamilyData* cfd_; ColumnFamilyData* cfd_;
const ImmutableDBOptions& db_options_; const ImmutableDBOptions& db_options_;

@ -990,7 +990,7 @@ bool InternalStats::HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* /*db*/,
void InternalStats::DumpDBStats(std::string* value) { void InternalStats::DumpDBStats(std::string* value) {
char buf[1000]; char buf[1000];
// DB-level stats, only available from default column family // DB-level stats, only available from default column family
double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec; double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up; double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
"\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n", "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
@ -1313,7 +1313,7 @@ void InternalStats::DumpCFStatsNoFileHistogram(std::string* value) {
} }
} }
double seconds_up = (env_->NowMicros() - started_at_ + 1) / kMicrosInSec; double seconds_up = (clock_->NowMicros() - started_at_ + 1) / kMicrosInSec;
double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up; double interval_seconds_up = seconds_up - cf_stats_snapshot_.seconds_up;
snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n", snprintf(buf, sizeof(buf), "Uptime(secs): %.1f total, %.1f interval\n",
seconds_up, interval_seconds_up); seconds_up, interval_seconds_up);

@ -14,6 +14,7 @@
#include <vector> #include <vector>
#include "db/version_set.h" #include "db/version_set.h"
#include "rocksdb/system_clock.h"
class ColumnFamilyData; class ColumnFamilyData;
@ -122,7 +123,8 @@ class InternalStats {
kIntStatsNumMax, kIntStatsNumMax,
}; };
InternalStats(int num_levels, Env* env, ColumnFamilyData* cfd) InternalStats(int num_levels, const std::shared_ptr<SystemClock>& clock,
ColumnFamilyData* cfd)
: db_stats_{}, : db_stats_{},
cf_stats_value_{}, cf_stats_value_{},
cf_stats_count_{}, cf_stats_count_{},
@ -131,9 +133,9 @@ class InternalStats {
file_read_latency_(num_levels), file_read_latency_(num_levels),
bg_error_count_(0), bg_error_count_(0),
number_levels_(num_levels), number_levels_(num_levels),
env_(env), clock_(clock),
cfd_(cfd), cfd_(cfd),
started_at_(env->NowMicros()) {} started_at_(clock->NowMicros()) {}
// Per level compaction stats. comp_stats_[level] stores the stats for // Per level compaction stats. comp_stats_[level] stores the stats for
// compactions that produced data for the specified "level". // compactions that produced data for the specified "level".
@ -341,7 +343,7 @@ class InternalStats {
cf_stats_snapshot_.Clear(); cf_stats_snapshot_.Clear();
db_stats_snapshot_.Clear(); db_stats_snapshot_.Clear();
bg_error_count_ = 0; bg_error_count_ = 0;
started_at_ = env_->NowMicros(); started_at_ = clock_->NowMicros();
} }
void AddCompactionStats(int level, Env::Priority thread_pri, void AddCompactionStats(int level, Env::Priority thread_pri,
@ -602,7 +604,7 @@ class InternalStats {
uint64_t bg_error_count_; uint64_t bg_error_count_;
const int number_levels_; const int number_levels_;
Env* env_; const std::shared_ptr<SystemClock> clock_;
ColumnFamilyData* cfd_; ColumnFamilyData* cfd_;
uint64_t started_at_; uint64_t started_at_;
}; };
@ -641,7 +643,9 @@ class InternalStats {
kIntStatsNumMax, kIntStatsNumMax,
}; };
InternalStats(int /*num_levels*/, Env* /*env*/, ColumnFamilyData* /*cfd*/) {} InternalStats(int /*num_levels*/,
const std::shared_ptr<SystemClock>& /*clock*/,
ColumnFamilyData* /*cfd*/) {}
struct CompactionStats { struct CompactionStats {
uint64_t micros; uint64_t micros;

@ -102,7 +102,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
: 0), : 0),
prefix_extractor_(mutable_cf_options.prefix_extractor.get()), prefix_extractor_(mutable_cf_options.prefix_extractor.get()),
flush_state_(FLUSH_NOT_REQUESTED), flush_state_(FLUSH_NOT_REQUESTED),
env_(ioptions.env), clock_(ioptions.env->GetSystemClock()),
insert_with_hint_prefix_extractor_( insert_with_hint_prefix_extractor_(
ioptions.memtable_insert_with_hint_prefix_extractor), ioptions.memtable_insert_with_hint_prefix_extractor),
oldest_key_time_(std::numeric_limits<uint64_t>::max()), oldest_key_time_(std::numeric_limits<uint64_t>::max()),
@ -221,7 +221,7 @@ void MemTable::UpdateOldestKeyTime() {
uint64_t oldest_key_time = oldest_key_time_.load(std::memory_order_relaxed); uint64_t oldest_key_time = oldest_key_time_.load(std::memory_order_relaxed);
if (oldest_key_time == std::numeric_limits<uint64_t>::max()) { if (oldest_key_time == std::numeric_limits<uint64_t>::max()) {
int64_t current_time = 0; int64_t current_time = 0;
auto s = env_->GetCurrentTime(&current_time); auto s = clock_->GetCurrentTime(&current_time);
if (s.ok()) { if (s.ok()) {
assert(current_time >= 0); assert(current_time >= 0);
// If fail, the timestamp is already set. // If fail, the timestamp is already set.
@ -628,7 +628,8 @@ struct Saver {
Statistics* statistics; Statistics* statistics;
bool inplace_update_support; bool inplace_update_support;
bool do_merge; bool do_merge;
Env* env_; std::shared_ptr<SystemClock> clock;
ReadCallback* callback_; ReadCallback* callback_;
bool* is_blob_index; bool* is_blob_index;
bool allow_data_in_errors; bool allow_data_in_errors;
@ -712,7 +713,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge( *(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), &v, merge_operator, s->key->user_key(), &v,
merge_context->GetOperands(), s->value, s->logger, merge_context->GetOperands(), s->value, s->logger,
s->statistics, s->env_, nullptr /* result_operand */, true); s->statistics, s->clock, nullptr /* result_operand */, true);
} }
} else { } else {
// Preserve the value with the goal of returning it as part of // Preserve the value with the goal of returning it as part of
@ -751,7 +752,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge( *(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), nullptr, merge_operator, s->key->user_key(), nullptr,
merge_context->GetOperands(), s->value, s->logger, merge_context->GetOperands(), s->value, s->logger,
s->statistics, s->env_, nullptr /* result_operand */, true); s->statistics, s->clock, nullptr /* result_operand */, true);
} }
} else { } else {
*(s->status) = Status::NotFound(); *(s->status) = Status::NotFound();
@ -779,7 +780,7 @@ static bool SaveValue(void* arg, const char* entry) {
*(s->status) = MergeHelper::TimedFullMerge( *(s->status) = MergeHelper::TimedFullMerge(
merge_operator, s->key->user_key(), nullptr, merge_operator, s->key->user_key(), nullptr,
merge_context->GetOperands(), s->value, s->logger, s->statistics, merge_context->GetOperands(), s->value, s->logger, s->statistics,
s->env_, nullptr /* result_operand */, true); s->clock, nullptr /* result_operand */, true);
*(s->found_final_value) = true; *(s->found_final_value) = true;
return false; return false;
} }
@ -887,7 +888,7 @@ void MemTable::GetFromTable(const LookupKey& key,
saver.logger = moptions_.info_log; saver.logger = moptions_.info_log;
saver.inplace_update_support = moptions_.inplace_update_support; saver.inplace_update_support = moptions_.inplace_update_support;
saver.statistics = moptions_.statistics; saver.statistics = moptions_.statistics;
saver.env_ = env_; saver.clock = clock_;
saver.callback_ = callback; saver.callback_ = callback;
saver.is_blob_index = is_blob_index; saver.is_blob_index = is_blob_index;
saver.do_merge = do_merge; saver.do_merge = do_merge;

@ -24,7 +24,6 @@
#include "monitoring/instrumented_mutex.h" #include "monitoring/instrumented_mutex.h"
#include "options/cf_options.h" #include "options/cf_options.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/env.h"
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "table/multiget_context.h" #include "table/multiget_context.h"
#include "util/dynamic_bloom.h" #include "util/dynamic_bloom.h"
@ -36,6 +35,7 @@ struct FlushJobInfo;
class Mutex; class Mutex;
class MemTableIterator; class MemTableIterator;
class MergeContext; class MergeContext;
class SystemClock;
struct ImmutableMemTableOptions { struct ImmutableMemTableOptions {
explicit ImmutableMemTableOptions(const ImmutableCFOptions& ioptions, explicit ImmutableMemTableOptions(const ImmutableCFOptions& ioptions,
@ -504,7 +504,7 @@ class MemTable {
std::atomic<FlushStateEnum> flush_state_; std::atomic<FlushStateEnum> flush_state_;
Env* env_; std::shared_ptr<SystemClock> clock_;
// Extract sequential insert prefixes. // Extract sequential insert prefixes.
const SliceTransform* insert_with_hint_prefix_extractor_; const SliceTransform* insert_with_hint_prefix_extractor_;

@ -14,6 +14,7 @@
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/system_clock.h"
#include "table/format.h" #include "table/format.h"
#include "table/internal_iterator.h" #include "table/internal_iterator.h"
@ -28,6 +29,7 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
Statistics* stats, Statistics* stats,
const std::atomic<bool>* shutting_down) const std::atomic<bool>* shutting_down)
: env_(env), : env_(env),
clock_(env->GetSystemClock()),
user_comparator_(user_comparator), user_comparator_(user_comparator),
user_merge_operator_(user_merge_operator), user_merge_operator_(user_merge_operator),
compaction_filter_(compaction_filter), compaction_filter_(compaction_filter),
@ -39,7 +41,7 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
snapshot_checker_(snapshot_checker), snapshot_checker_(snapshot_checker),
level_(level), level_(level),
keys_(), keys_(),
filter_timer_(env_), filter_timer_(clock_),
total_filter_time_(0U), total_filter_time_(0U),
stats_(stats) { stats_(stats) {
assert(user_comparator_ != nullptr); assert(user_comparator_ != nullptr);
@ -48,13 +50,11 @@ MergeHelper::MergeHelper(Env* env, const Comparator* user_comparator,
} }
} }
Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator, Status MergeHelper::TimedFullMerge(
const Slice& key, const Slice* value, const MergeOperator* merge_operator, const Slice& key, const Slice* value,
const std::vector<Slice>& operands, const std::vector<Slice>& operands, std::string* result, Logger* logger,
std::string* result, Logger* logger, Statistics* statistics, const std::shared_ptr<SystemClock>& clock,
Statistics* statistics, Env* env, Slice* result_operand, bool update_num_ops_stats) {
Slice* result_operand,
bool update_num_ops_stats) {
assert(merge_operator != nullptr); assert(merge_operator != nullptr);
if (operands.size() == 0) { if (operands.size() == 0) {
@ -75,7 +75,7 @@ Status MergeHelper::TimedFullMerge(const MergeOperator* merge_operator,
MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand); MergeOperator::MergeOperationOutput merge_out(*result, tmp_result_operand);
{ {
// Setup to time the merge // Setup to time the merge
StopWatchNano timer(env, statistics != nullptr); StopWatchNano timer(clock, statistics != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos); PERF_TIMER_GUARD(merge_operator_time_nanos);
// Do the merge // Do the merge
@ -213,7 +213,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
std::string merge_result; std::string merge_result;
s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr, s = TimedFullMerge(user_merge_operator_, ikey.user_key, val_ptr,
merge_context_.GetOperands(), &merge_result, logger_, merge_context_.GetOperands(), &merge_result, logger_,
stats_, env_); stats_, clock_);
// We store the result in keys_.back() and operands_.back() // We store the result in keys_.back() and operands_.back()
// if nothing went wrong (i.e.: no operand corruption on disk) // if nothing went wrong (i.e.: no operand corruption on disk)
@ -324,7 +324,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
std::string merge_result; std::string merge_result;
s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr, s = TimedFullMerge(user_merge_operator_, orig_ikey.user_key, nullptr,
merge_context_.GetOperands(), &merge_result, logger_, merge_context_.GetOperands(), &merge_result, logger_,
stats_, env_); stats_, clock_);
if (s.ok()) { if (s.ok()) {
// The original key encountered // The original key encountered
// We are certain that keys_ is not empty here (see assertions couple of // We are certain that keys_ is not empty here (see assertions couple of
@ -347,7 +347,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
bool merge_success = false; bool merge_success = false;
std::string merge_result; std::string merge_result;
{ {
StopWatchNano timer(env_, stats_ != nullptr); StopWatchNano timer(clock_, stats_ != nullptr);
PERF_TIMER_GUARD(merge_operator_time_nanos); PERF_TIMER_GUARD(merge_operator_time_nanos);
merge_success = user_merge_operator_->PartialMergeMulti( merge_success = user_merge_operator_->PartialMergeMulti(
orig_ikey.user_key, orig_ikey.user_key,

@ -25,6 +25,7 @@ class Iterator;
class Logger; class Logger;
class MergeOperator; class MergeOperator;
class Statistics; class Statistics;
class SystemClock;
class MergeHelper { class MergeHelper {
public: public:
@ -44,13 +45,11 @@ class MergeHelper {
// Returns one of the following statuses: // Returns one of the following statuses:
// - OK: Entries were successfully merged. // - OK: Entries were successfully merged.
// - Corruption: Merge operator reported unsuccessful merge. // - Corruption: Merge operator reported unsuccessful merge.
static Status TimedFullMerge(const MergeOperator* merge_operator, static Status TimedFullMerge(
const Slice& key, const Slice* value, const MergeOperator* merge_operator, const Slice& key, const Slice* value,
const std::vector<Slice>& operands, const std::vector<Slice>& operands, std::string* result, Logger* logger,
std::string* result, Logger* logger, Statistics* statistics, const std::shared_ptr<SystemClock>& clock,
Statistics* statistics, Env* env, Slice* result_operand = nullptr, bool update_num_ops_stats = false);
Slice* result_operand = nullptr,
bool update_num_ops_stats = false);
// Merge entries until we hit // Merge entries until we hit
// - a corrupted key // - a corrupted key
@ -140,6 +139,7 @@ class MergeHelper {
private: private:
Env* env_; Env* env_;
std::shared_ptr<SystemClock> clock_;
const Comparator* user_comparator_; const Comparator* user_comparator_;
const MergeOperator* user_merge_operator_; const MergeOperator* user_merge_operator_;
const CompactionFilter* compaction_filter_; const CompactionFilter* compaction_filter_;

@ -3,6 +3,8 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
// //
#include "rocksdb/perf_context.h"
#include <algorithm> #include <algorithm>
#include <iostream> #include <iostream>
#include <thread> #include <thread>
@ -15,8 +17,8 @@
#include "port/port.h" #include "port/port.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "rocksdb/perf_context.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/string_util.h" #include "util/string_util.h"
@ -91,7 +93,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::string value; std::string value;
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default()); StopWatchNano timer(SystemClock::Default());
timer.Start(); timer.Start();
auto status = db->Get(read_options, key, &value); auto status = db->Get(read_options, key, &value);
auto elapsed_nanos = timer.ElapsedNanos(); auto elapsed_nanos = timer.ElapsedNanos();
@ -110,7 +112,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::unique_ptr<Iterator> iter(db->NewIterator(read_options)); std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
iter->SeekToFirst(); iter->SeekToFirst();
hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count); hist_seek_to_first.Add(get_perf_context()->user_key_comparison_count);
auto elapsed_nanos = timer.ElapsedNanos(); auto elapsed_nanos = timer.ElapsedNanos();
@ -131,7 +133,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
std::string key = "k" + ToString(i); std::string key = "k" + ToString(i);
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
iter->Seek(key); iter->Seek(key);
auto elapsed_nanos = timer.ElapsedNanos(); auto elapsed_nanos = timer.ElapsedNanos();
hist_seek.Add(get_perf_context()->user_key_comparison_count); hist_seek.Add(get_perf_context()->user_key_comparison_count);
@ -145,7 +147,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion) {
get_perf_context()->Reset(); get_perf_context()->Reset();
ASSERT_TRUE(iter->Valid()); ASSERT_TRUE(iter->Valid());
StopWatchNano timer2(Env::Default(), true); StopWatchNano timer2(SystemClock::Default(), true);
iter->Next(); iter->Next();
auto elapsed_nanos2 = timer2.ElapsedNanos(); auto elapsed_nanos2 = timer2.ElapsedNanos();
if (FLAGS_verbose) { if (FLAGS_verbose) {
@ -164,7 +166,7 @@ TEST_F(PerfContextTest, StopWatchNanoOverhead) {
const int kTotalIterations = 1000000; const int kTotalIterations = 1000000;
std::vector<uint64_t> timings(kTotalIterations); std::vector<uint64_t> timings(kTotalIterations);
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
for (auto& timing : timings) { for (auto& timing : timings) {
timing = timer.ElapsedNanos(true /* reset */); timing = timer.ElapsedNanos(true /* reset */);
} }
@ -185,7 +187,7 @@ TEST_F(PerfContextTest, StopWatchOverhead) {
uint64_t elapsed = 0; uint64_t elapsed = 0;
std::vector<uint64_t> timings(kTotalIterations); std::vector<uint64_t> timings(kTotalIterations);
StopWatch timer(Env::Default(), nullptr, 0, &elapsed); StopWatch timer(SystemClock::Default(), nullptr, 0, &elapsed);
for (auto& timing : timings) { for (auto& timing : timings) {
timing = elapsed; timing = elapsed;
} }
@ -539,7 +541,7 @@ TEST_F(PerfContextTest, SeekKeyComparison) {
HistogramImpl hist_time_diff; HistogramImpl hist_time_diff;
SetPerfLevel(kEnableTime); SetPerfLevel(kEnableTime);
StopWatchNano timer(Env::Default()); StopWatchNano timer(SystemClock::Default());
for (const int i : keys) { for (const int i : keys) {
std::string key = "k" + ToString(i); std::string key = "k" + ToString(i);
std::string value = "v" + ToString(i); std::string value = "v" + ToString(i);
@ -592,25 +594,25 @@ TEST_F(PerfContextTest, DBMutexLockCounter) {
for (PerfLevel perf_level_test : for (PerfLevel perf_level_test :
{PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) { {PerfLevel::kEnableTimeExceptForMutex, PerfLevel::kEnableTime}) {
for (int c = 0; c < 2; ++c) { for (int c = 0; c < 2; ++c) {
InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); InstrumentedMutex mutex(nullptr, SystemClock::Default(), stats_code[c]);
mutex.Lock();
ROCKSDB_NAMESPACE::port::Thread child_thread([&] {
SetPerfLevel(perf_level_test);
get_perf_context()->Reset();
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
mutex.Lock(); mutex.Lock();
mutex.Unlock(); ROCKSDB_NAMESPACE::port::Thread child_thread([&] {
if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex || SetPerfLevel(perf_level_test);
stats_code[c] != DB_MUTEX_WAIT_MICROS) { get_perf_context()->Reset();
ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0); ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
} else { mutex.Lock();
// increment the counter only when it's a DB Mutex mutex.Unlock();
ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0); if (perf_level_test == PerfLevel::kEnableTimeExceptForMutex ||
} stats_code[c] != DB_MUTEX_WAIT_MICROS) {
}); ASSERT_EQ(get_perf_context()->db_mutex_lock_nanos, 0);
Env::Default()->SleepForMicroseconds(100); } else {
mutex.Unlock(); // increment the counter only when it's a DB Mutex
child_thread.join(); ASSERT_GT(get_perf_context()->db_mutex_lock_nanos, 0);
}
});
SystemClock::Default()->SleepForMicroseconds(100);
mutex.Unlock();
child_thread.join();
} }
} }
} }
@ -619,7 +621,7 @@ TEST_F(PerfContextTest, FalseDBMutexWait) {
SetPerfLevel(kEnableTime); SetPerfLevel(kEnableTime);
int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)}; int stats_code[] = {0, static_cast<int>(DB_MUTEX_WAIT_MICROS)};
for (int c = 0; c < 2; ++c) { for (int c = 0; c < 2; ++c) {
InstrumentedMutex mutex(nullptr, Env::Default(), stats_code[c]); InstrumentedMutex mutex(nullptr, SystemClock::Default(), stats_code[c]);
InstrumentedCondVar lock(&mutex); InstrumentedCondVar lock(&mutex);
get_perf_context()->Reset(); get_perf_context()->Reset();
mutex.Lock(); mutex.Lock();
@ -824,8 +826,8 @@ TEST_F(PerfContextTest, PerfContextByLevelGetSet) {
} }
TEST_F(PerfContextTest, CPUTimer) { TEST_F(PerfContextTest, CPUTimer) {
if (Env::Default()->NowCPUNanos() == 0) { if (SystemClock::Default()->CPUNanos() == 0) {
ROCKSDB_GTEST_SKIP("Target without NowCPUNanos support"); ROCKSDB_GTEST_SKIP("Target without CPUNanos support");
return; return;
} }

@ -6,12 +6,14 @@
#include "db/periodic_work_scheduler.h" #include "db/periodic_work_scheduler.h"
#include "db/db_impl/db_impl.h" #include "db/db_impl/db_impl.h"
#include "rocksdb/system_clock.h"
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
PeriodicWorkScheduler::PeriodicWorkScheduler(Env* env) : timer_mu_(env) { PeriodicWorkScheduler::PeriodicWorkScheduler(
timer = std::unique_ptr<Timer>(new Timer(env)); const std::shared_ptr<SystemClock>& clock) {
timer = std::unique_ptr<Timer>(new Timer(clock));
} }
void PeriodicWorkScheduler::Register(DBImpl* dbi, void PeriodicWorkScheduler::Register(DBImpl* dbi,
@ -52,10 +54,10 @@ void PeriodicWorkScheduler::Unregister(DBImpl* dbi) {
} }
PeriodicWorkScheduler* PeriodicWorkScheduler::Default() { PeriodicWorkScheduler* PeriodicWorkScheduler::Default() {
// Always use the default Env for the scheduler, as we only use the NowMicros // Always use the default SystemClock for the scheduler, as we only use the
// which is the same for all env. // NowMicros which is the same for all clocks. The Env could only be
// The Env could only be overridden in test. // overridden in test.
static PeriodicWorkScheduler scheduler(Env::Default()); static PeriodicWorkScheduler scheduler(SystemClock::Default());
return &scheduler; return &scheduler;
} }
@ -69,12 +71,13 @@ std::string PeriodicWorkScheduler::GetTaskName(DBImpl* dbi,
#ifndef NDEBUG #ifndef NDEBUG
// Get the static scheduler. For a new env, it needs to re-create the internal // Get the static scheduler. For a new SystemClock, it needs to re-create the
// timer, so only re-create it when there's no running task. Otherwise, return // internal timer, so only re-create it when there's no running task. Otherwise,
// the existing scheduler. Which means if the unittest needs to update MockEnv, // return the existing scheduler. Which means if the unittest needs to update
// Close all db instances and then re-open them. // MockClock, Close all db instances and then re-open them.
PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) { PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(
static PeriodicWorkTestScheduler scheduler(env); const std::shared_ptr<SystemClock>& clock) {
static PeriodicWorkTestScheduler scheduler(clock);
static port::Mutex mutex; static port::Mutex mutex;
{ {
MutexLock l(&mutex); MutexLock l(&mutex);
@ -84,7 +87,7 @@ PeriodicWorkTestScheduler* PeriodicWorkTestScheduler::Default(Env* env) {
MutexLock timer_mu_guard(&scheduler.timer_mu_); MutexLock timer_mu_guard(&scheduler.timer_mu_);
scheduler.timer->Shutdown(); scheduler.timer->Shutdown();
} }
scheduler.timer.reset(new Timer(env)); scheduler.timer.reset(new Timer(clock));
} }
} }
return &scheduler; return &scheduler;
@ -104,8 +107,9 @@ size_t PeriodicWorkTestScheduler::TEST_GetValidTaskNum() const {
return 0; return 0;
} }
PeriodicWorkTestScheduler::PeriodicWorkTestScheduler(Env* env) PeriodicWorkTestScheduler::PeriodicWorkTestScheduler(
: PeriodicWorkScheduler(env) {} const std::shared_ptr<SystemClock>& clock)
: PeriodicWorkScheduler(clock) {}
#endif // !NDEBUG #endif // !NDEBUG
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -11,6 +11,7 @@
#include "util/timer.h" #include "util/timer.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class SystemClock;
// PeriodicWorkScheduler is a singleton object, which is scheduling/running // PeriodicWorkScheduler is a singleton object, which is scheduling/running
// DumpStats(), PersistStats(), and FlushInfoLog() for all DB instances. All DB // DumpStats(), PersistStats(), and FlushInfoLog() for all DB instances. All DB
@ -49,25 +50,26 @@ class PeriodicWorkScheduler {
// the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically. // the `Timer::Cancel()`s and `Timer::Shutdown()` run atomically.
port::Mutex timer_mu_; port::Mutex timer_mu_;
explicit PeriodicWorkScheduler(Env* env); explicit PeriodicWorkScheduler(const std::shared_ptr<SystemClock>& clock);
private: private:
std::string GetTaskName(DBImpl* dbi, const std::string& func_name); std::string GetTaskName(DBImpl* dbi, const std::string& func_name);
}; };
#ifndef NDEBUG #ifndef NDEBUG
// PeriodicWorkTestScheduler is for unittest, which can specify the Env like // PeriodicWorkTestScheduler is for unittest, which can specify the SystemClock
// SafeMockTimeEnv. It also contains functions for unittest. // It also contains functions for unittest.
class PeriodicWorkTestScheduler : public PeriodicWorkScheduler { class PeriodicWorkTestScheduler : public PeriodicWorkScheduler {
public: public:
static PeriodicWorkTestScheduler* Default(Env* env); static PeriodicWorkTestScheduler* Default(
const std::shared_ptr<SystemClock>& clock);
void TEST_WaitForRun(std::function<void()> callback) const; void TEST_WaitForRun(std::function<void()> callback) const;
size_t TEST_GetValidTaskNum() const; size_t TEST_GetValidTaskNum() const;
private: private:
explicit PeriodicWorkTestScheduler(Env* env); explicit PeriodicWorkTestScheduler(const std::shared_ptr<SystemClock>& clock);
}; };
#endif // !NDEBUG #endif // !NDEBUG

@ -6,6 +6,8 @@
#include "db/periodic_work_scheduler.h" #include "db/periodic_work_scheduler.h"
#include "db/db_test_util.h" #include "db/db_test_util.h"
#include "env/composite_env_wrapper.h"
#include "test_util/mock_time_env.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -14,20 +16,22 @@ class PeriodicWorkSchedulerTest : public DBTestBase {
public: public:
PeriodicWorkSchedulerTest() PeriodicWorkSchedulerTest()
: DBTestBase("/periodic_work_scheduler_test", /*env_do_fsync=*/true) { : DBTestBase("/periodic_work_scheduler_test", /*env_do_fsync=*/true) {
mock_env_.reset(new MockTimeEnv(env_)); mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
} }
protected: protected:
std::unique_ptr<MockTimeEnv> mock_env_; std::unique_ptr<Env> mock_env_;
std::shared_ptr<MockSystemClock> mock_clock_;
void SetUp() override { void SetUp() override {
mock_env_->InstallTimedWaitFixCallback(); mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack( SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) { "DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) {
auto* periodic_work_scheduler_ptr = auto* periodic_work_scheduler_ptr =
reinterpret_cast<PeriodicWorkScheduler**>(arg); reinterpret_cast<PeriodicWorkScheduler**>(arg);
*periodic_work_scheduler_ptr = *periodic_work_scheduler_ptr =
PeriodicWorkTestScheduler::Default(mock_env_.get()); PeriodicWorkTestScheduler::Default(mock_clock_);
}); });
} }
}; };
@ -63,7 +67,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_GT(kPeriodSec, 1u); ASSERT_GT(kPeriodSec, 1u);
dbfull()->TEST_WaitForStatsDumpRun([&] { dbfull()->TEST_WaitForStatsDumpRun([&] {
mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec) - 1); mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec) - 1);
}); });
auto scheduler = dbfull()->TEST_GetPeriodicWorkScheduler(); auto scheduler = dbfull()->TEST_GetPeriodicWorkScheduler();
@ -75,14 +79,14 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_EQ(1, flush_info_log_counter); ASSERT_EQ(1, flush_info_log_counter);
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); }); [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(2, dump_st_counter); ASSERT_EQ(2, dump_st_counter);
ASSERT_EQ(2, pst_st_counter); ASSERT_EQ(2, pst_st_counter);
ASSERT_EQ(2, flush_info_log_counter); ASSERT_EQ(2, flush_info_log_counter);
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); }); [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter); ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter); ASSERT_EQ(3, pst_st_counter);
@ -96,7 +100,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
// Info log flush should still run. // Info log flush should still run.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); }); [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(3, dump_st_counter); ASSERT_EQ(3, dump_st_counter);
ASSERT_EQ(3, pst_st_counter); ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(4, flush_info_log_counter); ASSERT_EQ(4, flush_info_log_counter);
@ -114,7 +118,7 @@ TEST_F(PeriodicWorkSchedulerTest, Basic) {
ASSERT_EQ(2, scheduler->TEST_GetValidTaskNum()); ASSERT_EQ(2, scheduler->TEST_GetValidTaskNum());
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); }); [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(kPeriodSec)); });
ASSERT_EQ(4, dump_st_counter); ASSERT_EQ(4, dump_st_counter);
ASSERT_EQ(3, pst_st_counter); ASSERT_EQ(3, pst_st_counter);
ASSERT_EQ(5, flush_info_log_counter); ASSERT_EQ(5, flush_info_log_counter);
@ -154,19 +158,19 @@ TEST_F(PeriodicWorkSchedulerTest, MultiInstances) {
int expected_run = kInstanceNum; int expected_run = kInstanceNum;
dbi->TEST_WaitForStatsDumpRun( dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
ASSERT_EQ(expected_run, dump_st_counter); ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter); ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum; expected_run += kInstanceNum;
dbi->TEST_WaitForStatsDumpRun( dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter); ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter); ASSERT_EQ(expected_run, pst_st_counter);
expected_run += kInstanceNum; expected_run += kInstanceNum;
dbi->TEST_WaitForStatsDumpRun( dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter); ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter); ASSERT_EQ(expected_run, pst_st_counter);
@ -178,9 +182,9 @@ TEST_F(PeriodicWorkSchedulerTest, MultiInstances) {
expected_run += (kInstanceNum - half) * 2; expected_run += (kInstanceNum - half) * 2;
dbi->TEST_WaitForStatsDumpRun( dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
dbi->TEST_WaitForStatsDumpRun( dbi->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_EQ(expected_run, dump_st_counter); ASSERT_EQ(expected_run, dump_st_counter);
ASSERT_EQ(expected_run, pst_st_counter); ASSERT_EQ(expected_run, pst_st_counter);
@ -202,7 +206,8 @@ TEST_F(PeriodicWorkSchedulerTest, MultiEnv) {
Reopen(options1); Reopen(options1);
std::unique_ptr<MockTimeEnv> mock_env2(new MockTimeEnv(Env::Default())); std::unique_ptr<Env> mock_env2(
new CompositeEnvWrapper(Env::Default(), mock_clock_));
Options options2; Options options2;
options2.stats_dump_period_sec = kDumpPeriodSec; options2.stats_dump_period_sec = kDumpPeriodSec;
options2.stats_persist_period_sec = kPersistPeriodSec; options2.stats_persist_period_sec = kPersistPeriodSec;

@ -25,6 +25,7 @@ int main() {
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "rocksdb/perf_context.h" #include "rocksdb/perf_context.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/cast_util.h" #include "util/cast_util.h"
@ -608,7 +609,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
std::string value(FLAGS_value_size, 0); std::string value(FLAGS_value_size, 0);
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
ASSERT_OK(db->Put(write_options, key, value)); ASSERT_OK(db->Put(write_options, key, value));
hist_put_time.Add(timer.ElapsedNanos()); hist_put_time.Add(timer.ElapsedNanos());
hist_put_comparison.Add(get_perf_context()->user_key_comparison_count); hist_put_comparison.Add(get_perf_context()->user_key_comparison_count);
@ -631,7 +632,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
std::string value = "v" + ToString(0); std::string value = "v" + ToString(0);
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
auto key_prefix = options.prefix_extractor->Transform(key); auto key_prefix = options.prefix_extractor->Transform(key);
uint64_t total_keys = 0; uint64_t total_keys = 0;
for (iter->Seek(key); for (iter->Seek(key);
@ -665,7 +666,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator) {
Slice key = TestKeyToSlice(s, test_key); Slice key = TestKeyToSlice(s, test_key);
get_perf_context()->Reset(); get_perf_context()->Reset();
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
iter->Seek(key); iter->Seek(key);
hist_no_seek_time.Add(timer.ElapsedNanos()); hist_no_seek_time.Add(timer.ElapsedNanos());
hist_no_seek_comparison.Add(get_perf_context()->user_key_comparison_count); hist_no_seek_comparison.Add(get_perf_context()->user_key_comparison_count);

@ -11,8 +11,8 @@ int main() {
} }
#else #else
#include <iostream>
#include <iomanip> #include <iomanip>
#include <iostream>
#include <memory> #include <memory>
#include <random> #include <random>
#include <set> #include <set>
@ -22,14 +22,13 @@ int main() {
#include "db/range_del_aggregator.h" #include "db/range_del_aggregator.h"
#include "db/range_tombstone_fragmenter.h" #include "db/range_tombstone_fragmenter.h"
#include "rocksdb/comparator.h" #include "rocksdb/comparator.h"
#include "rocksdb/env.h" #include "rocksdb/system_clock.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/gflags_compat.h"
#include "util/random.h" #include "util/random.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags; using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_int32(num_range_tombstones, 1000, "number of range tombstones created"); DEFINE_int32(num_range_tombstones, 1000, "number of range tombstones created");
@ -220,7 +219,7 @@ int main(int argc, char** argv) {
ROCKSDB_NAMESPACE::kMaxSequenceNumber)); ROCKSDB_NAMESPACE::kMaxSequenceNumber));
ROCKSDB_NAMESPACE::StopWatchNano stop_watch_add_tombstones( ROCKSDB_NAMESPACE::StopWatchNano stop_watch_add_tombstones(
ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */); ROCKSDB_NAMESPACE::SystemClock::Default(), true /* auto_start */);
range_del_agg.AddTombstones(std::move(fragmented_range_del_iter)); range_del_agg.AddTombstones(std::move(fragmented_range_del_iter));
stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos(); stats.time_add_tombstones += stop_watch_add_tombstones.ElapsedNanos();
} }
@ -237,7 +236,7 @@ int main(int argc, char** argv) {
parsed_key.user_key = key_string; parsed_key.user_key = key_string;
ROCKSDB_NAMESPACE::StopWatchNano stop_watch_should_delete( ROCKSDB_NAMESPACE::StopWatchNano stop_watch_should_delete(
ROCKSDB_NAMESPACE::Env::Default(), true /* auto_start */); ROCKSDB_NAMESPACE::SystemClock::Default(), true /* auto_start */);
range_del_agg.ShouldDelete(parsed_key, mode); range_del_agg.ShouldDelete(parsed_key, mode);
uint64_t call_time = stop_watch_should_delete.ElapsedNanos(); uint64_t call_time = stop_watch_should_delete.ElapsedNanos();

@ -106,14 +106,15 @@ Status TableCache::GetTableReader(
TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId()); TableFileName(ioptions_.cf_paths, fd.GetNumber(), fd.GetPathId());
std::unique_ptr<FSRandomAccessFile> file; std::unique_ptr<FSRandomAccessFile> file;
FileOptions fopts = file_options; FileOptions fopts = file_options;
Status s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options); const auto& clock = ioptions_.env->GetSystemClock();
Status s = PrepareIOFromReadOptions(ro, clock, fopts.io_options);
if (s.ok()) { if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr); s = ioptions_.fs->NewRandomAccessFile(fname, fopts, &file, nullptr);
} }
RecordTick(ioptions_.statistics, NO_FILE_OPENS); RecordTick(ioptions_.statistics, NO_FILE_OPENS);
if (s.IsPathNotFound()) { if (s.IsPathNotFound()) {
fname = Rocks2LevelTableFileName(fname); fname = Rocks2LevelTableFileName(fname);
s = PrepareIOFromReadOptions(ro, ioptions_.env, fopts.io_options); s = PrepareIOFromReadOptions(ro, clock, fopts.io_options);
if (s.ok()) { if (s.ok()) {
s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file, s = ioptions_.fs->NewRandomAccessFile(fname, file_options, &file,
nullptr); nullptr);
@ -125,10 +126,10 @@ Status TableCache::GetTableReader(
if (!sequential_mode && ioptions_.advise_random_on_open) { if (!sequential_mode && ioptions_.advise_random_on_open) {
file->Hint(FSRandomAccessFile::kRandom); file->Hint(FSRandomAccessFile::kRandom);
} }
StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS); StopWatch sw(clock, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
std::unique_ptr<RandomAccessFileReader> file_reader( std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader( new RandomAccessFileReader(
std::move(file), fname, ioptions_.env, io_tracer_, std::move(file), fname, clock, io_tracer_,
record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS, record_read_stats ? ioptions_.statistics : nullptr, SST_READ_MICROS,
file_read_hist, ioptions_.rate_limiter, ioptions_.listeners)); file_read_hist, ioptions_.rate_limiter, ioptions_.listeners));
s = ioptions_.table_factory->NewTableReader( s = ioptions_.table_factory->NewTableReader(
@ -161,7 +162,8 @@ Status TableCache::FindTable(const ReadOptions& ro,
HistogramImpl* file_read_hist, bool skip_filters, HistogramImpl* file_read_hist, bool skip_filters,
int level, bool prefetch_index_and_filter_in_cache, int level, bool prefetch_index_and_filter_in_cache,
size_t max_file_size_for_l0_meta_pin) { size_t max_file_size_for_l0_meta_pin) {
PERF_TIMER_GUARD_WITH_ENV(find_table_nanos, ioptions_.env); PERF_TIMER_GUARD_WITH_CLOCK(find_table_nanos,
ioptions_.env->GetSystemClock());
uint64_t number = fd.GetNumber(); uint64_t number = fd.GetNumber();
Slice key = GetSliceForFileNumber(&number); Slice key = GetSliceForFileNumber(&number);
*handle = cache_->Lookup(key); *handle = cache_->Lookup(key);

@ -1761,6 +1761,7 @@ Version::Version(ColumnFamilyData* column_family_data, VersionSet* vset,
const std::shared_ptr<IOTracer>& io_tracer, const std::shared_ptr<IOTracer>& io_tracer,
uint64_t version_number) uint64_t version_number)
: env_(vset->env_), : env_(vset->env_),
clock_(env_->GetSystemClock()),
cfd_(column_family_data), cfd_(column_family_data),
info_log_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->info_log), info_log_((cfd_ == nullptr) ? nullptr : cfd_->ioptions()->info_log),
db_statistics_((cfd_ == nullptr) ? nullptr db_statistics_((cfd_ == nullptr) ? nullptr
@ -1879,7 +1880,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
user_comparator(), merge_operator_, info_log_, db_statistics_, user_comparator(), merge_operator_, info_log_, db_statistics_,
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key, status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
do_merge ? value : nullptr, do_merge ? timestamp : nullptr, value_found, do_merge ? value : nullptr, do_merge ? timestamp : nullptr, value_found,
merge_context, do_merge, max_covering_tombstone_seq, this->env_, seq, merge_context, do_merge, max_covering_tombstone_seq, clock_, seq,
merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob_to_use, merge_operator_ ? &pinned_iters_mgr : nullptr, callback, is_blob_to_use,
tracing_get_id); tracing_get_id);
@ -1907,7 +1908,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
bool timer_enabled = bool timer_enabled =
GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
get_perf_context()->per_level_perf_context_enabled; get_perf_context()->per_level_perf_context_enabled;
StopWatchNano timer(env_, timer_enabled /* auto_start */); StopWatchNano timer(clock_, timer_enabled /* auto_start */);
*status = table_cache_->Get( *status = table_cache_->Get(
read_options, *internal_comparator(), *f->file_metadata, ikey, read_options, *internal_comparator(), *f->file_metadata, ikey,
&get_context, mutable_cf_options_.prefix_extractor.get(), &get_context, mutable_cf_options_.prefix_extractor.get(),
@ -1996,7 +1997,7 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
std::string* str_value = value != nullptr ? value->GetSelf() : nullptr; std::string* str_value = value != nullptr ? value->GetSelf() : nullptr;
*status = MergeHelper::TimedFullMerge( *status = MergeHelper::TimedFullMerge(
merge_operator_, user_key, nullptr, merge_context->GetOperands(), merge_operator_, user_key, nullptr, merge_context->GetOperands(),
str_value, info_log_, db_statistics_, env_, str_value, info_log_, db_statistics_, clock_,
nullptr /* result_operand */, true); nullptr /* result_operand */, true);
if (LIKELY(value != nullptr)) { if (LIKELY(value != nullptr)) {
value->PinSelf(); value->PinSelf();
@ -2033,9 +2034,9 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
user_comparator(), merge_operator_, info_log_, db_statistics_, user_comparator(), merge_operator_, info_log_, db_statistics_,
iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge, iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge,
iter->ukey_with_ts, iter->value, iter->timestamp, nullptr, iter->ukey_with_ts, iter->value, iter->timestamp, nullptr,
&(iter->merge_context), true, &iter->max_covering_tombstone_seq, &(iter->merge_context), true, &iter->max_covering_tombstone_seq, clock_,
this->env_, nullptr, merge_operator_ ? &pinned_iters_mgr : nullptr, nullptr, merge_operator_ ? &pinned_iters_mgr : nullptr, callback,
callback, &iter->is_blob_index, tracing_mget_id); &iter->is_blob_index, tracing_mget_id);
// MergeInProgress status, if set, has been transferred to the get_context // MergeInProgress status, if set, has been transferred to the get_context
// state, so we set status to ok here. From now on, the iter status will // state, so we set status to ok here. From now on, the iter status will
// be used for IO errors, and get_context state will be used for any // be used for IO errors, and get_context state will be used for any
@ -2065,7 +2066,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
bool timer_enabled = bool timer_enabled =
GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex && GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
get_perf_context()->per_level_perf_context_enabled; get_perf_context()->per_level_perf_context_enabled;
StopWatchNano timer(env_, timer_enabled /* auto_start */); StopWatchNano timer(clock_, timer_enabled /* auto_start */);
s = table_cache_->MultiGet( s = table_cache_->MultiGet(
read_options, *internal_comparator(), *f->file_metadata, &file_range, read_options, *internal_comparator(), *f->file_metadata, &file_range,
mutable_cf_options_.prefix_extractor.get(), mutable_cf_options_.prefix_extractor.get(),
@ -2228,7 +2229,7 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
iter->value != nullptr ? iter->value->GetSelf() : nullptr; iter->value != nullptr ? iter->value->GetSelf() : nullptr;
*status = MergeHelper::TimedFullMerge( *status = MergeHelper::TimedFullMerge(
merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(), merge_operator_, user_key, nullptr, iter->merge_context.GetOperands(),
str_value, info_log_, db_statistics_, env_, str_value, info_log_, db_statistics_, clock_,
nullptr /* result_operand */, true); nullptr /* result_operand */, true);
if (LIKELY(iter->value != nullptr)) { if (LIKELY(iter->value != nullptr)) {
iter->value->PinSelf(); iter->value->PinSelf();
@ -3782,6 +3783,7 @@ VersionSet::VersionSet(const std::string& dbname,
table_cache_(table_cache), table_cache_(table_cache),
env_(_db_options->env), env_(_db_options->env),
fs_(_db_options->fs, io_tracer), fs_(_db_options->fs, io_tracer),
clock_(env_->GetSystemClock()),
dbname_(dbname), dbname_(dbname),
db_options_(_db_options), db_options_(_db_options),
next_file_number_(2), next_file_number_(2),
@ -4119,7 +4121,7 @@ Status VersionSet::ProcessManifestWrites(
db_options_->manifest_preallocation_size); db_options_->manifest_preallocation_size);
std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter( std::unique_ptr<WritableFileWriter> file_writer(new WritableFileWriter(
std::move(descriptor_file), descriptor_fname, opt_file_opts, env_, std::move(descriptor_file), descriptor_fname, opt_file_opts, clock_,
io_tracer_, nullptr, db_options_->listeners)); io_tracer_, nullptr, db_options_->listeners));
descriptor_log_.reset( descriptor_log_.reset(
new log::Writer(std::move(file_writer), 0, false)); new log::Writer(std::move(file_writer), 0, false));
@ -4167,7 +4169,7 @@ Status VersionSet::ProcessManifestWrites(
} }
} }
if (s.ok()) { if (s.ok()) {
io_s = SyncManifest(env_, db_options_, descriptor_log_->file()); io_s = SyncManifest(clock_, db_options_, descriptor_log_->file());
TEST_SYNC_POINT_CALLBACK( TEST_SYNC_POINT_CALLBACK(
"VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s); "VersionSet::ProcessManifestWrites:AfterSyncManifest", &io_s);
} }
@ -6302,7 +6304,7 @@ Status ReactiveVersionSet::MaybeSwitchManifest(
"ReactiveVersionSet::MaybeSwitchManifest:" "ReactiveVersionSet::MaybeSwitchManifest:"
"AfterGetCurrentManifestPath:1"); "AfterGetCurrentManifestPath:1");
s = fs_->NewSequentialFile(manifest_path, s = fs_->NewSequentialFile(manifest_path,
env_->OptimizeForManifestRead(file_options_), fs_->OptimizeForManifestRead(file_options_),
&manifest_file, nullptr); &manifest_file, nullptr);
} else { } else {
// No need to switch manifest. // No need to switch manifest.

@ -71,6 +71,7 @@ class WriteBufferManager;
class MergeContext; class MergeContext;
class ColumnFamilySet; class ColumnFamilySet;
class MergeIteratorBuilder; class MergeIteratorBuilder;
class SystemClock;
// VersionEdit is always supposed to be valid and it is used to point at // VersionEdit is always supposed to be valid and it is used to point at
// entries in Manifest. Ideally it should not be used as a container to // entries in Manifest. Ideally it should not be used as a container to
@ -779,6 +780,8 @@ class Version {
private: private:
Env* env_; Env* env_;
std::shared_ptr<SystemClock> clock_;
friend class ReactiveVersionSet; friend class ReactiveVersionSet;
friend class VersionSet; friend class VersionSet;
friend class VersionEditHandler; friend class VersionEditHandler;
@ -1346,6 +1349,7 @@ class VersionSet {
Cache* table_cache_; Cache* table_cache_;
Env* const env_; Env* const env_;
FileSystemPtr const fs_; FileSystemPtr const fs_;
const std::shared_ptr<SystemClock> clock_;
const std::string dbname_; const std::string dbname_;
std::string db_id_; std::string db_id_;
const ImmutableDBOptions* const db_options_; const ImmutableDBOptions* const db_options_;

@ -2786,8 +2786,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
std::unique_ptr<FSWritableFile> file; std::unique_ptr<FSWritableFile> file;
Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr); Status s = fs_->NewWritableFile(fname, FileOptions(), &file, nullptr);
ASSERT_OK(s); ASSERT_OK(s);
std::unique_ptr<WritableFileWriter> fwriter( std::unique_ptr<WritableFileWriter> fwriter(new WritableFileWriter(
new WritableFileWriter(std::move(file), fname, FileOptions(), env_)); std::move(file), fname, FileOptions(), env_->GetSystemClock()));
std::vector<std::unique_ptr<IntTblPropCollectorFactory>> std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;

@ -55,6 +55,7 @@
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "port/lang.h" #include "port/lang.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "rocksdb/system_clock.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/cast_util.h" #include "util/cast_util.h"
#include "util/coding.h" #include "util/coding.h"
@ -1804,7 +1805,7 @@ class MemTableInserter : public WriteBatch::Handler {
Status merge_status = MergeHelper::TimedFullMerge( Status merge_status = MergeHelper::TimedFullMerge(
merge_operator, key, &get_value_slice, {value}, &new_value, merge_operator, key, &get_value_slice, {value}, &new_value,
moptions->info_log, moptions->statistics, Env::Default()); moptions->info_log, moptions->statistics, SystemClock::Default());
if (!merge_status.ok()) { if (!merge_status.ok()) {
// Failed to merge! // Failed to merge!

@ -8,7 +8,8 @@
#include <atomic> #include <atomic>
#include <cassert> #include <cassert>
#include <ratio> #include <ratio>
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -42,7 +43,8 @@ bool WriteController::IsStopped() const {
// If it turns out to be a performance issue, we can redesign the thread // If it turns out to be a performance issue, we can redesign the thread
// synchronization model here. // synchronization model here.
// The function trust caller will sleep micros returned. // The function trust caller will sleep micros returned.
uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) { uint64_t WriteController::GetDelay(const std::shared_ptr<SystemClock>& clock,
uint64_t num_bytes) {
if (total_stopped_.load(std::memory_order_relaxed) > 0) { if (total_stopped_.load(std::memory_order_relaxed) > 0) {
return 0; return 0;
} }
@ -59,7 +61,7 @@ uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) {
} }
// The frequency to get time inside DB mutex is less than one per refill // The frequency to get time inside DB mutex is less than one per refill
// interval. // interval.
auto time_now = NowMicrosMonotonic(env); auto time_now = NowMicrosMonotonic(clock);
uint64_t sleep_debt = 0; uint64_t sleep_debt = 0;
uint64_t time_since_last_refill = 0; uint64_t time_since_last_refill = 0;
@ -106,8 +108,9 @@ uint64_t WriteController::GetDelay(Env* env, uint64_t num_bytes) {
return sleep_amount; return sleep_amount;
} }
uint64_t WriteController::NowMicrosMonotonic(Env* env) { uint64_t WriteController::NowMicrosMonotonic(
return env->NowNanos() / std::milli::den; const std::shared_ptr<SystemClock>& clock) {
return clock->NowNanos() / std::milli::den;
} }
StopWriteToken::~StopWriteToken() { StopWriteToken::~StopWriteToken() {

@ -13,7 +13,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Env; class SystemClock;
class WriteControllerToken; class WriteControllerToken;
// WriteController is controlling write stalls in our write code-path. Write // WriteController is controlling write stalls in our write code-path. Write
@ -57,7 +57,8 @@ class WriteController {
// return how many microseconds the caller needs to sleep after the call // return how many microseconds the caller needs to sleep after the call
// num_bytes: how many number of bytes to put into the DB. // num_bytes: how many number of bytes to put into the DB.
// Prerequisite: DB mutex held. // Prerequisite: DB mutex held.
uint64_t GetDelay(Env* env, uint64_t num_bytes); uint64_t GetDelay(const std::shared_ptr<SystemClock>& clock,
uint64_t num_bytes);
void set_delayed_write_rate(uint64_t write_rate) { void set_delayed_write_rate(uint64_t write_rate) {
// avoid divide 0 // avoid divide 0
if (write_rate == 0) { if (write_rate == 0) {
@ -85,7 +86,7 @@ class WriteController {
RateLimiter* low_pri_rate_limiter() { return low_pri_rate_limiter_.get(); } RateLimiter* low_pri_rate_limiter() { return low_pri_rate_limiter_.get(); }
private: private:
uint64_t NowMicrosMonotonic(Env* env); uint64_t NowMicrosMonotonic(const std::shared_ptr<SystemClock>& clock);
friend class WriteControllerToken; friend class WriteControllerToken;
friend class StopWriteToken; friend class StopWriteToken;

@ -3,46 +3,50 @@
// COPYING file in the root directory) and Apache 2.0 License // COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
// //
#include <ratio>
#include "db/write_controller.h" #include "db/write_controller.h"
#include "rocksdb/env.h" #include <ratio>
#include "rocksdb/system_clock.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace {
class WriteControllerTest : public testing::Test {}; class TimeSetClock : public SystemClockWrapper {
class TimeSetEnv : public EnvWrapper {
public: public:
explicit TimeSetEnv() : EnvWrapper(nullptr) {} explicit TimeSetClock() : SystemClockWrapper(nullptr) {}
const char* Name() const override { return "TimeSetClock"; }
uint64_t now_micros_ = 6666; uint64_t now_micros_ = 6666;
uint64_t NowNanos() override { return now_micros_ * std::milli::den; } uint64_t NowNanos() override { return now_micros_ * std::milli::den; }
}; };
} // namespace
class WriteControllerTest : public testing::Test {
public:
WriteControllerTest() { clock_ = std::make_shared<TimeSetClock>(); }
std::shared_ptr<TimeSetClock> clock_;
};
TEST_F(WriteControllerTest, ChangeDelayRateTest) { TEST_F(WriteControllerTest, ChangeDelayRateTest) {
TimeSetEnv env;
WriteController controller(40000000u); // also set max delayed rate WriteController controller(40000000u); // also set max delayed rate
controller.set_delayed_write_rate(10000000u); controller.set_delayed_write_rate(10000000u);
auto delay_token_0 = auto delay_token_0 =
controller.GetDelayToken(controller.delayed_write_rate()); controller.GetDelayToken(controller.delayed_write_rate());
ASSERT_EQ(static_cast<uint64_t>(2000000), ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
auto delay_token_1 = controller.GetDelayToken(2000000u); auto delay_token_1 = controller.GetDelayToken(2000000u);
ASSERT_EQ(static_cast<uint64_t>(10000000), ASSERT_EQ(static_cast<uint64_t>(10000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
auto delay_token_2 = controller.GetDelayToken(1000000u); auto delay_token_2 = controller.GetDelayToken(1000000u);
ASSERT_EQ(static_cast<uint64_t>(20000000), ASSERT_EQ(static_cast<uint64_t>(20000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
auto delay_token_3 = controller.GetDelayToken(20000000u); auto delay_token_3 = controller.GetDelayToken(20000000u);
ASSERT_EQ(static_cast<uint64_t>(1000000), ASSERT_EQ(static_cast<uint64_t>(1000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
// This is more than max rate. Max delayed rate will be used. // This is more than max rate. Max delayed rate will be used.
auto delay_token_4 = auto delay_token_4 =
controller.GetDelayToken(controller.delayed_write_rate() * 3); controller.GetDelayToken(controller.delayed_write_rate() * 3);
ASSERT_EQ(static_cast<uint64_t>(500000), ASSERT_EQ(static_cast<uint64_t>(500000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
} }
TEST_F(WriteControllerTest, SanityTest) { TEST_F(WriteControllerTest, SanityTest) {
@ -56,73 +60,71 @@ TEST_F(WriteControllerTest, SanityTest) {
stop_token_2.reset(); stop_token_2.reset();
ASSERT_FALSE(controller.IsStopped()); ASSERT_FALSE(controller.IsStopped());
TimeSetEnv env;
auto delay_token_1 = controller.GetDelayToken(10000000u); auto delay_token_1 = controller.GetDelayToken(10000000u);
ASSERT_EQ(static_cast<uint64_t>(2000000), ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
env.now_micros_ += 1999900u; // sleep debt 1000 clock_->now_micros_ += 1999900u; // sleep debt 1000
auto delay_token_2 = controller.GetDelayToken(10000000u); auto delay_token_2 = controller.GetDelayToken(10000000u);
// Rate reset after changing the token. // Rate reset after changing the token.
ASSERT_EQ(static_cast<uint64_t>(2000000), ASSERT_EQ(static_cast<uint64_t>(2000000),
controller.GetDelay(&env, 20000000u)); controller.GetDelay(clock_, 20000000u));
env.now_micros_ += 1999900u; // sleep debt 1000 clock_->now_micros_ += 1999900u; // sleep debt 1000
// One refill: 10240 bytes allowed, 1000 used, 9240 left // One refill: 10240 bytes allowed, 1000 used, 9240 left
ASSERT_EQ(static_cast<uint64_t>(1124), controller.GetDelay(&env, 1000u)); ASSERT_EQ(static_cast<uint64_t>(1124), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 1124u; // sleep debt 0 clock_->now_micros_ += 1124u; // sleep debt 0
delay_token_2.reset(); delay_token_2.reset();
// 1000 used, 8240 left // 1000 used, 8240 left
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u)); ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 100u; // sleep credit 100 clock_->now_micros_ += 100u; // sleep credit 100
// 1000 used, 7240 left // 1000 used, 7240 left
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u)); ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 100u; // sleep credit 200 clock_->now_micros_ += 100u; // sleep credit 200
// One refill: 10240 fileed, sleep credit generates 2000. 8000 used // One refill: 10240 fileed, sleep credit generates 2000. 8000 used
// 7240 + 10240 + 2000 - 8000 = 11480 left // 7240 + 10240 + 2000 - 8000 = 11480 left
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 8000u)); ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 8000u));
env.now_micros_ += 200u; // sleep debt 824 clock_->now_micros_ += 200u; // sleep debt 824
// 1000 used, 10480 left. // 1000 used, 10480 left.
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 1000u)); ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 1000u));
env.now_micros_ += 200u; // sleep debt 624 clock_->now_micros_ += 200u; // sleep debt 624
// Out of bound sleep, still 10480 left // Out of bound sleep, still 10480 left
ASSERT_EQ(static_cast<uint64_t>(3000624u), ASSERT_EQ(static_cast<uint64_t>(3000624u),
controller.GetDelay(&env, 30000000u)); controller.GetDelay(clock_, 30000000u));
env.now_micros_ += 3000724u; // sleep credit 100 clock_->now_micros_ += 3000724u; // sleep credit 100
// 6000 used, 4480 left. // 6000 used, 4480 left.
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 6000u)); ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 6000u));
env.now_micros_ += 200u; // sleep credit 300 clock_->now_micros_ += 200u; // sleep credit 300
// One refill, credit 4480 balance + 3000 credit + 10240 refill // One refill, credit 4480 balance + 3000 credit + 10240 refill
// Use 8000, 9720 left // Use 8000, 9720 left
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 8000u)); ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 8000u));
env.now_micros_ += 3024u; // sleep credit 2000 clock_->now_micros_ += 3024u; // sleep credit 2000
// 1720 left // 1720 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u)); ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// 1720 balance + 20000 credit = 20170 left // 1720 balance + 20000 credit = 20170 left
// Use 8000, 12170 left // Use 8000, 12170 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u)); ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// 4170 left // 4170 left
ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(&env, 8000u)); ASSERT_EQ(static_cast<uint64_t>(0u), controller.GetDelay(clock_, 8000u));
// Need a refill // Need a refill
ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(&env, 9000u)); ASSERT_EQ(static_cast<uint64_t>(1024u), controller.GetDelay(clock_, 9000u));
delay_token_1.reset(); delay_token_1.reset();
ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(&env, 30000000u)); ASSERT_EQ(static_cast<uint64_t>(0), controller.GetDelay(clock_, 30000000u));
delay_token_1.reset(); delay_token_1.reset();
ASSERT_FALSE(controller.IsStopped()); ASSERT_FALSE(controller.IsStopped());
} }

369
env/composite_env.cc vendored

@ -0,0 +1,369 @@
// Copyright (c) 2019-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#include "env/composite_env_wrapper.h"
namespace ROCKSDB_NAMESPACE {
namespace {
// The CompositeEnvWrapper class provides an interface that is compatible
// with the old monolithic Env API, and an implementation that wraps around
// the new Env that provides threading and other OS related functionality, and
// the new FileSystem API that provides storage functionality. By
// providing the old Env interface, it allows the rest of RocksDB code to
// be agnostic of whether the underlying Env implementation is a monolithic
// Env or an Env + FileSystem. In the former case, the user will specify
// Options::env only, whereas in the latter case, the user will specify
// Options::env and Options::file_system.
class CompositeSequentialFileWrapper : public SequentialFile {
public:
explicit CompositeSequentialFileWrapper(
std::unique_ptr<FSSequentialFile>& target)
: target_(std::move(target)) {}
Status Read(size_t n, Slice* result, char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(n, io_opts, result, scratch, &dbg);
}
Status Skip(uint64_t n) override { return target_->Skip(n); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status PositionedRead(uint64_t offset, size_t n, Slice* result,
char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg);
}
private:
std::unique_ptr<FSSequentialFile> target_;
};
class CompositeRandomAccessFileWrapper : public RandomAccessFile {
public:
explicit CompositeRandomAccessFileWrapper(
std::unique_ptr<FSRandomAccessFile>& target)
: target_(std::move(target)) {}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
IOOptions io_opts;
IODebugContext dbg;
std::vector<FSReadRequest> fs_reqs;
Status status;
fs_reqs.resize(num_reqs);
for (size_t i = 0; i < num_reqs; ++i) {
fs_reqs[i].offset = reqs[i].offset;
fs_reqs[i].len = reqs[i].len;
fs_reqs[i].scratch = reqs[i].scratch;
fs_reqs[i].status = IOStatus::OK();
}
status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg);
for (size_t i = 0; i < num_reqs; ++i) {
reqs[i].result = fs_reqs[i].result;
reqs[i].status = fs_reqs[i].status;
}
return status;
}
Status Prefetch(uint64_t offset, size_t n) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Prefetch(offset, n, io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
void Hint(AccessPattern pattern) override {
target_->Hint((FSRandomAccessFile::AccessPattern)pattern);
}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
private:
std::unique_ptr<FSRandomAccessFile> target_;
};
class CompositeWritableFileWrapper : public WritableFile {
public:
explicit CompositeWritableFileWrapper(std::unique_ptr<FSWritableFile>& t)
: target_(std::move(t)) {}
Status Append(const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Append(data, io_opts, &dbg);
}
Status PositionedAppend(const Slice& data, uint64_t offset) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedAppend(data, offset, io_opts, &dbg);
}
Status Truncate(uint64_t size) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Truncate(size, io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
target_->SetWriteLifeTimeHint(hint);
}
Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
return target_->GetWriteLifeTimeHint();
}
uint64_t GetFileSize() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->GetFileSize(io_opts, &dbg);
}
void SetPreallocationBlockSize(size_t size) override {
target_->SetPreallocationBlockSize(size);
}
void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override {
target_->GetPreallocationStatus(block_size, last_allocated_block);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->RangeSync(offset, nbytes, io_opts, &dbg);
}
void PrepareWrite(size_t offset, size_t len) override {
IOOptions io_opts;
IODebugContext dbg;
target_->PrepareWrite(offset, len, io_opts, &dbg);
}
Status Allocate(uint64_t offset, uint64_t len) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Allocate(offset, len, io_opts, &dbg);
}
std::unique_ptr<FSWritableFile>* target() { return &target_; }
private:
std::unique_ptr<FSWritableFile> target_;
};
class CompositeRandomRWFileWrapper : public RandomRWFile {
public:
explicit CompositeRandomRWFileWrapper(std::unique_ptr<FSRandomRWFile>& target)
: target_(std::move(target)) {}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status Write(uint64_t offset, const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Write(offset, data, io_opts, &dbg);
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
private:
std::unique_ptr<FSRandomRWFile> target_;
};
class CompositeDirectoryWrapper : public Directory {
public:
explicit CompositeDirectoryWrapper(std::unique_ptr<FSDirectory>& target)
: target_(std::move(target)) {}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
private:
std::unique_ptr<FSDirectory> target_;
};
} // namespace
Status CompositeEnv::NewSequentialFile(const std::string& f,
std::unique_ptr<SequentialFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSSequentialFile> file;
Status status;
status =
file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeSequentialFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewRandomAccessFile(const std::string& f,
std::unique_ptr<RandomAccessFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSRandomAccessFile> file;
Status status;
status =
file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeRandomAccessFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewWritableFile(const std::string& f,
std::unique_ptr<WritableFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSWritableFile> file;
Status status;
status = file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::ReopenWritableFile(const std::string& fname,
std::unique_ptr<WritableFile>* result,
const EnvOptions& options) {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReopenWritableFile(fname, FileOptions(options), &file,
&dbg);
if (status.ok()) {
result->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::ReuseWritableFile(const std::string& fname,
const std::string& old_fname,
std::unique_ptr<WritableFile>* r,
const EnvOptions& options) {
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReuseWritableFile(fname, old_fname,
FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewRandomRWFile(const std::string& fname,
std::unique_ptr<RandomRWFile>* result,
const EnvOptions& options) {
IODebugContext dbg;
std::unique_ptr<FSRandomRWFile> file;
Status status;
status =
file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg);
if (status.ok()) {
result->reset(new CompositeRandomRWFileWrapper(file));
}
return status;
}
Status CompositeEnv::NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) {
IOOptions io_opts;
IODebugContext dbg;
std::unique_ptr<FSDirectory> dir;
Status status;
status = file_system_->NewDirectory(name, io_opts, &dir, &dbg);
if (status.ok()) {
result->reset(new CompositeDirectoryWrapper(dir));
}
return status;
}
} // namespace ROCKSDB_NAMESPACE

@ -7,6 +7,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#ifdef _WIN32 #ifdef _WIN32
// Windows API macro interference // Windows API macro interference
@ -17,272 +18,14 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
// The CompositeEnvWrapper class provides an interface that is compatible
// with the old monolithic Env API, and an implementation that wraps around
// the new Env that provides threading and other OS related functionality, and
// the new FileSystem API that provides storage functionality. By
// providing the old Env interface, it allows the rest of RocksDB code to
// be agnostic of whether the underlying Env implementation is a monolithic
// Env or an Env + FileSystem. In the former case, the user will specify
// Options::env only, whereas in the latter case, the user will specify
// Options::env and Options::file_system.
class CompositeSequentialFileWrapper : public SequentialFile {
public:
explicit CompositeSequentialFileWrapper(
std::unique_ptr<FSSequentialFile>& target)
: target_(std::move(target)) {}
Status Read(size_t n, Slice* result, char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(n, io_opts, result, scratch, &dbg);
}
Status Skip(uint64_t n) override { return target_->Skip(n); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status PositionedRead(uint64_t offset, size_t n, Slice* result,
char* scratch) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedRead(offset, n, io_opts, result, scratch, &dbg);
}
private:
std::unique_ptr<FSSequentialFile> target_;
};
class CompositeRandomAccessFileWrapper : public RandomAccessFile {
public:
explicit CompositeRandomAccessFileWrapper(
std::unique_ptr<FSRandomAccessFile>& target)
: target_(std::move(target)) {}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status MultiRead(ReadRequest* reqs, size_t num_reqs) override {
IOOptions io_opts;
IODebugContext dbg;
std::vector<FSReadRequest> fs_reqs;
Status status;
fs_reqs.resize(num_reqs);
for (size_t i = 0; i < num_reqs; ++i) {
fs_reqs[i].offset = reqs[i].offset;
fs_reqs[i].len = reqs[i].len;
fs_reqs[i].scratch = reqs[i].scratch;
fs_reqs[i].status = IOStatus::OK();
}
status = target_->MultiRead(fs_reqs.data(), num_reqs, io_opts, &dbg);
for (size_t i = 0; i < num_reqs; ++i) {
reqs[i].result = fs_reqs[i].result;
reqs[i].status = fs_reqs[i].status;
}
return status;
}
Status Prefetch(uint64_t offset, size_t n) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Prefetch(offset, n, io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
};
void Hint(AccessPattern pattern) override {
target_->Hint((FSRandomAccessFile::AccessPattern)pattern);
}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
private:
std::unique_ptr<FSRandomAccessFile> target_;
};
class CompositeWritableFileWrapper : public WritableFile {
public:
explicit CompositeWritableFileWrapper(std::unique_ptr<FSWritableFile>& t)
: target_(std::move(t)) {}
Status Append(const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Append(data, io_opts, &dbg);
}
Status PositionedAppend(const Slice& data, uint64_t offset) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->PositionedAppend(data, offset, io_opts, &dbg);
}
Status Truncate(uint64_t size) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Truncate(size, io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
bool IsSyncThreadSafe() const override { return target_->IsSyncThreadSafe(); }
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
void SetWriteLifeTimeHint(Env::WriteLifeTimeHint hint) override {
target_->SetWriteLifeTimeHint(hint);
}
Env::WriteLifeTimeHint GetWriteLifeTimeHint() override {
return target_->GetWriteLifeTimeHint();
}
uint64_t GetFileSize() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->GetFileSize(io_opts, &dbg);
}
void SetPreallocationBlockSize(size_t size) override {
target_->SetPreallocationBlockSize(size);
}
void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override {
target_->GetPreallocationStatus(block_size, last_allocated_block);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
Status InvalidateCache(size_t offset, size_t length) override {
return target_->InvalidateCache(offset, length);
}
Status RangeSync(uint64_t offset, uint64_t nbytes) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->RangeSync(offset, nbytes, io_opts, &dbg);
}
void PrepareWrite(size_t offset, size_t len) override {
IOOptions io_opts;
IODebugContext dbg;
target_->PrepareWrite(offset, len, io_opts, &dbg);
}
Status Allocate(uint64_t offset, uint64_t len) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Allocate(offset, len, io_opts, &dbg);
}
std::unique_ptr<FSWritableFile>* target() { return &target_; }
private:
std::unique_ptr<FSWritableFile> target_;
};
class CompositeRandomRWFileWrapper : public RandomRWFile {
public:
explicit CompositeRandomRWFileWrapper(std::unique_ptr<FSRandomRWFile>& target)
: target_(std::move(target)) {}
bool use_direct_io() const override { return target_->use_direct_io(); }
size_t GetRequiredBufferAlignment() const override {
return target_->GetRequiredBufferAlignment();
}
Status Write(uint64_t offset, const Slice& data) override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Write(offset, data, io_opts, &dbg);
}
Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Read(offset, n, io_opts, result, scratch, &dbg);
}
Status Flush() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Flush(io_opts, &dbg);
}
Status Sync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Sync(io_opts, &dbg);
}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
Status Close() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Close(io_opts, &dbg);
}
private:
std::unique_ptr<FSRandomRWFile> target_;
};
class CompositeDirectoryWrapper : public Directory {
public:
explicit CompositeDirectoryWrapper(std::unique_ptr<FSDirectory>& target)
: target_(std::move(target)) {}
Status Fsync() override {
IOOptions io_opts;
IODebugContext dbg;
return target_->Fsync(io_opts, &dbg);
}
size_t GetUniqueId(char* id, size_t max_size) const override {
return target_->GetUniqueId(id, max_size);
}
private:
std::unique_ptr<FSDirectory> target_;
};
class CompositeEnv : public Env { class CompositeEnv : public Env {
public: public:
// Initialize a CompositeEnvWrapper that delegates all thread/time related // Initialize a CompositeEnvWrapper that delegates all thread/time related
// calls to env, and all file operations to fs // calls to env, and all file operations to fs
explicit CompositeEnv(const std::shared_ptr<FileSystem>& fs) : Env(fs) {} explicit CompositeEnv(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock)
: Env(fs, clock) {}
Status RegisterDbPaths(const std::vector<std::string>& paths) override { Status RegisterDbPaths(const std::vector<std::string>& paths) override {
return file_system_->RegisterDbPaths(paths); return file_system_->RegisterDbPaths(paths);
@ -294,99 +37,37 @@ class CompositeEnv : public Env {
// The following text is boilerplate that forwards all methods to target() // The following text is boilerplate that forwards all methods to target()
Status NewSequentialFile(const std::string& f, Status NewSequentialFile(const std::string& f,
std::unique_ptr<SequentialFile>* r, std::unique_ptr<SequentialFile>* r,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
std::unique_ptr<FSSequentialFile> file;
Status status;
status =
file_system_->NewSequentialFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeSequentialFileWrapper(file));
}
return status;
}
Status NewRandomAccessFile(const std::string& f, Status NewRandomAccessFile(const std::string& f,
std::unique_ptr<RandomAccessFile>* r, std::unique_ptr<RandomAccessFile>* r,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
std::unique_ptr<FSRandomAccessFile> file;
Status status;
status =
file_system_->NewRandomAccessFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeRandomAccessFileWrapper(file));
}
return status;
}
Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r, Status NewWritableFile(const std::string& f, std::unique_ptr<WritableFile>* r,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
std::unique_ptr<FSWritableFile> file;
Status status;
status =
file_system_->NewWritableFile(f, FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status ReopenWritableFile(const std::string& fname, Status ReopenWritableFile(const std::string& fname,
std::unique_ptr<WritableFile>* result, std::unique_ptr<WritableFile>* result,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReopenWritableFile(fname, FileOptions(options),
&file, &dbg);
if (status.ok()) {
result->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status ReuseWritableFile(const std::string& fname, Status ReuseWritableFile(const std::string& fname,
const std::string& old_fname, const std::string& old_fname,
std::unique_ptr<WritableFile>* r, std::unique_ptr<WritableFile>* r,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
Status status;
std::unique_ptr<FSWritableFile> file;
status = file_system_->ReuseWritableFile(fname, old_fname,
FileOptions(options), &file, &dbg);
if (status.ok()) {
r->reset(new CompositeWritableFileWrapper(file));
}
return status;
}
Status NewRandomRWFile(const std::string& fname, Status NewRandomRWFile(const std::string& fname,
std::unique_ptr<RandomRWFile>* result, std::unique_ptr<RandomRWFile>* result,
const EnvOptions& options) override { const EnvOptions& options) override;
IODebugContext dbg;
std::unique_ptr<FSRandomRWFile> file;
Status status;
status =
file_system_->NewRandomRWFile(fname, FileOptions(options), &file, &dbg);
if (status.ok()) {
result->reset(new CompositeRandomRWFileWrapper(file));
}
return status;
}
Status NewMemoryMappedFileBuffer( Status NewMemoryMappedFileBuffer(
const std::string& fname, const std::string& fname,
std::unique_ptr<MemoryMappedFileBuffer>* result) override { std::unique_ptr<MemoryMappedFileBuffer>* result) override {
return file_system_->NewMemoryMappedFileBuffer(fname, result); return file_system_->NewMemoryMappedFileBuffer(fname, result);
} }
Status NewDirectory(const std::string& name, Status NewDirectory(const std::string& name,
std::unique_ptr<Directory>* result) override { std::unique_ptr<Directory>* result) override;
IOOptions io_opts;
IODebugContext dbg;
std::unique_ptr<FSDirectory> dir;
Status status;
status = file_system_->NewDirectory(name, io_opts, &dir, &dbg);
if (status.ok()) {
result->reset(new CompositeDirectoryWrapper(dir));
}
return status;
}
Status FileExists(const std::string& f) override { Status FileExists(const std::string& f) override {
IOOptions io_opts; IOOptions io_opts;
IODebugContext dbg; IODebugContext dbg;
@ -548,6 +229,21 @@ class CompositeEnv : public Env {
IODebugContext dbg; IODebugContext dbg;
return file_system_->GetFreeSpace(path, io_opts, diskfree, &dbg); return file_system_->GetFreeSpace(path, io_opts, diskfree, &dbg);
} }
uint64_t NowMicros() override { return system_clock_->NowMicros(); }
uint64_t NowNanos() override { return system_clock_->NowNanos(); }
uint64_t NowCPUNanos() override { return system_clock_->CPUNanos(); }
void SleepForMicroseconds(int micros) override {
system_clock_->SleepForMicroseconds(micros);
}
Status GetCurrentTime(int64_t* unix_time) override {
return system_clock_->GetCurrentTime(unix_time);
}
std::string TimeToString(uint64_t time) override {
return system_clock_->TimeToString(time);
}
}; };
class CompositeEnvWrapper : public CompositeEnv { class CompositeEnvWrapper : public CompositeEnv {
@ -555,7 +251,14 @@ class CompositeEnvWrapper : public CompositeEnv {
// Initialize a CompositeEnvWrapper that delegates all thread/time related // Initialize a CompositeEnvWrapper that delegates all thread/time related
// calls to env, and all file operations to fs // calls to env, and all file operations to fs
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs) explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs)
: CompositeEnv(fs), env_target_(env) {} : CompositeEnvWrapper(env, fs, env->GetSystemClock()) {}
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<SystemClock>& sc)
: CompositeEnvWrapper(env, env->GetFileSystem(), sc) {}
explicit CompositeEnvWrapper(Env* env, const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& sc)
: CompositeEnv(fs, sc), env_target_(env) {}
// Return the target to which this Env forwards all calls // Return the target to which this Env forwards all calls
Env* env_target() const { return env_target_; } Env* env_target() const { return env_target_; }
@ -585,19 +288,9 @@ class CompositeEnvWrapper : public CompositeEnv {
return env_target_->GetThreadPoolQueueLen(pri); return env_target_->GetThreadPoolQueueLen(pri);
} }
uint64_t NowMicros() override { return env_target_->NowMicros(); }
uint64_t NowNanos() override { return env_target_->NowNanos(); }
uint64_t NowCPUNanos() override { return env_target_->NowCPUNanos(); }
void SleepForMicroseconds(int micros) override {
env_target_->SleepForMicroseconds(micros);
}
Status GetHostName(char* name, uint64_t len) override { Status GetHostName(char* name, uint64_t len) override {
return env_target_->GetHostName(name, len); return env_target_->GetHostName(name, len);
} }
Status GetCurrentTime(int64_t* unix_time) override {
return env_target_->GetCurrentTime(unix_time);
}
void SetBackgroundThreads(int num, Priority pri) override { void SetBackgroundThreads(int num, Priority pri) override {
return env_target_->SetBackgroundThreads(num, pri); return env_target_->SetBackgroundThreads(num, pri);
} }
@ -625,10 +318,6 @@ class CompositeEnvWrapper : public CompositeEnv {
return env_target_->LowerThreadPoolCPUPriority(pool, pri); return env_target_->LowerThreadPoolCPUPriority(pool, pri);
} }
std::string TimeToString(uint64_t time) override {
return env_target_->TimeToString(time);
}
Status GetThreadList(std::vector<ThreadStatus>* thread_list) override { Status GetThreadList(std::vector<ThreadStatus>* thread_list) override {
return env_target_->GetThreadList(thread_list); return env_target_->GetThreadList(thread_list);
} }

57
env/env.cc vendored

@ -10,17 +10,58 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include <thread> #include <thread>
#include "env/composite_env_wrapper.h" #include "env/composite_env_wrapper.h"
#include "logging/env_logger.h" #include "logging/env_logger.h"
#include "memory/arena.h" #include "memory/arena.h"
#include "options/db_options.h" #include "options/db_options.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/utilities/object_registry.h" #include "rocksdb/utilities/object_registry.h"
#include "util/autovector.h" #include "util/autovector.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace { namespace {
class LegacySystemClock : public SystemClock {
private:
Env* env_;
public:
explicit LegacySystemClock(Env* env) : env_(env) {}
const char* Name() const override { return "Legacy System Clock"; }
// Returns the number of micro-seconds since some fixed point in time.
// It is often used as system time such as in GenericRateLimiter
// and other places so a port needs to return system time in order to work.
uint64_t NowMicros() override { return env_->NowMicros(); }
// Returns the number of nano-seconds since some fixed point in time. Only
// useful for computing deltas of time in one run.
// Default implementation simply relies on NowMicros.
// In platform-specific implementations, NowNanos() should return time points
// that are MONOTONIC.
uint64_t NowNanos() override { return env_->NowNanos(); }
uint64_t CPUMicros() override { return CPUNanos() / 1000; }
uint64_t CPUNanos() override { return env_->NowCPUNanos(); }
// Sleep/delay the thread for the prescribed number of micro-seconds.
void SleepForMicroseconds(int micros) override {
env_->SleepForMicroseconds(micros);
}
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
Status GetCurrentTime(int64_t* unix_time) override {
return env_->GetCurrentTime(unix_time);
}
// Converts seconds-since-Jan-01-1970 to a printable string
std::string TimeToString(uint64_t time) override {
return env_->TimeToString(time);
}
};
class LegacyFileSystemWrapper : public FileSystem { class LegacyFileSystemWrapper : public FileSystem {
public: public:
// Initialize an EnvWrapper that delegates all calls to *t // Initialize an EnvWrapper that delegates all calls to *t
@ -265,11 +306,17 @@ class LegacyFileSystemWrapper : public FileSystem {
Env::Env() : thread_status_updater_(nullptr) { Env::Env() : thread_status_updater_(nullptr) {
file_system_ = std::make_shared<LegacyFileSystemWrapper>(this); file_system_ = std::make_shared<LegacyFileSystemWrapper>(this);
system_clock_ = std::make_shared<LegacySystemClock>(this);
} }
Env::Env(std::shared_ptr<FileSystem> fs) Env::Env(const std::shared_ptr<FileSystem>& fs)
: thread_status_updater_(nullptr), : thread_status_updater_(nullptr), file_system_(fs) {
file_system_(fs) {} system_clock_ = std::make_shared<LegacySystemClock>(this);
}
Env::Env(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock)
: thread_status_updater_(nullptr), file_system_(fs), system_clock_(clock) {}
Env::~Env() { Env::~Env() {
} }
@ -730,4 +777,8 @@ Status NewEnvLogger(const std::string& fname, Env* env,
const std::shared_ptr<FileSystem>& Env::GetFileSystem() const { const std::shared_ptr<FileSystem>& Env::GetFileSystem() const {
return file_system_; return file_system_;
} }
const std::shared_ptr<SystemClock>& Env::GetSystemClock() const {
return system_clock_;
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

158
env/env_posix.cc vendored

@ -56,8 +56,10 @@
#include "monitoring/iostats_context_imp.h" #include "monitoring/iostats_context_imp.h"
#include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_updater.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/slice.h" #include "rocksdb/slice.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/compression_context_cache.h" #include "util/compression_context_cache.h"
@ -121,6 +123,82 @@ class PosixDynamicLibrary : public DynamicLibrary {
void* handle_; void* handle_;
}; };
#endif // !ROCKSDB_NO_DYNAMIC_EXTENSION #endif // !ROCKSDB_NO_DYNAMIC_EXTENSION
class PosixClock : public SystemClock {
public:
const char* Name() const override { return "PosixClock"; }
uint64_t NowMicros() override {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
uint64_t NowNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(OS_SOLARIS)
return gethrtime();
#elif defined(__MACH__)
clock_serv_t cclock;
mach_timespec_t ts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &ts);
mach_port_deallocate(mach_task_self(), cclock);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
#endif
}
uint64_t CPUMicros() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000;
#endif
return 0;
}
uint64_t CPUNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
return 0;
}
void SleepForMicroseconds(int micros) override { usleep(micros); }
Status GetCurrentTime(int64_t* unix_time) override {
time_t ret = time(nullptr);
if (ret == (time_t)-1) {
return IOError("GetCurrentTime", "", errno);
}
*unix_time = (int64_t)ret;
return Status::OK();
}
std::string TimeToString(uint64_t secondsSince1970) override {
const time_t seconds = (time_t)secondsSince1970;
struct tm t;
int maxsize = 64;
std::string dummy;
dummy.reserve(maxsize);
dummy.resize(maxsize);
char* p = &dummy[0];
localtime_r(&seconds, &t);
snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
return dummy;
}
};
class PosixEnv : public CompositeEnv { class PosixEnv : public CompositeEnv {
public: public:
@ -232,45 +310,6 @@ class PosixEnv : public CompositeEnv {
uint64_t GetThreadID() const override { return gettid(pthread_self()); } uint64_t GetThreadID() const override { return gettid(pthread_self()); }
uint64_t NowMicros() override {
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
}
uint64_t NowNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX)
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#elif defined(OS_SOLARIS)
return gethrtime();
#elif defined(__MACH__)
clock_serv_t cclock;
mach_timespec_t ts;
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
clock_get_time(cclock, &ts);
mach_port_deallocate(mach_task_self(), cclock);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#else
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch()).count();
#endif
}
uint64_t NowCPUNanos() override {
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_GNU_KFREEBSD) || \
defined(OS_AIX) || (defined(__MACH__) && defined(__MAC_10_12))
struct timespec ts;
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
return static_cast<uint64_t>(ts.tv_sec) * 1000000000 + ts.tv_nsec;
#endif
return 0;
}
void SleepForMicroseconds(int micros) override { usleep(micros); }
Status GetHostName(char* name, uint64_t len) override { Status GetHostName(char* name, uint64_t len) override {
int ret = gethostname(name, static_cast<size_t>(len)); int ret = gethostname(name, static_cast<size_t>(len));
if (ret < 0) { if (ret < 0) {
@ -283,15 +322,6 @@ class PosixEnv : public CompositeEnv {
return Status::OK(); return Status::OK();
} }
Status GetCurrentTime(int64_t* unix_time) override {
time_t ret = time(nullptr);
if (ret == (time_t) -1) {
return IOError("GetCurrentTime", "", errno);
}
*unix_time = (int64_t) ret;
return Status::OK();
}
ThreadStatusUpdater* GetThreadStatusUpdater() const override { ThreadStatusUpdater* GetThreadStatusUpdater() const override {
return Env::GetThreadStatusUpdater(); return Env::GetThreadStatusUpdater();
} }
@ -340,26 +370,6 @@ class PosixEnv : public CompositeEnv {
return Status::OK(); return Status::OK();
} }
std::string TimeToString(uint64_t secondsSince1970) override {
const time_t seconds = (time_t)secondsSince1970;
struct tm t;
int maxsize = 64;
std::string dummy;
dummy.reserve(maxsize);
dummy.resize(maxsize);
char* p = &dummy[0];
localtime_r(&seconds, &t);
snprintf(p, maxsize,
"%04d/%02d/%02d-%02d:%02d:%02d ",
t.tm_year + 1900,
t.tm_mon + 1,
t.tm_mday,
t.tm_hour,
t.tm_min,
t.tm_sec);
return dummy;
}
private: private:
friend Env* Env::Default(); friend Env* Env::Default();
// Constructs the default Env, a singleton // Constructs the default Env, a singleton
@ -382,7 +392,7 @@ class PosixEnv : public CompositeEnv {
}; };
PosixEnv::PosixEnv() PosixEnv::PosixEnv()
: CompositeEnv(FileSystem::Default()), : CompositeEnv(FileSystem::Default(), SystemClock::Default()),
thread_pools_storage_(Priority::TOTAL), thread_pools_storage_(Priority::TOTAL),
allow_non_owner_access_storage_(true), allow_non_owner_access_storage_(true),
thread_pools_(thread_pools_storage_), thread_pools_(thread_pools_storage_),
@ -401,7 +411,7 @@ PosixEnv::PosixEnv()
PosixEnv::PosixEnv(const PosixEnv* default_env, PosixEnv::PosixEnv(const PosixEnv* default_env,
const std::shared_ptr<FileSystem>& fs) const std::shared_ptr<FileSystem>& fs)
: CompositeEnv(fs), : CompositeEnv(fs, default_env->GetSystemClock()),
thread_pools_(default_env->thread_pools_), thread_pools_(default_env->thread_pools_),
mu_(default_env->mu_), mu_(default_env->mu_),
threads_to_join_(default_env->threads_to_join_), threads_to_join_(default_env->threads_to_join_),
@ -509,6 +519,14 @@ std::unique_ptr<Env> NewCompositeEnv(const std::shared_ptr<FileSystem>& fs) {
return std::unique_ptr<Env>(new PosixEnv(default_env, fs)); return std::unique_ptr<Env>(new PosixEnv(default_env, fs));
} }
//
// Default Posix SystemClock
//
const std::shared_ptr<SystemClock>& SystemClock::Default() {
static std::shared_ptr<SystemClock> default_clock =
std::make_shared<PosixClock>();
return default_clock;
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
#endif #endif

4
env/env_test.cc vendored

@ -35,6 +35,7 @@
#include "port/malloc.h" #include "port/malloc.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
@ -2213,7 +2214,8 @@ TEST_F(EnvTest, IsDirectory) {
ASSERT_OK(s); ASSERT_OK(s);
std::unique_ptr<WritableFileWriter> fwriter; std::unique_ptr<WritableFileWriter> fwriter;
fwriter.reset(new WritableFileWriter(std::move(wfile), test_file_path, fwriter.reset(new WritableFileWriter(std::move(wfile), test_file_path,
FileOptions(), Env::Default())); FileOptions(),
SystemClock::Default()));
constexpr char buf[] = "test"; constexpr char buf[] = "test";
s = fwriter->Append(buf); s = fwriter->Append(buf);
ASSERT_OK(s); ASSERT_OK(s);

@ -5,18 +5,19 @@
#include "env/file_system_tracer.h" #include "env/file_system_tracer.h"
#include "rocksdb/env.h" #include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
IOStatus FileSystemTracingWrapper::NewSequentialFile( IOStatus FileSystemTracingWrapper::NewSequentialFile(
const std::string& fname, const FileOptions& file_opts, const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) { std::unique_ptr<FSSequentialFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->NewSequentialFile(fname, file_opts, result, dbg); IOStatus s = target()->NewSequentialFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -26,11 +27,11 @@ IOStatus FileSystemTracingWrapper::NewSequentialFile(
IOStatus FileSystemTracingWrapper::NewRandomAccessFile( IOStatus FileSystemTracingWrapper::NewRandomAccessFile(
const std::string& fname, const FileOptions& file_opts, const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) { std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg); IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -40,11 +41,11 @@ IOStatus FileSystemTracingWrapper::NewRandomAccessFile(
IOStatus FileSystemTracingWrapper::NewWritableFile( IOStatus FileSystemTracingWrapper::NewWritableFile(
const std::string& fname, const FileOptions& file_opts, const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) { std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->NewWritableFile(fname, file_opts, result, dbg); IOStatus s = target()->NewWritableFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -54,11 +55,11 @@ IOStatus FileSystemTracingWrapper::NewWritableFile(
IOStatus FileSystemTracingWrapper::ReopenWritableFile( IOStatus FileSystemTracingWrapper::ReopenWritableFile(
const std::string& fname, const FileOptions& file_opts, const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) { std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->ReopenWritableFile(fname, file_opts, result, dbg); IOStatus s = target()->ReopenWritableFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -69,12 +70,12 @@ IOStatus FileSystemTracingWrapper::ReuseWritableFile(
const std::string& fname, const std::string& old_fname, const std::string& fname, const std::string& old_fname,
const FileOptions& file_opts, std::unique_ptr<FSWritableFile>* result, const FileOptions& file_opts, std::unique_ptr<FSWritableFile>* result,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = IOStatus s =
target()->ReuseWritableFile(fname, old_fname, file_opts, result, dbg); target()->ReuseWritableFile(fname, old_fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -84,11 +85,11 @@ IOStatus FileSystemTracingWrapper::ReuseWritableFile(
IOStatus FileSystemTracingWrapper::NewRandomRWFile( IOStatus FileSystemTracingWrapper::NewRandomRWFile(
const std::string& fname, const FileOptions& file_opts, const std::string& fname, const FileOptions& file_opts,
std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) { std::unique_ptr<FSRandomRWFile>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->NewRandomRWFile(fname, file_opts, result, dbg); IOStatus s = target()->NewRandomRWFile(fname, file_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -98,11 +99,11 @@ IOStatus FileSystemTracingWrapper::NewRandomRWFile(
IOStatus FileSystemTracingWrapper::NewDirectory( IOStatus FileSystemTracingWrapper::NewDirectory(
const std::string& name, const IOOptions& io_opts, const std::string& name, const IOOptions& io_opts,
std::unique_ptr<FSDirectory>* result, IODebugContext* dbg) { std::unique_ptr<FSDirectory>* result, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->NewDirectory(name, io_opts, result, dbg); IOStatus s = target()->NewDirectory(name, io_opts, result, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
name.substr(name.find_last_of("/\\") + 1)); name.substr(name.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -113,11 +114,11 @@ IOStatus FileSystemTracingWrapper::GetChildren(const std::string& dir,
const IOOptions& io_opts, const IOOptions& io_opts,
std::vector<std::string>* r, std::vector<std::string>* r,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->GetChildren(dir, io_opts, r, dbg); IOStatus s = target()->GetChildren(dir, io_opts, r, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dir.substr(dir.find_last_of("/\\") + 1)); dir.substr(dir.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -127,11 +128,11 @@ IOStatus FileSystemTracingWrapper::GetChildren(const std::string& dir,
IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname, IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->DeleteFile(fname, options, dbg); IOStatus s = target()->DeleteFile(fname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1)); fname.substr(fname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -141,11 +142,11 @@ IOStatus FileSystemTracingWrapper::DeleteFile(const std::string& fname,
IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname, IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->CreateDir(dirname, options, dbg); IOStatus s = target()->CreateDir(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1)); dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -154,11 +155,11 @@ IOStatus FileSystemTracingWrapper::CreateDir(const std::string& dirname,
IOStatus FileSystemTracingWrapper::CreateDirIfMissing( IOStatus FileSystemTracingWrapper::CreateDirIfMissing(
const std::string& dirname, const IOOptions& options, IODebugContext* dbg) { const std::string& dirname, const IOOptions& options, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->CreateDirIfMissing(dirname, options, dbg); IOStatus s = target()->CreateDirIfMissing(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1)); dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -168,11 +169,11 @@ IOStatus FileSystemTracingWrapper::CreateDirIfMissing(
IOStatus FileSystemTracingWrapper::DeleteDir(const std::string& dirname, IOStatus FileSystemTracingWrapper::DeleteDir(const std::string& dirname,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->DeleteDir(dirname, options, dbg); IOStatus s = target()->DeleteDir(dirname, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
dirname.substr(dirname.find_last_of("/\\") + 1)); dirname.substr(dirname.find_last_of("/\\") + 1));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -183,14 +184,14 @@ IOStatus FileSystemTracingWrapper::GetFileSize(const std::string& fname,
const IOOptions& options, const IOOptions& options,
uint64_t* file_size, uint64_t* file_size,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->GetFileSize(fname, options, file_size, dbg); IOStatus s = target()->GetFileSize(fname, options, file_size, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize); io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record( IOTraceRecord io_record(
env_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, elapsed, clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, elapsed,
s.ToString(), fname.substr(fname.find_last_of("/\\") + 1), *file_size); s.ToString(), fname.substr(fname.find_last_of("/\\") + 1), *file_size);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
return s; return s;
@ -200,13 +201,13 @@ IOStatus FileSystemTracingWrapper::Truncate(const std::string& fname,
size_t size, size_t size,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Truncate(fname, size, options, dbg); IOStatus s = target()->Truncate(fname, size, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize); io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), __func__, elapsed, s.ToString(),
fname.substr(fname.find_last_of("/\\") + 1), size); fname.substr(fname.find_last_of("/\\") + 1), size);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -217,13 +218,13 @@ IOStatus FSSequentialFileTracingWrapper::Read(size_t n,
const IOOptions& options, const IOOptions& options,
Slice* result, char* scratch, Slice* result, char* scratch,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Read(n, options, result, scratch, dbg); IOStatus s = target()->Read(n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, __func__, elapsed, s.ToString(), file_name_,
result->size(), 0 /*Offset*/); result->size(), 0 /*Offset*/);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -232,14 +233,14 @@ IOStatus FSSequentialFileTracingWrapper::Read(size_t n,
IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset, IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) { size_t length) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->InvalidateCache(offset, length); IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length, __func__, elapsed, s.ToString(), file_name_, length,
offset); offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -249,7 +250,7 @@ IOStatus FSSequentialFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSSequentialFileTracingWrapper::PositionedRead( IOStatus FSSequentialFileTracingWrapper::PositionedRead(
uint64_t offset, size_t n, const IOOptions& options, Slice* result, uint64_t offset, size_t n, const IOOptions& options, Slice* result,
char* scratch, IODebugContext* dbg) { char* scratch, IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = IOStatus s =
target()->PositionedRead(offset, n, options, result, scratch, dbg); target()->PositionedRead(offset, n, options, result, scratch, dbg);
@ -257,7 +258,7 @@ IOStatus FSSequentialFileTracingWrapper::PositionedRead(
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, __func__, elapsed, s.ToString(), file_name_,
result->size(), offset); result->size(), offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -268,14 +269,14 @@ IOStatus FSRandomAccessFileTracingWrapper::Read(uint64_t offset, size_t n,
const IOOptions& options, const IOOptions& options,
Slice* result, char* scratch, Slice* result, char* scratch,
IODebugContext* dbg) const { IODebugContext* dbg) const {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Read(offset, n, options, result, scratch, dbg); IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n, __func__, elapsed, s.ToString(), file_name_, n,
offset); offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -286,7 +287,7 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
size_t num_reqs, size_t num_reqs,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->MultiRead(reqs, num_reqs, options, dbg); IOStatus s = target()->MultiRead(reqs, num_reqs, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
@ -295,9 +296,9 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
for (size_t i = 0; i < num_reqs; i++) { for (size_t i = 0; i < num_reqs; i++) {
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(
__func__, latency, reqs[i].status.ToString(), clock_->NowNanos(), TraceType::kIOTracer, io_op_data, __func__, latency,
file_name_, reqs[i].len, reqs[i].offset); reqs[i].status.ToString(), file_name_, reqs[i].len, reqs[i].offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
} }
return s; return s;
@ -306,14 +307,14 @@ IOStatus FSRandomAccessFileTracingWrapper::MultiRead(FSReadRequest* reqs,
IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n, IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Prefetch(offset, n, options, dbg); IOStatus s = target()->Prefetch(offset, n, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n, __func__, elapsed, s.ToString(), file_name_, n,
offset); offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -322,14 +323,14 @@ IOStatus FSRandomAccessFileTracingWrapper::Prefetch(uint64_t offset, size_t n,
IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset, IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) { size_t length) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->InvalidateCache(offset, length); IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length, __func__, elapsed, s.ToString(), file_name_, length,
static_cast<uint64_t>(offset)); static_cast<uint64_t>(offset));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -339,13 +340,13 @@ IOStatus FSRandomAccessFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSWritableFileTracingWrapper::Append(const Slice& data, IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Append(data, options, dbg); IOStatus s = target()->Append(data, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, __func__, elapsed, s.ToString(), file_name_,
data.size(), 0 /*Offset*/); data.size(), 0 /*Offset*/);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -355,14 +356,14 @@ IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
IOStatus FSWritableFileTracingWrapper::PositionedAppend( IOStatus FSWritableFileTracingWrapper::PositionedAppend(
const Slice& data, uint64_t offset, const IOOptions& options, const Slice& data, uint64_t offset, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->PositionedAppend(data, offset, options, dbg); IOStatus s = target()->PositionedAppend(data, offset, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, __func__, elapsed, s.ToString(), file_name_,
data.size(), offset); data.size(), offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -372,13 +373,13 @@ IOStatus FSWritableFileTracingWrapper::PositionedAppend(
IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size, IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Truncate(size, options, dbg); IOStatus s = target()->Truncate(size, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, size, __func__, elapsed, s.ToString(), file_name_, size,
0 /*Offset*/); 0 /*Offset*/);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -387,11 +388,11 @@ IOStatus FSWritableFileTracingWrapper::Truncate(uint64_t size,
IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options, IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Close(options, dbg); IOStatus s = target()->Close(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_); file_name_);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -400,13 +401,13 @@ IOStatus FSWritableFileTracingWrapper::Close(const IOOptions& options,
uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options, uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
uint64_t file_size = target()->GetFileSize(options, dbg); uint64_t file_size = target()->GetFileSize(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOFileSize); io_op_data |= (1 << IOTraceOp::kIOFileSize);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, "OK", file_name_, file_size); __func__, elapsed, "OK", file_name_, file_size);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
return file_size; return file_size;
@ -414,14 +415,14 @@ uint64_t FSWritableFileTracingWrapper::GetFileSize(const IOOptions& options,
IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset, IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset,
size_t length) { size_t length) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->InvalidateCache(offset, length); IOStatus s = target()->InvalidateCache(offset, length);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, length, __func__, elapsed, s.ToString(), file_name_, length,
static_cast<uint64_t>(offset)); static_cast<uint64_t>(offset));
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -431,14 +432,14 @@ IOStatus FSWritableFileTracingWrapper::InvalidateCache(size_t offset,
IOStatus FSRandomRWFileTracingWrapper::Write(uint64_t offset, const Slice& data, IOStatus FSRandomRWFileTracingWrapper::Write(uint64_t offset, const Slice& data,
const IOOptions& options, const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Write(offset, data, options, dbg); IOStatus s = target()->Write(offset, data, options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, __func__, elapsed, s.ToString(), file_name_,
data.size(), offset); data.size(), offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -449,14 +450,14 @@ IOStatus FSRandomRWFileTracingWrapper::Read(uint64_t offset, size_t n,
const IOOptions& options, const IOOptions& options,
Slice* result, char* scratch, Slice* result, char* scratch,
IODebugContext* dbg) const { IODebugContext* dbg) const {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Read(offset, n, options, result, scratch, dbg); IOStatus s = target()->Read(offset, n, options, result, scratch, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
uint64_t io_op_data = 0; uint64_t io_op_data = 0;
io_op_data |= (1 << IOTraceOp::kIOLen); io_op_data |= (1 << IOTraceOp::kIOLen);
io_op_data |= (1 << IOTraceOp::kIOOffset); io_op_data |= (1 << IOTraceOp::kIOOffset);
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, io_op_data, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
__func__, elapsed, s.ToString(), file_name_, n, __func__, elapsed, s.ToString(), file_name_, n,
offset); offset);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -465,11 +466,11 @@ IOStatus FSRandomRWFileTracingWrapper::Read(uint64_t offset, size_t n,
IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options, IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Flush(options, dbg); IOStatus s = target()->Flush(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_); file_name_);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -478,11 +479,11 @@ IOStatus FSRandomRWFileTracingWrapper::Flush(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options, IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Close(options, dbg); IOStatus s = target()->Close(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_); file_name_);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -491,11 +492,11 @@ IOStatus FSRandomRWFileTracingWrapper::Close(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options, IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Sync(options, dbg); IOStatus s = target()->Sync(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_); file_name_);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);
@ -504,11 +505,11 @@ IOStatus FSRandomRWFileTracingWrapper::Sync(const IOOptions& options,
IOStatus FSRandomRWFileTracingWrapper::Fsync(const IOOptions& options, IOStatus FSRandomRWFileTracingWrapper::Fsync(const IOOptions& options,
IODebugContext* dbg) { IODebugContext* dbg) {
StopWatchNano timer(env_); StopWatchNano timer(clock_);
timer.Start(); timer.Start();
IOStatus s = target()->Fsync(options, dbg); IOStatus s = target()->Fsync(options, dbg);
uint64_t elapsed = timer.ElapsedNanos(); uint64_t elapsed = timer.ElapsedNanos();
IOTraceRecord io_record(env_->NowNanos(), TraceType::kIOTracer, IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer,
0 /*io_op_data*/, __func__, elapsed, s.ToString(), 0 /*io_op_data*/, __func__, elapsed, s.ToString(),
file_name_); file_name_);
io_tracer_->WriteIOOp(io_record); io_tracer_->WriteIOOp(io_record);

@ -6,6 +6,7 @@
#pragma once #pragma once
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "trace_replay/io_tracer.h" #include "trace_replay/io_tracer.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -18,9 +19,11 @@ namespace ROCKSDB_NAMESPACE {
// overridden. // overridden.
class FileSystemTracingWrapper : public FileSystemWrapper { class FileSystemTracingWrapper : public FileSystemWrapper {
public: public:
FileSystemTracingWrapper(std::shared_ptr<FileSystem> t, FileSystemTracingWrapper(const std::shared_ptr<FileSystem>& t,
std::shared_ptr<IOTracer> io_tracer) const std::shared_ptr<IOTracer>& io_tracer)
: FileSystemWrapper(t), io_tracer_(io_tracer), env_(Env::Default()) {} : FileSystemWrapper(t),
io_tracer_(io_tracer),
clock_(SystemClock::Default()) {}
~FileSystemTracingWrapper() override {} ~FileSystemTracingWrapper() override {}
@ -83,7 +86,7 @@ class FileSystemTracingWrapper : public FileSystemWrapper {
private: private:
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
Env* env_; std::shared_ptr<SystemClock> clock_;
}; };
// The FileSystemPtr is a wrapper class that takes pointer to storage systems // The FileSystemPtr is a wrapper class that takes pointer to storage systems
@ -135,7 +138,7 @@ class FSSequentialFileTracingWrapper : public FSSequentialFileWrapper {
const std::string& file_name) const std::string& file_name)
: FSSequentialFileWrapper(t), : FSSequentialFileWrapper(t),
io_tracer_(io_tracer), io_tracer_(io_tracer),
env_(Env::Default()), clock_(SystemClock::Default()),
file_name_(file_name) {} file_name_(file_name) {}
~FSSequentialFileTracingWrapper() override {} ~FSSequentialFileTracingWrapper() override {}
@ -151,7 +154,7 @@ class FSSequentialFileTracingWrapper : public FSSequentialFileWrapper {
private: private:
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
Env* env_; std::shared_ptr<SystemClock> clock_;
std::string file_name_; std::string file_name_;
}; };
@ -207,7 +210,7 @@ class FSRandomAccessFileTracingWrapper : public FSRandomAccessFileWrapper {
const std::string& file_name) const std::string& file_name)
: FSRandomAccessFileWrapper(t), : FSRandomAccessFileWrapper(t),
io_tracer_(io_tracer), io_tracer_(io_tracer),
env_(Env::Default()), clock_(SystemClock::Default()),
file_name_(file_name) {} file_name_(file_name) {}
~FSRandomAccessFileTracingWrapper() override {} ~FSRandomAccessFileTracingWrapper() override {}
@ -226,7 +229,7 @@ class FSRandomAccessFileTracingWrapper : public FSRandomAccessFileWrapper {
private: private:
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
Env* env_; std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path. // Stores file name instead of full path.
std::string file_name_; std::string file_name_;
}; };
@ -282,7 +285,7 @@ class FSWritableFileTracingWrapper : public FSWritableFileWrapper {
const std::string& file_name) const std::string& file_name)
: FSWritableFileWrapper(t), : FSWritableFileWrapper(t),
io_tracer_(io_tracer), io_tracer_(io_tracer),
env_(Env::Default()), clock_(SystemClock::Default()),
file_name_(file_name) {} file_name_(file_name) {}
~FSWritableFileTracingWrapper() override {} ~FSWritableFileTracingWrapper() override {}
@ -316,7 +319,7 @@ class FSWritableFileTracingWrapper : public FSWritableFileWrapper {
private: private:
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
Env* env_; std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path. // Stores file name instead of full path.
std::string file_name_; std::string file_name_;
}; };
@ -379,7 +382,7 @@ class FSRandomRWFileTracingWrapper : public FSRandomRWFileWrapper {
const std::string& file_name) const std::string& file_name)
: FSRandomRWFileWrapper(t), : FSRandomRWFileWrapper(t),
io_tracer_(io_tracer), io_tracer_(io_tracer),
env_(Env::Default()), clock_(SystemClock::Default()),
file_name_(file_name) {} file_name_(file_name) {}
~FSRandomRWFileTracingWrapper() override {} ~FSRandomRWFileTracingWrapper() override {}
@ -401,7 +404,7 @@ class FSRandomRWFileTracingWrapper : public FSRandomRWFileWrapper {
private: private:
std::shared_ptr<IOTracer> io_tracer_; std::shared_ptr<IOTracer> io_tracer_;
Env* env_; std::shared_ptr<SystemClock> clock_;
// Stores file name instead of full path. // Stores file name instead of full path.
std::string file_name_; std::string file_name_;
}; };

@ -15,17 +15,20 @@
#include "logging/logging.h" #include "logging/logging.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
DeleteScheduler::DeleteScheduler(Env* env, FileSystem* fs, DeleteScheduler::DeleteScheduler(const std::shared_ptr<SystemClock>& clock,
int64_t rate_bytes_per_sec, Logger* info_log, FileSystem* fs, int64_t rate_bytes_per_sec,
Logger* info_log,
SstFileManagerImpl* sst_file_manager, SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio, double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) uint64_t bytes_max_delete_chunk)
: env_(env), : clock_(clock),
fs_(fs), fs_(fs),
total_trash_size_(0), total_trash_size_(0),
rate_bytes_per_sec_(rate_bytes_per_sec), rate_bytes_per_sec_(rate_bytes_per_sec),
@ -223,14 +226,14 @@ void DeleteScheduler::BackgroundEmptyTrash() {
} }
// Delete all files in queue_ // Delete all files in queue_
uint64_t start_time = env_->NowMicros(); uint64_t start_time = clock_->NowMicros();
uint64_t total_deleted_bytes = 0; uint64_t total_deleted_bytes = 0;
int64_t current_delete_rate = rate_bytes_per_sec_.load(); int64_t current_delete_rate = rate_bytes_per_sec_.load();
while (!queue_.empty() && !closing_) { while (!queue_.empty() && !closing_) {
if (current_delete_rate != rate_bytes_per_sec_.load()) { if (current_delete_rate != rate_bytes_per_sec_.load()) {
// User changed the delete rate // User changed the delete rate
current_delete_rate = rate_bytes_per_sec_.load(); current_delete_rate = rate_bytes_per_sec_.load();
start_time = env_->NowMicros(); start_time = clock_->NowMicros();
total_deleted_bytes = 0; total_deleted_bytes = 0;
ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64, ROCKS_LOG_INFO(info_log_, "rate_bytes_per_sec is changed to %" PRIi64,
current_delete_rate); current_delete_rate);

@ -15,14 +15,15 @@
#include "monitoring/instrumented_mutex.h" #include "monitoring/instrumented_mutex.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/file_system.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Env; class Env;
class FileSystem;
class Logger; class Logger;
class SstFileManagerImpl; class SstFileManagerImpl;
class SystemClock;
// DeleteScheduler allows the DB to enforce a rate limit on file deletion, // DeleteScheduler allows the DB to enforce a rate limit on file deletion,
// Instead of deleteing files immediately, files are marked as trash // Instead of deleteing files immediately, files are marked as trash
@ -33,8 +34,9 @@ class SstFileManagerImpl;
// case DeleteScheduler will delete files immediately. // case DeleteScheduler will delete files immediately.
class DeleteScheduler { class DeleteScheduler {
public: public:
DeleteScheduler(Env* env, FileSystem* fs, int64_t rate_bytes_per_sec, DeleteScheduler(const std::shared_ptr<SystemClock>& clock, FileSystem* fs,
Logger* info_log, SstFileManagerImpl* sst_file_manager, int64_t rate_bytes_per_sec, Logger* info_log,
SstFileManagerImpl* sst_file_manager,
double max_trash_db_ratio, uint64_t bytes_max_delete_chunk); double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
~DeleteScheduler(); ~DeleteScheduler();
@ -99,7 +101,7 @@ class DeleteScheduler {
void MaybeCreateBackgroundThread(); void MaybeCreateBackgroundThread();
Env* env_; const std::shared_ptr<SystemClock> clock_;
FileSystem* fs_; FileSystem* fs_;
// total size of trash files // total size of trash files

@ -95,9 +95,10 @@ class DeleteSchedulerTest : public testing::Test {
// Tests in this file are for DeleteScheduler component and don't create any // Tests in this file are for DeleteScheduler component and don't create any
// DBs, so we need to set max_trash_db_ratio to 100% (instead of default // DBs, so we need to set max_trash_db_ratio to 100% (instead of default
// 25%) // 25%)
sst_file_mgr_.reset(new SstFileManagerImpl( sst_file_mgr_.reset(
env_, env_->GetFileSystem(), nullptr, rate_bytes_per_sec_, new SstFileManagerImpl(env_->GetSystemClock(), env_->GetFileSystem(),
/* max_trash_db_ratio= */ 1.1, 128 * 1024)); nullptr, rate_bytes_per_sec_,
/* max_trash_db_ratio= */ 1.1, 128 * 1024));
delete_scheduler_ = sst_file_mgr_->delete_scheduler(); delete_scheduler_ = sst_file_mgr_->delete_scheduler();
sst_file_mgr_->SetStatisticsPtr(stats_); sst_file_mgr_->SetStatisticsPtr(stats_);
} }

@ -12,6 +12,7 @@
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/sst_file_writer.h" #include "rocksdb/sst_file_writer.h"
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "trace_replay/io_tracer.h" #include "trace_replay/io_tracer.h"
@ -67,14 +68,12 @@ inline IOStatus GenerateOneFileChecksum(
allow_mmap_reads, io_tracer); allow_mmap_reads, io_tracer);
} }
inline IOStatus PrepareIOFromReadOptions(const ReadOptions& ro, Env* env, inline IOStatus PrepareIOFromReadOptions(
IOOptions& opts) { const ReadOptions& ro, const std::shared_ptr<SystemClock>& clock,
if (!env) { IOOptions& opts) {
env = Env::Default();
}
if (ro.deadline.count()) { if (ro.deadline.count()) {
std::chrono::microseconds now = std::chrono::microseconds(env->NowMicros()); std::chrono::microseconds now =
std::chrono::microseconds(clock->NowMicros());
// Ensure there is atleast 1us available. We don't want to pass a value of // Ensure there is atleast 1us available. We don't want to pass a value of
// 0 as that means no timeout // 0 as that means no timeout
if (now >= ro.deadline) { if (now >= ro.deadline) {

@ -419,15 +419,17 @@ Status SetIdentityFile(Env* env, const std::string& dbname,
return s; return s;
} }
IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options, IOStatus SyncManifest(const std::shared_ptr<SystemClock>& clock,
const ImmutableDBOptions* db_options,
WritableFileWriter* file) { WritableFileWriter* file) {
TEST_KILL_RANDOM("SyncManifest:0", rocksdb_kill_odds * REDUCE_ODDS2); TEST_KILL_RANDOM("SyncManifest:0", rocksdb_kill_odds * REDUCE_ODDS2);
StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS); StopWatch sw(clock, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS);
return file->Sync(db_options->use_fsync); return file->Sync(db_options->use_fsync);
} }
Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& dbname, std::string* parent_dir, const std::string& db_log_dir, const std::string& dbname,
std::string* parent_dir,
std::vector<std::string>* info_log_list) { std::vector<std::string>* info_log_list) {
assert(parent_dir != nullptr); assert(parent_dir != nullptr);
assert(info_log_list != nullptr); assert(info_log_list != nullptr);
@ -443,7 +445,7 @@ Status GetInfoLogFiles(Env* env, const std::string& db_log_dir,
InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname); InfoLogPrefix info_log_prefix(!db_log_dir.empty(), dbname);
std::vector<std::string> file_names; std::vector<std::string> file_names;
Status s = env->GetChildren(*parent_dir, &file_names); Status s = fs->GetChildren(*parent_dir, IOOptions(), &file_names, nullptr);
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -27,6 +27,7 @@ namespace ROCKSDB_NAMESPACE {
class Env; class Env;
class Directory; class Directory;
class SystemClock;
class WritableFileWriter; class WritableFileWriter;
#ifdef OS_WIN #ifdef OS_WIN
@ -166,14 +167,16 @@ extern Status SetIdentityFile(Env* env, const std::string& dbname,
const std::string& db_id = {}); const std::string& db_id = {});
// Sync manifest file `file`. // Sync manifest file `file`.
extern IOStatus SyncManifest(Env* env, const ImmutableDBOptions* db_options, extern IOStatus SyncManifest(const std::shared_ptr<SystemClock>& clock,
const ImmutableDBOptions* db_options,
WritableFileWriter* file); WritableFileWriter* file);
// Return list of file names of info logs in `file_names`. // Return list of file names of info logs in `file_names`.
// The list only contains file name. The parent directory name is stored // The list only contains file name. The parent directory name is stored
// in `parent_dir`. // in `parent_dir`.
// `db_log_dir` should be the one as in options.db_log_dir // `db_log_dir` should be the one as in options.db_log_dir
extern Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, extern Status GetInfoLogFiles(const std::shared_ptr<FileSystem>& fs,
const std::string& db_log_dir,
const std::string& dbname, const std::string& dbname,
std::string* parent_dir, std::string* parent_dir,
std::vector<std::string>* file_names); std::vector<std::string>* file_names);

@ -12,6 +12,7 @@
#include <algorithm> #include <algorithm>
#include <mutex> #include <mutex>
#include "file/file_util.h"
#include "monitoring/histogram.h" #include "monitoring/histogram.h"
#include "monitoring/iostats_context_imp.h" #include "monitoring/iostats_context_imp.h"
#include "port/port.h" #include "port/port.h"
@ -32,7 +33,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
Status s; Status s;
uint64_t elapsed = 0; uint64_t elapsed = 0;
{ {
StopWatch sw(env_, stats_, hist_type_, StopWatch sw(clock_, stats_, hist_type_,
(stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
true /*delay_enabled*/); true /*delay_enabled*/);
auto prev_perf_level = GetPerfLevel(); auto prev_perf_level = GetPerfLevel();
@ -68,7 +69,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
} }
{ {
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
// Only user reads are expected to specify a timeout. And user reads // Only user reads are expected to specify a timeout. And user reads
// are not subjected to rate_limiter and should go through only // are not subjected to rate_limiter and should go through only
// one iteration of this loop, so we don't need to check and adjust // one iteration of this loop, so we don't need to check and adjust
@ -128,7 +129,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
#endif #endif
{ {
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
// Only user reads are expected to specify a timeout. And user reads // Only user reads are expected to specify a timeout. And user reads
// are not subjected to rate_limiter and should go through only // are not subjected to rate_limiter and should go through only
// one iteration of this loop, so we don't need to check and adjust // one iteration of this loop, so we don't need to check and adjust
@ -205,7 +206,7 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
Status s; Status s;
uint64_t elapsed = 0; uint64_t elapsed = 0;
{ {
StopWatch sw(env_, stats_, hist_type_, StopWatch sw(clock_, stats_, hist_type_,
(stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/, (stats_ != nullptr) ? &elapsed : nullptr, true /*overwrite*/,
true /*delay_enabled*/); true /*delay_enabled*/);
auto prev_perf_level = GetPerfLevel(); auto prev_perf_level = GetPerfLevel();
@ -267,7 +268,7 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
{ {
IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr); s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr);
} }
@ -312,4 +313,12 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
return s; return s;
} }
IOStatus RandomAccessFileReader::PrepareIOOptions(const ReadOptions& ro,
IOOptions& opts) {
if (clock_.get() != nullptr) {
return PrepareIOFromReadOptions(ro, clock_, opts);
} else {
return PrepareIOFromReadOptions(ro, SystemClock::Default(), opts);
}
}
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -14,7 +14,6 @@
#include "env/file_system_tracer.h" #include "env/file_system_tracer.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/listener.h" #include "rocksdb/listener.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
@ -24,6 +23,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Statistics; class Statistics;
class HistogramImpl; class HistogramImpl;
class SystemClock;
using AlignedBuf = std::unique_ptr<char[]>; using AlignedBuf = std::unique_ptr<char[]>;
@ -67,7 +67,7 @@ class RandomAccessFileReader {
FSRandomAccessFilePtr file_; FSRandomAccessFilePtr file_;
std::string file_name_; std::string file_name_;
Env* env_; std::shared_ptr<SystemClock> clock_;
Statistics* stats_; Statistics* stats_;
uint32_t hist_type_; uint32_t hist_type_;
HistogramImpl* file_read_hist_; HistogramImpl* file_read_hist_;
@ -77,14 +77,15 @@ class RandomAccessFileReader {
public: public:
explicit RandomAccessFileReader( explicit RandomAccessFileReader(
std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name, std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name,
Env* _env = nullptr, const std::shared_ptr<IOTracer>& io_tracer = nullptr, const std::shared_ptr<SystemClock>& clock = nullptr,
const std::shared_ptr<IOTracer>& io_tracer = nullptr,
Statistics* stats = nullptr, uint32_t hist_type = 0, Statistics* stats = nullptr, uint32_t hist_type = 0,
HistogramImpl* file_read_hist = nullptr, HistogramImpl* file_read_hist = nullptr,
RateLimiter* rate_limiter = nullptr, RateLimiter* rate_limiter = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {}) const std::vector<std::shared_ptr<EventListener>>& listeners = {})
: file_(std::move(raf), io_tracer, _file_name), : file_(std::move(raf), io_tracer, _file_name),
file_name_(std::move(_file_name)), file_name_(std::move(_file_name)),
env_(_env), clock_(clock),
stats_(stats), stats_(stats),
hist_type_(hist_type), hist_type_(hist_type),
file_read_hist_(file_read_hist), file_read_hist_(file_read_hist),
@ -137,6 +138,6 @@ class RandomAccessFileReader {
bool use_direct_io() const { return file_->use_direct_io(); } bool use_direct_io() const { return file_->use_direct_io(); }
Env* env() const { return env_; } IOStatus PrepareIOOptions(const ReadOptions& ro, IOOptions& opts);
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE

@ -42,7 +42,8 @@ class RandomAccessFileReaderTest : public testing::Test {
std::string fpath = Path(fname); std::string fpath = Path(fname);
std::unique_ptr<FSRandomAccessFile> f; std::unique_ptr<FSRandomAccessFile> f;
ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr)); ASSERT_OK(fs_->NewRandomAccessFile(fpath, opts, &f, nullptr));
(*reader).reset(new RandomAccessFileReader(std::move(f), fpath, env_)); (*reader).reset(new RandomAccessFileReader(std::move(f), fpath,
env_->GetSystemClock()));
} }
void AssertResult(const std::string& content, void AssertResult(const std::string& content,

@ -18,12 +18,12 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs, SstFileManagerImpl::SstFileManagerImpl(
std::shared_ptr<Logger> logger, const std::shared_ptr<SystemClock>& clock,
int64_t rate_bytes_per_sec, const std::shared_ptr<FileSystem>& fs,
double max_trash_db_ratio, const std::shared_ptr<Logger>& logger, int64_t rate_bytes_per_sec,
uint64_t bytes_max_delete_chunk) double max_trash_db_ratio, uint64_t bytes_max_delete_chunk)
: env_(env), : clock_(clock),
fs_(fs), fs_(fs),
logger_(logger), logger_(logger),
total_files_size_(0), total_files_size_(0),
@ -31,8 +31,8 @@ SstFileManagerImpl::SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs,
compaction_buffer_size_(0), compaction_buffer_size_(0),
cur_compactions_reserved_size_(0), cur_compactions_reserved_size_(0),
max_allowed_space_(0), max_allowed_space_(0),
delete_scheduler_(env, fs_.get(), rate_bytes_per_sec, logger.get(), this, delete_scheduler_(clock_, fs_.get(), rate_bytes_per_sec, logger.get(),
max_trash_db_ratio, bytes_max_delete_chunk), this, max_trash_db_ratio, bytes_max_delete_chunk),
cv_(&mu_), cv_(&mu_),
closing_(false), closing_(false),
bg_thread_(nullptr), bg_thread_(nullptr),
@ -347,7 +347,7 @@ void SstFileManagerImpl::ClearError() {
if (!error_handler_list_.empty()) { if (!error_handler_list_.empty()) {
// If there are more instances to be recovered, reschedule after 5 // If there are more instances to be recovered, reschedule after 5
// seconds // seconds
int64_t wait_until = env_->NowMicros() + 5000000; int64_t wait_until = clock_->NowMicros() + 5000000;
cv_.TimedWait(wait_until); cv_.TimedWait(wait_until);
} }
@ -485,7 +485,6 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<Logger> info_log,
double max_trash_db_ratio, double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) { uint64_t bytes_max_delete_chunk) {
const auto& fs = env->GetFileSystem(); const auto& fs = env->GetFileSystem();
return NewSstFileManager(env, fs, info_log, trash_dir, rate_bytes_per_sec, return NewSstFileManager(env, fs, info_log, trash_dir, rate_bytes_per_sec,
delete_existing_trash, status, max_trash_db_ratio, delete_existing_trash, status, max_trash_db_ratio,
bytes_max_delete_chunk); bytes_max_delete_chunk);
@ -498,8 +497,9 @@ SstFileManager* NewSstFileManager(Env* env, std::shared_ptr<FileSystem> fs,
bool delete_existing_trash, Status* status, bool delete_existing_trash, Status* status,
double max_trash_db_ratio, double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk) { uint64_t bytes_max_delete_chunk) {
const auto& clock = env->GetSystemClock();
SstFileManagerImpl* res = SstFileManagerImpl* res =
new SstFileManagerImpl(env, fs, info_log, rate_bytes_per_sec, new SstFileManagerImpl(clock, fs, info_log, rate_bytes_per_sec,
max_trash_db_ratio, bytes_max_delete_chunk); max_trash_db_ratio, bytes_max_delete_chunk);
// trash_dir is deprecated and not needed anymore, but if user passed it // trash_dir is deprecated and not needed anymore, but if user passed it

@ -12,14 +12,13 @@
#include "port/port.h" #include "port/port.h"
#include "db/compaction/compaction.h" #include "db/compaction/compaction.h"
#include "db/error_handler.h"
#include "file/delete_scheduler.h" #include "file/delete_scheduler.h"
#include "rocksdb/file_system.h"
#include "rocksdb/sst_file_manager.h" #include "rocksdb/sst_file_manager.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class ErrorHandler;
class Env; class FileSystem;
class SystemClock;
class Logger; class Logger;
// SstFileManager is used to track SST files in the DB and control their // SstFileManager is used to track SST files in the DB and control their
@ -27,8 +26,9 @@ class Logger;
// All SstFileManager public functions are thread-safe. // All SstFileManager public functions are thread-safe.
class SstFileManagerImpl : public SstFileManager { class SstFileManagerImpl : public SstFileManager {
public: public:
explicit SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs, explicit SstFileManagerImpl(const std::shared_ptr<SystemClock>& clock,
std::shared_ptr<Logger> logger, const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<Logger>& logger,
int64_t rate_bytes_per_sec, int64_t rate_bytes_per_sec,
double max_trash_db_ratio, double max_trash_db_ratio,
uint64_t bytes_max_delete_chunk); uint64_t bytes_max_delete_chunk);
@ -152,7 +152,7 @@ class SstFileManagerImpl : public SstFileManager {
return bg_err_.severity() == Status::Severity::kSoftError; return bg_err_.severity() == Status::Severity::kSoftError;
} }
Env* env_; std::shared_ptr<SystemClock> clock_;
std::shared_ptr<FileSystem> fs_; std::shared_ptr<FileSystem> fs_;
std::shared_ptr<Logger> logger_; std::shared_ptr<Logger> logger_;
// Mutex to protect tracked_files_, total_files_size_ // Mutex to protect tracked_files_, total_files_size_

@ -16,6 +16,7 @@
#include "monitoring/histogram.h" #include "monitoring/histogram.h"
#include "monitoring/iostats_context_imp.h" #include "monitoring/iostats_context_imp.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "util/random.h" #include "util/random.h"
#include "util/rate_limiter.h" #include "util/rate_limiter.h"
@ -331,7 +332,7 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
IOSTATS_TIMER_GUARD(fsync_nanos); IOSTATS_TIMER_GUARD(fsync_nanos);
TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0"); TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0");
auto prev_perf_level = GetPerfLevel(); auto prev_perf_level = GetPerfLevel();
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::StartTimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
@ -406,7 +407,7 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
#endif #endif
{ {
auto prev_perf_level = GetPerfLevel(); auto prev_perf_level = GetPerfLevel();
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
s = writable_file_->Append(Slice(src, allowed), IOOptions(), nullptr); s = writable_file_->Append(Slice(src, allowed), IOOptions(), nullptr);
SetPerfLevel(prev_perf_level); SetPerfLevel(prev_perf_level);
} }

@ -14,7 +14,6 @@
#include "db/version_edit.h" #include "db/version_edit.h"
#include "env/file_system_tracer.h" #include "env/file_system_tracer.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/file_checksum.h" #include "rocksdb/file_checksum.h"
#include "rocksdb/file_system.h" #include "rocksdb/file_system.h"
#include "rocksdb/io_status.h" #include "rocksdb/io_status.h"
@ -25,6 +24,7 @@
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class Statistics; class Statistics;
class SystemClock;
// WritableFileWriter is a wrapper on top of Env::WritableFile. It provides // WritableFileWriter is a wrapper on top of Env::WritableFile. It provides
// facilities to: // facilities to:
@ -121,7 +121,7 @@ class WritableFileWriter {
std::string file_name_; std::string file_name_;
FSWritableFilePtr writable_file_; FSWritableFilePtr writable_file_;
Env* env_; std::shared_ptr<SystemClock> clock_;
AlignedBuffer buf_; AlignedBuffer buf_;
size_t max_buffer_size_; size_t max_buffer_size_;
// Actually written data size can be used for truncate // Actually written data size can be used for truncate
@ -145,14 +145,15 @@ class WritableFileWriter {
public: public:
WritableFileWriter( WritableFileWriter(
std::unique_ptr<FSWritableFile>&& file, const std::string& _file_name, std::unique_ptr<FSWritableFile>&& file, const std::string& _file_name,
const FileOptions& options, Env* env = nullptr, const FileOptions& options,
const std::shared_ptr<SystemClock>& clock = nullptr,
const std::shared_ptr<IOTracer>& io_tracer = nullptr, const std::shared_ptr<IOTracer>& io_tracer = nullptr,
Statistics* stats = nullptr, Statistics* stats = nullptr,
const std::vector<std::shared_ptr<EventListener>>& listeners = {}, const std::vector<std::shared_ptr<EventListener>>& listeners = {},
FileChecksumGenFactory* file_checksum_gen_factory = nullptr) FileChecksumGenFactory* file_checksum_gen_factory = nullptr)
: file_name_(_file_name), : file_name_(_file_name),
writable_file_(std::move(file), io_tracer, _file_name), writable_file_(std::move(file), io_tracer, _file_name),
env_(env), clock_(clock),
buf_(), buf_(),
max_buffer_size_(options.writable_file_max_buffer_size), max_buffer_size_(options.writable_file_max_buffer_size),
filesize_(0), filesize_(0),

@ -59,6 +59,7 @@ class RateLimiter;
class ThreadStatusUpdater; class ThreadStatusUpdater;
struct ThreadStatus; struct ThreadStatus;
class FileSystem; class FileSystem;
class SystemClock;
const size_t kDefaultPageSize = 4 * 1024; const size_t kDefaultPageSize = 4 * 1024;
@ -150,8 +151,11 @@ class Env {
}; };
Env(); Env();
// Construct an Env with a separate FileSystem implementation // Construct an Env with a separate FileSystem and/or SystemClock
Env(std::shared_ptr<FileSystem> fs); // implementation
explicit Env(const std::shared_ptr<FileSystem>& fs);
Env(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock);
// No copying allowed // No copying allowed
Env(const Env&) = delete; Env(const Env&) = delete;
void operator=(const Env&) = delete; void operator=(const Env&) = delete;
@ -576,6 +580,10 @@ class Env {
// could be a fully implemented one, or a wrapper class around the Env // could be a fully implemented one, or a wrapper class around the Env
const std::shared_ptr<FileSystem>& GetFileSystem() const; const std::shared_ptr<FileSystem>& GetFileSystem() const;
// Get the SystemClock implementation this Env was constructed with. It
// could be a fully implemented one, or a wrapper class around the Env
const std::shared_ptr<SystemClock>& GetSystemClock() const;
// If you're adding methods here, remember to add them to EnvWrapper too. // If you're adding methods here, remember to add them to EnvWrapper too.
protected: protected:
@ -586,6 +594,9 @@ class Env {
// Pointer to the underlying FileSystem implementation // Pointer to the underlying FileSystem implementation
std::shared_ptr<FileSystem> file_system_; std::shared_ptr<FileSystem> file_system_;
// Pointer to the underlying SystemClock implementation
std::shared_ptr<SystemClock> system_clock_;
private: private:
static const size_t kMaxHostNameLen = 256; static const size_t kMaxHostNameLen = 256;
}; };

@ -0,0 +1,102 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include <rocksdb/rocksdb_namespace.h>
#include <rocksdb/status.h>
#include <stdint.h>
#include <memory>
#ifdef _WIN32
// Windows API macro interference
#undef GetCurrentTime
#endif
namespace ROCKSDB_NAMESPACE {
struct ConfigOptions;
// A SystemClock is an interface used by the rocksdb implementation to access
// operating system time-related functionality.
class SystemClock {
public:
virtual ~SystemClock() {}
static const char* Type() { return "SystemClock"; }
// The name of this system clock
virtual const char* Name() const = 0;
// Return a default SystemClock suitable for the current operating
// system.
static const std::shared_ptr<SystemClock>& Default();
// Returns the number of micro-seconds since some fixed point in time.
// It is often used as system time such as in GenericRateLimiter
// and other places so a port needs to return system time in order to work.
virtual uint64_t NowMicros() = 0;
// Returns the number of nano-seconds since some fixed point in time. Only
// useful for computing deltas of time in one run.
// Default implementation simply relies on NowMicros.
// In platform-specific implementations, NowNanos() should return time points
// that are MONOTONIC.
virtual uint64_t NowNanos() { return NowMicros() * 1000; }
// Returns the number of micro-seconds of CPU time used by the current thread.
// 0 indicates not supported.
virtual uint64_t CPUMicros() { return 0; }
// Returns the number of nano-seconds of CPU time used by the current thread.
// Default implementation simply relies on CPUMicros.
// 0 indicates not supported.
virtual uint64_t CPUNanos() { return CPUMicros() * 1000; }
// Sleep/delay the thread for the prescribed number of micro-seconds.
virtual void SleepForMicroseconds(int micros) = 0;
// Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
// Only overwrites *unix_time on success.
virtual Status GetCurrentTime(int64_t* unix_time) = 0;
// Converts seconds-since-Jan-01-1970 to a printable string
virtual std::string TimeToString(uint64_t time) = 0;
};
// Wrapper class for a SystemClock. Redirects all methods (except Name)
// of the SystemClock interface to the target/wrapped class.
class SystemClockWrapper : public SystemClock {
public:
explicit SystemClockWrapper(const std::shared_ptr<SystemClock>& t)
: target_(t) {}
uint64_t NowMicros() override { return target_->NowMicros(); }
uint64_t NowNanos() override { return target_->NowNanos(); }
uint64_t CPUMicros() override { return target_->CPUMicros(); }
uint64_t CPUNanos() override { return target_->CPUNanos(); }
virtual void SleepForMicroseconds(int micros) override {
return target_->SleepForMicroseconds(micros);
}
Status GetCurrentTime(int64_t* unix_time) override {
return target_->GetCurrentTime(unix_time);
}
std::string TimeToString(uint64_t time) override {
return target_->TimeToString(time);
}
protected:
std::shared_ptr<SystemClock> target_;
};
} // end namespace ROCKSDB_NAMESPACE

@ -6,8 +6,12 @@
#include "logging/auto_roll_logger.h" #include "logging/auto_roll_logger.h"
#include <algorithm> #include <algorithm>
#include "file/filename.h" #include "file/filename.h"
#include "logging/logging.h" #include "logging/logging.h"
#include "rocksdb/env.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -15,7 +19,9 @@ namespace ROCKSDB_NAMESPACE {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// -- AutoRollLogger // -- AutoRollLogger
AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname, AutoRollLogger::AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& clock,
const std::string& dbname,
const std::string& db_log_dir, const std::string& db_log_dir,
size_t log_max_size, size_t log_max_size,
size_t log_file_time_to_roll, size_t log_file_time_to_roll,
@ -24,24 +30,26 @@ AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
: Logger(log_level), : Logger(log_level),
dbname_(dbname), dbname_(dbname),
db_log_dir_(db_log_dir), db_log_dir_(db_log_dir),
env_(env), fs_(fs),
clock_(clock),
status_(Status::OK()), status_(Status::OK()),
kMaxLogFileSize(log_max_size), kMaxLogFileSize(log_max_size),
kLogFileTimeToRoll(log_file_time_to_roll), kLogFileTimeToRoll(log_file_time_to_roll),
kKeepLogFileNum(keep_log_file_num), kKeepLogFileNum(keep_log_file_num),
cached_now(static_cast<uint64_t>(env_->NowMicros() * 1e-6)), cached_now(static_cast<uint64_t>(clock_->NowMicros() * 1e-6)),
ctime_(cached_now), ctime_(cached_now),
cached_now_access_count(0), cached_now_access_count(0),
call_NowMicros_every_N_records_(100), call_NowMicros_every_N_records_(100),
mutex_() { mutex_() {
Status s = env->GetAbsolutePath(dbname, &db_absolute_path_); Status s = fs->GetAbsolutePath(dbname, io_options_, &db_absolute_path_,
&io_context_);
if (s.IsNotSupported()) { if (s.IsNotSupported()) {
db_absolute_path_ = dbname; db_absolute_path_ = dbname;
} else { } else {
status_ = s; status_ = s;
} }
log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_); log_fname_ = InfoLogFileName(dbname_, db_absolute_path_, db_log_dir_);
if (env_->FileExists(log_fname_).ok()) { if (fs_->FileExists(log_fname_, io_options_, &io_context_).ok()) {
RollLogFile(); RollLogFile();
} }
GetExistingFiles(); GetExistingFiles();
@ -53,7 +61,7 @@ AutoRollLogger::AutoRollLogger(Env* env, const std::string& dbname,
Status AutoRollLogger::ResetLogger() { Status AutoRollLogger::ResetLogger() {
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger"); TEST_SYNC_POINT("AutoRollLogger::ResetLogger:BeforeNewLogger");
status_ = env_->NewLogger(log_fname_, &logger_); status_ = fs_->NewLogger(log_fname_, io_options_, &logger_, &io_context_);
TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger"); TEST_SYNC_POINT("AutoRollLogger::ResetLogger:AfterNewLogger");
if (!status_.ok()) { if (!status_.ok()) {
@ -67,7 +75,7 @@ Status AutoRollLogger::ResetLogger() {
"The underlying logger doesn't support GetLogFileSize()"); "The underlying logger doesn't support GetLogFileSize()");
} }
if (status_.ok()) { if (status_.ok()) {
cached_now = static_cast<uint64_t>(env_->NowMicros() * 1e-6); cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
ctime_ = cached_now; ctime_ = cached_now;
cached_now_access_count = 0; cached_now_access_count = 0;
} }
@ -79,14 +87,14 @@ void AutoRollLogger::RollLogFile() {
// This function is called when log is rotating. Two rotations // This function is called when log is rotating. Two rotations
// can happen quickly (NowMicro returns same value). To not overwrite // can happen quickly (NowMicro returns same value). To not overwrite
// previous log file we increment by one micro second and try again. // previous log file we increment by one micro second and try again.
uint64_t now = env_->NowMicros(); uint64_t now = clock_->NowMicros();
std::string old_fname; std::string old_fname;
do { do {
old_fname = OldInfoLogFileName( old_fname = OldInfoLogFileName(
dbname_, now, db_absolute_path_, db_log_dir_); dbname_, now, db_absolute_path_, db_log_dir_);
now++; now++;
} while (env_->FileExists(old_fname).ok()); } while (fs_->FileExists(old_fname, io_options_, &io_context_).ok());
Status s = env_->RenameFile(log_fname_, old_fname); Status s = fs_->RenameFile(log_fname_, old_fname, io_options_, &io_context_);
if (!s.ok()) { if (!s.ok()) {
// What should we do on error? // What should we do on error?
} }
@ -103,7 +111,7 @@ void AutoRollLogger::GetExistingFiles() {
std::string parent_dir; std::string parent_dir;
std::vector<std::string> info_log_files; std::vector<std::string> info_log_files;
Status s = Status s =
GetInfoLogFiles(env_, db_log_dir_, dbname_, &parent_dir, &info_log_files); GetInfoLogFiles(fs_, db_log_dir_, dbname_, &parent_dir, &info_log_files);
if (status_.ok()) { if (status_.ok()) {
status_ = s; status_ = s;
} }
@ -117,7 +125,7 @@ void AutoRollLogger::GetExistingFiles() {
} }
Status AutoRollLogger::TrimOldLogFiles() { Status AutoRollLogger::TrimOldLogFiles() {
// Here we directly list info files and delete them through Env. // Here we directly list info files and delete them through FileSystem.
// The deletion isn't going through DB, so there are shortcomes: // The deletion isn't going through DB, so there are shortcomes:
// 1. the deletion is not rate limited by SstFileManager // 1. the deletion is not rate limited by SstFileManager
// 2. there is a chance that an I/O will be issued here // 2. there is a chance that an I/O will be issued here
@ -130,7 +138,8 @@ Status AutoRollLogger::TrimOldLogFiles() {
// it's essentially the same thing, and checking empty before accessing // it's essentially the same thing, and checking empty before accessing
// the queue feels safer. // the queue feels safer.
while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) { while (!old_log_files_.empty() && old_log_files_.size() >= kKeepLogFileNum) {
Status s = env_->DeleteFile(old_log_files_.front()); Status s =
fs_->DeleteFile(old_log_files_.front(), io_options_, &io_context_);
// Remove the file from the tracking anyway. It's possible that // Remove the file from the tracking anyway. It's possible that
// DB cleaned up the old log file, or people cleaned it up manually. // DB cleaned up the old log file, or people cleaned it up manually.
old_log_files_.pop(); old_log_files_.pop();
@ -241,7 +250,7 @@ void AutoRollLogger::LogHeader(const char* format, va_list args) {
bool AutoRollLogger::LogExpired() { bool AutoRollLogger::LogExpired() {
if (cached_now_access_count >= call_NowMicros_every_N_records_) { if (cached_now_access_count >= call_NowMicros_every_N_records_) {
cached_now = static_cast<uint64_t>(env_->NowMicros() * 1e-6); cached_now = static_cast<uint64_t>(clock_->NowMicros() * 1e-6);
cached_now_access_count = 0; cached_now_access_count = 0;
} }
@ -267,15 +276,16 @@ Status CreateLoggerFromOptions(const std::string& dbname,
std::string fname = std::string fname =
InfoLogFileName(dbname, db_absolute_path, options.db_log_dir); InfoLogFileName(dbname, db_absolute_path, options.db_log_dir);
const auto& clock = env->GetSystemClock();
env->CreateDirIfMissing(dbname) env->CreateDirIfMissing(dbname)
.PermitUncheckedError(); // In case it does not exist .PermitUncheckedError(); // In case it does not exist
// Currently we only support roll by time-to-roll and log size // Currently we only support roll by time-to-roll and log size
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) { if (options.log_file_time_to_roll > 0 || options.max_log_file_size > 0) {
AutoRollLogger* result = new AutoRollLogger( AutoRollLogger* result = new AutoRollLogger(
env, dbname, options.db_log_dir, options.max_log_file_size, env->GetFileSystem(), clock, dbname, options.db_log_dir,
options.log_file_time_to_roll, options.keep_log_file_num, options.max_log_file_size, options.log_file_time_to_roll,
options.info_log_level); options.keep_log_file_num, options.info_log_level);
s = result->GetStatus(); s = result->GetStatus();
if (!s.ok()) { if (!s.ok()) {
delete result; delete result;
@ -286,9 +296,9 @@ Status CreateLoggerFromOptions(const std::string& dbname,
} }
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE
// Open a log file in the same directory as the db // Open a log file in the same directory as the db
env->RenameFile(fname, env->RenameFile(
OldInfoLogFileName(dbname, env->NowMicros(), db_absolute_path, fname, OldInfoLogFileName(dbname, clock->NowMicros(), db_absolute_path,
options.db_log_dir)) options.db_log_dir))
.PermitUncheckedError(); .PermitUncheckedError();
s = env->NewLogger(fname, logger); s = env->NewLogger(fname, logger);
if (logger->get() != nullptr) { if (logger->get() != nullptr) {

@ -18,14 +18,18 @@
#include "util/mutexlock.h" #include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class FileSystem;
class SystemClock;
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
// Rolls the log file by size and/or time // Rolls the log file by size and/or time
class AutoRollLogger : public Logger { class AutoRollLogger : public Logger {
public: public:
AutoRollLogger(Env* env, const std::string& dbname, AutoRollLogger(const std::shared_ptr<FileSystem>& fs,
const std::string& db_log_dir, size_t log_max_size, const std::shared_ptr<SystemClock>& clock,
size_t log_file_time_to_roll, size_t keep_log_file_num, const std::string& dbname, const std::string& db_log_dir,
size_t log_max_size, size_t log_file_time_to_roll,
size_t keep_log_file_num,
const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL); const InfoLogLevel log_level = InfoLogLevel::INFO_LEVEL);
using Logger::Logv; using Logger::Logv;
@ -134,7 +138,8 @@ class AutoRollLogger : public Logger {
std::string dbname_; std::string dbname_;
std::string db_log_dir_; std::string db_log_dir_;
std::string db_absolute_path_; std::string db_absolute_path_;
Env* env_; std::shared_ptr<FileSystem> fs_;
std::shared_ptr<SystemClock> clock_;
std::shared_ptr<Logger> logger_; std::shared_ptr<Logger> logger_;
// current status of the logger // current status of the logger
Status status_; Status status_;
@ -148,11 +153,13 @@ class AutoRollLogger : public Logger {
// Full path is stored here. It consumes signifianctly more memory // Full path is stored here. It consumes signifianctly more memory
// than only storing file name. Can optimize if it causes a problem. // than only storing file name. Can optimize if it causes a problem.
std::queue<std::string> old_log_files_; std::queue<std::string> old_log_files_;
// to avoid frequent env->NowMicros() calls, we cached the current time // to avoid frequent clock->NowMicros() calls, we cached the current time
uint64_t cached_now; uint64_t cached_now;
uint64_t ctime_; uint64_t ctime_;
uint64_t cached_now_access_count; uint64_t cached_now_access_count;
uint64_t call_NowMicros_every_N_records_; uint64_t call_NowMicros_every_N_records_;
IOOptions io_options_;
IODebugContext io_context_;
mutable port::Mutex mutex_; mutable port::Mutex mutex_;
}; };
#endif // !ROCKSDB_LITE #endif // !ROCKSDB_LITE

@ -7,7 +7,9 @@
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
#include "logging/auto_roll_logger.h" #include "logging/auto_roll_logger.h"
#include <sys/stat.h> #include <sys/stat.h>
#include <algorithm> #include <algorithm>
#include <cmath> #include <cmath>
#include <fstream> #include <fstream>
@ -16,18 +18,24 @@
#include <string> #include <string>
#include <thread> #include <thread>
#include <vector> #include <vector>
#include "logging/logging.h" #include "logging/logging.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "rocksdb/file_system.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace { namespace {
class NoSleepEnv : public EnvWrapper { class NoSleepClock : public SystemClockWrapper {
public: public:
NoSleepEnv(Env* base) : EnvWrapper(base) {} NoSleepClock(
const std::shared_ptr<SystemClock>& base = SystemClock::Default())
: SystemClockWrapper(base) {}
const char* Name() const override { return "NoSleepClock"; }
void SleepForMicroseconds(int micros) override { void SleepForMicroseconds(int micros) override {
fake_time_ += static_cast<uint64_t>(micros); fake_time_ += static_cast<uint64_t>(micros);
} }
@ -75,7 +83,9 @@ class AutoRollLoggerTest : public testing::Test {
void RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size, void RollLogFileBySizeTest(AutoRollLogger* logger, size_t log_max_size,
const std::string& log_message); const std::string& log_message);
void RollLogFileByTimeTest(Env*, AutoRollLogger* logger, size_t time, void RollLogFileByTimeTest(const std::shared_ptr<FileSystem>& fs,
const std::shared_ptr<SystemClock>& sc,
AutoRollLogger* logger, size_t time,
const std::string& log_message); const std::string& log_message);
// return list of files under kTestDir that contains "LOG" // return list of files under kTestDir that contains "LOG"
std::vector<std::string> GetLogFiles() { std::vector<std::string> GetLogFiles() {
@ -156,21 +166,22 @@ void AutoRollLoggerTest::RollLogFileBySizeTest(AutoRollLogger* logger,
ASSERT_TRUE(message_size == logger->GetLogFileSize()); ASSERT_TRUE(message_size == logger->GetLogFileSize());
} }
void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger, void AutoRollLoggerTest::RollLogFileByTimeTest(
size_t time, const std::shared_ptr<FileSystem>& fs,
const std::string& log_message) { const std::shared_ptr<SystemClock>& sc, AutoRollLogger* logger, size_t time,
const std::string& log_message) {
uint64_t expected_ctime; uint64_t expected_ctime;
uint64_t actual_ctime; uint64_t actual_ctime;
uint64_t total_log_size; uint64_t total_log_size;
EXPECT_OK(env->GetFileSize(kLogFile, &total_log_size)); EXPECT_OK(fs->GetFileSize(kLogFile, IOOptions(), &total_log_size, nullptr));
expected_ctime = logger->TEST_ctime(); expected_ctime = logger->TEST_ctime();
logger->SetCallNowMicrosEveryNRecords(0); logger->SetCallNowMicrosEveryNRecords(0);
// -- Write to the log for several times, which is supposed // -- Write to the log for several times, which is supposed
// to be finished before time. // to be finished before time.
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
env->SleepForMicroseconds(50000); sc->SleepForMicroseconds(50000);
LogMessage(logger, log_message.c_str()); LogMessage(logger, log_message.c_str());
EXPECT_OK(logger->GetStatus()); EXPECT_OK(logger->GetStatus());
// Make sure we always write to the same log file (by // Make sure we always write to the same log file (by
@ -185,7 +196,7 @@ void AutoRollLoggerTest::RollLogFileByTimeTest(Env* env, AutoRollLogger* logger,
} }
// -- Make the log file expire // -- Make the log file expire
env->SleepForMicroseconds(static_cast<int>(time * 1000000)); sc->SleepForMicroseconds(static_cast<int>(time * 1000000));
LogMessage(logger, log_message.c_str()); LogMessage(logger, log_message.c_str());
// At this time, the new log file should be created. // At this time, the new log file should be created.
@ -199,15 +210,15 @@ TEST_F(AutoRollLoggerTest, RollLogFileBySize) {
size_t log_max_size = 1024 * 5; size_t log_max_size = 1024 * 5;
size_t keep_log_file_num = 10; size_t keep_log_file_num = 10;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_max_size, 0, AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
keep_log_file_num); kTestDir, "", log_max_size, 0, keep_log_file_num);
RollLogFileBySizeTest(&logger, log_max_size, RollLogFileBySizeTest(&logger, log_max_size,
kSampleMessage + ":RollLogFileBySize"); kSampleMessage + ":RollLogFileBySize");
} }
TEST_F(AutoRollLoggerTest, RollLogFileByTime) { TEST_F(AutoRollLoggerTest, RollLogFileByTime) {
NoSleepEnv nse(Env::Default()); auto nsc = std::make_shared<NoSleepClock>();
size_t time = 2; size_t time = 2;
size_t log_size = 1024 * 5; size_t log_size = 1024 * 5;
@ -216,10 +227,11 @@ TEST_F(AutoRollLoggerTest, RollLogFileByTime) {
InitTestDb(); InitTestDb();
// -- Test the existence of file during the server restart. // -- Test the existence of file during the server restart.
ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile)); ASSERT_EQ(Status::NotFound(), default_env->FileExists(kLogFile));
AutoRollLogger logger(&nse, kTestDir, "", log_size, time, keep_log_file_num); AutoRollLogger logger(default_env->GetFileSystem(), nsc, kTestDir, "",
log_size, time, keep_log_file_num);
ASSERT_OK(default_env->FileExists(kLogFile)); ASSERT_OK(default_env->FileExists(kLogFile));
RollLogFileByTimeTest(&nse, &logger, time, RollLogFileByTimeTest(default_env->GetFileSystem(), nsc, &logger, time,
kSampleMessage + ":RollLogFileByTime"); kSampleMessage + ":RollLogFileByTime");
} }
@ -254,15 +266,17 @@ TEST_F(AutoRollLoggerTest, OpenLogFilesMultipleTimesWithOptionLog_max_size) {
size_t log_size = 1024; size_t log_size = 1024;
size_t keep_log_file_num = 10; size_t keep_log_file_num = 10;
AutoRollLogger* logger = new AutoRollLogger(Env::Default(), kTestDir, "", AutoRollLogger* logger =
log_size, 0, keep_log_file_num); new AutoRollLogger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, keep_log_file_num);
LogMessage(logger, kSampleMessage.c_str()); LogMessage(logger, kSampleMessage.c_str());
ASSERT_GT(logger->GetLogFileSize(), kZero); ASSERT_GT(logger->GetLogFileSize(), kZero);
delete logger; delete logger;
// reopens the log file and an empty log file will be created. // reopens the log file and an empty log file will be created.
logger = new AutoRollLogger(Env::Default(), kTestDir, "", log_size, 0, 10); logger = new AutoRollLogger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, 10);
ASSERT_EQ(logger->GetLogFileSize(), kZero); ASSERT_EQ(logger->GetLogFileSize(), kZero);
delete logger; delete logger;
} }
@ -273,16 +287,16 @@ TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) {
InitTestDb(); InitTestDb();
NoSleepEnv nse(Env::Default()); auto nsc = std::make_shared<NoSleepClock>();
AutoRollLogger logger(&nse, kTestDir, "", log_max_size, time, AutoRollLogger logger(FileSystem::Default(), nsc, kTestDir, "", log_max_size,
keep_log_file_num); time, keep_log_file_num);
// Test the ability to roll by size // Test the ability to roll by size
RollLogFileBySizeTest(&logger, log_max_size, RollLogFileBySizeTest(&logger, log_max_size,
kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); kSampleMessage + ":CompositeRollByTimeAndSizeLogger");
// Test the ability to roll by Time // Test the ability to roll by Time
RollLogFileByTimeTest(&nse, &logger, time, RollLogFileByTimeTest(FileSystem::Default(), nsc, &logger, time,
kSampleMessage + ":CompositeRollByTimeAndSizeLogger"); kSampleMessage + ":CompositeRollByTimeAndSizeLogger");
} }
@ -291,7 +305,9 @@ TEST_F(AutoRollLoggerTest, CompositeRollByTimeAndSizeLogger) {
// port // port
TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) { TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
DBOptions options; DBOptions options;
NoSleepEnv nse(Env::Default()); auto nsc = std::make_shared<NoSleepClock>();
std::unique_ptr<Env> nse(new CompositeEnvWrapper(Env::Default(), nsc));
std::shared_ptr<Logger> logger; std::shared_ptr<Logger> logger;
// Normal logger // Normal logger
@ -310,14 +326,15 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
kSampleMessage + ":CreateLoggerFromOptions - size"); kSampleMessage + ":CreateLoggerFromOptions - size");
// Only roll by Time // Only roll by Time
options.env = &nse; options.env = nse.get();
InitTestDb(); InitTestDb();
options.max_log_file_size = 0; options.max_log_file_size = 0;
options.log_file_time_to_roll = 2; options.log_file_time_to_roll = 2;
ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger)); ASSERT_OK(CreateLoggerFromOptions(kTestDir, options, &logger));
auto_roll_logger = auto_roll_logger =
dynamic_cast<AutoRollLogger*>(logger.get()); dynamic_cast<AutoRollLogger*>(logger.get());
RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll, RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger,
options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - time"); kSampleMessage + ":CreateLoggerFromOptions - time");
// roll by both Time and size // roll by both Time and size
@ -329,7 +346,8 @@ TEST_F(AutoRollLoggerTest, CreateLoggerFromOptions) {
dynamic_cast<AutoRollLogger*>(logger.get()); dynamic_cast<AutoRollLogger*>(logger.get());
RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size, RollLogFileBySizeTest(auto_roll_logger, options.max_log_file_size,
kSampleMessage + ":CreateLoggerFromOptions - both"); kSampleMessage + ":CreateLoggerFromOptions - both");
RollLogFileByTimeTest(&nse, auto_roll_logger, options.log_file_time_to_roll, RollLogFileByTimeTest(options.env->GetFileSystem(), nsc, auto_roll_logger,
options.log_file_time_to_roll,
kSampleMessage + ":CreateLoggerFromOptions - both"); kSampleMessage + ":CreateLoggerFromOptions - both");
// Set keep_log_file_num // Set keep_log_file_num
@ -402,8 +420,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
const size_t kMaxFileSize = 512; const size_t kMaxFileSize = 512;
{ {
size_t log_num = 8; size_t log_num = 8;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
log_num); dbname, db_log_dir, kMaxFileSize, 0, log_num);
RollNTimesBySize(&logger, log_num, kMaxFileSize); RollNTimesBySize(&logger, log_num, kMaxFileSize);
ASSERT_EQ(log_num, GetLogFiles().size()); ASSERT_EQ(log_num, GetLogFiles().size());
@ -411,8 +429,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
// Shrink number of files // Shrink number of files
{ {
size_t log_num = 5; size_t log_num = 5;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
log_num); dbname, db_log_dir, kMaxFileSize, 0, log_num);
ASSERT_EQ(log_num, GetLogFiles().size()); ASSERT_EQ(log_num, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize); RollNTimesBySize(&logger, 3, kMaxFileSize);
@ -422,8 +440,8 @@ TEST_F(AutoRollLoggerTest, AutoDeleting) {
// Increase number of files again. // Increase number of files again.
{ {
size_t log_num = 7; size_t log_num = 7;
AutoRollLogger logger(Env::Default(), dbname, db_log_dir, kMaxFileSize, 0, AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
log_num); dbname, db_log_dir, kMaxFileSize, 0, log_num);
ASSERT_EQ(6, GetLogFiles().size()); ASSERT_EQ(6, GetLogFiles().size());
RollNTimesBySize(&logger, 3, kMaxFileSize); RollNTimesBySize(&logger, 3, kMaxFileSize);
@ -485,7 +503,8 @@ TEST_F(AutoRollLoggerTest, InfoLogLevel) {
// an extra-scope to force the AutoRollLogger to flush the log file when it // an extra-scope to force the AutoRollLogger to flush the log file when it
// becomes out of scope. // becomes out of scope.
{ {
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
kTestDir, "", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL; for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level); logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -523,7 +542,8 @@ TEST_F(AutoRollLoggerTest, Close) {
size_t log_size = 8192; size_t log_size = 8192;
size_t log_lines = 0; size_t log_lines = 0;
AutoRollLogger logger(Env::Default(), kTestDir, "", log_size, 0, 10); AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(), kTestDir,
"", log_size, 0, 10);
for (int log_level = InfoLogLevel::HEADER_LEVEL; for (int log_level = InfoLogLevel::HEADER_LEVEL;
log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) { log_level >= InfoLogLevel::DEBUG_LEVEL; log_level--) {
logger.SetInfoLogLevel((InfoLogLevel)log_level); logger.SetInfoLogLevel((InfoLogLevel)log_level);
@ -590,8 +610,9 @@ TEST_F(AutoRollLoggerTest, LogHeaderTest) {
InitTestDb(); InitTestDb();
AutoRollLogger logger(Env::Default(), kTestDir, /*db_log_dir=*/"", AutoRollLogger logger(FileSystem::Default(), SystemClock::Default(),
LOG_MAX_SIZE, /*log_file_time_to_roll=*/0, kTestDir, /*db_log_dir=*/"", LOG_MAX_SIZE,
/*log_file_time_to_roll=*/0,
/*keep_log_file_num=*/10); /*keep_log_file_num=*/10);
if (test_num == 0) { if (test_num == 0) {

@ -31,7 +31,7 @@ class EnvLogger : public Logger {
const std::string& fname, const EnvOptions& options, Env* env, const std::string& fname, const EnvOptions& options, Env* env,
InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL) InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL)
: Logger(log_level), : Logger(log_level),
file_(std::move(writable_file), fname, options, env), file_(std::move(writable_file), fname, options, env->GetSystemClock()),
last_flush_micros_(0), last_flush_micros_(0),
env_(env), env_(env),
flush_pending_(false) {} flush_pending_(false) {}

@ -31,6 +31,7 @@ int main() {
#include "rocksdb/memtablerep.h" #include "rocksdb/memtablerep.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/slice_transform.h" #include "rocksdb/slice_transform.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/write_buffer_manager.h" #include "rocksdb/write_buffer_manager.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/gflags_compat.h" #include "util/gflags_compat.h"
@ -417,7 +418,7 @@ class Benchmark {
uint64_t bytes_written = 0; uint64_t bytes_written = 0;
uint64_t bytes_read = 0; uint64_t bytes_read = 0;
uint64_t read_hits = 0; uint64_t read_hits = 0;
StopWatchNano timer(Env::Default(), true); StopWatchNano timer(SystemClock::Default(), true);
RunThreads(&threads, &bytes_written, &bytes_read, true, &read_hits); RunThreads(&threads, &bytes_written, &bytes_read, true, &read_hits);
auto elapsed_time = static_cast<double>(timer.ElapsedNanos() / 1000); auto elapsed_time = static_cast<double>(timer.ElapsedNanos() / 1000);
std::cout << "Elapsed time: " << static_cast<int>(elapsed_time) << " us" std::cout << "Elapsed time: " << static_cast<int>(elapsed_time) << " us"

@ -11,10 +11,11 @@
#include "test_util/testharness.h" #include "test_util/testharness.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
const size_t kSizeDummyEntry = 256 * 1024;
class WriteBufferManagerTest : public testing::Test {}; class WriteBufferManagerTest : public testing::Test {};
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
const size_t kSizeDummyEntry = 256 * 1024;
TEST_F(WriteBufferManagerTest, ShouldFlush) { TEST_F(WriteBufferManagerTest, ShouldFlush) {
// A write buffer manager of size 10MB // A write buffer manager of size 10MB
std::unique_ptr<WriteBufferManager> wbf( std::unique_ptr<WriteBufferManager> wbf(

@ -8,6 +8,7 @@
#include <cmath> #include <cmath>
#include "monitoring/histogram_windowing.h" #include "monitoring/histogram_windowing.h"
#include "rocksdb/system_clock.h"
#include "test_util/mock_time_env.h" #include "test_util/mock_time_env.h"
#include "test_util/testharness.h" #include "test_util/testharness.h"
#include "util/random.h" #include "util/random.h"
@ -19,7 +20,8 @@ class HistogramTest : public testing::Test {};
namespace { namespace {
const double kIota = 0.1; const double kIota = 0.1;
const HistogramBucketMapper bucketMapper; const HistogramBucketMapper bucketMapper;
MockTimeEnv* env = new MockTimeEnv(Env::Default()); std::shared_ptr<MockSystemClock> clock =
std::make_shared<MockSystemClock>(SystemClock::Default());
} }
void PopulateHistogram(Histogram& histogram, void PopulateHistogram(Histogram& histogram,
@ -29,11 +31,11 @@ void PopulateHistogram(Histogram& histogram,
for (uint64_t i = low; i <= high; i++) { for (uint64_t i = low; i <= high; i++) {
histogram.Add(i); histogram.Add(i);
// sleep a random microseconds [0-10) // sleep a random microseconds [0-10)
env->MockSleepForMicroseconds(rnd.Uniform(10)); clock->MockSleepForMicroseconds(rnd.Uniform(10));
} }
} }
// make sure each data population at least take some time // make sure each data population at least take some time
env->MockSleepForMicroseconds(1); clock->MockSleepForMicroseconds(1);
} }
void BasicOperation(Histogram& histogram) { void BasicOperation(Histogram& histogram) {
@ -139,23 +141,23 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
HistogramWindowingImpl HistogramWindowingImpl
histogramWindowing(num_windows, micros_per_window, min_num_per_window); histogramWindowing(num_windows, micros_per_window, min_num_per_window);
histogramWindowing.TEST_UpdateEnv(env); histogramWindowing.TEST_UpdateClock(clock);
PopulateHistogram(histogramWindowing, 1, 1, 100); PopulateHistogram(histogramWindowing, 1, 1, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 100); ASSERT_EQ(histogramWindowing.num(), 100);
ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 1); ASSERT_EQ(histogramWindowing.max(), 1);
ASSERT_EQ(histogramWindowing.Average(), 1); ASSERT_EQ(histogramWindowing.Average(), 1);
PopulateHistogram(histogramWindowing, 2, 2, 100); PopulateHistogram(histogramWindowing, 2, 2, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 200); ASSERT_EQ(histogramWindowing.num(), 200);
ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 2); ASSERT_EQ(histogramWindowing.max(), 2);
ASSERT_EQ(histogramWindowing.Average(), 1.5); ASSERT_EQ(histogramWindowing.Average(), 1.5);
PopulateHistogram(histogramWindowing, 3, 3, 100); PopulateHistogram(histogramWindowing, 3, 3, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 1); ASSERT_EQ(histogramWindowing.min(), 1);
ASSERT_EQ(histogramWindowing.max(), 3); ASSERT_EQ(histogramWindowing.max(), 3);
@ -163,7 +165,7 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
// dropping oldest window with value 1, remaining 2 ~ 4 // dropping oldest window with value 1, remaining 2 ~ 4
PopulateHistogram(histogramWindowing, 4, 4, 100); PopulateHistogram(histogramWindowing, 4, 4, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 2); ASSERT_EQ(histogramWindowing.min(), 2);
ASSERT_EQ(histogramWindowing.max(), 4); ASSERT_EQ(histogramWindowing.max(), 4);
@ -171,7 +173,7 @@ TEST_F(HistogramTest, HistogramWindowingExpire) {
// dropping oldest window with value 2, remaining 3 ~ 5 // dropping oldest window with value 2, remaining 3 ~ 5
PopulateHistogram(histogramWindowing, 5, 5, 100); PopulateHistogram(histogramWindowing, 5, 5, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 300); ASSERT_EQ(histogramWindowing.num(), 300);
ASSERT_EQ(histogramWindowing.min(), 3); ASSERT_EQ(histogramWindowing.min(), 3);
ASSERT_EQ(histogramWindowing.max(), 5); ASSERT_EQ(histogramWindowing.max(), 5);
@ -187,20 +189,20 @@ TEST_F(HistogramTest, HistogramWindowingMerge) {
histogramWindowing(num_windows, micros_per_window, min_num_per_window); histogramWindowing(num_windows, micros_per_window, min_num_per_window);
HistogramWindowingImpl HistogramWindowingImpl
otherWindowing(num_windows, micros_per_window, min_num_per_window); otherWindowing(num_windows, micros_per_window, min_num_per_window);
histogramWindowing.TEST_UpdateEnv(env); histogramWindowing.TEST_UpdateClock(clock);
otherWindowing.TEST_UpdateEnv(env); otherWindowing.TEST_UpdateClock(clock);
PopulateHistogram(histogramWindowing, 1, 1, 100); PopulateHistogram(histogramWindowing, 1, 1, 100);
PopulateHistogram(otherWindowing, 1, 1, 100); PopulateHistogram(otherWindowing, 1, 1, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
PopulateHistogram(histogramWindowing, 2, 2, 100); PopulateHistogram(histogramWindowing, 2, 2, 100);
PopulateHistogram(otherWindowing, 2, 2, 100); PopulateHistogram(otherWindowing, 2, 2, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
PopulateHistogram(histogramWindowing, 3, 3, 100); PopulateHistogram(histogramWindowing, 3, 3, 100);
PopulateHistogram(otherWindowing, 3, 3, 100); PopulateHistogram(otherWindowing, 3, 3, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
histogramWindowing.Merge(otherWindowing); histogramWindowing.Merge(otherWindowing);
ASSERT_EQ(histogramWindowing.num(), 600); ASSERT_EQ(histogramWindowing.num(), 600);
@ -210,14 +212,14 @@ TEST_F(HistogramTest, HistogramWindowingMerge) {
// dropping oldest window with value 1, remaining 2 ~ 4 // dropping oldest window with value 1, remaining 2 ~ 4
PopulateHistogram(histogramWindowing, 4, 4, 100); PopulateHistogram(histogramWindowing, 4, 4, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 500); ASSERT_EQ(histogramWindowing.num(), 500);
ASSERT_EQ(histogramWindowing.min(), 2); ASSERT_EQ(histogramWindowing.min(), 2);
ASSERT_EQ(histogramWindowing.max(), 4); ASSERT_EQ(histogramWindowing.max(), 4);
// dropping oldest window with value 2, remaining 3 ~ 5 // dropping oldest window with value 2, remaining 3 ~ 5
PopulateHistogram(histogramWindowing, 5, 5, 100); PopulateHistogram(histogramWindowing, 5, 5, 100);
env->MockSleepForMicroseconds(micros_per_window); clock->MockSleepForMicroseconds(micros_per_window);
ASSERT_EQ(histogramWindowing.num(), 400); ASSERT_EQ(histogramWindowing.num(), 400);
ASSERT_EQ(histogramWindowing.min(), 3); ASSERT_EQ(histogramWindowing.min(), 3);
ASSERT_EQ(histogramWindowing.max(), 5); ASSERT_EQ(histogramWindowing.max(), 5);

@ -8,15 +8,17 @@
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "monitoring/histogram_windowing.h" #include "monitoring/histogram_windowing.h"
#include "monitoring/histogram.h"
#include "util/cast_util.h"
#include <algorithm> #include <algorithm>
#include "monitoring/histogram.h"
#include "rocksdb/system_clock.h"
#include "util/cast_util.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
HistogramWindowingImpl::HistogramWindowingImpl() { HistogramWindowingImpl::HistogramWindowingImpl() {
env_ = Env::Default(); clock_ = SystemClock::Default();
window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]); window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
Clear(); Clear();
} }
@ -28,7 +30,7 @@ HistogramWindowingImpl::HistogramWindowingImpl(
num_windows_(num_windows), num_windows_(num_windows),
micros_per_window_(micros_per_window), micros_per_window_(micros_per_window),
min_num_per_window_(min_num_per_window) { min_num_per_window_(min_num_per_window) {
env_ = Env::Default(); clock_ = SystemClock::Default();
window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]); window_stats_.reset(new HistogramStat[static_cast<size_t>(num_windows_)]);
Clear(); Clear();
} }
@ -44,7 +46,7 @@ void HistogramWindowingImpl::Clear() {
window_stats_[i].Clear(); window_stats_[i].Clear();
} }
current_window_.store(0, std::memory_order_relaxed); current_window_.store(0, std::memory_order_relaxed);
last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
} }
bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); } bool HistogramWindowingImpl::Empty() const { return stats_.Empty(); }
@ -129,7 +131,7 @@ void HistogramWindowingImpl::Data(HistogramData * const data) const {
} }
void HistogramWindowingImpl::TimerTick() { void HistogramWindowingImpl::TimerTick() {
uint64_t curr_time = env_->NowMicros(); uint64_t curr_time = clock_->NowMicros();
size_t curr_window_ = static_cast<size_t>(current_window()); size_t curr_window_ = static_cast<size_t>(current_window());
if (curr_time - last_swap_time() > micros_per_window_ && if (curr_time - last_swap_time() > micros_per_window_ &&
window_stats_[curr_window_].num() >= min_num_per_window_) { window_stats_[curr_window_].num() >= min_num_per_window_) {
@ -144,7 +146,7 @@ void HistogramWindowingImpl::SwapHistoryBucket() {
// If mutex is held by Merge() or Clear(), next Add() will take care of the // If mutex is held by Merge() or Clear(), next Add() will take care of the
// swap, if needed. // swap, if needed.
if (mutex_.try_lock()) { if (mutex_.try_lock()) {
last_swap_time_.store(env_->NowMicros(), std::memory_order_relaxed); last_swap_time_.store(clock_->NowMicros(), std::memory_order_relaxed);
uint64_t curr_window = current_window(); uint64_t curr_window = current_window();
uint64_t next_window = (curr_window == num_windows_ - 1) ? uint64_t next_window = (curr_window == num_windows_ - 1) ?

@ -10,9 +10,9 @@
#pragma once #pragma once
#include "monitoring/histogram.h" #include "monitoring/histogram.h"
#include "rocksdb/env.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class SystemClock;
class HistogramWindowingImpl : public Histogram class HistogramWindowingImpl : public Histogram
{ {
@ -45,7 +45,9 @@ public:
virtual void Data(HistogramData* const data) const override; virtual void Data(HistogramData* const data) const override;
#ifndef NDEBUG #ifndef NDEBUG
void TEST_UpdateEnv(Env* env) { env_ = env; } void TEST_UpdateClock(const std::shared_ptr<SystemClock>& clock) {
clock_ = clock;
}
#endif // NDEBUG #endif // NDEBUG
private: private:
@ -58,7 +60,7 @@ public:
return last_swap_time_.load(std::memory_order_relaxed); return last_swap_time_.load(std::memory_order_relaxed);
} }
Env* env_; std::shared_ptr<SystemClock> clock_;
std::mutex mutex_; std::mutex mutex_;
// Aggregated stats over windows_stats_, all the computation is done // Aggregated stats over windows_stats_, all the computation is done

@ -4,15 +4,18 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include "monitoring/instrumented_mutex.h" #include "monitoring/instrumented_mutex.h"
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
#include "monitoring/thread_status_util.h" #include "monitoring/thread_status_util.h"
#include "rocksdb/system_clock.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
namespace { namespace {
#ifndef NPERF_CONTEXT #ifndef NPERF_CONTEXT
Statistics* stats_for_report(Env* env, Statistics* stats) { Statistics* stats_for_report(const std::shared_ptr<SystemClock>& clock,
if (env != nullptr && stats != nullptr && Statistics* stats) {
if (clock.get() != nullptr && stats != nullptr &&
stats->get_stats_level() > kExceptTimeForMutex) { stats->get_stats_level() > kExceptTimeForMutex) {
return stats; return stats;
} else { } else {
@ -25,7 +28,7 @@ Statistics* stats_for_report(Env* env, Statistics* stats) {
void InstrumentedMutex::Lock() { void InstrumentedMutex::Lock() {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, db_mutex_lock_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_); stats_for_report(clock_, stats_), stats_code_);
LockInternal(); LockInternal();
} }
@ -39,7 +42,7 @@ void InstrumentedMutex::LockInternal() {
void InstrumentedCondVar::Wait() { void InstrumentedCondVar::Wait() {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_); stats_for_report(clock_, stats_), stats_code_);
WaitInternal(); WaitInternal();
} }
@ -53,7 +56,7 @@ void InstrumentedCondVar::WaitInternal() {
bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) { bool InstrumentedCondVar::TimedWait(uint64_t abs_time_us) {
PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD( PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(
db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS, db_condition_wait_nanos, stats_code_ == DB_MUTEX_WAIT_MICROS,
stats_for_report(env_, stats_), stats_code_); stats_for_report(clock_, stats_), stats_code_);
return TimedWaitInternal(abs_time_us); return TimedWaitInternal(abs_time_us);
} }

@ -7,8 +7,8 @@
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "port/port.h" #include "port/port.h"
#include "rocksdb/env.h"
#include "rocksdb/statistics.h" #include "rocksdb/statistics.h"
#include "rocksdb/system_clock.h"
#include "rocksdb/thread_status.h" #include "rocksdb/thread_status.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -20,13 +20,18 @@ class InstrumentedCondVar;
class InstrumentedMutex { class InstrumentedMutex {
public: public:
explicit InstrumentedMutex(bool adaptive = false) explicit InstrumentedMutex(bool adaptive = false)
: mutex_(adaptive), stats_(nullptr), env_(nullptr), : mutex_(adaptive), stats_(nullptr), clock_(nullptr), stats_code_(0) {}
stats_code_(0) {}
explicit InstrumentedMutex(const std::shared_ptr<SystemClock>& clock,
InstrumentedMutex( bool adaptive = false)
Statistics* stats, Env* env, : mutex_(adaptive), stats_(nullptr), clock_(clock), stats_code_(0) {}
int stats_code, bool adaptive = false)
: mutex_(adaptive), stats_(stats), env_(env), InstrumentedMutex(Statistics* stats,
const std::shared_ptr<SystemClock>& clock, int stats_code,
bool adaptive = false)
: mutex_(adaptive),
stats_(stats),
clock_(clock),
stats_code_(stats_code) {} stats_code_(stats_code) {}
void Lock(); void Lock();
@ -44,7 +49,7 @@ class InstrumentedMutex {
friend class InstrumentedCondVar; friend class InstrumentedCondVar;
port::Mutex mutex_; port::Mutex mutex_;
Statistics* stats_; Statistics* stats_;
Env* env_; std::shared_ptr<SystemClock> clock_;
int stats_code_; int stats_code_;
}; };
@ -71,7 +76,7 @@ class InstrumentedCondVar {
explicit InstrumentedCondVar(InstrumentedMutex* instrumented_mutex) explicit InstrumentedCondVar(InstrumentedMutex* instrumented_mutex)
: cond_(&(instrumented_mutex->mutex_)), : cond_(&(instrumented_mutex->mutex_)),
stats_(instrumented_mutex->stats_), stats_(instrumented_mutex->stats_),
env_(instrumented_mutex->env_), clock_(instrumented_mutex->clock_),
stats_code_(instrumented_mutex->stats_code_) {} stats_code_(instrumented_mutex->stats_code_) {}
void Wait(); void Wait();
@ -91,7 +96,7 @@ class InstrumentedCondVar {
bool TimedWaitInternal(uint64_t abs_time_us); bool TimedWaitInternal(uint64_t abs_time_us);
port::CondVar cond_; port::CondVar cond_;
Statistics* stats_; Statistics* stats_;
Env* env_; const std::shared_ptr<SystemClock> clock_;
int stats_code_; int stats_code_;
}; };

@ -38,9 +38,9 @@ extern __thread IOStatsContext iostats_context;
iostats_step_timer_##metric.Start(); iostats_step_timer_##metric.Start();
// Declare and set start time of the timer // Declare and set start time of the timer
#define IOSTATS_CPU_TIMER_GUARD(metric, env) \ #define IOSTATS_CPU_TIMER_GUARD(metric, clock) \
PerfStepTimer iostats_step_timer_##metric( \ PerfStepTimer iostats_step_timer_##metric( \
&(iostats_context.metric), env, true, \ &(iostats_context.metric), clock, true, \
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
iostats_step_timer_##metric.Start(); iostats_step_timer_##metric.Start();
@ -55,6 +55,6 @@ extern __thread IOStatsContext iostats_context;
#define IOSTATS(metric) 0 #define IOSTATS(metric) 0
#define IOSTATS_TIMER_GUARD(metric) #define IOSTATS_TIMER_GUARD(metric)
#define IOSTATS_CPU_TIMER_GUARD(metric, env) static_cast<void>(env) #define IOSTATS_CPU_TIMER_GUARD(metric, clock) static_cast<void>(clock)
#endif // ROCKSDB_SUPPORT_THREAD_LOCAL #endif // ROCKSDB_SUPPORT_THREAD_LOCAL

@ -25,8 +25,8 @@ extern thread_local PerfContext perf_context;
#define PERF_TIMER_STOP(metric) #define PERF_TIMER_STOP(metric)
#define PERF_TIMER_START(metric) #define PERF_TIMER_START(metric)
#define PERF_TIMER_GUARD(metric) #define PERF_TIMER_GUARD(metric)
#define PERF_TIMER_GUARD_WITH_ENV(metric, env) #define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock)
#define PERF_CPU_TIMER_GUARD(metric, env) #define PERF_CPU_TIMER_GUARD(metric, clock)
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \ #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition, stats, \
ticker_type) ticker_type)
#define PERF_TIMER_MEASURE(metric) #define PERF_TIMER_MEASURE(metric)
@ -46,14 +46,14 @@ extern thread_local PerfContext perf_context;
perf_step_timer_##metric.Start(); perf_step_timer_##metric.Start();
// Declare and set start time of the timer // Declare and set start time of the timer
#define PERF_TIMER_GUARD_WITH_ENV(metric, env) \ #define PERF_TIMER_GUARD_WITH_CLOCK(metric, clock) \
PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), env); \ PerfStepTimer perf_step_timer_##metric(&(perf_context.metric), clock); \
perf_step_timer_##metric.Start(); perf_step_timer_##metric.Start();
// Declare and set start time of the timer // Declare and set start time of the timer
#define PERF_CPU_TIMER_GUARD(metric, env) \ #define PERF_CPU_TIMER_GUARD(metric, clock) \
PerfStepTimer perf_step_timer_##metric( \ PerfStepTimer perf_step_timer_##metric( \
&(perf_context.metric), env, true, \ &(perf_context.metric), clock, true, \
PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \ PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
perf_step_timer_##metric.Start(); perf_step_timer_##metric.Start();

@ -5,22 +5,23 @@
// //
#pragma once #pragma once
#include "monitoring/perf_level_imp.h" #include "monitoring/perf_level_imp.h"
#include "rocksdb/env.h" #include "monitoring/statistics.h"
#include "util/stop_watch.h" #include "rocksdb/system_clock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
class PerfStepTimer { class PerfStepTimer {
public: public:
explicit PerfStepTimer( explicit PerfStepTimer(
uint64_t* metric, Env* env = nullptr, bool use_cpu_time = false, uint64_t* metric, const std::shared_ptr<SystemClock>& clock = nullptr,
bool use_cpu_time = false,
PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex, PerfLevel enable_level = PerfLevel::kEnableTimeExceptForMutex,
Statistics* statistics = nullptr, uint32_t ticker_type = 0) Statistics* statistics = nullptr, uint32_t ticker_type = 0)
: perf_counter_enabled_(perf_level >= enable_level), : perf_counter_enabled_(perf_level >= enable_level),
use_cpu_time_(use_cpu_time), use_cpu_time_(use_cpu_time),
env_((perf_counter_enabled_ || statistics != nullptr) clock_((perf_counter_enabled_ || statistics != nullptr)
? ((env != nullptr) ? env : Env::Default()) ? ((clock.get() != nullptr) ? clock : SystemClock::Default())
: nullptr), : nullptr),
start_(0), start_(0),
metric_(metric), metric_(metric),
statistics_(statistics), statistics_(statistics),
@ -36,14 +37,6 @@ class PerfStepTimer {
} }
} }
uint64_t time_now() {
if (!use_cpu_time_) {
return env_->NowNanos();
} else {
return env_->NowCPUNanos();
}
}
void Measure() { void Measure() {
if (start_) { if (start_) {
uint64_t now = time_now(); uint64_t now = time_now();
@ -67,9 +60,17 @@ class PerfStepTimer {
} }
private: private:
uint64_t time_now() {
if (!use_cpu_time_) {
return clock_->NowNanos();
} else {
return clock_->CPUNanos();
}
}
const bool perf_counter_enabled_; const bool perf_counter_enabled_;
const bool use_cpu_time_; const bool use_cpu_time_;
Env* const env_; std::shared_ptr<SystemClock> clock_;
uint64_t start_; uint64_t start_;
uint64_t* metric_; uint64_t* metric_;
Statistics* statistics_; Statistics* statistics_;

@ -22,6 +22,7 @@
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/convenience.h" #include "rocksdb/convenience.h"
#include "rocksdb/rate_limiter.h" #include "rocksdb/rate_limiter.h"
#include "test_util/mock_time_env.h"
#include "test_util/sync_point.h" #include "test_util/sync_point.h"
#include "test_util/testutil.h" #include "test_util/testutil.h"
#include "util/random.h" #include "util/random.h"
@ -33,20 +34,22 @@ class StatsHistoryTest : public DBTestBase {
public: public:
StatsHistoryTest() StatsHistoryTest()
: DBTestBase("/stats_history_test", /*env_do_fsync=*/true) { : DBTestBase("/stats_history_test", /*env_do_fsync=*/true) {
mock_env_.reset(new MockTimeEnv(env_)); mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
mock_env_.reset(new CompositeEnvWrapper(env_, mock_clock_));
} }
protected: protected:
std::unique_ptr<MockTimeEnv> mock_env_; std::shared_ptr<MockSystemClock> mock_clock_;
std::unique_ptr<Env> mock_env_;
void SetUp() override { void SetUp() override {
mock_env_->InstallTimedWaitFixCallback(); mock_clock_->InstallTimedWaitFixCallback();
SyncPoint::GetInstance()->SetCallBack( SyncPoint::GetInstance()->SetCallBack(
"DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) { "DBImpl::StartPeriodicWorkScheduler:Init", [&](void* arg) {
auto* periodic_work_scheduler_ptr = auto* periodic_work_scheduler_ptr =
reinterpret_cast<PeriodicWorkScheduler**>(arg); reinterpret_cast<PeriodicWorkScheduler**>(arg);
*periodic_work_scheduler_ptr = *periodic_work_scheduler_ptr =
PeriodicWorkTestScheduler::Default(mock_env_.get()); PeriodicWorkTestScheduler::Default(mock_clock_);
}); });
} }
}; };
@ -66,17 +69,17 @@ TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) {
// Wait for the first stats persist to finish, as the initial delay could be // Wait for the first stats persist to finish, as the initial delay could be
// different. // different.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1); ASSERT_GE(counter, 1);
// Test cancel job through SetOptions // Test cancel job through SetOptions
ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}})); ASSERT_OK(dbfull()->SetDBOptions({{"stats_dump_period_sec", "0"}}));
int old_val = counter; int old_val = counter;
for (int i = 1; i < 20; ++i) { for (int i = 1; i < 20; ++i) {
mock_env_->MockSleepForSeconds(kPeriodSec); mock_clock_->MockSleepForSeconds(kPeriodSec);
} }
ASSERT_EQ(counter, old_val); ASSERT_EQ(counter, old_val);
Close(); Close();
@ -98,17 +101,17 @@ TEST_F(StatsHistoryTest, StatsPersistScheduling) {
// Wait for the first stats persist to finish, as the initial delay could be // Wait for the first stats persist to finish, as the initial delay could be
// different. // different.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1); ASSERT_GE(counter, 1);
// Test cancel job through SetOptions // Test cancel job through SetOptions
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
int old_val = counter; int old_val = counter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec * 2); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec * 2); });
ASSERT_EQ(counter, old_val); ASSERT_EQ(counter, old_val);
Close(); Close();
@ -130,7 +133,7 @@ TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) {
ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec); ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec);
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
ASSERT_GE(counter, 1); ASSERT_GE(counter, 1);
Close(); Close();
} }
@ -149,30 +152,31 @@ TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) {
// make sure the first stats persist to finish // make sure the first stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish // Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
std::unique_ptr<StatsHistoryIterator> stats_iter; std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
// disabled stats snapshots // disabled stats snapshots
ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}})); ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
size_t stats_count = 0; size_t stats_count = 0;
for (; stats_iter->Valid(); stats_iter->Next()) { for (; stats_iter->Valid(); stats_iter->Next()) {
auto stats_map = stats_iter->GetStatsMap(); auto stats_map = stats_iter->GetStatsMap();
ASSERT_EQ(stats_iter->GetStatsTime(), mock_env_->NowSeconds()); ASSERT_EQ(stats_iter->GetStatsTime(), mock_clock_->NowSeconds());
stats_count += stats_map.size(); stats_count += stats_map.size();
} }
ASSERT_GT(stats_count, 0); ASSERT_GT(stats_count, 0);
// Wait a bit and verify no more stats are found // Wait a bit and verify no more stats are found
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(1); }); [&] { mock_clock_->MockSleepForSeconds(1); });
} }
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter)); ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_new = 0; size_t stats_count_new = 0;
for (; stats_iter->Valid(); stats_iter->Next()) { for (; stats_iter->Valid(); stats_iter->Next()) {
@ -225,11 +229,12 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
const int kIterations = 10; const int kIterations = 10;
for (int i = 0; i < kIterations; ++i) { for (int i = 0; i < kIterations; ++i) {
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
} }
std::unique_ptr<StatsHistoryIterator> stats_iter; std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count = 0; size_t stats_count = 0;
int slice_count = 0; int slice_count = 0;
@ -248,10 +253,11 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
// Wait for stats persist to finish // Wait for stats persist to finish
for (int i = 0; i < kIterations; ++i) { for (int i = 0; i < kIterations; ++i) {
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
} }
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_reopen = 0; size_t stats_count_reopen = 0;
slice_count = 0; slice_count = 0;
@ -296,11 +302,11 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
// Wait for the first stats persist to finish, as the initial delay could be // Wait for the first stats persist to finish, as the initial delay could be
// different. // different.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish // Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter = auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
@ -308,14 +314,14 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
delete iter; delete iter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter = iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count2 = countkeys(iter); int key_count2 = countkeys(iter);
delete iter; delete iter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter = iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count3 = countkeys(iter); int key_count3 = countkeys(iter);
@ -324,7 +330,8 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
ASSERT_GE(key_count3, key_count2); ASSERT_GE(key_count3, key_count2);
ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1); ASSERT_EQ(key_count3 - key_count2, key_count2 - key_count1);
std::unique_ptr<StatsHistoryIterator> stats_iter; std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count = 0; size_t stats_count = 0;
int slice_count = 0; int slice_count = 0;
@ -345,7 +352,8 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
ASSERT_EQ(stats_count, key_count3 - 2); ASSERT_EQ(stats_count, key_count3 - 2);
// verify reopen will not cause data loss // verify reopen will not cause data loss
ReopenWithColumnFamilies({"default", "pikachu"}, options); ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
size_t stats_count_reopen = 0; size_t stats_count_reopen = 0;
int slice_count_reopen = 0; int slice_count_reopen = 0;
@ -387,37 +395,38 @@ TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
// Wait for the first stats persist to finish, as the initial delay could be // Wait for the first stats persist to finish, as the initial delay could be
// different. // different.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
// Wait for stats persist to finish // Wait for stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter = auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter); countkeys(iter);
delete iter; delete iter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter = iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter); countkeys(iter);
delete iter; delete iter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
iter = iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
countkeys(iter); countkeys(iter);
delete iter; delete iter;
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
std::map<std::string, uint64_t> stats_map_after; std::map<std::string, uint64_t> stats_map_after;
ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after)); ASSERT_TRUE(options.statistics->getTickerMap(&stats_map_after));
std::unique_ptr<StatsHistoryIterator> stats_iter; std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
std::string sample = "rocksdb.num.iterator.deleted"; std::string sample = "rocksdb.num.iterator.deleted";
uint64_t recovered_value = 0; uint64_t recovered_value = 0;
@ -434,7 +443,8 @@ TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
// test stats value retains after recovery // test stats value retains after recovery
ReopenWithColumnFamilies({"default", "pikachu"}, options); ReopenWithColumnFamilies({"default", "pikachu"}, options);
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds() + 1, &stats_iter)); ASSERT_OK(
db_->GetStatsHistory(0, mock_clock_->NowSeconds() + 1, &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
uint64_t new_recovered_value = 0; uint64_t new_recovered_value = 0;
for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) { for (int i = 2; stats_iter->Valid(); stats_iter->Next(), i++) {
@ -474,10 +484,10 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
// make sure the first stats persist to finish // make sure the first stats persist to finish
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
auto iter = auto iter =
db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily()); db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
int key_count = countkeys(iter); int key_count = countkeys(iter);
@ -486,7 +496,7 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
uint64_t num_write_wal = 0; uint64_t num_write_wal = 0;
std::string sample = "rocksdb.write.wal"; std::string sample = "rocksdb.write.wal";
std::unique_ptr<StatsHistoryIterator> stats_iter; std::unique_ptr<StatsHistoryIterator> stats_iter;
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter)); ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
for (; stats_iter->Valid(); stats_iter->Next()) { for (; stats_iter->Valid(); stats_iter->Next()) {
auto stats_map = stats_iter->GetStatsMap(); auto stats_map = stats_iter->GetStatsMap();
@ -522,7 +532,7 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName, ASSERT_NOK(db_->CreateColumnFamily(cf_opts, kPersistentStatsColumnFamilyName,
&handle)); &handle));
// verify stats is not affected by prior failed CF creation // verify stats is not affected by prior failed CF creation
ASSERT_OK(db_->GetStatsHistory(0, mock_env_->NowSeconds(), &stats_iter)); ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
ASSERT_TRUE(stats_iter != nullptr); ASSERT_TRUE(stats_iter != nullptr);
num_write_wal = 0; num_write_wal = 0;
for (; stats_iter->Valid(); stats_iter->Next()) { for (; stats_iter->Valid(); stats_iter->Next()) {
@ -574,7 +584,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
// Wait for the first stats persist to finish, as the initial delay could be // Wait for the first stats persist to finish, as the initial delay could be
// different. // different.
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec - 1); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
ColumnFamilyData* cfd_default = ColumnFamilyData* cfd_default =
static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily()) static_cast<ColumnFamilyHandleImpl*>(dbfull()->DefaultColumnFamily())
@ -593,7 +603,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v0", Get(1, "Eevee")); ASSERT_EQ("v0", Get(1, "Eevee"));
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to all three cf, flush default cf // writing to all three cf, flush default cf
// LogNumbers: default: 14, stats: 4, pikachu: 4 // LogNumbers: default: 14, stats: 4, pikachu: 4
ASSERT_OK(Flush()); ASSERT_OK(Flush());
@ -618,7 +628,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v2", Get("foo2")); ASSERT_EQ("v2", Get("foo2"));
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to default and stats cf, flushing default cf // writing to default and stats cf, flushing default cf
// LogNumbers: default: 19, stats: 19, pikachu: 19 // LogNumbers: default: 19, stats: 19, pikachu: 19
ASSERT_OK(Flush()); ASSERT_OK(Flush());
@ -633,7 +643,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
ASSERT_EQ("v3", Get(1, "Jolteon")); ASSERT_EQ("v3", Get(1, "Jolteon"));
dbfull()->TEST_WaitForStatsDumpRun( dbfull()->TEST_WaitForStatsDumpRun(
[&] { mock_env_->MockSleepForSeconds(kPeriodSec); }); [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
// writing to all three cf, flushing test cf // writing to all three cf, flushing test cf
// LogNumbers: default: 19, stats: 19, pikachu: 22 // LogNumbers: default: 19, stats: 19, pikachu: 22
ASSERT_OK(Flush(1)); ASSERT_OK(Flush(1));

@ -4,9 +4,12 @@
// (found in the LICENSE.Apache file in the root directory). // (found in the LICENSE.Apache file in the root directory).
#include "monitoring/thread_status_updater.h" #include "monitoring/thread_status_updater.h"
#include <memory> #include <memory>
#include "port/likely.h" #include "port/likely.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
namespace ROCKSDB_NAMESPACE { namespace ROCKSDB_NAMESPACE {
@ -159,7 +162,7 @@ Status ThreadStatusUpdater::GetThreadList(
std::vector<ThreadStatus>* thread_list) { std::vector<ThreadStatus>* thread_list) {
thread_list->clear(); thread_list->clear();
std::vector<std::shared_ptr<ThreadStatusData>> valid_list; std::vector<std::shared_ptr<ThreadStatusData>> valid_list;
uint64_t now_micros = Env::Default()->NowMicros(); uint64_t now_micros = SystemClock::Default()->NowMicros();
std::lock_guard<std::mutex> lck(thread_list_mutex_); std::lock_guard<std::mutex> lck(thread_list_mutex_);
for (auto* thread_data : thread_data_set_) { for (auto* thread_data : thread_data_set_) {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save