Use steady_clock instead of system_clock in FileOperationInfo::TimePoint (#7153)

Summary:
Issue https://github.com/facebook/rocksdb/issues/7133 reported that using `system_clock` in `FileOperationInfo::TimePoint` causes the duration of file flush operation (which can be a noop on MacOS in some scenarios) appears to be 0 and fail an assertion in listener_test. Using `steady_clock` supposedly fixed the problem.
`steady_clock` actually fits better into the use cases of `FileOperationInfo::TimePoint` as all usages care about durations but not wall clock time.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/7153

Test Plan: make check.

Reviewed By: riversand963

Differential Revision: D22654136

Pulled By: roghnin

fbshipit-source-id: 5980b1080734bdae496a18071a2c2b5887c67d85
main
Haosen Wen 5 years ago committed by Facebook GitHub Bot
parent b923dc720b
commit dbc51adbac
  1. 1
      HISTORY.md
  2. 4
      db/listener_test.cc
  3. 18
      file/random_access_file_reader.cc
  4. 7
      file/random_access_file_reader.h
  5. 48
      file/writable_file_writer.cc
  6. 31
      file/writable_file_writer.h
  7. 29
      include/rocksdb/listener.h

@ -3,6 +3,7 @@
### Public API Change ### Public API Change
* Encryption file classes now exposed for inheritance in env_encryption.h * Encryption file classes now exposed for inheritance in env_encryption.h
* File I/O listener is extended to cover more I/O operations. Now class `EventListener` in listener.h contains new callback functions: `OnFileFlushFinish()`, `OnFileSyncFinish()`, `OnFileRangeSyncFinish()`, `OnFileTruncateFinish()`, and ``OnFileCloseFinish()``. * File I/O listener is extended to cover more I/O operations. Now class `EventListener` in listener.h contains new callback functions: `OnFileFlushFinish()`, `OnFileSyncFinish()`, `OnFileRangeSyncFinish()`, `OnFileTruncateFinish()`, and ``OnFileCloseFinish()``.
* `FileOperationInfo` now reports `duration` measured by `std::chrono::steady_clock` and `start_ts` measured by `std::chrono::system_clock` instead of start and finish timestamps measured by `system_clock`. Note that `system_clock` is called before `steady_clock` in program order at operation starts.
### Behavior Changes ### Behavior Changes
* Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s). * Best-efforts recovery ignores CURRENT file completely. If CURRENT file is missing during recovery, best-efforts recovery still proceeds with MANIFEST file(s).

@ -1052,9 +1052,7 @@ class TestFileOperationListener : public EventListener {
private: private:
void ReportDuration(const FileOperationInfo& info) const { void ReportDuration(const FileOperationInfo& info) const {
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>( ASSERT_GT(info.duration.count(), 0);
info.finish_timestamp - info.start_timestamp);
ASSERT_GT(duration.count(), 0);
} }
}; };

@ -60,10 +60,10 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
} }
Slice tmp; Slice tmp;
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
uint64_t orig_offset = 0; uint64_t orig_offset = 0;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
orig_offset = aligned_offset + buf.CurrentSize(); orig_offset = aligned_offset + buf.CurrentSize();
} }
@ -78,7 +78,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
&tmp, buf.Destination(), nullptr); &tmp, buf.Destination(), nullptr);
} }
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts, NotifyOnFileReadFinish(orig_offset, tmp.size(), start_ts, finish_ts,
s); s);
} }
@ -121,9 +121,9 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
Slice tmp_result; Slice tmp_result;
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
@ -139,7 +139,7 @@ Status RandomAccessFileReader::Read(const IOOptions& opts, uint64_t offset,
} }
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts, NotifyOnFileReadFinish(offset + pos, tmp_result.size(), start_ts,
finish_ts, s); finish_ts, s);
} }
@ -256,9 +256,9 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
@ -292,7 +292,7 @@ Status RandomAccessFileReader::MultiRead(const IOOptions& opts,
for (size_t i = 0; i < num_reqs; ++i) { for (size_t i = 0; i < num_reqs; ++i) {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileReadFinish(read_reqs[i].offset, read_reqs[i].result.size(), NotifyOnFileReadFinish(read_reqs[i].offset, read_reqs[i].result.size(),
start_ts, finish_ts, read_reqs[i].status); start_ts, finish_ts, read_reqs[i].status);
} }

@ -35,9 +35,10 @@ using AlignedBuf = std::unique_ptr<char[]>;
class RandomAccessFileReader { class RandomAccessFileReader {
private: private:
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
void NotifyOnFileReadFinish(uint64_t offset, size_t length, void NotifyOnFileReadFinish(
const FileOperationInfo::TimePoint& start_ts, uint64_t offset, size_t length,
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::FinishTimePoint& finish_ts,
const Status& status) const { const Status& status) const {
FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts, FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts,
finish_ts, status); finish_ts, status);

@ -141,15 +141,15 @@ IOStatus WritableFileWriter::Close() {
if (use_direct_io()) { if (use_direct_io()) {
{ {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
interim = writable_file_->Truncate(filesize_, IOOptions(), nullptr); interim = writable_file_->Truncate(filesize_, IOOptions(), nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileTruncateFinish(start_ts, finish_ts, s); NotifyOnFileTruncateFinish(start_ts, finish_ts, s);
} }
#endif #endif
@ -157,15 +157,15 @@ IOStatus WritableFileWriter::Close() {
if (interim.ok()) { if (interim.ok()) {
{ {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
interim = writable_file_->Fsync(IOOptions(), nullptr); interim = writable_file_->Fsync(IOOptions(), nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileSyncFinish(start_ts, finish_ts, s, NotifyOnFileSyncFinish(start_ts, finish_ts, s,
FileOperationType::kFsync); FileOperationType::kFsync);
} }
@ -180,15 +180,15 @@ IOStatus WritableFileWriter::Close() {
TEST_KILL_RANDOM("WritableFileWriter::Close:0", rocksdb_kill_odds); TEST_KILL_RANDOM("WritableFileWriter::Close:0", rocksdb_kill_odds);
{ {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
interim = writable_file_->Close(IOOptions(), nullptr); interim = writable_file_->Close(IOOptions(), nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = FileOperationInfo::FinishNow();
NotifyOnFileCloseFinish(start_ts, finish_ts, s); NotifyOnFileCloseFinish(start_ts, finish_ts, s);
} }
#endif #endif
@ -232,15 +232,15 @@ IOStatus WritableFileWriter::Flush() {
{ {
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
s = writable_file_->Flush(IOOptions(), nullptr); s = writable_file_->Flush(IOOptions(), nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = std::chrono::steady_clock::now();
NotifyOnFileFlushFinish(start_ts, finish_ts, s); NotifyOnFileFlushFinish(start_ts, finish_ts, s);
} }
#endif #endif
@ -333,9 +333,9 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
auto prev_perf_level = GetPerfLevel(); auto prev_perf_level = GetPerfLevel();
IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_); IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, env_);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
if (use_fsync) { if (use_fsync) {
@ -345,7 +345,7 @@ IOStatus WritableFileWriter::SyncInternal(bool use_fsync) {
} }
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = std::chrono::steady_clock::now();
NotifyOnFileSyncFinish( NotifyOnFileSyncFinish(
start_ts, finish_ts, s, start_ts, finish_ts, s,
use_fsync ? FileOperationType::kFsync : FileOperationType::kSync); use_fsync ? FileOperationType::kFsync : FileOperationType::kSync);
@ -359,15 +359,15 @@ IOStatus WritableFileWriter::RangeSync(uint64_t offset, uint64_t nbytes) {
IOSTATS_TIMER_GUARD(range_sync_nanos); IOSTATS_TIMER_GUARD(range_sync_nanos);
TEST_SYNC_POINT("WritableFileWriter::RangeSync:0"); TEST_SYNC_POINT("WritableFileWriter::RangeSync:0");
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
#endif #endif
IOStatus s = writable_file_->RangeSync(offset, nbytes, IOOptions(), nullptr); IOStatus s = writable_file_->RangeSync(offset, nbytes, IOOptions(), nullptr);
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = std::chrono::steady_clock::now();
NotifyOnFileRangeSyncFinish(offset, nbytes, start_ts, finish_ts, s); NotifyOnFileRangeSyncFinish(offset, nbytes, start_ts, finish_ts, s);
} }
#endif #endif
@ -397,10 +397,10 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
uint64_t old_size = writable_file_->GetFileSize(IOOptions(), nullptr); uint64_t old_size = writable_file_->GetFileSize(IOOptions(), nullptr);
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
old_size = next_write_offset_; old_size = next_write_offset_;
} }
#endif #endif
@ -412,7 +412,7 @@ IOStatus WritableFileWriter::WriteBuffered(const char* data, size_t size) {
} }
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = std::chrono::steady_clock::now();
NotifyOnFileWriteFinish(old_size, allowed, start_ts, finish_ts, s); NotifyOnFileWriteFinish(old_size, allowed, start_ts, finish_ts, s);
} }
#endif #endif
@ -482,15 +482,15 @@ IOStatus WritableFileWriter::WriteDirect() {
{ {
IOSTATS_TIMER_GUARD(write_nanos); IOSTATS_TIMER_GUARD(write_nanos);
TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend"); TEST_SYNC_POINT("WritableFileWriter::Flush:BeforeAppend");
FileOperationInfo::TimePoint start_ts; FileOperationInfo::StartTimePoint start_ts;
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
start_ts = std::chrono::system_clock::now(); start_ts = FileOperationInfo::StartNow();
} }
// direct writes must be positional // direct writes must be positional
s = writable_file_->PositionedAppend(Slice(src, size), write_offset, s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
IOOptions(), nullptr); IOOptions(), nullptr);
if (ShouldNotifyListeners()) { if (ShouldNotifyListeners()) {
auto finish_ts = std::chrono::system_clock::now(); auto finish_ts = std::chrono::steady_clock::now();
NotifyOnFileWriteFinish(write_offset, size, start_ts, finish_ts, s); NotifyOnFileWriteFinish(write_offset, size, start_ts, finish_ts, s);
} }
if (!s.ok()) { if (!s.ok()) {

@ -34,9 +34,10 @@ class Statistics;
class WritableFileWriter { class WritableFileWriter {
private: private:
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
void NotifyOnFileWriteFinish(uint64_t offset, size_t length, void NotifyOnFileWriteFinish(
const FileOperationInfo::TimePoint& start_ts, uint64_t offset, size_t length,
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status) { const IOStatus& io_status) {
FileOperationInfo info(FileOperationType::kWrite, file_name_, start_ts, FileOperationInfo info(FileOperationType::kWrite, file_name_, start_ts,
finish_ts, io_status); finish_ts, io_status);
@ -48,8 +49,9 @@ class WritableFileWriter {
} }
info.status.PermitUncheckedError(); info.status.PermitUncheckedError();
} }
void NotifyOnFileFlushFinish(const FileOperationInfo::TimePoint& start_ts, void NotifyOnFileFlushFinish(
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status) { const IOStatus& io_status) {
FileOperationInfo info(FileOperationType::kFlush, file_name_, start_ts, FileOperationInfo info(FileOperationType::kFlush, file_name_, start_ts,
finish_ts, io_status); finish_ts, io_status);
@ -60,8 +62,9 @@ class WritableFileWriter {
info.status.PermitUncheckedError(); info.status.PermitUncheckedError();
} }
void NotifyOnFileSyncFinish( void NotifyOnFileSyncFinish(
const FileOperationInfo::TimePoint& start_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::TimePoint& finish_ts, const IOStatus& io_status, const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status,
FileOperationType type = FileOperationType::kSync) { FileOperationType type = FileOperationType::kSync) {
FileOperationInfo info(type, file_name_, start_ts, finish_ts, io_status); FileOperationInfo info(type, file_name_, start_ts, finish_ts, io_status);
@ -72,8 +75,8 @@ class WritableFileWriter {
} }
void NotifyOnFileRangeSyncFinish( void NotifyOnFileRangeSyncFinish(
uint64_t offset, size_t length, uint64_t offset, size_t length,
const FileOperationInfo::TimePoint& start_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status) { const IOStatus& io_status) {
FileOperationInfo info(FileOperationType::kRangeSync, file_name_, start_ts, FileOperationInfo info(FileOperationType::kRangeSync, file_name_, start_ts,
finish_ts, io_status); finish_ts, io_status);
@ -85,8 +88,9 @@ class WritableFileWriter {
} }
info.status.PermitUncheckedError(); info.status.PermitUncheckedError();
} }
void NotifyOnFileTruncateFinish(const FileOperationInfo::TimePoint& start_ts, void NotifyOnFileTruncateFinish(
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status) { const IOStatus& io_status) {
FileOperationInfo info(FileOperationType::kTruncate, file_name_, start_ts, FileOperationInfo info(FileOperationType::kTruncate, file_name_, start_ts,
finish_ts, io_status); finish_ts, io_status);
@ -96,8 +100,9 @@ class WritableFileWriter {
} }
info.status.PermitUncheckedError(); info.status.PermitUncheckedError();
} }
void NotifyOnFileCloseFinish(const FileOperationInfo::TimePoint& start_ts, void NotifyOnFileCloseFinish(
const FileOperationInfo::TimePoint& finish_ts, const FileOperationInfo::StartTimePoint& start_ts,
const FileOperationInfo::FinishTimePoint& finish_ts,
const IOStatus& io_status) { const IOStatus& io_status) {
FileOperationInfo info(FileOperationType::kClose, file_name_, start_ts, FileOperationInfo info(FileOperationType::kClose, file_name_, start_ts,
finish_ts, io_status); finish_ts, io_status);

@ -161,24 +161,37 @@ enum class FileOperationType {
}; };
struct FileOperationInfo { struct FileOperationInfo {
using TimePoint = std::chrono::time_point<std::chrono::system_clock, using Duration = std::chrono::nanoseconds;
std::chrono::nanoseconds>; using SteadyTimePoint =
std::chrono::time_point<std::chrono::steady_clock, Duration>;
using SystemTimePoint =
std::chrono::time_point<std::chrono::system_clock, Duration>;
using StartTimePoint = std::pair<SystemTimePoint, SteadyTimePoint>;
using FinishTimePoint = SteadyTimePoint;
FileOperationType type; FileOperationType type;
const std::string& path; const std::string& path;
uint64_t offset; uint64_t offset;
size_t length; size_t length;
const TimePoint& start_timestamp; const Duration duration;
const TimePoint& finish_timestamp; const SystemTimePoint& start_ts;
Status status; Status status;
FileOperationInfo(const FileOperationType _type, const std::string& _path, FileOperationInfo(const FileOperationType _type, const std::string& _path,
const TimePoint& start, const TimePoint& finish, const StartTimePoint& _start_ts,
const Status& _status) const FinishTimePoint& _finish_ts, const Status& _status)
: type(_type), : type(_type),
path(_path), path(_path),
start_timestamp(start), duration(std::chrono::duration_cast<std::chrono::nanoseconds>(
finish_timestamp(finish), _finish_ts - _start_ts.second)),
start_ts(_start_ts.first),
status(_status) {} status(_status) {}
static StartTimePoint StartNow() {
return std::make_pair<SystemTimePoint, SteadyTimePoint>(
std::chrono::system_clock::now(), std::chrono::steady_clock::now());
}
static FinishTimePoint FinishNow() {
return std::chrono::steady_clock::now();
}
}; };
struct FlushJobInfo { struct FlushJobInfo {

Loading…
Cancel
Save