From 3653029dda238692564773552c55cc29fea10310 Mon Sep 17 00:00:00 2001 From: Akanksha Mahajan Date: Mon, 25 Apr 2022 21:58:22 -0700 Subject: [PATCH] Add stats related to async prefetching (#9845) Summary: Add stats PREFETCHED_BYTES_DISCARDED and POLL_WAIT_MICROS. PREFETCHED_BYTES_DISCARDED records number of prefetched bytes discarded by FilePrefetchBuffer. POLL_WAIT_MICROS records the time taken by underling file_system Poll API. Pull Request resolved: https://github.com/facebook/rocksdb/pull/9845 Test Plan: Update existing tests Reviewed By: anand1976 Differential Revision: D35909694 Pulled By: akankshamahajan15 fbshipit-source-id: e009ef940bb9ed72c9446f5529095caabb8a1e36 --- HISTORY.md | 1 + file/file_prefetch_buffer.cc | 1 + file/file_prefetch_buffer.h | 23 +++++++++++++++++--- file/prefetch_test.cc | 5 +++++ include/rocksdb/statistics.h | 5 +++++ java/rocksjni/portal.h | 8 +++++++ monitoring/statistics.cc | 3 +++ table/block_based/block_based_table_reader.h | 3 ++- 8 files changed, 45 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index cf387ca46..f4e2da635 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -5,6 +5,7 @@ ### New Features * DB::GetLiveFilesStorageInfo is ready for production use. +* Add new stats PREFETCHED_BYTES_DISCARDED which records number of prefetched bytes discarded by RocksDB FilePrefetchBuffer on destruction and POLL_WAIT_MICROS records wait time for FS::Poll API completion. ### Public API changes * Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions. diff --git a/file/file_prefetch_buffer.cc b/file/file_prefetch_buffer.cc index 6d98f3b53..ec92758fe 100644 --- a/file/file_prefetch_buffer.cc +++ b/file/file_prefetch_buffer.cc @@ -229,6 +229,7 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts, // second buffer. std::vector handles; handles.emplace_back(io_handle_); + StopWatch sw(clock_, stats_, POLL_WAIT_MICROS); fs_->Poll(handles, 1).PermitUncheckedError(); } diff --git a/file/file_prefetch_buffer.h b/file/file_prefetch_buffer.h index b56ef8614..94d09bba4 100644 --- a/file/file_prefetch_buffer.h +++ b/file/file_prefetch_buffer.h @@ -14,6 +14,7 @@ #include #include "file/readahead_file_info.h" +#include "monitoring/statistics.h" #include "port/port.h" #include "rocksdb/env.h" #include "rocksdb/file_system.h" @@ -64,7 +65,8 @@ class FilePrefetchBuffer { FilePrefetchBuffer(size_t readahead_size = 0, size_t max_readahead_size = 0, bool enable = true, bool track_min_offset = false, bool implicit_auto_readahead = false, - bool async_io = false, FileSystem* fs = nullptr) + bool async_io = false, FileSystem* fs = nullptr, + SystemClock* clock = nullptr, Statistics* stats = nullptr) : curr_(0), readahead_size_(readahead_size), initial_auto_readahead_size_(readahead_size), @@ -80,7 +82,9 @@ class FilePrefetchBuffer { del_fn_(nullptr), async_read_in_progress_(false), async_io_(async_io), - fs_(fs) { + fs_(fs), + clock_(clock), + stats_(stats) { // If async_io_ is enabled, data is asynchronously filled in second buffer // while curr_ is being consumed. If data is overlapping in two buffers, // data is copied to third buffer to return continuous buffer. @@ -88,13 +92,24 @@ class FilePrefetchBuffer { } ~FilePrefetchBuffer() { - // Wait for any pending async job before destroying the class object. + // Abort any pending async read request before destroying the class object. if (async_read_in_progress_ && fs_ != nullptr) { std::vector handles; handles.emplace_back(io_handle_); Status s = fs_->AbortIO(handles); assert(s.ok()); } + + // Prefetch buffer bytes discarded. + uint64_t bytes_discarded = 0; + if (bufs_[curr_].buffer_.CurrentSize() != 0) { + bytes_discarded = bufs_[curr_].buffer_.CurrentSize(); + } + if (bufs_[curr_ ^ 1].buffer_.CurrentSize() != 0) { + bytes_discarded += bufs_[curr_ ^ 1].buffer_.CurrentSize(); + } + RecordInHistogram(stats_, PREFETCHED_BYTES_DISCARDED, bytes_discarded); + // Release io_handle_. if (io_handle_ != nullptr && del_fn_ != nullptr) { del_fn_(io_handle_); @@ -273,5 +288,7 @@ class FilePrefetchBuffer { bool async_read_in_progress_; bool async_io_; FileSystem* fs_; + SystemClock* clock_; + Statistics* stats_; }; } // namespace ROCKSDB_NAMESPACE diff --git a/file/prefetch_test.cc b/file/prefetch_test.cc index 20e569568..c6287961c 100644 --- a/file/prefetch_test.cc +++ b/file/prefetch_test.cc @@ -1288,6 +1288,10 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) { { HistogramData async_read_bytes; options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes); + HistogramData prefetched_bytes_discarded; + options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED, + &prefetched_bytes_discarded); + // Not all platforms support iouring. In that case, ReadAsync in posix // won't submit async requests. if (read_async_called) { @@ -1295,6 +1299,7 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) { } else { ASSERT_EQ(async_read_bytes.count, 0); } + ASSERT_GT(prefetched_bytes_discarded.count, 0); } } diff --git a/include/rocksdb/statistics.h b/include/rocksdb/statistics.h index dcd3fc333..9bc7ab196 100644 --- a/include/rocksdb/statistics.h +++ b/include/rocksdb/statistics.h @@ -536,7 +536,12 @@ enum Histograms : uint32_t { // Error handler statistics ERROR_HANDLER_AUTORESUME_RETRY_COUNT, + // Stats related to asynchronous read requests. ASYNC_READ_BYTES, + POLL_WAIT_MICROS, + + // Number of prefetched bytes discarded by RocksDB. + PREFETCHED_BYTES_DISCARDED, HISTOGRAM_ENUM_MAX, }; diff --git a/java/rocksjni/portal.h b/java/rocksjni/portal.h index a94b39065..4d0d55c12 100644 --- a/java/rocksjni/portal.h +++ b/java/rocksjni/portal.h @@ -5589,6 +5589,10 @@ class HistogramTypeJni { return 0x32; case ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES: return 0x33; + case ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS: + return 0x34; + case ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED: + return 0x35; case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: // 0x1F for backwards compatibility on current minor version. return 0x1F; @@ -5708,6 +5712,10 @@ class HistogramTypeJni { ERROR_HANDLER_AUTORESUME_RETRY_COUNT; case 0x33: return ROCKSDB_NAMESPACE::Histograms::ASYNC_READ_BYTES; + case 0x34: + return ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS; + case 0x35: + return ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED; case 0x1F: // 0x1F for backwards compatibility on current minor version. return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; diff --git a/monitoring/statistics.cc b/monitoring/statistics.cc index 566feb189..388acaf4d 100644 --- a/monitoring/statistics.cc +++ b/monitoring/statistics.cc @@ -284,6 +284,9 @@ const std::vector> HistogramsNameMap = { {ERROR_HANDLER_AUTORESUME_RETRY_COUNT, "rocksdb.error.handler.autoresume.retry.count"}, {ASYNC_READ_BYTES, "rocksdb.async.read.bytes"}, + {POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"}, + {PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"}, + }; std::shared_ptr CreateDBStatistics() { diff --git a/table/block_based/block_based_table_reader.h b/table/block_based/block_based_table_reader.h index 0671fc09f..c9c656c3e 100644 --- a/table/block_based/block_based_table_reader.h +++ b/table/block_based/block_based_table_reader.h @@ -666,7 +666,8 @@ struct BlockBasedTable::Rep { fpb->reset(new FilePrefetchBuffer( readahead_size, max_readahead_size, !ioptions.allow_mmap_reads /* enable */, false /* track_min_offset */, - implicit_auto_readahead, async_io, ioptions.fs.get())); + implicit_auto_readahead, async_io, ioptions.fs.get(), ioptions.clock, + ioptions.stats)); } void CreateFilePrefetchBufferIfNotExists(