Add statistics support to integrated BlobDB (#8667)

Summary:
The patch adds statistics support to the integrated BlobDB implementation,
namely the tickers `BLOB_DB_BLOB_FILE_BYTES_READ` and
`BLOB_DB_GC_{NUM_KEYS,BYTES}_RELOCATED`, and the histograms
`BLOB_DB_(DE)COMPRESSION_MICROS`. (Some other statistics, like
`BLOB_DB_BLOB_FILE_BYTES_WRITTEN`, `BLOB_DB_BLOB_FILE_SYNCED`,
`BLOB_DB_BLOB_FILE_{READ,WRITE,SYNC}_MICROS` were already supported.)
Note that the vast majority of the old BlobDB's tickers/histograms are not
really applicable to the new implementation, since they e.g. pertain to calling
dedicated BlobDB APIs (which the integrated BlobDB does not have) or are
tied to the legacy BlobDB's design of writing blob files synchronously when
a write API is called. Such statistics are marked "legacy BlobDB only" in
`statistics.h`.

Fixes https://github.com/facebook/rocksdb/issues/8645 .

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8667

Test Plan: Ran `make check` and tested the new statistics using `db_bench`.

Reviewed By: riversand963

Differential Revision: D30356884

Pulled By: ltamasi

fbshipit-source-id: 5f8a833faee60401c5643c2f0a6c0415488190a4
main
Levi Tamasi 3 years ago committed by Facebook GitHub Bot
parent 0729b287e9
commit 6878cedcc3
  1. 1
      HISTORY.md
  2. 12
      db/blob/blob_file_builder.cc
  3. 60
      db/blob/blob_file_reader.cc
  4. 16
      db/blob/blob_file_reader.h
  5. 2
      db/compaction/compaction_iteration_stats.h
  6. 3
      db/compaction/compaction_iterator.cc
  7. 10
      db/compaction/compaction_job.cc
  8. 80
      include/rocksdb/statistics.h

@ -18,6 +18,7 @@
* Fast forward option in Trace replay changed to double type to allow replaying at a lower speed, by settings the value between 0 and 1. This option can be set via `ReplayOptions` in `Replayer::Replay()`, or via `--trace_replay_fast_forward` in db_bench.
* Add property `LiveSstFilesSizeAtTemperature` to retrieve sst file size at different temperature.
* Added a stat rocksdb.secondary.cache.hits
* The integrated BlobDB implementation now supports the tickers `BLOB_DB_BLOB_FILE_BYTES_READ`, `BLOB_DB_GC_NUM_KEYS_RELOCATED`, and `BLOB_DB_GC_BYTES_RELOCATED`, as well as the histograms `BLOB_DB_COMPRESSION_MICROS` and `BLOB_DB_DECOMPRESSION_MICROS`.
## Public API change
* Added APIs to decode and replay trace file via Replayer class. Added `DB::NewDefaultReplayer()` to create a default Replayer instance. Added `TraceReader::Reset()` to restart reading a trace file. Created trace_record.h and utilities/replayer.h files to access decoded Trace records and replay them.

@ -228,6 +228,7 @@ Status BlobFileBuilder::CompressBlobIfNeeded(
assert(blob);
assert(compressed_blob);
assert(compressed_blob->empty());
assert(immutable_options_);
if (blob_compression_type_ == kNoCompression) {
return Status::OK();
@ -242,7 +243,16 @@ Status BlobFileBuilder::CompressBlobIfNeeded(
constexpr uint32_t compression_format_version = 2;
if (!CompressData(*blob, info, compression_format_version, compressed_blob)) {
bool success = false;
{
StopWatch stop_watch(immutable_options_->clock, immutable_options_->stats,
BLOB_DB_COMPRESSION_MICROS);
success =
CompressData(*blob, info, compression_format_version, compressed_blob);
}
if (!success) {
return Status::Corruption("Error compressing blob");
}

@ -10,6 +10,7 @@
#include "db/blob/blob_log_format.h"
#include "file/filename.h"
#include "monitoring/statistics.h"
#include "options/cf_options.h"
#include "rocksdb/file_system.h"
#include "rocksdb/slice.h"
@ -17,6 +18,7 @@
#include "test_util/sync_point.h"
#include "util/compression.h"
#include "util/crc32c.h"
#include "util/stop_watch.h"
namespace ROCKSDB_NAMESPACE {
@ -42,25 +44,28 @@ Status BlobFileReader::Create(
assert(file_reader);
Statistics* const statistics = immutable_options.stats;
CompressionType compression_type = kNoCompression;
{
const Status s =
ReadHeader(file_reader.get(), column_family_id, &compression_type);
const Status s = ReadHeader(file_reader.get(), column_family_id, statistics,
&compression_type);
if (!s.ok()) {
return s;
}
}
{
const Status s = ReadFooter(file_size, file_reader.get());
const Status s = ReadFooter(file_reader.get(), file_size, statistics);
if (!s.ok()) {
return s;
}
}
blob_file_reader->reset(
new BlobFileReader(std::move(file_reader), file_size, compression_type));
new BlobFileReader(std::move(file_reader), file_size, compression_type,
immutable_options.clock, statistics));
return Status::OK();
}
@ -127,6 +132,7 @@ Status BlobFileReader::OpenFile(
Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
uint32_t column_family_id,
Statistics* statistics,
CompressionType* compression_type) {
assert(file_reader);
assert(compression_type);
@ -141,8 +147,9 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
constexpr uint64_t read_offset = 0;
constexpr size_t read_size = BlobLogHeader::kSize;
const Status s = ReadFromFile(file_reader, read_offset, read_size,
&header_slice, &buf, &aligned_buf);
const Status s =
ReadFromFile(file_reader, read_offset, read_size, statistics,
&header_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
@ -175,8 +182,8 @@ Status BlobFileReader::ReadHeader(const RandomAccessFileReader* file_reader,
return Status::OK();
}
Status BlobFileReader::ReadFooter(uint64_t file_size,
const RandomAccessFileReader* file_reader) {
Status BlobFileReader::ReadFooter(const RandomAccessFileReader* file_reader,
uint64_t file_size, Statistics* statistics) {
assert(file_size >= BlobLogHeader::kSize + BlobLogFooter::kSize);
assert(file_reader);
@ -190,8 +197,9 @@ Status BlobFileReader::ReadFooter(uint64_t file_size,
const uint64_t read_offset = file_size - BlobLogFooter::kSize;
constexpr size_t read_size = BlobLogFooter::kSize;
const Status s = ReadFromFile(file_reader, read_offset, read_size,
&footer_slice, &buf, &aligned_buf);
const Status s =
ReadFromFile(file_reader, read_offset, read_size, statistics,
&footer_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
}
@ -220,14 +228,16 @@ Status BlobFileReader::ReadFooter(uint64_t file_size,
Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Slice* slice, Buffer* buf,
AlignedBuf* aligned_buf) {
Statistics* statistics, Slice* slice,
Buffer* buf, AlignedBuf* aligned_buf) {
assert(slice);
assert(buf);
assert(aligned_buf);
assert(file_reader);
RecordTick(statistics, BLOB_DB_BLOB_FILE_BYTES_READ, read_size);
Status s;
if (file_reader->use_direct_io()) {
@ -256,10 +266,13 @@ Status BlobFileReader::ReadFromFile(const RandomAccessFileReader* file_reader,
BlobFileReader::BlobFileReader(
std::unique_ptr<RandomAccessFileReader>&& file_reader, uint64_t file_size,
CompressionType compression_type)
CompressionType compression_type, SystemClock* clock,
Statistics* statistics)
: file_reader_(std::move(file_reader)),
file_size_(file_size),
compression_type_(compression_type) {
compression_type_(compression_type),
clock_(clock),
statistics_(statistics) {
assert(file_reader_);
}
@ -304,7 +317,7 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
TEST_SYNC_POINT("BlobFileReader::GetBlob:ReadFromFile");
const Status s = ReadFromFile(file_reader_.get(), record_offset,
static_cast<size_t>(record_size),
static_cast<size_t>(record_size), statistics_,
&record_slice, &buf, &aligned_buf);
if (!s.ok()) {
return s;
@ -324,8 +337,8 @@ Status BlobFileReader::GetBlob(const ReadOptions& read_options,
const Slice value_slice(record_slice.data() + adjustment, value_size);
{
const Status s =
UncompressBlobIfNeeded(value_slice, compression_type, value);
const Status s = UncompressBlobIfNeeded(value_slice, compression_type,
clock_, statistics_, value);
if (!s.ok()) {
return s;
}
@ -382,6 +395,8 @@ Status BlobFileReader::VerifyBlob(const Slice& record_slice,
Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
SystemClock* clock,
Statistics* statistics,
PinnableSlice* value) {
assert(value);
@ -399,9 +414,14 @@ Status BlobFileReader::UncompressBlobIfNeeded(const Slice& value_slice,
constexpr uint32_t compression_format_version = 2;
constexpr MemoryAllocator* allocator = nullptr;
CacheAllocationPtr output =
UncompressData(info, value_slice.data(), value_slice.size(),
&uncompressed_size, compression_format_version, allocator);
CacheAllocationPtr output;
{
StopWatch stop_watch(clock, statistics, BLOB_DB_DECOMPRESSION_MICROS);
output = UncompressData(info, value_slice.data(), value_slice.size(),
&uncompressed_size, compression_format_version,
allocator);
}
TEST_SYNC_POINT_CALLBACK(
"BlobFileReader::UncompressBlobIfNeeded:TamperWithResult", &output);

@ -21,6 +21,7 @@ class HistogramImpl;
struct ReadOptions;
class Slice;
class PinnableSlice;
class Statistics;
class BlobFileReader {
public:
@ -44,7 +45,8 @@ class BlobFileReader {
private:
BlobFileReader(std::unique_ptr<RandomAccessFileReader>&& file_reader,
uint64_t file_size, CompressionType compression_type);
uint64_t file_size, CompressionType compression_type,
SystemClock* clock, Statistics* statistics);
static Status OpenFile(const ImmutableOptions& immutable_options,
const FileOptions& file_opts,
@ -55,17 +57,17 @@ class BlobFileReader {
std::unique_ptr<RandomAccessFileReader>* file_reader);
static Status ReadHeader(const RandomAccessFileReader* file_reader,
uint32_t column_family_id,
uint32_t column_family_id, Statistics* statistics,
CompressionType* compression_type);
static Status ReadFooter(uint64_t file_size,
const RandomAccessFileReader* file_reader);
static Status ReadFooter(const RandomAccessFileReader* file_reader,
uint64_t file_size, Statistics* statistics);
using Buffer = std::unique_ptr<char[]>;
static Status ReadFromFile(const RandomAccessFileReader* file_reader,
uint64_t read_offset, size_t read_size,
Slice* slice, Buffer* buf,
Statistics* statistics, Slice* slice, Buffer* buf,
AlignedBuf* aligned_buf);
static Status VerifyBlob(const Slice& record_slice, const Slice& user_key,
@ -73,6 +75,8 @@ class BlobFileReader {
static Status UncompressBlobIfNeeded(const Slice& value_slice,
CompressionType compression_type,
SystemClock* clock,
Statistics* statistics,
PinnableSlice* value);
static void SaveValue(const Slice& src, PinnableSlice* dst);
@ -80,6 +84,8 @@ class BlobFileReader {
std::unique_ptr<RandomAccessFileReader> file_reader_;
uint64_t file_size_;
CompressionType compression_type_;
SystemClock* clock_;
Statistics* statistics_;
};
} // namespace ROCKSDB_NAMESPACE

@ -38,4 +38,6 @@ struct CompactionIterationStats {
// Blob related statistics
uint64_t num_blobs_read = 0;
uint64_t total_blob_bytes_read = 0;
uint64_t num_blobs_relocated = 0;
uint64_t total_blob_bytes_relocated = 0;
};

@ -898,6 +898,9 @@ void CompactionIterator::GarbageCollectBlobIfNeeded() {
++iter_stats_.num_blobs_read;
iter_stats_.total_blob_bytes_read += bytes_read;
++iter_stats_.num_blobs_relocated;
iter_stats_.total_blob_bytes_relocated += blob_index.size();
value_ = blob_value_;
if (ExtractLargeValueIfNeededImpl()) {

@ -1369,6 +1369,16 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
RecordTick(stats_, FILTER_OPERATION_TOTAL_TIME,
c_iter_stats.total_filter_time);
if (c_iter_stats.num_blobs_relocated > 0) {
RecordTick(stats_, BLOB_DB_GC_NUM_KEYS_RELOCATED,
c_iter_stats.num_blobs_relocated);
}
if (c_iter_stats.total_blob_bytes_relocated > 0) {
RecordTick(stats_, BLOB_DB_GC_BYTES_RELOCATED,
c_iter_stats.total_blob_bytes_relocated);
}
RecordDroppedKeys(c_iter_stats, &sub_compact->compaction_job_stats);
RecordCompactionIOStats();

@ -247,35 +247,42 @@ enum Tickers : uint32_t {
NUMBER_ITER_SKIP,
// BlobDB specific stats
// # of Put/PutTTL/PutUntil to BlobDB.
// # of Put/PutTTL/PutUntil to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PUT,
// # of Write to BlobDB.
// # of Write to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_WRITE,
// # of Get to BlobDB.
// # of Get to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_GET,
// # of MultiGet to BlobDB.
// # of MultiGet to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_MULTIGET,
// # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator.
// # of Seek/SeekToFirst/SeekToLast/SeekForPrev to BlobDB iterator. Only
// applicable to legacy BlobDB.
BLOB_DB_NUM_SEEK,
// # of Next to BlobDB iterator.
// # of Next to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_NEXT,
// # of Prev to BlobDB iterator.
// # of Prev to BlobDB iterator. Only applicable to legacy BlobDB.
BLOB_DB_NUM_PREV,
// # of keys written to BlobDB.
// # of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_WRITTEN,
// # of keys read from BlobDB.
// # of keys read from BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_NUM_KEYS_READ,
// # of bytes (key + value) written to BlobDB.
// # of bytes (key + value) written to BlobDB. Only applicable to legacy
// BlobDB.
BLOB_DB_BYTES_WRITTEN,
// # of bytes (keys + value) read from BlobDB.
// # of bytes (keys + value) read from BlobDB. Only applicable to legacy
// BlobDB.
BLOB_DB_BYTES_READ,
// # of keys written by BlobDB as non-TTL inlined value.
// # of keys written by BlobDB as non-TTL inlined value. Only applicable to
// legacy BlobDB.
BLOB_DB_WRITE_INLINED,
// # of keys written by BlobDB as TTL inlined value.
// # of keys written by BlobDB as TTL inlined value. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_INLINED_TTL,
// # of keys written by BlobDB as non-TTL blob value.
// # of keys written by BlobDB as non-TTL blob value. Only applicable to
// legacy BlobDB.
BLOB_DB_WRITE_BLOB,
// # of keys written by BlobDB as TTL blob value.
// # of keys written by BlobDB as TTL blob value. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_BLOB_TTL,
// # of bytes written to blob file.
BLOB_DB_BLOB_FILE_BYTES_WRITTEN,
@ -284,22 +291,24 @@ enum Tickers : uint32_t {
// # of times a blob files being synced.
BLOB_DB_BLOB_FILE_SYNCED,
// # of blob index evicted from base DB by BlobDB compaction filter because
// of expiration.
// of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
// because of expiration.
// because of expiration. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EXPIRED_SIZE,
// # of blob index evicted from base DB by BlobDB compaction filter because
// of corresponding file deleted.
// of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_COUNT,
// size of blob index evicted from base DB by BlobDB compaction filter
// because of corresponding file deleted.
// because of corresponding file deleted. Only applicable to legacy BlobDB.
BLOB_DB_BLOB_INDEX_EVICTED_SIZE,
// # of blob files that were obsoleted by garbage collection.
// # of blob files that were obsoleted by garbage collection. Only applicable
// to legacy BlobDB.
BLOB_DB_GC_NUM_FILES,
// # of blob files generated by garbage collection.
// # of blob files generated by garbage collection. Only applicable to legacy
// BlobDB.
BLOB_DB_GC_NUM_NEW_FILES,
// # of BlobDB garbage collection failures.
// # of BlobDB garbage collection failures. Only applicable to legacy BlobDB.
BLOB_DB_GC_FAILURES,
// # of keys dropped by BlobDB garbage collection because they had been
// overwritten. DEPRECATED.
@ -317,11 +326,14 @@ enum Tickers : uint32_t {
BLOB_DB_GC_BYTES_EXPIRED,
// # of bytes relocated to new blob file by garbage collection.
BLOB_DB_GC_BYTES_RELOCATED,
// # of blob files evicted because of BlobDB is full.
// # of blob files evicted because of BlobDB is full. Only applicable to
// legacy BlobDB.
BLOB_DB_FIFO_NUM_FILES_EVICTED,
// # of keys in the blob files evicted because of BlobDB is full.
// # of keys in the blob files evicted because of BlobDB is full. Only
// applicable to legacy BlobDB.
BLOB_DB_FIFO_NUM_KEYS_EVICTED,
// # of bytes in the blob files evicted because of BlobDB is full.
// # of bytes in the blob files evicted because of BlobDB is full. Only
// applicable to legacy BlobDB.
BLOB_DB_FIFO_BYTES_EVICTED,
// These counters indicate a performance issue in WritePrepared transactions.
@ -450,21 +462,23 @@ enum Histograms : uint32_t {
READ_NUM_MERGE_OPERANDS,
// BlobDB specific stats
// Size of keys written to BlobDB.
// Size of keys written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_KEY_SIZE,
// Size of values written to BlobDB.
// Size of values written to BlobDB. Only applicable to legacy BlobDB.
BLOB_DB_VALUE_SIZE,
// BlobDB Put/PutWithTTL/PutUntil/Write latency.
// BlobDB Put/PutWithTTL/PutUntil/Write latency. Only applicable to legacy
// BlobDB.
BLOB_DB_WRITE_MICROS,
// BlobDB Get latency.
// BlobDB Get latency. Only applicable to legacy BlobDB.
BLOB_DB_GET_MICROS,
// BlobDB MultiGet latency.
// BlobDB MultiGet latency. Only applicable to legacy BlobDB.
BLOB_DB_MULTIGET_MICROS,
// BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency.
// BlobDB Seek/SeekToFirst/SeekToLast/SeekForPrev latency. Only applicable to
// legacy BlobDB.
BLOB_DB_SEEK_MICROS,
// BlobDB Next latency.
// BlobDB Next latency. Only applicable to legacy BlobDB.
BLOB_DB_NEXT_MICROS,
// BlobDB Prev latency.
// BlobDB Prev latency. Only applicable to legacy BlobDB.
BLOB_DB_PREV_MICROS,
// Blob file write latency.
BLOB_DB_BLOB_FILE_WRITE_MICROS,

Loading…
Cancel
Save