fork of https://github.com/oxigraph/rocksdb and https://github.com/facebook/rocksdb for nextgraph and oxigraph
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
174 lines
5.8 KiB
174 lines
5.8 KiB
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
// This source code is licensed under both the GPLv2 (found in the
|
|
// COPYING file in the root directory) and Apache 2.0 License
|
|
// (found in the LICENSE.Apache file in the root directory).
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
#include <fstream>
|
|
|
|
#include "monitoring/instrumented_mutex.h"
|
|
#include "rocksdb/env.h"
|
|
#include "rocksdb/options.h"
|
|
#include "rocksdb/trace_reader_writer.h"
|
|
#include "trace_replay/trace_replay.h"
|
|
|
|
namespace ROCKSDB_NAMESPACE {
|
|
|
|
struct IOTraceRecord {
|
|
// Required fields for all accesses.
|
|
uint64_t access_timestamp = 0;
|
|
TraceType trace_type = TraceType::kTraceMax;
|
|
std::string file_operation;
|
|
uint64_t latency = 0;
|
|
std::string io_status;
|
|
// Required fields for read.
|
|
std::string file_name;
|
|
uint64_t len = 0;
|
|
uint64_t offset = 0;
|
|
uint64_t file_size = 0;
|
|
|
|
IOTraceRecord() {}
|
|
|
|
IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
|
|
const std::string& _file_operation, const uint64_t& _latency,
|
|
const std::string& _io_status, const std::string& _file_name)
|
|
: access_timestamp(_access_timestamp),
|
|
trace_type(_trace_type),
|
|
file_operation(_file_operation),
|
|
latency(_latency),
|
|
io_status(_io_status),
|
|
file_name(_file_name) {}
|
|
|
|
IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
|
|
const std::string& _file_operation, const uint64_t& _latency,
|
|
const std::string& _io_status, const std::string& _file_name,
|
|
const uint64_t& _file_size)
|
|
: access_timestamp(_access_timestamp),
|
|
trace_type(_trace_type),
|
|
file_operation(_file_operation),
|
|
latency(_latency),
|
|
io_status(_io_status),
|
|
file_name(_file_name),
|
|
file_size(_file_size) {}
|
|
|
|
IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
|
|
const std::string& _file_operation, const uint64_t& _latency,
|
|
const std::string& _io_status, const uint64_t& _len = 0,
|
|
const uint64_t& _offset = 0)
|
|
: access_timestamp(_access_timestamp),
|
|
trace_type(_trace_type),
|
|
file_operation(_file_operation),
|
|
latency(_latency),
|
|
io_status(_io_status),
|
|
len(_len),
|
|
offset(_offset) {}
|
|
};
|
|
|
|
struct IOTraceHeader {
|
|
uint64_t start_time;
|
|
uint32_t rocksdb_major_version;
|
|
uint32_t rocksdb_minor_version;
|
|
};
|
|
|
|
// IOTraceWriter writes IO operation as a single trace. Each trace will have a
|
|
// timestamp and type, followed by the trace payload.
|
|
class IOTraceWriter {
|
|
public:
|
|
IOTraceWriter(Env* env, const TraceOptions& trace_options,
|
|
std::unique_ptr<TraceWriter>&& trace_writer);
|
|
~IOTraceWriter() = default;
|
|
// No copy and move.
|
|
IOTraceWriter(const IOTraceWriter&) = delete;
|
|
IOTraceWriter& operator=(const IOTraceWriter&) = delete;
|
|
IOTraceWriter(IOTraceWriter&&) = delete;
|
|
IOTraceWriter& operator=(IOTraceWriter&&) = delete;
|
|
|
|
Status WriteIOOp(const IOTraceRecord& record);
|
|
|
|
// Write a trace header at the beginning, typically on initiating a trace,
|
|
// with some metadata like a magic number and RocksDB version.
|
|
Status WriteHeader();
|
|
|
|
private:
|
|
Env* env_;
|
|
TraceOptions trace_options_;
|
|
std::unique_ptr<TraceWriter> trace_writer_;
|
|
};
|
|
|
|
// IOTraceReader helps read the trace file generated by IOTraceWriter.
|
|
class IOTraceReader {
|
|
public:
|
|
explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader);
|
|
~IOTraceReader() = default;
|
|
// No copy and move.
|
|
IOTraceReader(const IOTraceReader&) = delete;
|
|
IOTraceReader& operator=(const IOTraceReader&) = delete;
|
|
IOTraceReader(IOTraceReader&&) = delete;
|
|
IOTraceReader& operator=(IOTraceReader&&) = delete;
|
|
|
|
Status ReadHeader(IOTraceHeader* header);
|
|
|
|
Status ReadIOOp(IOTraceRecord* record);
|
|
|
|
private:
|
|
std::unique_ptr<TraceReader> trace_reader_;
|
|
};
|
|
|
|
// An IO tracer. It uses IOTraceWriter to write the access record to the
|
|
// trace file.
|
|
class IOTracer {
|
|
public:
|
|
IOTracer();
|
|
~IOTracer();
|
|
// No copy and move.
|
|
IOTracer(const IOTracer&) = delete;
|
|
IOTracer& operator=(const IOTracer&) = delete;
|
|
IOTracer(IOTracer&&) = delete;
|
|
IOTracer& operator=(IOTracer&&) = delete;
|
|
|
|
// no_sanitize is added for tracing_enabled. writer_ is protected under mutex
|
|
// so even if user call Start/EndIOTrace and tracing_enabled is not updated in
|
|
// the meanwhile, WriteIOOp will anyways check the writer_ protected under
|
|
// mutex and ignore the operation if writer_is null. So its ok if
|
|
// tracing_enabled shows non updated value.
|
|
|
|
#if defined(__clang__)
|
|
#if defined(__has_feature) && __has_feature(thread_sanitizer)
|
|
#define TSAN_SUPPRESSION __attribute__((no_sanitize("thread")))
|
|
#endif // __has_feature(thread_sanitizer)
|
|
#else // __clang__
|
|
#ifdef __SANITIZE_THREAD__
|
|
#define TSAN_SUPPRESSION __attribute__((no_sanitize("thread")))
|
|
#endif // __SANITIZE_THREAD__
|
|
#endif // __clang__
|
|
|
|
#ifndef TSAN_SUPPRESSION
|
|
#define TSAN_SUPPRESSION
|
|
#endif // TSAN_SUPPRESSION
|
|
|
|
// Start writing IO operations to the trace_writer.
|
|
TSAN_SUPPRESSION Status
|
|
StartIOTrace(Env* env, const TraceOptions& trace_options,
|
|
std::unique_ptr<TraceWriter>&& trace_writer);
|
|
|
|
// Stop writing IO operations to the trace_writer.
|
|
TSAN_SUPPRESSION void EndIOTrace();
|
|
|
|
TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; }
|
|
|
|
Status WriteIOOp(const IOTraceRecord& record);
|
|
|
|
private:
|
|
TraceOptions trace_options_;
|
|
// A mutex protects the writer_.
|
|
InstrumentedMutex trace_writer_mutex_;
|
|
std::atomic<IOTraceWriter*> writer_;
|
|
// bool tracing_enabled is added to avoid costly operation of checking atomic
|
|
// variable 'writer_' is nullptr or not in is_tracing_enabled().
|
|
// is_tracing_enabled() is invoked multiple times by FileSystem classes.
|
|
bool tracing_enabled;
|
|
};
|
|
|
|
} // namespace ROCKSDB_NAMESPACE
|
|
|