// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "monitoring/instrumented_mutex.h" #include "rocksdb/env.h" #include "rocksdb/options.h" #include "rocksdb/trace_reader_writer.h" #include "trace_replay/trace_replay.h" namespace ROCKSDB_NAMESPACE { struct IOTraceRecord { // Required fields for all accesses. uint64_t access_timestamp = 0; TraceType trace_type = TraceType::kTraceMax; std::string file_operation; uint64_t latency = 0; std::string io_status; // Required fields for read. std::string file_name; size_t len = 0; uint64_t offset = 0; uint64_t file_size = 0; IOTraceRecord() {} IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, const std::string& _file_operation, const uint64_t& _latency, const std::string& _io_status, const std::string& _file_name) : access_timestamp(_access_timestamp), trace_type(_trace_type), file_operation(_file_operation), latency(_latency), io_status(_io_status), file_name(_file_name) {} IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, const std::string& _file_operation, const uint64_t& _latency, const std::string& _io_status, const std::string& _file_name, const uint64_t& _file_size) : access_timestamp(_access_timestamp), trace_type(_trace_type), file_operation(_file_operation), latency(_latency), io_status(_io_status), file_name(_file_name), file_size(_file_size) {} IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, const std::string& _file_operation, const uint64_t& _latency, const std::string& _io_status, const uint64_t& _len = 0, const uint64_t& _offset = 0) : access_timestamp(_access_timestamp), trace_type(_trace_type), file_operation(_file_operation), latency(_latency), io_status(_io_status), len(_len), offset(_offset) {} }; struct IOTraceHeader { uint64_t start_time; uint32_t rocksdb_major_version; uint32_t rocksdb_minor_version; }; // IOTraceWriter writes IO operation as a single trace. Each trace will have a // timestamp and type, followed by the trace payload. class IOTraceWriter { public: IOTraceWriter(Env* env, const TraceOptions& trace_options, std::unique_ptr&& trace_writer); ~IOTraceWriter() = default; // No copy and move. IOTraceWriter(const IOTraceWriter&) = delete; IOTraceWriter& operator=(const IOTraceWriter&) = delete; IOTraceWriter(IOTraceWriter&&) = delete; IOTraceWriter& operator=(IOTraceWriter&&) = delete; Status WriteIOOp(const IOTraceRecord& record); // Write a trace header at the beginning, typically on initiating a trace, // with some metadata like a magic number and RocksDB version. Status WriteHeader(); private: Env* env_; TraceOptions trace_options_; std::unique_ptr trace_writer_; }; // IOTraceReader helps read the trace file generated by IOTraceWriter. class IOTraceReader { public: explicit IOTraceReader(std::unique_ptr&& reader); ~IOTraceReader() = default; // No copy and move. IOTraceReader(const IOTraceReader&) = delete; IOTraceReader& operator=(const IOTraceReader&) = delete; IOTraceReader(IOTraceReader&&) = delete; IOTraceReader& operator=(IOTraceReader&&) = delete; Status ReadHeader(IOTraceHeader* header); Status ReadIOOp(IOTraceRecord* record); private: std::unique_ptr trace_reader_; }; // An IO tracer. It uses IOTraceWriter to write the access record to the // trace file. class IOTracer { public: IOTracer(); ~IOTracer(); // No copy and move. IOTracer(const IOTracer&) = delete; IOTracer& operator=(const IOTracer&) = delete; IOTracer(IOTracer&&) = delete; IOTracer& operator=(IOTracer&&) = delete; // Start writing IO operations to the trace_writer. Status StartIOTrace(Env* env, const TraceOptions& trace_options, std::unique_ptr&& trace_writer); // Stop writing IO operations to the trace_writer. void EndIOTrace(); bool is_tracing_enabled() const { return tracing_enabled; } Status WriteIOOp(const IOTraceRecord& record); private: TraceOptions trace_options_; // A mutex protects the writer_. InstrumentedMutex trace_writer_mutex_; std::atomic writer_; // bool tracing_enabled is added to avoid costly operation of checking atomic // variable 'writer_' is nullptr or not in is_tracing_enabled(). // is_tracing_enabled() is invoked multiple times by FileSystem classes. bool tracing_enabled; }; } // namespace ROCKSDB_NAMESPACE