// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include "monitoring/instrumented_mutex.h" #include "port/lang.h" #include "rocksdb/file_system.h" #include "rocksdb/options.h" #include "rocksdb/trace_record.h" #include "trace_replay/trace_replay.h" namespace ROCKSDB_NAMESPACE { class SystemClock; class TraceReader; class TraceWriter; /* In order to log new data in trace record for specified operations, do following: 1. Add new data in IOTraceOP (say kIONewData= 3) 2. Log it in IOTraceWriter::WriteIOOp, and read that in IOTraceReader::ReadIOOp and IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case. 3. In the FileSystemTracer APIs where this data will be logged with, update io_op_data |= (1 << IOTraceOp::kIONewData). */ enum IOTraceOp : char { // The value of each enum represents the bitwise position for // IOTraceRecord.io_op_data. kIOFileSize = 0, kIOLen = 1, kIOOffset = 2, }; struct IOTraceRecord { // Required fields for all accesses. uint64_t access_timestamp = 0; TraceType trace_type = TraceType::kTraceMax; // Each bit in io_op_data stores which corresponding info from IOTraceOp will // be added in the trace. Foreg, if bit at position 1 is set then // IOTraceOp::kIOLen (length) will be logged in the record. uint64_t io_op_data = 0; std::string file_operation; uint64_t latency = 0; std::string io_status; // Stores file name instead of full path. std::string file_name; // Fields added to record based on IO operation. uint64_t len = 0; uint64_t offset = 0; uint64_t file_size = 0; // Additional information passed in IODebugContext. uint64_t trace_data = 0; std::string request_id; IOTraceRecord() {} IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, const uint64_t& _io_op_data, const std::string& _file_operation, const uint64_t& _latency, const std::string& _io_status, const std::string& _file_name, const uint64_t& _file_size = 0) : access_timestamp(_access_timestamp), trace_type(_trace_type), io_op_data(_io_op_data), file_operation(_file_operation), latency(_latency), io_status(_io_status), file_name(_file_name), file_size(_file_size) {} IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, const uint64_t& _io_op_data, const std::string& _file_operation, const uint64_t& _latency, const std::string& _io_status, const std::string& _file_name, const uint64_t& _len, const uint64_t& _offset) : access_timestamp(_access_timestamp), trace_type(_trace_type), io_op_data(_io_op_data), file_operation(_file_operation), latency(_latency), io_status(_io_status), file_name(_file_name), len(_len), offset(_offset) {} }; struct IOTraceHeader { uint64_t start_time; uint32_t rocksdb_major_version; uint32_t rocksdb_minor_version; }; // IOTraceWriter writes IO operation as a single trace. Each trace will have a // timestamp and type, followed by the trace payload. class IOTraceWriter { public: IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options, std::unique_ptr&& trace_writer); ~IOTraceWriter() = default; // No copy and move. IOTraceWriter(const IOTraceWriter&) = delete; IOTraceWriter& operator=(const IOTraceWriter&) = delete; IOTraceWriter(IOTraceWriter&&) = delete; IOTraceWriter& operator=(IOTraceWriter&&) = delete; Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); // Write a trace header at the beginning, typically on initiating a trace, // with some metadata like a magic number and RocksDB version. Status WriteHeader(); private: SystemClock* clock_; TraceOptions trace_options_; std::unique_ptr trace_writer_; }; // IOTraceReader helps read the trace file generated by IOTraceWriter. class IOTraceReader { public: explicit IOTraceReader(std::unique_ptr&& reader); ~IOTraceReader() = default; // No copy and move. IOTraceReader(const IOTraceReader&) = delete; IOTraceReader& operator=(const IOTraceReader&) = delete; IOTraceReader(IOTraceReader&&) = delete; IOTraceReader& operator=(IOTraceReader&&) = delete; Status ReadHeader(IOTraceHeader* header); Status ReadIOOp(IOTraceRecord* record); private: std::unique_ptr trace_reader_; }; // An IO tracer. It uses IOTraceWriter to write the access record to the // trace file. class IOTracer { public: IOTracer(); ~IOTracer(); // No copy and move. IOTracer(const IOTracer&) = delete; IOTracer& operator=(const IOTracer&) = delete; IOTracer(IOTracer&&) = delete; IOTracer& operator=(IOTracer&&) = delete; // no_sanitize is added for tracing_enabled. writer_ is protected under mutex // so even if user call Start/EndIOTrace and tracing_enabled is not updated in // the meanwhile, WriteIOOp will anyways check the writer_ protected under // mutex and ignore the operation if writer_is null. So its ok if // tracing_enabled shows non updated value. // Start writing IO operations to the trace_writer. TSAN_SUPPRESSION Status StartIOTrace(SystemClock* clock, const TraceOptions& trace_options, std::unique_ptr&& trace_writer); // Stop writing IO operations to the trace_writer. TSAN_SUPPRESSION void EndIOTrace(); TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; } void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); private: TraceOptions trace_options_; // A mutex protects the writer_. InstrumentedMutex trace_writer_mutex_; std::atomic writer_; // bool tracing_enabled is added to avoid costly operation of checking atomic // variable 'writer_' is nullptr or not in is_tracing_enabled(). // is_tracing_enabled() is invoked multiple times by FileSystem classes. bool tracing_enabled; }; } // namespace ROCKSDB_NAMESPACE