Make TraceRecord and Replayer public (#8611)
Summary: New public interfaces: `TraceRecord` and `TraceRecord::Handler`, available in "rocksdb/trace_record.h". `Replayer`, available in `rocksdb/utilities/replayer.h`. User can use `DB::NewDefaultReplayer()` to create a Replayer to auto/manual replay a trace file. Unit tests: - `./db_test2 --gtest_filter="DBTest2.TraceAndReplay"`: Updated with the internal API changes. - `./db_test2 --gtest_filter="DBTest2.TraceAndManualReplay"`: New for manual replay. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8611 Reviewed By: ajkr Differential Revision: D30266329 Pulled By: autopear fbshipit-source-id: 1ecb3cbbedae0f6a67c18f0cc82e002b4d81b6f8main
parent
a53563d86e
commit
f58d276764
@ -0,0 +1,205 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
|
||||
#include <string> |
||||
#include <vector> |
||||
|
||||
#include "rocksdb/rocksdb_namespace.h" |
||||
#include "rocksdb/slice.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
class ColumnFamilyHandle; |
||||
class DB; |
||||
class Status; |
||||
|
||||
// Supported trace record types.
|
||||
enum TraceType : char { |
||||
kTraceNone = 0, |
||||
kTraceBegin = 1, |
||||
kTraceEnd = 2, |
||||
// Query level tracing related trace types.
|
||||
kTraceWrite = 3, |
||||
kTraceGet = 4, |
||||
kTraceIteratorSeek = 5, |
||||
kTraceIteratorSeekForPrev = 6, |
||||
// Block cache tracing related trace types.
|
||||
kBlockTraceIndexBlock = 7, |
||||
kBlockTraceFilterBlock = 8, |
||||
kBlockTraceDataBlock = 9, |
||||
kBlockTraceUncompressionDictBlock = 10, |
||||
kBlockTraceRangeDeletionBlock = 11, |
||||
// IO tracing related trace type.
|
||||
kIOTracer = 12, |
||||
// Query level tracing related trace type.
|
||||
kTraceMultiGet = 13, |
||||
// All trace types should be added before kTraceMax
|
||||
kTraceMax, |
||||
}; |
||||
|
||||
class WriteQueryTraceRecord; |
||||
class GetQueryTraceRecord; |
||||
class IteratorSeekQueryTraceRecord; |
||||
class MultiGetQueryTraceRecord; |
||||
|
||||
// Base class for all types of trace records.
|
||||
class TraceRecord { |
||||
public: |
||||
TraceRecord(); |
||||
explicit TraceRecord(uint64_t timestamp); |
||||
virtual ~TraceRecord(); |
||||
|
||||
virtual TraceType GetTraceType() const = 0; |
||||
|
||||
virtual uint64_t GetTimestamp() const; |
||||
|
||||
class Handler { |
||||
public: |
||||
virtual ~Handler() {} |
||||
|
||||
virtual Status Handle(const WriteQueryTraceRecord& record) = 0; |
||||
virtual Status Handle(const GetQueryTraceRecord& record) = 0; |
||||
virtual Status Handle(const IteratorSeekQueryTraceRecord& record) = 0; |
||||
virtual Status Handle(const MultiGetQueryTraceRecord& record) = 0; |
||||
}; |
||||
|
||||
virtual Status Accept(Handler* handler) = 0; |
||||
|
||||
// Create a handler for the exeution of TraceRecord.
|
||||
static Handler* NewExecutionHandler( |
||||
DB* db, const std::vector<ColumnFamilyHandle*>& handles); |
||||
|
||||
private: |
||||
// Timestamp (in microseconds) of this trace.
|
||||
uint64_t timestamp_; |
||||
}; |
||||
|
||||
// Base class for all query types of trace records.
|
||||
class QueryTraceRecord : public TraceRecord { |
||||
public: |
||||
explicit QueryTraceRecord(uint64_t timestamp); |
||||
|
||||
virtual ~QueryTraceRecord() override; |
||||
}; |
||||
|
||||
// Trace record for DB::Write() operation.
|
||||
class WriteQueryTraceRecord : public QueryTraceRecord { |
||||
public: |
||||
WriteQueryTraceRecord(PinnableSlice&& write_batch_rep, uint64_t timestamp); |
||||
|
||||
WriteQueryTraceRecord(const std::string& write_batch_rep, uint64_t timestamp); |
||||
|
||||
virtual ~WriteQueryTraceRecord() override; |
||||
|
||||
TraceType GetTraceType() const override { return kTraceWrite; }; |
||||
|
||||
virtual Slice GetWriteBatchRep() const; |
||||
|
||||
virtual Status Accept(Handler* handler) override; |
||||
|
||||
private: |
||||
PinnableSlice rep_; |
||||
}; |
||||
|
||||
// Trace record for DB::Get() operation
|
||||
class GetQueryTraceRecord : public QueryTraceRecord { |
||||
public: |
||||
GetQueryTraceRecord(uint32_t column_family_id, PinnableSlice&& key, |
||||
uint64_t timestamp); |
||||
|
||||
GetQueryTraceRecord(uint32_t column_family_id, const std::string& key, |
||||
uint64_t timestamp); |
||||
|
||||
virtual ~GetQueryTraceRecord() override; |
||||
|
||||
TraceType GetTraceType() const override { return kTraceGet; }; |
||||
|
||||
virtual uint32_t GetColumnFamilyID() const; |
||||
|
||||
virtual Slice GetKey() const; |
||||
|
||||
virtual Status Accept(Handler* handler) override; |
||||
|
||||
private: |
||||
// Column family ID.
|
||||
uint32_t cf_id_; |
||||
// Key to get.
|
||||
PinnableSlice key_; |
||||
}; |
||||
|
||||
// Base class for all Iterator related operations.
|
||||
class IteratorQueryTraceRecord : public QueryTraceRecord { |
||||
public: |
||||
explicit IteratorQueryTraceRecord(uint64_t timestamp); |
||||
|
||||
virtual ~IteratorQueryTraceRecord() override; |
||||
}; |
||||
|
||||
// Trace record for Iterator::Seek() and Iterator::SeekForPrev() operation.
|
||||
class IteratorSeekQueryTraceRecord : public IteratorQueryTraceRecord { |
||||
public: |
||||
// Currently we only support Seek() and SeekForPrev().
|
||||
enum SeekType { |
||||
kSeek = kTraceIteratorSeek, |
||||
kSeekForPrev = kTraceIteratorSeekForPrev |
||||
}; |
||||
|
||||
IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id, |
||||
PinnableSlice&& key, uint64_t timestamp); |
||||
|
||||
IteratorSeekQueryTraceRecord(SeekType seekType, uint32_t column_family_id, |
||||
const std::string& key, uint64_t timestamp); |
||||
|
||||
virtual ~IteratorSeekQueryTraceRecord() override; |
||||
|
||||
TraceType GetTraceType() const override; |
||||
|
||||
virtual SeekType GetSeekType() const; |
||||
|
||||
virtual uint32_t GetColumnFamilyID() const; |
||||
|
||||
virtual Slice GetKey() const; |
||||
|
||||
virtual Status Accept(Handler* handler) override; |
||||
|
||||
private: |
||||
SeekType type_; |
||||
// Column family ID.
|
||||
uint32_t cf_id_; |
||||
// Key to seek to.
|
||||
PinnableSlice key_; |
||||
}; |
||||
|
||||
// Trace record for DB::MultiGet() operation.
|
||||
class MultiGetQueryTraceRecord : public QueryTraceRecord { |
||||
public: |
||||
MultiGetQueryTraceRecord(std::vector<uint32_t> column_family_ids, |
||||
std::vector<PinnableSlice>&& keys, |
||||
uint64_t timestamp); |
||||
|
||||
MultiGetQueryTraceRecord(std::vector<uint32_t> column_family_ids, |
||||
const std::vector<std::string>& keys, |
||||
uint64_t timestamp); |
||||
|
||||
virtual ~MultiGetQueryTraceRecord() override; |
||||
|
||||
TraceType GetTraceType() const override { return kTraceMultiGet; }; |
||||
|
||||
virtual std::vector<uint32_t> GetColumnFamilyIDs() const; |
||||
|
||||
virtual std::vector<Slice> GetKeys() const; |
||||
|
||||
virtual Status Accept(Handler* handler) override; |
||||
|
||||
private: |
||||
// Column familiy IDs.
|
||||
std::vector<uint32_t> cf_ids_; |
||||
// Keys to get.
|
||||
std::vector<PinnableSlice> keys_; |
||||
}; |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,74 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include <memory> |
||||
|
||||
#include "rocksdb/rocksdb_namespace.h" |
||||
#include "rocksdb/status.h" |
||||
#include "rocksdb/trace_record.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
struct ReplayOptions { |
||||
// Number of threads used for replaying. If 0 or 1, replay using
|
||||
// single thread.
|
||||
uint32_t num_threads; |
||||
|
||||
// Enables fast forwarding a replay by increasing/reducing the delay between
|
||||
// the ingested traces.
|
||||
// If > 0.0 and < 1.0, slow down the replay by this amount.
|
||||
// If 1.0, replay the operations at the same rate as in the trace stream.
|
||||
// If > 1, speed up the replay by this amount.
|
||||
double fast_forward; |
||||
|
||||
ReplayOptions() : num_threads(1), fast_forward(1.0) {} |
||||
ReplayOptions(uint32_t num_of_threads, double fast_forward_ratio) |
||||
: num_threads(num_of_threads), fast_forward(fast_forward_ratio) {} |
||||
}; |
||||
|
||||
// Replayer helps to replay the captured RocksDB query level operations.
|
||||
// The Replayer can either be created from DB::NewReplayer method, or be
|
||||
// instantiated via db_bench today, on using "replay" benchmark.
|
||||
class Replayer { |
||||
public: |
||||
virtual ~Replayer() {} |
||||
|
||||
// Make some preparation before replaying the trace. This will also reset the
|
||||
// replayer in order to restart replaying.
|
||||
virtual Status Prepare() = 0; |
||||
|
||||
// Return the timestamp when the trace recording was started.
|
||||
virtual uint64_t GetHeaderTimestamp() const = 0; |
||||
|
||||
// Atomically read one trace into a TraceRecord (excluding the header and
|
||||
// footer traces).
|
||||
// Return Status::OK() on success;
|
||||
// Status::Incomplete() if Prepare() was not called or no more available
|
||||
// trace;
|
||||
// Status::NotSupported() if the read trace type is not supported.
|
||||
virtual Status Next(std::unique_ptr<TraceRecord>* record) = 0; |
||||
|
||||
// Execute one TraceRecord.
|
||||
// Return Status::OK() if the execution was successful. Get/MultiGet traces
|
||||
// will still return Status::OK() even if they got Status::NotFound()
|
||||
// from DB::Get() or DB::MultiGet();
|
||||
// Status::Incomplete() if Prepare() was not called or no more available
|
||||
// trace;
|
||||
// Status::NotSupported() if the operation is not supported;
|
||||
// Otherwise, return the corresponding error status.
|
||||
virtual Status Execute(const std::unique_ptr<TraceRecord>& record) = 0; |
||||
virtual Status Execute(std::unique_ptr<TraceRecord>&& record) = 0; |
||||
|
||||
// Replay all the traces from the provided trace stream, taking the delay
|
||||
// between the traces into consideration.
|
||||
virtual Status Replay(const ReplayOptions& options) = 0; |
||||
virtual Status Replay() { return Replay(ReplayOptions()); } |
||||
}; |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
#endif // ROCKSDB_LITE
|
@ -0,0 +1,163 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#include "rocksdb/trace_record.h" |
||||
|
||||
#include <utility> |
||||
|
||||
#include "rocksdb/db.h" |
||||
#include "rocksdb/iterator.h" |
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/status.h" |
||||
#include "trace_replay/trace_record_handler.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
// TraceRecord
|
||||
TraceRecord::TraceRecord(uint64_t timestamp) : timestamp_(timestamp) {} |
||||
|
||||
TraceRecord::~TraceRecord() {} |
||||
|
||||
uint64_t TraceRecord::GetTimestamp() const { return timestamp_; } |
||||
|
||||
TraceRecord::Handler* TraceRecord::NewExecutionHandler( |
||||
DB* db, const std::vector<ColumnFamilyHandle*>& handles) { |
||||
return new TraceExecutionHandler(db, handles); |
||||
} |
||||
|
||||
// QueryTraceRecord
|
||||
QueryTraceRecord::QueryTraceRecord(uint64_t timestamp) |
||||
: TraceRecord(timestamp) {} |
||||
|
||||
QueryTraceRecord::~QueryTraceRecord() {} |
||||
|
||||
// WriteQueryTraceRecord
|
||||
WriteQueryTraceRecord::WriteQueryTraceRecord(PinnableSlice&& write_batch_rep, |
||||
uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp), rep_(std::move(write_batch_rep)) {} |
||||
|
||||
WriteQueryTraceRecord::WriteQueryTraceRecord(const std::string& write_batch_rep, |
||||
uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp) { |
||||
rep_.PinSelf(write_batch_rep); |
||||
} |
||||
|
||||
WriteQueryTraceRecord::~WriteQueryTraceRecord() {} |
||||
|
||||
Slice WriteQueryTraceRecord::GetWriteBatchRep() const { return Slice(rep_); } |
||||
|
||||
Status WriteQueryTraceRecord::Accept(Handler* handler) { |
||||
assert(handler != nullptr); |
||||
return handler->Handle(*this); |
||||
} |
||||
|
||||
// GetQueryTraceRecord
|
||||
GetQueryTraceRecord::GetQueryTraceRecord(uint32_t column_family_id, |
||||
PinnableSlice&& key, |
||||
uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp), |
||||
cf_id_(column_family_id), |
||||
key_(std::move(key)) {} |
||||
|
||||
GetQueryTraceRecord::GetQueryTraceRecord(uint32_t column_family_id, |
||||
const std::string& key, |
||||
uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp), cf_id_(column_family_id) { |
||||
key_.PinSelf(key); |
||||
} |
||||
|
||||
GetQueryTraceRecord::~GetQueryTraceRecord() {} |
||||
|
||||
uint32_t GetQueryTraceRecord::GetColumnFamilyID() const { return cf_id_; } |
||||
|
||||
Slice GetQueryTraceRecord::GetKey() const { return Slice(key_); } |
||||
|
||||
Status GetQueryTraceRecord::Accept(Handler* handler) { |
||||
assert(handler != nullptr); |
||||
return handler->Handle(*this); |
||||
} |
||||
|
||||
// IteratorQueryTraceRecord
|
||||
IteratorQueryTraceRecord::IteratorQueryTraceRecord(uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp) {} |
||||
|
||||
IteratorQueryTraceRecord::~IteratorQueryTraceRecord() {} |
||||
|
||||
// IteratorSeekQueryTraceRecord
|
||||
IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord( |
||||
SeekType seek_type, uint32_t column_family_id, PinnableSlice&& key, |
||||
uint64_t timestamp) |
||||
: IteratorQueryTraceRecord(timestamp), |
||||
type_(seek_type), |
||||
cf_id_(column_family_id), |
||||
key_(std::move(key)) {} |
||||
|
||||
IteratorSeekQueryTraceRecord::IteratorSeekQueryTraceRecord( |
||||
SeekType seek_type, uint32_t column_family_id, const std::string& key, |
||||
uint64_t timestamp) |
||||
: IteratorQueryTraceRecord(timestamp), |
||||
type_(seek_type), |
||||
cf_id_(column_family_id) { |
||||
key_.PinSelf(key); |
||||
} |
||||
|
||||
IteratorSeekQueryTraceRecord::~IteratorSeekQueryTraceRecord() {} |
||||
|
||||
TraceType IteratorSeekQueryTraceRecord::GetTraceType() const { |
||||
return static_cast<TraceType>(type_); |
||||
} |
||||
|
||||
IteratorSeekQueryTraceRecord::SeekType |
||||
IteratorSeekQueryTraceRecord::GetSeekType() const { |
||||
return type_; |
||||
} |
||||
|
||||
uint32_t IteratorSeekQueryTraceRecord::GetColumnFamilyID() const { |
||||
return cf_id_; |
||||
} |
||||
|
||||
Slice IteratorSeekQueryTraceRecord::GetKey() const { return Slice(key_); } |
||||
|
||||
Status IteratorSeekQueryTraceRecord::Accept(Handler* handler) { |
||||
assert(handler != nullptr); |
||||
return handler->Handle(*this); |
||||
} |
||||
|
||||
// MultiGetQueryTraceRecord
|
||||
MultiGetQueryTraceRecord::MultiGetQueryTraceRecord( |
||||
std::vector<uint32_t> column_family_ids, std::vector<PinnableSlice>&& keys, |
||||
uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp), |
||||
cf_ids_(column_family_ids), |
||||
keys_(std::move(keys)) {} |
||||
|
||||
MultiGetQueryTraceRecord::MultiGetQueryTraceRecord( |
||||
std::vector<uint32_t> column_family_ids, |
||||
const std::vector<std::string>& keys, uint64_t timestamp) |
||||
: QueryTraceRecord(timestamp), cf_ids_(column_family_ids) { |
||||
keys_.reserve(keys.size()); |
||||
for (const std::string& key : keys) { |
||||
PinnableSlice ps; |
||||
ps.PinSelf(key); |
||||
keys_.push_back(std::move(ps)); |
||||
} |
||||
} |
||||
|
||||
MultiGetQueryTraceRecord::~MultiGetQueryTraceRecord() {} |
||||
|
||||
std::vector<uint32_t> MultiGetQueryTraceRecord::GetColumnFamilyIDs() const { |
||||
return cf_ids_; |
||||
} |
||||
|
||||
std::vector<Slice> MultiGetQueryTraceRecord::GetKeys() const { |
||||
return std::vector<Slice>(keys_.begin(), keys_.end()); |
||||
} |
||||
|
||||
Status MultiGetQueryTraceRecord::Accept(Handler* handler) { |
||||
assert(handler != nullptr); |
||||
return handler->Handle(*this); |
||||
} |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,108 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#include "trace_replay/trace_record_handler.h" |
||||
|
||||
#include "rocksdb/iterator.h" |
||||
#include "rocksdb/write_batch.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
// TraceExecutionHandler
|
||||
TraceExecutionHandler::TraceExecutionHandler( |
||||
DB* db, const std::vector<ColumnFamilyHandle*>& handles) |
||||
: TraceRecord::Handler(), |
||||
db_(db), |
||||
write_opts_(WriteOptions()), |
||||
read_opts_(ReadOptions()) { |
||||
assert(db != nullptr); |
||||
assert(!handles.empty()); |
||||
cf_map_.reserve(handles.size()); |
||||
for (ColumnFamilyHandle* handle : handles) { |
||||
assert(handle != nullptr); |
||||
cf_map_.insert({handle->GetID(), handle}); |
||||
} |
||||
} |
||||
|
||||
TraceExecutionHandler::~TraceExecutionHandler() { cf_map_.clear(); } |
||||
|
||||
Status TraceExecutionHandler::Handle(const WriteQueryTraceRecord& record) { |
||||
WriteBatch batch(record.GetWriteBatchRep().ToString()); |
||||
return db_->Write(write_opts_, &batch); |
||||
} |
||||
|
||||
Status TraceExecutionHandler::Handle(const GetQueryTraceRecord& record) { |
||||
auto it = cf_map_.find(record.GetColumnFamilyID()); |
||||
if (it == cf_map_.end()) { |
||||
return Status::Corruption("Invalid Column Family ID."); |
||||
} |
||||
assert(it->second != nullptr); |
||||
|
||||
std::string value; |
||||
Status s = db_->Get(read_opts_, it->second, record.GetKey(), &value); |
||||
|
||||
// Treat not found as ok and return other errors.
|
||||
return s.IsNotFound() ? Status::OK() : s; |
||||
} |
||||
|
||||
Status TraceExecutionHandler::Handle( |
||||
const IteratorSeekQueryTraceRecord& record) { |
||||
auto it = cf_map_.find(record.GetColumnFamilyID()); |
||||
if (it == cf_map_.end()) { |
||||
return Status::Corruption("Invalid Column Family ID."); |
||||
} |
||||
assert(it->second != nullptr); |
||||
|
||||
Iterator* single_iter = db_->NewIterator(read_opts_, it->second); |
||||
|
||||
switch (record.GetSeekType()) { |
||||
case IteratorSeekQueryTraceRecord::kSeekForPrev: { |
||||
single_iter->SeekForPrev(record.GetKey()); |
||||
break; |
||||
} |
||||
default: { |
||||
single_iter->Seek(record.GetKey()); |
||||
break; |
||||
} |
||||
} |
||||
Status s = single_iter->status(); |
||||
delete single_iter; |
||||
return s; |
||||
} |
||||
|
||||
Status TraceExecutionHandler::Handle(const MultiGetQueryTraceRecord& record) { |
||||
std::vector<ColumnFamilyHandle*> handles; |
||||
handles.reserve(record.GetColumnFamilyIDs().size()); |
||||
for (uint32_t cf_id : record.GetColumnFamilyIDs()) { |
||||
auto it = cf_map_.find(cf_id); |
||||
if (it == cf_map_.end()) { |
||||
return Status::Corruption("Invalid Column Family ID."); |
||||
} |
||||
assert(it->second != nullptr); |
||||
handles.push_back(it->second); |
||||
} |
||||
|
||||
std::vector<Slice> keys = record.GetKeys(); |
||||
|
||||
if (handles.empty() || keys.empty()) { |
||||
return Status::InvalidArgument("Empty MultiGet cf_ids or keys."); |
||||
} |
||||
if (handles.size() != keys.size()) { |
||||
return Status::InvalidArgument("MultiGet cf_ids and keys size mismatch."); |
||||
} |
||||
|
||||
std::vector<std::string> values; |
||||
std::vector<Status> ss = db_->MultiGet(read_opts_, handles, keys, &values); |
||||
|
||||
// Treat not found as ok, return other errors.
|
||||
for (Status s : ss) { |
||||
if (!s.ok() && !s.IsNotFound()) { |
||||
return s; |
||||
} |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,39 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
|
||||
#include <unordered_map> |
||||
#include <vector> |
||||
|
||||
#include "rocksdb/db.h" |
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/status.h" |
||||
#include "rocksdb/trace_record.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
// Handler to execute TraceRecord.
|
||||
class TraceExecutionHandler : public TraceRecord::Handler { |
||||
public: |
||||
TraceExecutionHandler(DB* db, |
||||
const std::vector<ColumnFamilyHandle*>& handles); |
||||
virtual ~TraceExecutionHandler() override; |
||||
|
||||
virtual Status Handle(const WriteQueryTraceRecord& record) override; |
||||
virtual Status Handle(const GetQueryTraceRecord& record) override; |
||||
virtual Status Handle(const IteratorSeekQueryTraceRecord& record) override; |
||||
virtual Status Handle(const MultiGetQueryTraceRecord& record) override; |
||||
|
||||
private: |
||||
DB* db_; |
||||
std::unordered_map<uint32_t, ColumnFamilyHandle*> cf_map_; |
||||
WriteOptions write_opts_; |
||||
ReadOptions read_opts_; |
||||
}; |
||||
|
||||
// To do: Handler for trace_analyzer.
|
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
@ -0,0 +1,305 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include "utilities/trace/replayer_impl.h" |
||||
|
||||
#include <cmath> |
||||
#include <thread> |
||||
|
||||
#include "rocksdb/db.h" |
||||
#include "rocksdb/env.h" |
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/slice.h" |
||||
#include "rocksdb/status.h" |
||||
#include "rocksdb/system_clock.h" |
||||
#include "rocksdb/trace_reader_writer.h" |
||||
#include "util/threadpool_imp.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
ReplayerImpl::ReplayerImpl(DB* db, |
||||
const std::vector<ColumnFamilyHandle*>& handles, |
||||
std::unique_ptr<TraceReader>&& reader) |
||||
: Replayer(), |
||||
env_(db->GetEnv()), |
||||
trace_reader_(std::move(reader)), |
||||
prepared_(false), |
||||
trace_end_(false), |
||||
header_ts_(0), |
||||
exec_handler_(TraceRecord::NewExecutionHandler(db, handles)) {} |
||||
|
||||
ReplayerImpl::~ReplayerImpl() { |
||||
exec_handler_.reset(); |
||||
trace_reader_.reset(); |
||||
} |
||||
|
||||
Status ReplayerImpl::Prepare() { |
||||
Trace header; |
||||
int db_version; |
||||
Status s = ReadHeader(&header); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
s = TracerHelper::ParseTraceHeader(header, &trace_file_version_, &db_version); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
header_ts_ = header.ts; |
||||
prepared_ = true; |
||||
trace_end_ = false; |
||||
return Status::OK(); |
||||
} |
||||
|
||||
Status ReplayerImpl::Next(std::unique_ptr<TraceRecord>* record) { |
||||
if (!prepared_) { |
||||
return Status::Incomplete("Not prepared!"); |
||||
} |
||||
if (trace_end_) { |
||||
return Status::Incomplete("Trace end."); |
||||
} |
||||
|
||||
Trace trace; |
||||
Status s = ReadTrace(&trace); // ReadTrace is atomic
|
||||
// Reached the trace end.
|
||||
if (s.ok() && trace.type == kTraceEnd) { |
||||
trace_end_ = true; |
||||
return Status::Incomplete("Trace end."); |
||||
} |
||||
if (!s.ok() || record == nullptr) { |
||||
return s; |
||||
} |
||||
|
||||
return DecodeTraceRecord(&trace, trace_file_version_, record); |
||||
} |
||||
|
||||
Status ReplayerImpl::Execute(const std::unique_ptr<TraceRecord>& record) { |
||||
return record->Accept(exec_handler_.get()); |
||||
} |
||||
|
||||
Status ReplayerImpl::Execute(std::unique_ptr<TraceRecord>&& record) { |
||||
Status s = record->Accept(exec_handler_.get()); |
||||
record.reset(); |
||||
return s; |
||||
} |
||||
|
||||
Status ReplayerImpl::Replay(const ReplayOptions& options) { |
||||
if (options.fast_forward <= 0.0) { |
||||
return Status::InvalidArgument("Wrong fast forward speed!"); |
||||
} |
||||
|
||||
if (!prepared_) { |
||||
return Status::Incomplete("Not prepared!"); |
||||
} |
||||
if (trace_end_) { |
||||
return Status::Incomplete("Trace end."); |
||||
} |
||||
|
||||
Status s = Status::OK(); |
||||
|
||||
if (options.num_threads <= 1) { |
||||
// num_threads == 0 or num_threads == 1 uses single thread.
|
||||
std::chrono::system_clock::time_point replay_epoch = |
||||
std::chrono::system_clock::now(); |
||||
|
||||
while (s.ok()) { |
||||
Trace trace; |
||||
s = ReadTrace(&trace); |
||||
// If already at trace end, ReadTrace should return Status::Incomplete().
|
||||
if (!s.ok()) { |
||||
break; |
||||
} |
||||
|
||||
// No need to sleep before breaking the loop if at the trace end.
|
||||
if (trace.type == kTraceEnd) { |
||||
trace_end_ = true; |
||||
s = Status::Incomplete("Trace end."); |
||||
break; |
||||
} |
||||
|
||||
// In single-threaded replay, decode first then sleep.
|
||||
std::unique_ptr<TraceRecord> record; |
||||
s = DecodeTraceRecord(&trace, trace_file_version_, &record); |
||||
// Skip unsupported traces, stop for other errors.
|
||||
if (s.IsNotSupported()) { |
||||
continue; |
||||
} else if (!s.ok()) { |
||||
break; |
||||
} |
||||
|
||||
std::this_thread::sleep_until( |
||||
replay_epoch + |
||||
std::chrono::microseconds(static_cast<uint64_t>(std::llround( |
||||
1.0 * (trace.ts - header_ts_) / options.fast_forward)))); |
||||
|
||||
s = Execute(std::move(record)); |
||||
} |
||||
} else { |
||||
// Multi-threaded replay.
|
||||
ThreadPoolImpl thread_pool; |
||||
thread_pool.SetHostEnv(env_); |
||||
thread_pool.SetBackgroundThreads(static_cast<int>(options.num_threads)); |
||||
|
||||
std::mutex mtx; |
||||
// Background decoding and execution status.
|
||||
Status bg_s = Status::OK(); |
||||
uint64_t last_err_ts = static_cast<uint64_t>(-1); |
||||
// Callback function used in background work to update bg_s at the first
|
||||
// execution error (with the smallest Trace timestamp).
|
||||
auto error_cb = [&mtx, &bg_s, &last_err_ts](Status err, uint64_t err_ts) { |
||||
std::lock_guard<std::mutex> gd(mtx); |
||||
// Only record the first error.
|
||||
if (!err.ok() && !err.IsNotSupported() && err_ts < last_err_ts) { |
||||
bg_s = err; |
||||
last_err_ts = err_ts; |
||||
} |
||||
}; |
||||
|
||||
std::chrono::system_clock::time_point replay_epoch = |
||||
std::chrono::system_clock::now(); |
||||
|
||||
while (bg_s.ok() && s.ok()) { |
||||
Trace trace; |
||||
s = ReadTrace(&trace); |
||||
// If already at trace end, ReadTrace should return Status::Incomplete().
|
||||
if (!s.ok()) { |
||||
break; |
||||
} |
||||
|
||||
TraceType trace_type = trace.type; |
||||
|
||||
// No need to sleep before breaking the loop if at the trace end.
|
||||
if (trace_type == kTraceEnd) { |
||||
trace_end_ = true; |
||||
s = Status::Incomplete("Trace end."); |
||||
break; |
||||
} |
||||
|
||||
// In multi-threaded replay, sleep first thatn start decoding and
|
||||
// execution in a thread.
|
||||
std::this_thread::sleep_until( |
||||
replay_epoch + |
||||
std::chrono::microseconds(static_cast<uint64_t>(std::llround( |
||||
1.0 * (trace.ts - header_ts_) / options.fast_forward)))); |
||||
|
||||
if (trace_type == kTraceWrite || trace_type == kTraceGet || |
||||
trace_type == kTraceIteratorSeek || |
||||
trace_type == kTraceIteratorSeekForPrev || |
||||
trace_type == kTraceMultiGet) { |
||||
std::unique_ptr<ReplayerWorkerArg> ra(new ReplayerWorkerArg); |
||||
ra->trace_entry = std::move(trace); |
||||
ra->handler = exec_handler_.get(); |
||||
ra->trace_file_version = trace_file_version_; |
||||
ra->error_cb = error_cb; |
||||
thread_pool.Schedule(&ReplayerImpl::BackgroundWork, ra.release(), |
||||
nullptr, nullptr); |
||||
} |
||||
// Skip unsupported traces.
|
||||
} |
||||
|
||||
thread_pool.WaitForJobsAndJoinAllThreads(); |
||||
if (!bg_s.ok()) { |
||||
s = bg_s; |
||||
} |
||||
} |
||||
|
||||
if (s.IsIncomplete()) { |
||||
// Reaching eof returns Incomplete status at the moment.
|
||||
// Could happen when killing a process without calling EndTrace() API.
|
||||
// TODO: Add better error handling.
|
||||
trace_end_ = true; |
||||
return Status::OK(); |
||||
} |
||||
return s; |
||||
} |
||||
|
||||
uint64_t ReplayerImpl::GetHeaderTimestamp() const { return header_ts_; } |
||||
|
||||
Status ReplayerImpl::ReadHeader(Trace* header) { |
||||
assert(header != nullptr); |
||||
Status s = trace_reader_->Reset(); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
std::string encoded_trace; |
||||
// Read the trace head
|
||||
s = trace_reader_->Read(&encoded_trace); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
|
||||
return TracerHelper::DecodeHeader(encoded_trace, header); |
||||
} |
||||
|
||||
Status ReplayerImpl::ReadFooter(Trace* footer) { |
||||
assert(footer != nullptr); |
||||
Status s = ReadTrace(footer); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
if (footer->type != kTraceEnd) { |
||||
return Status::Corruption("Corrupted trace file. Incorrect footer."); |
||||
} |
||||
|
||||
// TODO: Add more validations later
|
||||
return s; |
||||
} |
||||
|
||||
Status ReplayerImpl::ReadTrace(Trace* trace) { |
||||
assert(trace != nullptr); |
||||
std::string encoded_trace; |
||||
// We don't know if TraceReader is implemented thread-safe, so we protect the
|
||||
// reading trace part with a mutex. The decoding part does not need to be
|
||||
// protected since it's local.
|
||||
{ |
||||
std::lock_guard<std::mutex> guard(mutex_); |
||||
Status s = trace_reader_->Read(&encoded_trace); |
||||
if (!s.ok()) { |
||||
return s; |
||||
} |
||||
} |
||||
return TracerHelper::DecodeTrace(encoded_trace, trace); |
||||
} |
||||
|
||||
Status ReplayerImpl::DecodeTraceRecord(Trace* trace, int trace_file_version, |
||||
std::unique_ptr<TraceRecord>* record) { |
||||
switch (trace->type) { |
||||
case kTraceWrite: |
||||
return TracerHelper::DecodeWriteRecord(trace, trace_file_version, record); |
||||
case kTraceGet: |
||||
return TracerHelper::DecodeGetRecord(trace, trace_file_version, record); |
||||
case kTraceIteratorSeek: |
||||
case kTraceIteratorSeekForPrev: |
||||
return TracerHelper::DecodeIterRecord(trace, trace_file_version, record); |
||||
case kTraceMultiGet: |
||||
return TracerHelper::DecodeMultiGetRecord(trace, trace_file_version, |
||||
record); |
||||
case kTraceEnd: |
||||
return Status::Incomplete("Trace end."); |
||||
default: |
||||
return Status::NotSupported("Unsupported trace type."); |
||||
} |
||||
} |
||||
|
||||
void ReplayerImpl::BackgroundWork(void* arg) { |
||||
std::unique_ptr<ReplayerWorkerArg> ra( |
||||
reinterpret_cast<ReplayerWorkerArg*>(arg)); |
||||
assert(ra != nullptr); |
||||
|
||||
std::unique_ptr<TraceRecord> record; |
||||
Status s = |
||||
DecodeTraceRecord(&(ra->trace_entry), ra->trace_file_version, &record); |
||||
if (s.ok()) { |
||||
s = record->Accept(ra->handler); |
||||
record.reset(); |
||||
} |
||||
if (!s.ok() && ra->error_cb) { |
||||
ra->error_cb(s, ra->trace_entry.ts); |
||||
} |
||||
} |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
#endif // ROCKSDB_LITE
|
@ -0,0 +1,90 @@ |
||||
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include <atomic> |
||||
#include <functional> |
||||
#include <memory> |
||||
#include <mutex> |
||||
#include <unordered_map> |
||||
|
||||
#include "rocksdb/rocksdb_namespace.h" |
||||
#include "rocksdb/trace_record.h" |
||||
#include "rocksdb/utilities/replayer.h" |
||||
#include "trace_replay/trace_replay.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
class ColumnFamilyHandle; |
||||
class DB; |
||||
class Env; |
||||
class TraceReader; |
||||
class TraceRecord; |
||||
class Status; |
||||
|
||||
struct ReplayOptions; |
||||
|
||||
class ReplayerImpl : public Replayer { |
||||
public: |
||||
ReplayerImpl(DB* db, const std::vector<ColumnFamilyHandle*>& handles, |
||||
std::unique_ptr<TraceReader>&& reader); |
||||
~ReplayerImpl() override; |
||||
|
||||
using Replayer::Prepare; |
||||
Status Prepare() override; |
||||
|
||||
using Replayer::Next; |
||||
Status Next(std::unique_ptr<TraceRecord>* record) override; |
||||
|
||||
using Replayer::Execute; |
||||
Status Execute(const std::unique_ptr<TraceRecord>& record) override; |
||||
Status Execute(std::unique_ptr<TraceRecord>&& record) override; |
||||
|
||||
using Replayer::Replay; |
||||
Status Replay(const ReplayOptions& options) override; |
||||
|
||||
using Replayer::GetHeaderTimestamp; |
||||
uint64_t GetHeaderTimestamp() const override; |
||||
|
||||
private: |
||||
Status ReadHeader(Trace* header); |
||||
Status ReadFooter(Trace* footer); |
||||
Status ReadTrace(Trace* trace); |
||||
|
||||
// Generic function to convert a Trace to TraceRecord.
|
||||
static Status DecodeTraceRecord(Trace* trace, int trace_file_version, |
||||
std::unique_ptr<TraceRecord>* record); |
||||
|
||||
// Generic function to execute a Trace in a thread pool.
|
||||
static void BackgroundWork(void* arg); |
||||
|
||||
Env* env_; |
||||
std::unique_ptr<TraceReader> trace_reader_; |
||||
// When reading the trace header, the trace file version can be parsed.
|
||||
// Replayer will use different decode method to get the trace content based
|
||||
// on different trace file version.
|
||||
int trace_file_version_; |
||||
std::mutex mutex_; |
||||
std::atomic<bool> prepared_; |
||||
std::atomic<bool> trace_end_; |
||||
uint64_t header_ts_; |
||||
std::unique_ptr<TraceRecord::Handler> exec_handler_; |
||||
}; |
||||
|
||||
// The passin arg of MultiThreadRepkay for each trace record.
|
||||
struct ReplayerWorkerArg { |
||||
Trace trace_entry; |
||||
int trace_file_version; |
||||
// Handler to execute TraceRecord.
|
||||
TraceRecord::Handler* handler; |
||||
// Callback function to report the error status and the timestamp of the
|
||||
// TraceRecord.
|
||||
std::function<void(Status, uint64_t)> error_cb; |
||||
}; |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
#endif // ROCKSDB_LITE
|
Loading…
Reference in new issue