Hint temperature of bottommost level files to FileSystem (#8222)
Summary: As the first part of the effort of having placing different files on different storage types, this change introduces several things: (1) An experimental interface in FileSystem that specify temperature to a new file created. (2) A test FileSystemWrapper, SimulatedHybridFileSystem, that simulates HDD for a file of "warm" temperature. (3) A simple experimental feature ColumnFamilyOptions.bottommost_temperature. RocksDB would pass this value to FileSystem when creating any bottommost file. (4) A db_bench parameter that applies the (2) and (3) to db_bench. The motivation of the change is to introduce minimal changes that allow us to evolve tiered storage development. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8222 Test Plan: ./db_bench --benchmarks=fillrandom --write_buffer_size=2000000 -max_bytes_for_level_base=20000000 -level_compaction_dynamic_level_bytes --reads=100 -compaction_readahead_size=20000000 --reads=100000 -num=10000000 followed by ./db_bench --benchmarks=readrandom,stats --write_buffer_size=2000000 -max_bytes_for_level_base=20000000 -simulate_hybrid_fs_file=/tmp/warm_file_list -level_compaction_dynamic_level_bytes -compaction_readahead_size=20000000 --reads=500 --threads=16 -use_existing_db --num=10000000 and see results as expected. Reviewed By: ajkr Differential Revision: D28003028 fbshipit-source-id: 4724896d5205730227ba2f17c3fecb11261744cemain
parent
d2ca04e3ed
commit
c3ff14e2c1
@ -0,0 +1,145 @@ |
||||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include "tools/simulated_hybrid_file_system.h" |
||||
|
||||
#include <sstream> |
||||
#include <string> |
||||
|
||||
#include "rocksdb/rate_limiter.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
const int kLatencyAddedPerRequestUs = 15000; |
||||
const int64_t kRequestPerSec = 100; |
||||
const int64_t kDummyBytesPerRequest = 1024 * 1024; |
||||
|
||||
// The metadata file format: each line is a full filename of a file which is
|
||||
// warm
|
||||
SimulatedHybridFileSystem::SimulatedHybridFileSystem( |
||||
const std::shared_ptr<FileSystem>& base, |
||||
const std::string& metadata_file_name) |
||||
: FileSystemWrapper(base), |
||||
// Limit to 100 requests per second.
|
||||
rate_limiter_(NewGenericRateLimiter( |
||||
kDummyBytesPerRequest * kRequestPerSec /* rate_bytes_per_sec */, |
||||
1000 /* refill_period_us */)), |
||||
metadata_file_name_(metadata_file_name), |
||||
name_("SimulatedHybridFileSystem: " + std::string(target()->Name())) { |
||||
IOStatus s = base->FileExists(metadata_file_name, IOOptions(), nullptr); |
||||
if (s.IsNotFound()) { |
||||
return; |
||||
} |
||||
std::string metadata; |
||||
s = ReadFileToString(base.get(), metadata_file_name, &metadata); |
||||
if (!s.ok()) { |
||||
fprintf(stderr, "Error reading from file %s: %s", |
||||
metadata_file_name.c_str(), s.ToString().c_str()); |
||||
// Exit rather than assert as this file system is built to run with
|
||||
// benchmarks, which usually run on release mode.
|
||||
std::exit(1); |
||||
} |
||||
std::istringstream input; |
||||
input.str(metadata); |
||||
std::string line; |
||||
while (std::getline(input, line)) { |
||||
fprintf(stderr, "Warm file %s\n", line.c_str()); |
||||
warm_file_set_.insert(line); |
||||
} |
||||
} |
||||
|
||||
// Need to write out the metadata file to file. See comment of
|
||||
// SimulatedHybridFileSystem::SimulatedHybridFileSystem() for format of the
|
||||
// file.
|
||||
SimulatedHybridFileSystem::~SimulatedHybridFileSystem() { |
||||
std::string metadata; |
||||
for (const auto& f : warm_file_set_) { |
||||
metadata += f; |
||||
metadata += "\n"; |
||||
} |
||||
IOStatus s = WriteStringToFile(target(), metadata, metadata_file_name_, true); |
||||
if (!s.ok()) { |
||||
fprintf(stderr, "Error writing to file %s: %s", metadata_file_name_.c_str(), |
||||
s.ToString().c_str()); |
||||
} |
||||
} |
||||
|
||||
IOStatus SimulatedHybridFileSystem::NewRandomAccessFile( |
||||
const std::string& fname, const FileOptions& file_opts, |
||||
std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) { |
||||
Temperature temperature = Temperature::kUnknown; |
||||
{ |
||||
const std::lock_guard<std::mutex> lock(mutex_); |
||||
if (warm_file_set_.find(fname) != warm_file_set_.end()) { |
||||
temperature = Temperature::kWarm; |
||||
} |
||||
} |
||||
IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg); |
||||
result->reset( |
||||
new SimulatedHybridRaf(result->release(), rate_limiter_, temperature)); |
||||
return s; |
||||
} |
||||
|
||||
IOStatus SimulatedHybridFileSystem::NewWritableFile( |
||||
const std::string& fname, const FileOptions& file_opts, |
||||
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) { |
||||
if (file_opts.temperature == Temperature::kWarm) { |
||||
const std::lock_guard<std::mutex> lock(mutex_); |
||||
fprintf(stderr, "warm file %s\n", fname.c_str()); |
||||
warm_file_set_.insert(fname); |
||||
} |
||||
return target()->NewWritableFile(fname, file_opts, result, dbg); |
||||
} |
||||
|
||||
IOStatus SimulatedHybridFileSystem::DeleteFile(const std::string& fname, |
||||
const IOOptions& options, |
||||
IODebugContext* dbg) { |
||||
{ |
||||
const std::lock_guard<std::mutex> lock(mutex_); |
||||
warm_file_set_.erase(fname); |
||||
} |
||||
return target()->DeleteFile(fname, options, dbg); |
||||
} |
||||
|
||||
IOStatus SimulatedHybridRaf::Read(uint64_t offset, size_t n, |
||||
const IOOptions& options, Slice* result, |
||||
char* scratch, IODebugContext* dbg) const { |
||||
if (temperature_ == Temperature::kWarm) { |
||||
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); |
||||
rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, |
||||
nullptr); |
||||
} |
||||
return target()->Read(offset, n, options, result, scratch, dbg); |
||||
} |
||||
|
||||
IOStatus SimulatedHybridRaf::MultiRead(FSReadRequest* reqs, size_t num_reqs, |
||||
const IOOptions& options, |
||||
IODebugContext* dbg) { |
||||
if (temperature_ == Temperature::kWarm) { |
||||
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs * |
||||
static_cast<int>(num_reqs)); |
||||
rate_limiter_->Request( |
||||
static_cast<int64_t>(num_reqs) * kDummyBytesPerRequest, |
||||
Env::IOPriority::IO_LOW, nullptr); |
||||
} |
||||
return target()->MultiRead(reqs, num_reqs, options, dbg); |
||||
} |
||||
|
||||
IOStatus SimulatedHybridRaf::Prefetch(uint64_t offset, size_t n, |
||||
const IOOptions& options, |
||||
IODebugContext* dbg) { |
||||
if (temperature_ == Temperature::kWarm) { |
||||
rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, |
||||
nullptr); |
||||
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); |
||||
} |
||||
return target()->Prefetch(offset, n, options, dbg); |
||||
} |
||||
|
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
#endif // ROCKSDB_LITE
|
@ -0,0 +1,89 @@ |
||||
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||
// This source code is licensed under both the GPLv2 (found in the
|
||||
// COPYING file in the root directory) and Apache 2.0 License
|
||||
// (found in the LICENSE.Apache file in the root directory).
|
||||
|
||||
#pragma once |
||||
|
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
#include <utility> |
||||
|
||||
#include "rocksdb/file_system.h" |
||||
|
||||
namespace ROCKSDB_NAMESPACE { |
||||
|
||||
// A FileSystem simulates hybrid file system by ingesting latency and limit
|
||||
// IOPs.
|
||||
// This class is only used for development purpose and should not be used
|
||||
// in production.
|
||||
// Right now we ingest 15ms latency and allow 100 requests per second when
|
||||
// the file is for warm temperature.
|
||||
// When the object is destroyed, the list of warm files are written to a
|
||||
// file, which can be used to reopen a FileSystem and still recover the
|
||||
// list. This is to allow the information to preserve between db_bench
|
||||
// runs.
|
||||
class SimulatedHybridFileSystem : public FileSystemWrapper { |
||||
public: |
||||
// metadata_file_name stores metadata of the files, so that it can be
|
||||
// loaded after process restarts. If the file doesn't exist, create
|
||||
// one. The file is written when the class is destroyed.
|
||||
explicit SimulatedHybridFileSystem(const std::shared_ptr<FileSystem>& base, |
||||
const std::string& metadata_file_name); |
||||
|
||||
~SimulatedHybridFileSystem() override; |
||||
|
||||
public: |
||||
IOStatus NewRandomAccessFile(const std::string& fname, |
||||
const FileOptions& file_opts, |
||||
std::unique_ptr<FSRandomAccessFile>* result, |
||||
IODebugContext* dbg) override; |
||||
IOStatus NewWritableFile(const std::string& fname, |
||||
const FileOptions& file_opts, |
||||
std::unique_ptr<FSWritableFile>* result, |
||||
IODebugContext* dbg) override; |
||||
IOStatus DeleteFile(const std::string& fname, const IOOptions& options, |
||||
IODebugContext* dbg) override; |
||||
|
||||
const char* Name() const override { return name_.c_str(); } |
||||
|
||||
private: |
||||
// Limit 100 requests per second. Rate limiter is designed to byte but
|
||||
// we use it as fixed bytes is one request.
|
||||
std::shared_ptr<RateLimiter> rate_limiter_; |
||||
std::mutex mutex_; |
||||
std::unordered_set<std::string> warm_file_set_; |
||||
std::string metadata_file_name_; |
||||
std::string name_; |
||||
}; |
||||
|
||||
// Simulated random access file that can control IOPs and latency to simulate
|
||||
// specific storage media
|
||||
class SimulatedHybridRaf : public FSRandomAccessFileWrapper { |
||||
public: |
||||
SimulatedHybridRaf(FSRandomAccessFile* t, |
||||
std::shared_ptr<RateLimiter> rate_limiter, |
||||
Temperature temperature) |
||||
: FSRandomAccessFileWrapper(t), |
||||
rate_limiter_(rate_limiter), |
||||
temperature_(temperature) {} |
||||
|
||||
~SimulatedHybridRaf() override {} |
||||
|
||||
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, |
||||
Slice* result, char* scratch, |
||||
IODebugContext* dbg) const override; |
||||
|
||||
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, |
||||
const IOOptions& options, IODebugContext* dbg) override; |
||||
|
||||
IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options, |
||||
IODebugContext* dbg) override; |
||||
|
||||
private: |
||||
std::shared_ptr<RateLimiter> rate_limiter_; |
||||
Temperature temperature_; |
||||
}; |
||||
} // namespace ROCKSDB_NAMESPACE
|
||||
|
||||
#endif // ROCKSDB_LITE
|
Loading…
Reference in new issue