Hint temperature of bottommost level files to FileSystem (#8222)
Summary: As the first part of the effort of having placing different files on different storage types, this change introduces several things: (1) An experimental interface in FileSystem that specify temperature to a new file created. (2) A test FileSystemWrapper, SimulatedHybridFileSystem, that simulates HDD for a file of "warm" temperature. (3) A simple experimental feature ColumnFamilyOptions.bottommost_temperature. RocksDB would pass this value to FileSystem when creating any bottommost file. (4) A db_bench parameter that applies the (2) and (3) to db_bench. The motivation of the change is to introduce minimal changes that allow us to evolve tiered storage development. Pull Request resolved: https://github.com/facebook/rocksdb/pull/8222 Test Plan: ./db_bench --benchmarks=fillrandom --write_buffer_size=2000000 -max_bytes_for_level_base=20000000 -level_compaction_dynamic_level_bytes --reads=100 -compaction_readahead_size=20000000 --reads=100000 -num=10000000 followed by ./db_bench --benchmarks=readrandom,stats --write_buffer_size=2000000 -max_bytes_for_level_base=20000000 -simulate_hybrid_fs_file=/tmp/warm_file_list -level_compaction_dynamic_level_bytes -compaction_readahead_size=20000000 --reads=500 --threads=16 -use_existing_db --num=10000000 and see results as expected. Reviewed By: ajkr Differential Revision: D28003028 fbshipit-source-id: 4724896d5205730227ba2f17c3fecb11261744cemain
parent
d2ca04e3ed
commit
c3ff14e2c1
@ -0,0 +1,145 @@ |
|||||||
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include "tools/simulated_hybrid_file_system.h" |
||||||
|
|
||||||
|
#include <sstream> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "rocksdb/rate_limiter.h" |
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE { |
||||||
|
|
||||||
|
const int kLatencyAddedPerRequestUs = 15000; |
||||||
|
const int64_t kRequestPerSec = 100; |
||||||
|
const int64_t kDummyBytesPerRequest = 1024 * 1024; |
||||||
|
|
||||||
|
// The metadata file format: each line is a full filename of a file which is
|
||||||
|
// warm
|
||||||
|
SimulatedHybridFileSystem::SimulatedHybridFileSystem( |
||||||
|
const std::shared_ptr<FileSystem>& base, |
||||||
|
const std::string& metadata_file_name) |
||||||
|
: FileSystemWrapper(base), |
||||||
|
// Limit to 100 requests per second.
|
||||||
|
rate_limiter_(NewGenericRateLimiter( |
||||||
|
kDummyBytesPerRequest * kRequestPerSec /* rate_bytes_per_sec */, |
||||||
|
1000 /* refill_period_us */)), |
||||||
|
metadata_file_name_(metadata_file_name), |
||||||
|
name_("SimulatedHybridFileSystem: " + std::string(target()->Name())) { |
||||||
|
IOStatus s = base->FileExists(metadata_file_name, IOOptions(), nullptr); |
||||||
|
if (s.IsNotFound()) { |
||||||
|
return; |
||||||
|
} |
||||||
|
std::string metadata; |
||||||
|
s = ReadFileToString(base.get(), metadata_file_name, &metadata); |
||||||
|
if (!s.ok()) { |
||||||
|
fprintf(stderr, "Error reading from file %s: %s", |
||||||
|
metadata_file_name.c_str(), s.ToString().c_str()); |
||||||
|
// Exit rather than assert as this file system is built to run with
|
||||||
|
// benchmarks, which usually run on release mode.
|
||||||
|
std::exit(1); |
||||||
|
} |
||||||
|
std::istringstream input; |
||||||
|
input.str(metadata); |
||||||
|
std::string line; |
||||||
|
while (std::getline(input, line)) { |
||||||
|
fprintf(stderr, "Warm file %s\n", line.c_str()); |
||||||
|
warm_file_set_.insert(line); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// Need to write out the metadata file to file. See comment of
|
||||||
|
// SimulatedHybridFileSystem::SimulatedHybridFileSystem() for format of the
|
||||||
|
// file.
|
||||||
|
SimulatedHybridFileSystem::~SimulatedHybridFileSystem() { |
||||||
|
std::string metadata; |
||||||
|
for (const auto& f : warm_file_set_) { |
||||||
|
metadata += f; |
||||||
|
metadata += "\n"; |
||||||
|
} |
||||||
|
IOStatus s = WriteStringToFile(target(), metadata, metadata_file_name_, true); |
||||||
|
if (!s.ok()) { |
||||||
|
fprintf(stderr, "Error writing to file %s: %s", metadata_file_name_.c_str(), |
||||||
|
s.ToString().c_str()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridFileSystem::NewRandomAccessFile( |
||||||
|
const std::string& fname, const FileOptions& file_opts, |
||||||
|
std::unique_ptr<FSRandomAccessFile>* result, IODebugContext* dbg) { |
||||||
|
Temperature temperature = Temperature::kUnknown; |
||||||
|
{ |
||||||
|
const std::lock_guard<std::mutex> lock(mutex_); |
||||||
|
if (warm_file_set_.find(fname) != warm_file_set_.end()) { |
||||||
|
temperature = Temperature::kWarm; |
||||||
|
} |
||||||
|
} |
||||||
|
IOStatus s = target()->NewRandomAccessFile(fname, file_opts, result, dbg); |
||||||
|
result->reset( |
||||||
|
new SimulatedHybridRaf(result->release(), rate_limiter_, temperature)); |
||||||
|
return s; |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridFileSystem::NewWritableFile( |
||||||
|
const std::string& fname, const FileOptions& file_opts, |
||||||
|
std::unique_ptr<FSWritableFile>* result, IODebugContext* dbg) { |
||||||
|
if (file_opts.temperature == Temperature::kWarm) { |
||||||
|
const std::lock_guard<std::mutex> lock(mutex_); |
||||||
|
fprintf(stderr, "warm file %s\n", fname.c_str()); |
||||||
|
warm_file_set_.insert(fname); |
||||||
|
} |
||||||
|
return target()->NewWritableFile(fname, file_opts, result, dbg); |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridFileSystem::DeleteFile(const std::string& fname, |
||||||
|
const IOOptions& options, |
||||||
|
IODebugContext* dbg) { |
||||||
|
{ |
||||||
|
const std::lock_guard<std::mutex> lock(mutex_); |
||||||
|
warm_file_set_.erase(fname); |
||||||
|
} |
||||||
|
return target()->DeleteFile(fname, options, dbg); |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridRaf::Read(uint64_t offset, size_t n, |
||||||
|
const IOOptions& options, Slice* result, |
||||||
|
char* scratch, IODebugContext* dbg) const { |
||||||
|
if (temperature_ == Temperature::kWarm) { |
||||||
|
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); |
||||||
|
rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, |
||||||
|
nullptr); |
||||||
|
} |
||||||
|
return target()->Read(offset, n, options, result, scratch, dbg); |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridRaf::MultiRead(FSReadRequest* reqs, size_t num_reqs, |
||||||
|
const IOOptions& options, |
||||||
|
IODebugContext* dbg) { |
||||||
|
if (temperature_ == Temperature::kWarm) { |
||||||
|
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs * |
||||||
|
static_cast<int>(num_reqs)); |
||||||
|
rate_limiter_->Request( |
||||||
|
static_cast<int64_t>(num_reqs) * kDummyBytesPerRequest, |
||||||
|
Env::IOPriority::IO_LOW, nullptr); |
||||||
|
} |
||||||
|
return target()->MultiRead(reqs, num_reqs, options, dbg); |
||||||
|
} |
||||||
|
|
||||||
|
IOStatus SimulatedHybridRaf::Prefetch(uint64_t offset, size_t n, |
||||||
|
const IOOptions& options, |
||||||
|
IODebugContext* dbg) { |
||||||
|
if (temperature_ == Temperature::kWarm) { |
||||||
|
rate_limiter_->Request(kDummyBytesPerRequest, Env::IOPriority::IO_LOW, |
||||||
|
nullptr); |
||||||
|
Env::Default()->SleepForMicroseconds(kLatencyAddedPerRequestUs); |
||||||
|
} |
||||||
|
return target()->Prefetch(offset, n, options, dbg); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
|
#endif // ROCKSDB_LITE
|
@ -0,0 +1,89 @@ |
|||||||
|
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
|
||||||
|
// This source code is licensed under both the GPLv2 (found in the
|
||||||
|
// COPYING file in the root directory) and Apache 2.0 License
|
||||||
|
// (found in the LICENSE.Apache file in the root directory).
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#ifndef ROCKSDB_LITE |
||||||
|
|
||||||
|
#include <utility> |
||||||
|
|
||||||
|
#include "rocksdb/file_system.h" |
||||||
|
|
||||||
|
namespace ROCKSDB_NAMESPACE { |
||||||
|
|
||||||
|
// A FileSystem simulates hybrid file system by ingesting latency and limit
|
||||||
|
// IOPs.
|
||||||
|
// This class is only used for development purpose and should not be used
|
||||||
|
// in production.
|
||||||
|
// Right now we ingest 15ms latency and allow 100 requests per second when
|
||||||
|
// the file is for warm temperature.
|
||||||
|
// When the object is destroyed, the list of warm files are written to a
|
||||||
|
// file, which can be used to reopen a FileSystem and still recover the
|
||||||
|
// list. This is to allow the information to preserve between db_bench
|
||||||
|
// runs.
|
||||||
|
class SimulatedHybridFileSystem : public FileSystemWrapper { |
||||||
|
public: |
||||||
|
// metadata_file_name stores metadata of the files, so that it can be
|
||||||
|
// loaded after process restarts. If the file doesn't exist, create
|
||||||
|
// one. The file is written when the class is destroyed.
|
||||||
|
explicit SimulatedHybridFileSystem(const std::shared_ptr<FileSystem>& base, |
||||||
|
const std::string& metadata_file_name); |
||||||
|
|
||||||
|
~SimulatedHybridFileSystem() override; |
||||||
|
|
||||||
|
public: |
||||||
|
IOStatus NewRandomAccessFile(const std::string& fname, |
||||||
|
const FileOptions& file_opts, |
||||||
|
std::unique_ptr<FSRandomAccessFile>* result, |
||||||
|
IODebugContext* dbg) override; |
||||||
|
IOStatus NewWritableFile(const std::string& fname, |
||||||
|
const FileOptions& file_opts, |
||||||
|
std::unique_ptr<FSWritableFile>* result, |
||||||
|
IODebugContext* dbg) override; |
||||||
|
IOStatus DeleteFile(const std::string& fname, const IOOptions& options, |
||||||
|
IODebugContext* dbg) override; |
||||||
|
|
||||||
|
const char* Name() const override { return name_.c_str(); } |
||||||
|
|
||||||
|
private: |
||||||
|
// Limit 100 requests per second. Rate limiter is designed to byte but
|
||||||
|
// we use it as fixed bytes is one request.
|
||||||
|
std::shared_ptr<RateLimiter> rate_limiter_; |
||||||
|
std::mutex mutex_; |
||||||
|
std::unordered_set<std::string> warm_file_set_; |
||||||
|
std::string metadata_file_name_; |
||||||
|
std::string name_; |
||||||
|
}; |
||||||
|
|
||||||
|
// Simulated random access file that can control IOPs and latency to simulate
|
||||||
|
// specific storage media
|
||||||
|
class SimulatedHybridRaf : public FSRandomAccessFileWrapper { |
||||||
|
public: |
||||||
|
SimulatedHybridRaf(FSRandomAccessFile* t, |
||||||
|
std::shared_ptr<RateLimiter> rate_limiter, |
||||||
|
Temperature temperature) |
||||||
|
: FSRandomAccessFileWrapper(t), |
||||||
|
rate_limiter_(rate_limiter), |
||||||
|
temperature_(temperature) {} |
||||||
|
|
||||||
|
~SimulatedHybridRaf() override {} |
||||||
|
|
||||||
|
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options, |
||||||
|
Slice* result, char* scratch, |
||||||
|
IODebugContext* dbg) const override; |
||||||
|
|
||||||
|
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs, |
||||||
|
const IOOptions& options, IODebugContext* dbg) override; |
||||||
|
|
||||||
|
IOStatus Prefetch(uint64_t offset, size_t n, const IOOptions& options, |
||||||
|
IODebugContext* dbg) override; |
||||||
|
|
||||||
|
private: |
||||||
|
std::shared_ptr<RateLimiter> rate_limiter_; |
||||||
|
Temperature temperature_; |
||||||
|
}; |
||||||
|
} // namespace ROCKSDB_NAMESPACE
|
||||||
|
|
||||||
|
#endif // ROCKSDB_LITE
|
Loading…
Reference in new issue