Added boolean variable to guard fallocate() calls

Summary:
Added boolean variable to guard fallocate() calls.
Set to false to prevent space leaks when tests fail.

Test Plan:
Compliles
Set to false and ran log device tests

Reviewers: sdong, lovro, igor

Reviewed By: igor

Subscribers: dhruba

Differential Revision: https://reviews.facebook.net/D48027
main
Lakshmi Narayanan 9 years ago
parent aadf49fe6b
commit 4049bcde39
  1. 3
      include/rocksdb/env.h
  2. 7
      include/rocksdb/options.h
  3. 2
      util/env.cc
  4. 46
      util/env_posix.cc
  5. 3
      util/options.cc
  6. 3
      util/options_helper.h

@ -68,6 +68,9 @@ struct EnvOptions {
// If true, then use mmap to write data // If true, then use mmap to write data
bool use_mmap_writes = true; bool use_mmap_writes = true;
// If false, fallocate() calls are bypassed
bool allow_fallocate = true;
// If true, set the FD_CLOEXEC on open fd. // If true, set the FD_CLOEXEC on open fd.
bool set_fd_cloexec = true; bool set_fd_cloexec = true;

@ -1006,6 +1006,9 @@ struct DBOptions {
// Default: false // Default: false
bool allow_mmap_writes; bool allow_mmap_writes;
// If false, fallocate() calls are bypassed
bool allow_fallocate;
// Disable child process inherit open files. Default: true // Disable child process inherit open files. Default: true
bool is_fd_close_on_exec; bool is_fd_close_on_exec;
@ -1137,9 +1140,7 @@ struct DBOptions {
// Options to control the behavior of a database (passed to DB::Open) // Options to control the behavior of a database (passed to DB::Open)
struct Options : public DBOptions, public ColumnFamilyOptions { struct Options : public DBOptions, public ColumnFamilyOptions {
// Create an Options object with default values for all fields. // Create an Options object with default values for all fields.
Options() : Options() : DBOptions(), ColumnFamilyOptions() {}
DBOptions(),
ColumnFamilyOptions() {}
Options(const DBOptions& db_options, Options(const DBOptions& db_options,
const ColumnFamilyOptions& column_family_options) const ColumnFamilyOptions& column_family_options)

@ -12,7 +12,6 @@
#include <thread> #include <thread>
#include "port/port.h" #include "port/port.h"
#include "port/sys_time.h" #include "port/sys_time.h"
#include "port/port.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "util/arena.h" #include "util/arena.h"
@ -283,6 +282,7 @@ void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) {
env_options->set_fd_cloexec = options.is_fd_close_on_exec; env_options->set_fd_cloexec = options.is_fd_close_on_exec;
env_options->bytes_per_sync = options.bytes_per_sync; env_options->bytes_per_sync = options.bytes_per_sync;
env_options->rate_limiter = options.rate_limiter.get(); env_options->rate_limiter = options.rate_limiter.get();
env_options->allow_fallocate = options.allow_fallocate;
} }
} }

@ -351,6 +351,7 @@ class PosixMmapFile : public WritableFile {
char* dst_; // Where to write next (in range [base_,limit_]) char* dst_; // Where to write next (in range [base_,limit_])
char* last_sync_; // Where have we synced up to char* last_sync_; // Where have we synced up to
uint64_t file_offset_; // Offset of base_ in file uint64_t file_offset_; // Offset of base_ in file
bool allow_fallocate_; // If false, fallocate calls are bypassed
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_; bool fallocate_with_keep_size_;
#endif #endif
@ -393,7 +394,7 @@ class PosixMmapFile : public WritableFile {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
// we can't fallocate with FALLOC_FL_KEEP_SIZE here // we can't fallocate with FALLOC_FL_KEEP_SIZE here
{ if (allow_fallocate_) {
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate(fd_, 0, file_offset_, map_size_); int alloc_status = fallocate(fd_, 0, file_offset_, map_size_);
if (alloc_status != 0) { if (alloc_status != 0) {
@ -451,7 +452,8 @@ class PosixMmapFile : public WritableFile {
limit_(nullptr), limit_(nullptr),
dst_(nullptr), dst_(nullptr),
last_sync_(nullptr), last_sync_(nullptr),
file_offset_(0) { file_offset_(0),
allow_fallocate_(options.allow_fallocate) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size; fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif #endif
@ -575,8 +577,12 @@ class PosixMmapFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
int alloc_status = fallocate( int alloc_status = 0;
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); if (allow_fallocate_) {
alloc_status =
fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
offset, len);
}
if (alloc_status == 0) { if (alloc_status == 0) {
return Status::OK(); return Status::OK();
} else { } else {
@ -592,13 +598,17 @@ class PosixWritableFile : public WritableFile {
const std::string filename_; const std::string filename_;
int fd_; int fd_;
uint64_t filesize_; uint64_t filesize_;
bool allow_fallocate_;
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_; bool fallocate_with_keep_size_;
#endif #endif
public: public:
PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options) PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options)
: filename_(fname), fd_(fd), filesize_(0) { : filename_(fname),
fd_(fd),
filesize_(0),
allow_fallocate_(options.allow_fallocate) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size; fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif #endif
@ -660,8 +670,10 @@ class PosixWritableFile : public WritableFile {
// We ignore error since failure of this operation does not affect // We ignore error since failure of this operation does not affect
// correctness. // correctness.
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, if (allow_fallocate_) {
filesize_, block_size * last_allocated_block - filesize_); fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, filesize_,
block_size * last_allocated_block - filesize_);
}
#endif #endif
} }
@ -714,9 +726,12 @@ class PosixWritableFile : public WritableFile {
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status; int alloc_status = 0;
alloc_status = fallocate( if (allow_fallocate_) {
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); alloc_status =
fallocate(fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0,
offset, len);
}
if (alloc_status == 0) { if (alloc_status == 0) {
return Status::OK(); return Status::OK();
} else { } else {
@ -1146,7 +1161,7 @@ class PosixEnv : public Env {
} else { } else {
int fd = fileno(f); int fd = fileno(f);
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024 * 1024); fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, 4 * 1024);
#endif #endif
SetFD_CLOEXEC(fd, nullptr); SetFD_CLOEXEC(fd, nullptr);
result->reset(new PosixLogger(f, &PosixEnv::gettid, this)); result->reset(new PosixLogger(f, &PosixEnv::gettid, this));
@ -1609,10 +1624,11 @@ class PosixEnv : public Env {
}; };
PosixEnv::PosixEnv() : checkedDiskForMmap_(false), PosixEnv::PosixEnv()
forceMmapOff(false), : checkedDiskForMmap_(false),
page_size_(getpagesize()), forceMmapOff(false),
thread_pools_(Priority::TOTAL) { page_size_(getpagesize()),
thread_pools_(Priority::TOTAL) {
PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr)); PthreadCall("mutex_init", pthread_mutex_init(&mu_, nullptr));
for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) { for (int pool_id = 0; pool_id < Env::Priority::TOTAL; ++pool_id) {
thread_pools_[pool_id].SetThreadPriority( thread_pools_[pool_id].SetThreadPriority(

@ -239,6 +239,7 @@ DBOptions::DBOptions()
allow_os_buffer(true), allow_os_buffer(true),
allow_mmap_reads(false), allow_mmap_reads(false),
allow_mmap_writes(false), allow_mmap_writes(false),
allow_fallocate(true),
is_fd_close_on_exec(true), is_fd_close_on_exec(true),
skip_log_error_on_recovery(false), skip_log_error_on_recovery(false),
stats_dump_period_sec(600), stats_dump_period_sec(600),
@ -292,6 +293,7 @@ DBOptions::DBOptions(const Options& options)
allow_os_buffer(options.allow_os_buffer), allow_os_buffer(options.allow_os_buffer),
allow_mmap_reads(options.allow_mmap_reads), allow_mmap_reads(options.allow_mmap_reads),
allow_mmap_writes(options.allow_mmap_writes), allow_mmap_writes(options.allow_mmap_writes),
allow_fallocate(options.allow_fallocate),
is_fd_close_on_exec(options.is_fd_close_on_exec), is_fd_close_on_exec(options.is_fd_close_on_exec),
skip_log_error_on_recovery(options.skip_log_error_on_recovery), skip_log_error_on_recovery(options.skip_log_error_on_recovery),
stats_dump_period_sec(options.stats_dump_period_sec), stats_dump_period_sec(options.stats_dump_period_sec),
@ -338,6 +340,7 @@ void DBOptions::Dump(Logger* log) const {
keep_log_file_num); keep_log_file_num);
Header(log, " Options.allow_os_buffer: %d", allow_os_buffer); Header(log, " Options.allow_os_buffer: %d", allow_os_buffer);
Header(log, " Options.allow_mmap_reads: %d", allow_mmap_reads); Header(log, " Options.allow_mmap_reads: %d", allow_mmap_reads);
Header(log, " Options.allow_fallocate: %d", allow_fallocate);
Header(log, " Options.allow_mmap_writes: %d", allow_mmap_writes); Header(log, " Options.allow_mmap_writes: %d", allow_mmap_writes);
Header(log, " Options.create_missing_column_families: %d", Header(log, " Options.create_missing_column_families: %d",
create_missing_column_families); create_missing_column_families);

@ -122,6 +122,9 @@ static std::unordered_map<std::string, OptionTypeInfo> db_options_type_info = {
{"allow_mmap_reads", {"allow_mmap_reads",
{offsetof(struct DBOptions, allow_mmap_reads), OptionType::kBoolean, {offsetof(struct DBOptions, allow_mmap_reads), OptionType::kBoolean,
OptionVerificationType::kNormal}}, OptionVerificationType::kNormal}},
{"allow_fallocate",
{offsetof(struct DBOptions, allow_fallocate), OptionType::kBoolean,
OptionVerificationType::kNormal}},
{"allow_mmap_writes", {"allow_mmap_writes",
{offsetof(struct DBOptions, allow_mmap_writes), OptionType::kBoolean, {offsetof(struct DBOptions, allow_mmap_writes), OptionType::kBoolean,
OptionVerificationType::kNormal}}, OptionVerificationType::kNormal}},

Loading…
Cancel
Save