[RocksDB] Option for incremental sync

Summary: This diff added an option to control the incremenal sync frequency. db_bench has a new flag bytes_per_sync for easy tuning exercise.

Test Plan: make check; db_bench

Reviewers: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D11295
main
Haobo Xu 13 years ago
parent 79f4fd2b62
commit 3cc1af2062
  1. 9
      db/db_bench.cc
  2. 6
      include/leveldb/env.h
  3. 6
      include/leveldb/options.h
  4. 1
      util/env.cc
  5. 12
      util/env_posix.cc
  6. 5
      util/options.cc

@ -327,6 +327,12 @@ static bool FLAGS_warn_missing_keys = true;
static auto FLAGS_use_adaptive_mutex = static auto FLAGS_use_adaptive_mutex =
leveldb::Options().use_adaptive_mutex; leveldb::Options().use_adaptive_mutex;
// Allows OS to incrementally sync files to disk while they are being
// written, in the background. Issue one request for every bytes_per_sync
// written. 0 turns it off.
static auto FLAGS_bytes_per_sync =
leveldb::Options().bytes_per_sync;
namespace leveldb { namespace leveldb {
// Helper for quickly generating random data. // Helper for quickly generating random data.
@ -1183,6 +1189,7 @@ unique_ptr<char []> GenerateKeyFromInt(int v, const char* suffix = "")
options.access_hint_on_compaction_start = FLAGS_compaction_fadvice; options.access_hint_on_compaction_start = FLAGS_compaction_fadvice;
options.use_adaptive_mutex = FLAGS_use_adaptive_mutex; options.use_adaptive_mutex = FLAGS_use_adaptive_mutex;
options.bytes_per_sync = FLAGS_bytes_per_sync;
Status s; Status s;
if(FLAGS_read_only) { if(FLAGS_read_only) {
@ -2244,6 +2251,8 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--keys_per_multiget=%d%c", } else if (sscanf(argv[i], "--keys_per_multiget=%d%c",
&n, &junk) == 1) { &n, &junk) == 1) {
FLAGS_keys_per_multiget = n; FLAGS_keys_per_multiget = n;
} else if (sscanf(argv[i], "--bytes_per_sync=%ld%c", &l, &junk) == 1) {
FLAGS_bytes_per_sync = l;
} else { } else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]); fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1); exit(1);

@ -55,6 +55,12 @@ struct EnvOptions {
// If true, set the FD_CLOEXEC on open fd. // If true, set the FD_CLOEXEC on open fd.
bool set_fd_cloexec; bool set_fd_cloexec;
// Allows OS to incrementally sync files to disk while they are being
// written, in the background. Issue one request for every bytes_per_sync
// written. 0 turns it off.
// Default: 0
uint64_t bytes_per_sync;
}; };
class Env { class Env {

@ -470,6 +470,12 @@ struct Options {
// Default: false // Default: false
bool use_adaptive_mutex; bool use_adaptive_mutex;
// Allows OS to incrementally sync files to disk while they are being
// written, asynchronously, in the background.
// Issue one request for every bytes_per_sync written. 0 turns it off.
// Default: 0
uint64_t bytes_per_sync;
}; };
// Options that control read operations // Options that control read operations

@ -107,6 +107,7 @@ void AssignEnvOptions(EnvOptions* env_options, const Options& options) {
env_options->use_mmap_reads = options.allow_mmap_reads; env_options->use_mmap_reads = options.allow_mmap_reads;
env_options->use_mmap_writes = options.allow_mmap_writes; env_options->use_mmap_writes = options.allow_mmap_writes;
env_options->set_fd_cloexec = options.is_fd_close_on_exec; env_options->set_fd_cloexec = options.is_fd_close_on_exec;
env_options->bytes_per_sync = options.bytes_per_sync;
} }
} }

@ -505,6 +505,7 @@ class PosixWritableFile : public WritableFile {
bool pending_sync_; bool pending_sync_;
bool pending_fsync_; bool pending_fsync_;
uint64_t last_sync_size_; uint64_t last_sync_size_;
uint64_t bytes_per_sync_;
public: public:
PosixWritableFile(const std::string& fname, int fd, size_t capacity, PosixWritableFile(const std::string& fname, int fd, size_t capacity,
@ -517,7 +518,8 @@ class PosixWritableFile : public WritableFile {
filesize_(0), filesize_(0),
pending_sync_(false), pending_sync_(false),
pending_fsync_(false), pending_fsync_(false),
last_sync_size_(0) { last_sync_size_(0),
bytes_per_sync_(options.bytes_per_sync) {
assert(!options.use_mmap_writes); assert(!options.use_mmap_writes);
} }
@ -605,8 +607,12 @@ class PosixWritableFile : public WritableFile {
} }
cursize_ = 0; cursize_ = 0;
// sync OS cache to disk for every 2MB written // sync OS cache to disk for every bytes_per_sync_
if (filesize_ - last_sync_size_ >= 2 * 1024 * 1024) { // TODO: give log file and sst file different options (log
// files could be potentially cached in OS for their whole
// life time, thus we might not want to flush at all).
if (bytes_per_sync_ &&
filesize_ - last_sync_size_ >= bytes_per_sync_) {
RangeSync(last_sync_size_, filesize_ - last_sync_size_); RangeSync(last_sync_size_, filesize_ - last_sync_size_);
last_sync_size_ = filesize_; last_sync_size_ = filesize_;
} }

@ -75,7 +75,8 @@ Options::Options()
block_size_deviation (10), block_size_deviation (10),
advise_random_on_open(true), advise_random_on_open(true),
access_hint_on_compaction_start(NORMAL), access_hint_on_compaction_start(NORMAL),
use_adaptive_mutex(false) { use_adaptive_mutex(false),
bytes_per_sync(0) {
} }
static const char* const access_hints[] = { static const char* const access_hints[] = {
@ -214,6 +215,8 @@ Options::Dump(Logger* log) const
access_hints[access_hint_on_compaction_start]); access_hints[access_hint_on_compaction_start]);
Log(log," Options.use_adaptive_mutex: %d", Log(log," Options.use_adaptive_mutex: %d",
use_adaptive_mutex); use_adaptive_mutex);
Log(log," Options.bytes_per_sync: %ld",
bytes_per_sync);
} // Options::Dump } // Options::Dump
// //

Loading…
Cancel
Save