You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
rocksdb/util/log_write_bench.cc

89 lines
2.8 KiB

// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef GFLAGS
#include <cstdio>
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#else
#include "file/writable_file_writer.h"
#include "monitoring/histogram.h"
#include "rocksdb/env.h"
#include "rocksdb/system_clock.h"
#include "test_util/testharness.h"
#include "test_util/testutil.h"
#include "util/gflags_compat.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
using GFLAGS_NAMESPACE::SetUsageMessage;
// A simple benchmark to simulate transactional logs
DEFINE_int32(num_records, 6000, "Number of records.");
DEFINE_int32(record_size, 249, "Size of each record.");
DEFINE_int32(record_interval, 10000, "Interval between records (microSec)");
DEFINE_int32(bytes_per_sync, 0, "bytes_per_sync parameter in EnvOptions");
DEFINE_bool(enable_sync, false, "sync after each write.");
namespace ROCKSDB_NAMESPACE {
void RunBenchmark() {
std::string file_name = test::PerThreadDBPath("log_write_benchmark.log");
DBOptions options;
Env* env = Env::Default();
const auto& clock = env->GetSystemClock();
EnvOptions env_options = env->OptimizeForLogWrite(EnvOptions(), options);
env_options.bytes_per_sync = FLAGS_bytes_per_sync;
std::unique_ptr<WritableFile> file;
env->NewWritableFile(file_name, &file, env_options);
std::unique_ptr<WritableFileWriter> writer;
writer.reset(new WritableFileWriter(std::move(file), file_name, env_options,
clock, nullptr /* stats */,
options.listeners));
std::string record;
Fix record_size in log_write_bench, swap args to std::string::assign. (#1373) Hello and thank you for RocksDB, I noticed when using log_write_bench that writes were always 88 bytes: > strace -e trace=write ./log_write_bench -num_records 2 2>&1 | head -n 2 write(3, "\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371"..., 88) = 88 write(3, "\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371\371"..., 88) = 88 > strace -e trace=write ./log_write_bench -record_size 4096 -num_records 2 2>&1 | head -n 2 write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) = 88 write(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 88) = 88 I think this should be: << record.assign('X', FLAGS_record_size); >> record.assign(FLAGS_record_size, 'X'); So fill and not buffer. Otherwise I always see writes of size 88 (the decimal value for chr "X"). string& assign (const char* s, size_t n); buffer - Copies the first n characters from the array of characters pointed by s. string& assign (size_t n, char c); fill - Replaces the current value by n consecutive copies of character c. perl -le 'print ord "X"' 88 With the change: > strace -e trace=write ./log_write_bench -record_size 4096 -num_records 2 2>&1 | head -n 2 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 4096) = 4096 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 4096) = 4096 > strace -e trace=write ./log_write_bench -num_records 2 2>&1 | head -n 2 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 249) = 249 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 249) = 249 Thanks. https://github.com/facebook/rocksdb/commit/01c27be5fb42524c5052b4b4a23e05501e1d1421 https://reviews.facebook.net/D16239
8 years ago
record.assign(FLAGS_record_size, 'X');
HistogramImpl hist;
uint64_t start_time = clock->NowMicros();
for (int i = 0; i < FLAGS_num_records; i++) {
uint64_t start_nanos = clock->NowNanos();
Fix log_write_bench -bytes_per_sync option. (#1375) Hello and thanks for RocksDB, When log_write_bench is run with the -bytes_per_sync option, the option does not influence any *sync* behaviour. > strace -e trace=write,sync_file_range ./log_write_bench -record_interval 0 -record_size 1048576 -num_records 11 -bytes_per_sync 2097152 2>&1 | egrep '^(sync|write.*XXXX)' write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 I suspect that this is because the bytes_per_sync option now needs to be using a `WritableFileWriter` and not a `WritableFile`. With the diff below applied, it changes to: > strace -e trace=write,sync_file_range ./log_write_bench -record_interval 0 -record_size 1048576 -num_records 11 -bytes_per_sync 2097152 2>&1 | egrep '^(sync|write.*XXXX)' write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x200000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x400000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x600000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x800000, 0x200000, 0x2) = 0 ( Note that the first 1MB is not synced as mentioned in util/file_reader_writer.cc::WritableFileWriter::Flush() ) This diff also includes the fix from https://github.com/facebook/rocksdb/pull/1373 > diff -du util/log_write_bench.cc.orig util/log_write_bench.cc --- util/log_write_bench.cc.orig 2016-10-04 12:06:29.115122580 -0400 +++ util/log_write_bench.cc 2016-10-05 07:24:09.677037576 -0400 @@ -14,6 +14,7 @@ #include <gflags/gflags.h> #include "rocksdb/env.h" +#include "util/file_reader_writer.h" #include "util/histogram.h" #include "util/testharness.h" #include "util/testutil.h" @@ -38,19 +39,21 @@ env_options.bytes_per_sync = FLAGS_bytes_per_sync; unique_ptr<WritableFile> file; env->NewWritableFile(file_name, &file, env_options); + unique_ptr<WritableFileWriter> writer; + writer.reset(new WritableFileWriter(std::move(file), env_options)); std::string record; - record.assign('X', FLAGS_record_size); + record.assign(FLAGS_record_size, 'X'); HistogramImpl hist; uint64_t start_time = env->NowMicros(); for (int i = 0; i < FLAGS_num_records; i++) { uint64_t start_nanos = env->NowNanos(); - file->Append(record); - file->Flush(); + writer->Append(record); + writer->Flush(); if (FLAGS_enable_sync) { - file->Sync(); + writer->Sync(false); } hist.Add(env->NowNanos() - start_nanos);
8 years ago
writer->Append(record);
writer->Flush();
if (FLAGS_enable_sync) {
Fix log_write_bench -bytes_per_sync option. (#1375) Hello and thanks for RocksDB, When log_write_bench is run with the -bytes_per_sync option, the option does not influence any *sync* behaviour. > strace -e trace=write,sync_file_range ./log_write_bench -record_interval 0 -record_size 1048576 -num_records 11 -bytes_per_sync 2097152 2>&1 | egrep '^(sync|write.*XXXX)' write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 I suspect that this is because the bytes_per_sync option now needs to be using a `WritableFileWriter` and not a `WritableFile`. With the diff below applied, it changes to: > strace -e trace=write,sync_file_range ./log_write_bench -record_interval 0 -record_size 1048576 -num_records 11 -bytes_per_sync 2097152 2>&1 | egrep '^(sync|write.*XXXX)' write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x200000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x400000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x600000, 0x200000, 0x2) = 0 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 write(3, "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"..., 1048576) = 1048576 sync_file_range(0x3, 0x800000, 0x200000, 0x2) = 0 ( Note that the first 1MB is not synced as mentioned in util/file_reader_writer.cc::WritableFileWriter::Flush() ) This diff also includes the fix from https://github.com/facebook/rocksdb/pull/1373 > diff -du util/log_write_bench.cc.orig util/log_write_bench.cc --- util/log_write_bench.cc.orig 2016-10-04 12:06:29.115122580 -0400 +++ util/log_write_bench.cc 2016-10-05 07:24:09.677037576 -0400 @@ -14,6 +14,7 @@ #include <gflags/gflags.h> #include "rocksdb/env.h" +#include "util/file_reader_writer.h" #include "util/histogram.h" #include "util/testharness.h" #include "util/testutil.h" @@ -38,19 +39,21 @@ env_options.bytes_per_sync = FLAGS_bytes_per_sync; unique_ptr<WritableFile> file; env->NewWritableFile(file_name, &file, env_options); + unique_ptr<WritableFileWriter> writer; + writer.reset(new WritableFileWriter(std::move(file), env_options)); std::string record; - record.assign('X', FLAGS_record_size); + record.assign(FLAGS_record_size, 'X'); HistogramImpl hist; uint64_t start_time = env->NowMicros(); for (int i = 0; i < FLAGS_num_records; i++) { uint64_t start_nanos = env->NowNanos(); - file->Append(record); - file->Flush(); + writer->Append(record); + writer->Flush(); if (FLAGS_enable_sync) { - file->Sync(); + writer->Sync(false); } hist.Add(env->NowNanos() - start_nanos);
8 years ago
writer->Sync(false);
}
hist.Add(clock->NowNanos() - start_nanos);
if (i % 1000 == 1) {
fprintf(stderr, "Wrote %d records...\n", i);
}
int time_to_sleep =
(i + 1) * FLAGS_record_interval - (clock->NowMicros() - start_time);
if (time_to_sleep > 0) {
clock->SleepForMicroseconds(time_to_sleep);
}
}
fprintf(stderr, "Distribution of latency of append+flush: \n%s",
hist.ToString().c_str());
}
} // namespace ROCKSDB_NAMESPACE
int main(int argc, char** argv) {
SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
" [OPTIONS]...");
ParseCommandLineFlags(&argc, &argv, true);
ROCKSDB_NAMESPACE::RunBenchmark();
return 0;
}
#endif // GFLAGS