Move rate_limiter, write buffering, most perf context instrumentation and most random kill out of Env

Summary: We want to keep Env a think layer for better portability. Less platform dependent codes should be moved out of Env. In this patch, I create a wrapper of file readers and writers, and put rate limiting, write buffering, as well as most perf context instrumentation and random kill out of Env. It will make it easier to maintain multiple Env in the future.

Test Plan: Run all existing unit tests.

Reviewers: anthony, kradhakrishnan, IslamAbdelRahman, yhchiang, igor

Reviewed By: igor

Subscribers: leveldb, dhruba

Differential Revision: https://reviews.facebook.net/D42321
main
sdong 9 years ago
parent 5ec829bc4f
commit 6e9fbeb27c
  1. 38
      db/builder.cc
  2. 4
      db/builder.h
  3. 22
      db/compaction_job.cc
  4. 5
      db/compaction_job_test.cc
  5. 84
      db/db_impl.cc
  6. 13
      db/db_test.cc
  7. 4
      db/fault_injection_test.cc
  8. 9
      db/filename.cc
  9. 4
      db/filename.h
  10. 5
      db/flush_job_test.cc
  11. 3
      db/log_reader.cc
  12. 8
      db/log_reader.h
  13. 52
      db/log_test.cc
  14. 6
      db/log_writer.cc
  15. 10
      db/log_writer.h
  16. 8
      db/plain_table_db_test.cc
  17. 15
      db/repair.cc
  18. 5
      db/table_cache.cc
  19. 42
      db/table_properties_collector_test.cc
  20. 19
      db/transaction_log_impl.cc
  21. 3
      db/transaction_log_impl.h
  22. 88
      db/version_set.cc
  23. 5
      db/wal_manager.cc
  24. 9
      db/wal_manager_test.cc
  25. 23
      include/rocksdb/env.h
  26. 8
      include/rocksdb/table.h
  27. 12
      port/port_posix.cc
  28. 3
      port/port_posix.h
  29. 39
      port/win/env_win.cc
  30. 1
      src.mk
  31. 7
      table/adaptive_table_factory.cc
  32. 13
      table/adaptive_table_factory.h
  33. 8
      table/block_based_table_builder.cc
  34. 2
      table/block_based_table_builder.h
  35. 4
      table/block_based_table_factory.cc
  36. 8
      table/block_based_table_factory.h
  37. 20
      table/block_based_table_reader.cc
  38. 5
      table/block_based_table_reader.h
  39. 3
      table/cuckoo_table_builder.cc
  40. 15
      table/cuckoo_table_builder.h
  41. 114
      table/cuckoo_table_builder_test.cc
  42. 9
      table/cuckoo_table_factory.cc
  43. 13
      table/cuckoo_table_factory.h
  44. 3
      table/cuckoo_table_reader.cc
  45. 14
      table/cuckoo_table_reader.h
  46. 66
      table/cuckoo_table_reader_test.cc
  47. 11
      table/format.cc
  48. 5
      table/format.h
  49. 12
      table/meta_blocks.cc
  50. 13
      table/meta_blocks.h
  51. 9
      table/mock_table.cc
  52. 13
      table/mock_table.h
  53. 10
      table/plain_table_builder.cc
  54. 11
      table/plain_table_builder.h
  55. 13
      table/plain_table_factory.cc
  56. 13
      table/plain_table_factory.h
  57. 6
      table/plain_table_key_coding.cc
  58. 2
      table/plain_table_key_coding.h
  59. 4
      table/plain_table_reader.cc
  60. 9
      table/plain_table_reader.h
  61. 15
      table/table_reader_bench.cc
  62. 48
      table/table_test.cc
  63. 3
      util/db_test_util.h
  64. 238
      util/env_posix.cc
  65. 7
      util/env_test.cc
  66. 225
      util/file_reader_writer.cc
  67. 109
      util/file_reader_writer.h
  68. 14
      util/file_util.cc
  69. 14
      util/ldb_cmd.cc
  70. 8
      util/sst_dump_test.cc
  71. 18
      util/sst_dump_tool.cc
  72. 7
      util/sst_dump_tool_imp.h
  73. 15
      util/sync_point.cc
  74. 28
      util/sync_point.h
  75. 16
      util/testutil.cc
  76. 8
      util/testutil.h
  77. 32
      utilities/backupable/backupable_db.cc
  78. 6
      utilities/backupable/backupable_db_test.cc

@ -21,6 +21,7 @@
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/table.h" #include "rocksdb/table.h"
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "util/file_reader_writer.h"
#include "util/iostats_context_imp.h" #include "util/iostats_context_imp.h"
#include "util/thread_status_util.h" #include "util/thread_status_util.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -34,7 +35,7 @@ TableBuilder* NewTableBuilder(
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters) { const CompressionOptions& compression_opts, const bool skip_filters) {
return ioptions.table_factory->NewTableBuilder( return ioptions.table_factory->NewTableBuilder(
TableBuilderOptions(ioptions, internal_comparator, TableBuilderOptions(ioptions, internal_comparator,
@ -72,16 +73,22 @@ Status BuildTable(
std::string fname = TableFileName(ioptions.db_paths, meta->fd.GetNumber(), std::string fname = TableFileName(ioptions.db_paths, meta->fd.GetNumber(),
meta->fd.GetPathId()); meta->fd.GetPathId());
if (iter->Valid()) { if (iter->Valid()) {
unique_ptr<WritableFile> file; TableBuilder* builder;
s = env->NewWritableFile(fname, &file, env_options); unique_ptr<WritableFileWriter> file_writer;
if (!s.ok()) { {
return s; unique_ptr<WritableFile> file;
} s = env->NewWritableFile(fname, &file, env_options);
file->SetIOPriority(io_priority); if (!s.ok()) {
return s;
}
file->SetIOPriority(io_priority);
TableBuilder* builder = NewTableBuilder( file_writer.reset(new WritableFileWriter(std::move(file), env_options));
ioptions, internal_comparator, int_tbl_prop_collector_factories,
file.get(), compression, compression_opts); builder = NewTableBuilder(
ioptions, internal_comparator, int_tbl_prop_collector_factories,
file_writer.get(), compression, compression_opts);
}
{ {
// the first key is the smallest key // the first key is the smallest key
@ -232,16 +239,11 @@ Status BuildTable(
// Finish and check for file errors // Finish and check for file errors
if (s.ok() && !ioptions.disable_data_sync) { if (s.ok() && !ioptions.disable_data_sync) {
if (ioptions.use_fsync) { StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS); file_writer->Sync(ioptions.use_fsync);
s = file->Fsync();
} else {
StopWatch sw(env, ioptions.statistics, TABLE_SYNC_MICROS);
s = file->Sync();
}
} }
if (s.ok()) { if (s.ok()) {
s = file->Close(); s = file_writer->Close();
} }
if (s.ok()) { if (s.ok()) {

@ -29,14 +29,14 @@ class Iterator;
class TableCache; class TableCache;
class VersionEdit; class VersionEdit;
class TableBuilder; class TableBuilder;
class WritableFile; class WritableFileWriter;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const ImmutableCFOptions& options, const ImmutableCFOptions& options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, const CompressionOptions& compression_opts,
const bool skip_filters = false); const bool skip_filters = false);

@ -44,6 +44,7 @@
#include "table/table_builder.h" #include "table/table_builder.h"
#include "table/two_level_iterator.h" #include "table/two_level_iterator.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/log_buffer.h" #include "util/log_buffer.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
@ -71,7 +72,7 @@ struct CompactionJob::CompactionState {
std::vector<Output> outputs; std::vector<Output> outputs;
// State kept for output being generated // State kept for output being generated
std::unique_ptr<WritableFile> outfile; std::unique_ptr<WritableFileWriter> outfile;
std::unique_ptr<TableBuilder> builder; std::unique_ptr<TableBuilder> builder;
uint64_t total_bytes; uint64_t total_bytes;
@ -662,13 +663,8 @@ Status CompactionJob::FinishCompactionOutputFile(const Status& input_status) {
// Finish and check for file errors // Finish and check for file errors
if (s.ok() && !db_options_.disableDataSync) { if (s.ok() && !db_options_.disableDataSync) {
if (db_options_.use_fsync) { StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS); s = compact_->outfile->Sync(db_options_.use_fsync);
s = compact_->outfile->Fsync();
} else {
StopWatch sw(env_, stats_, COMPACTION_OUTFILE_SYNC_MICROS);
s = compact_->outfile->Sync();
}
} }
if (s.ok()) { if (s.ok()) {
s = compact_->outfile->Close(); s = compact_->outfile->Close();
@ -799,10 +795,10 @@ Status CompactionJob::OpenCompactionOutputFile() {
// no need to lock because VersionSet::next_file_number_ is atomic // no need to lock because VersionSet::next_file_number_ is atomic
uint64_t file_number = versions_->NewFileNumber(); uint64_t file_number = versions_->NewFileNumber();
// Make the output file // Make the output file
unique_ptr<WritableFile> writable_file;
std::string fname = TableFileName(db_options_.db_paths, file_number, std::string fname = TableFileName(db_options_.db_paths, file_number,
compact_->compaction->output_path_id()); compact_->compaction->output_path_id());
Status s = env_->NewWritableFile(fname, &compact_->outfile, env_options_); Status s = env_->NewWritableFile(fname, &writable_file, env_options_);
if (!s.ok()) { if (!s.ok()) {
Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log, Log(InfoLogLevel::ERROR_LEVEL, db_options_.info_log,
"[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64 "[%s] [JOB %d] OpenCompactionOutputFiles for table #%" PRIu64
@ -820,9 +816,11 @@ Status CompactionJob::OpenCompactionOutputFile() {
out.smallest_seqno = out.largest_seqno = 0; out.smallest_seqno = out.largest_seqno = 0;
compact_->outputs.push_back(out); compact_->outputs.push_back(out);
compact_->outfile->SetIOPriority(Env::IO_LOW); writable_file->SetIOPriority(Env::IO_LOW);
compact_->outfile->SetPreallocationBlockSize( writable_file->SetPreallocationBlockSize(
static_cast<size_t>(compact_->compaction->OutputFilePreallocationSize())); static_cast<size_t>(compact_->compaction->OutputFilePreallocationSize()));
compact_->outfile.reset(
new WritableFileWriter(std::move(writable_file), env_options_));
ColumnFamilyData* cfd = compact_->compaction->column_family_data(); ColumnFamilyData* cfd = compact_->compaction->column_family_data();
bool skip_filters = false; bool skip_filters = false;

@ -13,6 +13,7 @@
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/db.h" #include "rocksdb/db.h"
#include "util/file_reader_writer.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -166,8 +167,10 @@ class CompactionJobTest : public testing::Test {
Status s = env_->NewWritableFile( Status s = env_->NewWritableFile(
manifest, &file, env_->OptimizeForManifestWrite(env_options_)); manifest, &file, env_->OptimizeForManifestWrite(env_options_));
ASSERT_OK(s); ASSERT_OK(s);
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), EnvOptions()));
{ {
log::Writer log(std::move(file)); log::Writer log(std::move(file_writer));
std::string record; std::string record;
new_db.EncodeTo(&record); new_db.EncodeTo(&record);
s = log.AddRecord(record); s = log.AddRecord(record);

@ -72,6 +72,7 @@
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/db_info_dumper.h" #include "util/db_info_dumper.h"
#include "util/file_reader_writer.h"
#include "util/file_util.h" #include "util/file_util.h"
#include "util/hash_skiplist_rep.h" #include "util/hash_skiplist_rep.h"
#include "util/hash_linklist_rep.h" #include "util/hash_linklist_rep.h"
@ -384,18 +385,22 @@ Status DBImpl::NewDB() {
new_db.SetNextFile(2); new_db.SetNextFile(2);
new_db.SetLastSequence(0); new_db.SetLastSequence(0);
Status s;
Log(InfoLogLevel::INFO_LEVEL, Log(InfoLogLevel::INFO_LEVEL,
db_options_.info_log, "Creating manifest 1 \n"); db_options_.info_log, "Creating manifest 1 \n");
const std::string manifest = DescriptorFileName(dbname_, 1); const std::string manifest = DescriptorFileName(dbname_, 1);
unique_ptr<WritableFile> file;
Status s = env_->NewWritableFile(
manifest, &file, env_->OptimizeForManifestWrite(env_options_));
if (!s.ok()) {
return s;
}
file->SetPreallocationBlockSize(db_options_.manifest_preallocation_size);
{ {
log::Writer log(std::move(file)); unique_ptr<WritableFile> file;
EnvOptions env_options = env_->OptimizeForManifestWrite(env_options_);
s = env_->NewWritableFile(manifest, &file, env_options);
if (!s.ok()) {
return s;
}
file->SetPreallocationBlockSize(db_options_.manifest_preallocation_size);
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options));
log::Writer log(std::move(file_writer));
std::string record; std::string record;
new_db.EncodeTo(&record); new_db.EncodeTo(&record);
s = log.AddRecord(record); s = log.AddRecord(record);
@ -1013,17 +1018,21 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
versions_->MarkFileNumberUsedDuringRecovery(log_number); versions_->MarkFileNumberUsedDuringRecovery(log_number);
// Open the log file // Open the log file
std::string fname = LogFileName(db_options_.wal_dir, log_number); std::string fname = LogFileName(db_options_.wal_dir, log_number);
unique_ptr<SequentialFile> file; unique_ptr<SequentialFileReader> file_reader;
status = env_->NewSequentialFile(fname, &file, env_options_); {
if (!status.ok()) { unique_ptr<SequentialFile> file;
MaybeIgnoreError(&status); status = env_->NewSequentialFile(fname, &file, env_options_);
if (!status.ok()) { if (!status.ok()) {
return status; MaybeIgnoreError(&status);
} else { if (!status.ok()) {
// Fail with one log file, but that's ok. return status;
// Try next one. } else {
continue; // Fail with one log file, but that's ok.
// Try next one.
continue;
}
} }
file_reader.reset(new SequentialFileReader(std::move(file)));
} }
// Create the log reader. // Create the log reader.
@ -1042,7 +1051,7 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
// paranoid_checks==false so that corruptions cause entire commits // paranoid_checks==false so that corruptions cause entire commits
// to be skipped instead of propagating bad information (like overly // to be skipped instead of propagating bad information (like overly
// large sequence numbers). // large sequence numbers).
log::Reader reader(std::move(file), &reporter, true /*checksum*/, log::Reader reader(std::move(file_reader), &reporter, true /*checksum*/,
0 /*initial_offset*/); 0 /*initial_offset*/);
Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log, Log(InfoLogLevel::INFO_LEVEL, db_options_.info_log,
"Recovering log #%" PRIu64 " mode %d skip-recovery %d", log_number, "Recovering log #%" PRIu64 " mode %d skip-recovery %d", log_number,
@ -3490,11 +3499,7 @@ Status DBImpl::WriteImpl(const WriteOptions& write_options,
if (status.ok() && write_options.sync) { if (status.ok() && write_options.sync) {
RecordTick(stats_, WAL_FILE_SYNCED); RecordTick(stats_, WAL_FILE_SYNCED);
StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS); StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS);
if (db_options_.use_fsync) { status = log_->file()->Sync(db_options_.use_fsync);
status = log_->file()->Fsync();
} else {
status = log_->file()->Sync();
}
if (status.ok() && !log_dir_synced_) { if (status.ok() && !log_dir_synced_) {
// We only sync WAL directory the first time WAL syncing is // We only sync WAL directory the first time WAL syncing is
// requested, so that in case users never turn on WAL sync, // requested, so that in case users never turn on WAL sync,
@ -3624,15 +3629,19 @@ Status DBImpl::SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context) {
Status s; Status s;
{ {
if (creating_new_log) { if (creating_new_log) {
EnvOptions opt_env_opt =
env_->OptimizeForLogWrite(env_options_, db_options_);
s = env_->NewWritableFile( s = env_->NewWritableFile(
LogFileName(db_options_.wal_dir, new_log_number), &lfile, LogFileName(db_options_.wal_dir, new_log_number), &lfile,
env_->OptimizeForLogWrite(env_options_, db_options_)); opt_env_opt);
if (s.ok()) { if (s.ok()) {
// Our final size should be less than write_buffer_size // Our final size should be less than write_buffer_size
// (compression, etc) but err on the side of caution. // (compression, etc) but err on the side of caution.
lfile->SetPreallocationBlockSize( lfile->SetPreallocationBlockSize(
1.1 * mutable_cf_options.write_buffer_size); 1.1 * mutable_cf_options.write_buffer_size);
new_log = new log::Writer(std::move(lfile)); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_opt));
new_log = new log::Writer(std::move(file_writer));
log_dir_synced_ = false; log_dir_synced_ = false;
} }
} }
@ -4031,12 +4040,18 @@ Status DBImpl::CheckConsistency() {
Status DBImpl::GetDbIdentity(std::string& identity) const { Status DBImpl::GetDbIdentity(std::string& identity) const {
std::string idfilename = IdentityFileName(dbname_); std::string idfilename = IdentityFileName(dbname_);
unique_ptr<SequentialFile> idfile;
const EnvOptions soptions; const EnvOptions soptions;
Status s = env_->NewSequentialFile(idfilename, &idfile, soptions); unique_ptr<SequentialFileReader> id_file_reader;
if (!s.ok()) { Status s;
return s; {
unique_ptr<SequentialFile> idfile;
s = env_->NewSequentialFile(idfilename, &idfile, soptions);
if (!s.ok()) {
return s;
}
id_file_reader.reset(new SequentialFileReader(std::move(idfile)));
} }
uint64_t file_size; uint64_t file_size;
s = env_->GetFileSize(idfilename, &file_size); s = env_->GetFileSize(idfilename, &file_size);
if (!s.ok()) { if (!s.ok()) {
@ -4044,7 +4059,7 @@ Status DBImpl::GetDbIdentity(std::string& identity) const {
} }
char* buffer = reinterpret_cast<char*>(alloca(file_size)); char* buffer = reinterpret_cast<char*>(alloca(file_size));
Slice id; Slice id;
s = idfile->Read(static_cast<size_t>(file_size), &id, buffer); s = id_file_reader->Read(static_cast<size_t>(file_size), &id, buffer);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -4176,14 +4191,17 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
uint64_t new_log_number = impl->versions_->NewFileNumber(); uint64_t new_log_number = impl->versions_->NewFileNumber();
unique_ptr<WritableFile> lfile; unique_ptr<WritableFile> lfile;
EnvOptions soptions(db_options); EnvOptions soptions(db_options);
EnvOptions opt_env_options =
impl->db_options_.env->OptimizeForLogWrite(soptions, impl->db_options_);
s = impl->db_options_.env->NewWritableFile( s = impl->db_options_.env->NewWritableFile(
LogFileName(impl->db_options_.wal_dir, new_log_number), &lfile, LogFileName(impl->db_options_.wal_dir, new_log_number), &lfile,
impl->db_options_.env->OptimizeForLogWrite(soptions, opt_env_options);
impl->db_options_));
if (s.ok()) { if (s.ok()) {
lfile->SetPreallocationBlockSize(1.1 * max_write_buffer_size); lfile->SetPreallocationBlockSize(1.1 * max_write_buffer_size);
impl->logfile_number_ = new_log_number; impl->logfile_number_ = new_log_number;
impl->log_.reset(new log::Writer(std::move(lfile))); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(lfile), opt_env_options));
impl->log_.reset(new log::Writer(std::move(file_writer)));
// set column family handles // set column family handles
for (auto cf : column_families) { for (auto cf : column_families) {

@ -52,6 +52,7 @@
#include "table/mock_table.h" #include "table/mock_table.h"
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "util/db_test_util.h" #include "util/db_test_util.h"
#include "util/file_reader_writer.h"
#include "util/hash.h" #include "util/hash.h"
#include "util/hash_linklist_rep.h" #include "util/hash_linklist_rep.h"
#include "utilities/merge_operators.h" #include "utilities/merge_operators.h"
@ -6008,7 +6009,9 @@ class RecoveryTestHelper {
std::string fname = LogFileName(test->dbname_, current_log_number); std::string fname = LogFileName(test->dbname_, current_log_number);
unique_ptr<WritableFile> file; unique_ptr<WritableFile> file;
ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options)); ASSERT_OK(db_options.env->NewWritableFile(fname, &file, env_options));
current_log_writer.reset(new log::Writer(std::move(file))); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options));
current_log_writer.reset(new log::Writer(std::move(file_writer)));
for (int i = 0; i < kKeysPerWALFile; i++) { for (int i = 0; i < kKeysPerWALFile; i++) {
std::string key = "key" + ToString(count++); std::string key = "key" + ToString(count++);
@ -7231,8 +7234,7 @@ TEST_F(DBTest, RateLimitingTest) {
} }
elapsed = env_->NowMicros() - start; elapsed = env_->NowMicros() - start;
Close(); Close();
ASSERT_TRUE(options.rate_limiter->GetTotalBytesThrough() == ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
env_->bytes_written_);
double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; double ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio); fprintf(stderr, "write rate ratio = %.2lf, expected 0.7\n", ratio);
ASSERT_TRUE(ratio < 0.8); ASSERT_TRUE(ratio < 0.8);
@ -7251,11 +7253,10 @@ TEST_F(DBTest, RateLimitingTest) {
} }
elapsed = env_->NowMicros() - start; elapsed = env_->NowMicros() - start;
Close(); Close();
ASSERT_TRUE(options.rate_limiter->GetTotalBytesThrough() == ASSERT_EQ(options.rate_limiter->GetTotalBytesThrough(), env_->bytes_written_);
env_->bytes_written_);
ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate; ratio = env_->bytes_written_ * 1000000 / elapsed / raw_rate;
fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio); fprintf(stderr, "write rate ratio = %.2lf, expected 0.5\n", ratio);
ASSERT_TRUE(ratio < 0.6); ASSERT_LT(ratio, 0.6);
} }
namespace { namespace {

@ -130,7 +130,7 @@ struct FileState {
} // anonymous namespace } // anonymous namespace
// A wrapper around WritableFile which informs another Env whenever this file // A wrapper around WritableFileWriter* file
// is written to or sync'ed. // is written to or sync'ed.
class TestWritableFile : public WritableFile { class TestWritableFile : public WritableFile {
public: public:
@ -197,7 +197,7 @@ class FaultInjectionTestEnv : public EnvWrapper {
Status s = target()->NewWritableFile(fname, result, soptions); Status s = target()->NewWritableFile(fname, result, soptions);
if (s.ok()) { if (s.ok()) {
result->reset(new TestWritableFile(fname, std::move(*result), this)); result->reset(new TestWritableFile(fname, std::move(*result), this));
// WritableFile doesn't append to files, so if the same file is opened // WritableFileWriter* file is opened
// again then it will be truncated - so forget our saved state. // again then it will be truncated - so forget our saved state.
UntrackFile(fname); UntrackFile(fname);
MutexLock l(&mutex_); MutexLock l(&mutex_);

@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include "db/dbformat.h" #include "db/dbformat.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "util/file_reader_writer.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -328,15 +329,13 @@ Status SetIdentityFile(Env* env, const std::string& dbname) {
return s; return s;
} }
Status SyncManifest(Env* env, const DBOptions* db_options, WritableFile* file) { Status SyncManifest(Env* env, const DBOptions* db_options,
WritableFileWriter* file) {
if (db_options->disableDataSync) { if (db_options->disableDataSync) {
return Status::OK(); return Status::OK();
} else if (db_options->use_fsync) {
StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS);
return file->Fsync();
} else { } else {
StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS); StopWatch sw(env, db_options->statistics.get(), MANIFEST_FILE_SYNC_MICROS);
return file->Sync(); return file->Sync(db_options->use_fsync);
} }
} }

@ -25,7 +25,7 @@ namespace rocksdb {
class Env; class Env;
class Directory; class Directory;
class WritableFile; class WritableFileWriter;
enum FileType { enum FileType {
kLogFile, kLogFile,
@ -140,6 +140,6 @@ extern Status SetIdentityFile(Env* env, const std::string& dbname);
// Sync manifest file `file`. // Sync manifest file `file`.
extern Status SyncManifest(Env* env, const DBOptions* db_options, extern Status SyncManifest(Env* env, const DBOptions* db_options,
WritableFile* file); WritableFileWriter* file);
} // namespace rocksdb } // namespace rocksdb

@ -11,6 +11,7 @@
#include "db/version_set.h" #include "db/version_set.h"
#include "db/writebuffer.h" #include "db/writebuffer.h"
#include "rocksdb/cache.h" #include "rocksdb/cache.h"
#include "util/file_reader_writer.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -56,8 +57,10 @@ class FlushJobTest : public testing::Test {
Status s = env_->NewWritableFile( Status s = env_->NewWritableFile(
manifest, &file, env_->OptimizeForManifestWrite(env_options_)); manifest, &file, env_->OptimizeForManifestWrite(env_options_));
ASSERT_OK(s); ASSERT_OK(s);
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), EnvOptions()));
{ {
log::Writer log(std::move(file)); log::Writer log(std::move(file_writer));
std::string record; std::string record;
new_db.EncodeTo(&record); new_db.EncodeTo(&record);
s = log.AddRecord(record); s = log.AddRecord(record);

@ -13,6 +13,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
namespace log { namespace log {
@ -20,7 +21,7 @@ namespace log {
Reader::Reporter::~Reporter() { Reader::Reporter::~Reporter() {
} }
Reader::Reader(unique_ptr<SequentialFile>&& _file, Reporter* reporter, Reader::Reader(unique_ptr<SequentialFileReader>&& _file, Reporter* reporter,
bool checksum, uint64_t initial_offset) bool checksum, uint64_t initial_offset)
: file_(std::move(_file)), : file_(std::move(_file)),
reporter_(reporter), reporter_(reporter),

@ -17,7 +17,7 @@
namespace rocksdb { namespace rocksdb {
class SequentialFile; class SequentialFileReader;
using std::unique_ptr; using std::unique_ptr;
namespace log { namespace log {
@ -51,7 +51,7 @@ class Reader {
// //
// The Reader will start reading at the first record located at physical // The Reader will start reading at the first record located at physical
// position >= initial_offset within the file. // position >= initial_offset within the file.
Reader(unique_ptr<SequentialFile>&& file, Reporter* reporter, Reader(unique_ptr<SequentialFileReader>&& file, Reporter* reporter,
bool checksum, uint64_t initial_offset); bool checksum, uint64_t initial_offset);
~Reader(); ~Reader();
@ -81,10 +81,10 @@ class Reader {
// block that was partially read. // block that was partially read.
void UnmarkEOF(); void UnmarkEOF();
SequentialFile* file() { return file_.get(); } SequentialFileReader* file() { return file_.get(); }
private: private:
const unique_ptr<SequentialFile> file_; const unique_ptr<SequentialFileReader> file_;
Reporter* const reporter_; Reporter* const reporter_;
bool const checksum_; bool const checksum_;
char* const backing_store_; char* const backing_store_;

@ -12,8 +12,10 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb { namespace rocksdb {
namespace log { namespace log {
@ -163,26 +165,27 @@ class LogTest : public testing::Test {
}; };
std::string& dest_contents() { std::string& dest_contents() {
auto dest = dynamic_cast<StringDest*>(writer_.file()); auto dest = dynamic_cast<StringDest*>(writer_.file()->writable_file());
assert(dest); assert(dest);
return dest->contents_; return dest->contents_;
} }
const std::string& dest_contents() const { const std::string& dest_contents() const {
auto dest = dynamic_cast<const StringDest*>(writer_.file()); auto dest =
dynamic_cast<const StringDest*>(writer_.file()->writable_file());
assert(dest); assert(dest);
return dest->contents_; return dest->contents_;
} }
void reset_source_contents() { void reset_source_contents() {
auto src = dynamic_cast<StringSource*>(reader_.file()); auto src = dynamic_cast<StringSource*>(reader_.file()->file());
assert(src); assert(src);
src->contents_ = dest_contents(); src->contents_ = dest_contents();
} }
Slice reader_contents_; Slice reader_contents_;
unique_ptr<StringDest> dest_holder_; unique_ptr<WritableFileWriter> dest_holder_;
unique_ptr<StringSource> source_holder_; unique_ptr<SequentialFileReader> source_holder_;
ReportCollector report_; ReportCollector report_;
Writer writer_; Writer writer_;
Reader reader_; Reader reader_;
@ -192,13 +195,15 @@ class LogTest : public testing::Test {
static uint64_t initial_offset_last_record_offsets_[]; static uint64_t initial_offset_last_record_offsets_[];
public: public:
LogTest() : reader_contents_(), LogTest()
dest_holder_(new StringDest(reader_contents_)), : reader_contents_(),
source_holder_(new StringSource(reader_contents_)), dest_holder_(
writer_(std::move(dest_holder_)), test::GetWritableFileWriter(new StringDest(reader_contents_))),
reader_(std::move(source_holder_), &report_, true/*checksum*/, source_holder_(
0/*initial_offset*/) { test::GetSequentialFileReader(new StringSource(reader_contents_))),
} writer_(std::move(dest_holder_)),
reader_(std::move(source_holder_), &report_, true /*checksum*/,
0 /*initial_offset*/) {}
void Write(const std::string& msg) { void Write(const std::string& msg) {
writer_.AddRecord(Slice(msg)); writer_.AddRecord(Slice(msg));
@ -227,7 +232,7 @@ class LogTest : public testing::Test {
} }
void ShrinkSize(int bytes) { void ShrinkSize(int bytes) {
auto dest = dynamic_cast<StringDest*>(writer_.file()); auto dest = dynamic_cast<StringDest*>(writer_.file()->writable_file());
assert(dest); assert(dest);
dest->Drop(bytes); dest->Drop(bytes);
} }
@ -240,7 +245,7 @@ class LogTest : public testing::Test {
} }
void ForceError(size_t position = 0) { void ForceError(size_t position = 0) {
auto src = dynamic_cast<StringSource*>(reader_.file()); auto src = dynamic_cast<StringSource*>(reader_.file()->file());
src->force_error_ = true; src->force_error_ = true;
src->force_error_position_ = position; src->force_error_position_ = position;
} }
@ -254,13 +259,13 @@ class LogTest : public testing::Test {
} }
void ForceEOF(size_t position = 0) { void ForceEOF(size_t position = 0) {
auto src = dynamic_cast<StringSource*>(reader_.file()); auto src = dynamic_cast<StringSource*>(reader_.file()->file());
src->force_eof_ = true; src->force_eof_ = true;
src->force_eof_position_ = position; src->force_eof_position_ = position;
} }
void UnmarkEOF() { void UnmarkEOF() {
auto src = dynamic_cast<StringSource*>(reader_.file()); auto src = dynamic_cast<StringSource*>(reader_.file()->file());
src->returned_partial_ = false; src->returned_partial_ = false;
reader_.UnmarkEOF(); reader_.UnmarkEOF();
} }
@ -288,10 +293,11 @@ class LogTest : public testing::Test {
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) { void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
WriteInitialOffsetLog(); WriteInitialOffsetLog();
unique_ptr<StringSource> source(new StringSource(reader_contents_)); unique_ptr<SequentialFileReader> file_reader(
test::GetSequentialFileReader(new StringSource(reader_contents_)));
unique_ptr<Reader> offset_reader( unique_ptr<Reader> offset_reader(
new Reader(std::move(source), &report_, true/*checksum*/, new Reader(std::move(file_reader), &report_, true /*checksum*/,
WrittenBytes() + offset_past_end)); WrittenBytes() + offset_past_end));
Slice record; Slice record;
std::string scratch; std::string scratch;
ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch)); ASSERT_TRUE(!offset_reader->ReadRecord(&record, &scratch));
@ -300,10 +306,10 @@ class LogTest : public testing::Test {
void CheckInitialOffsetRecord(uint64_t initial_offset, void CheckInitialOffsetRecord(uint64_t initial_offset,
int expected_record_offset) { int expected_record_offset) {
WriteInitialOffsetLog(); WriteInitialOffsetLog();
unique_ptr<StringSource> source(new StringSource(reader_contents_)); unique_ptr<SequentialFileReader> file_reader(
unique_ptr<Reader> offset_reader( test::GetSequentialFileReader(new StringSource(reader_contents_)));
new Reader(std::move(source), &report_, true/*checksum*/, unique_ptr<Reader> offset_reader(new Reader(
initial_offset)); std::move(file_reader), &report_, true /*checksum*/, initial_offset));
Slice record; Slice record;
std::string scratch; std::string scratch;
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch)); ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));

@ -13,13 +13,13 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
namespace log { namespace log {
Writer::Writer(unique_ptr<WritableFile>&& dest) Writer::Writer(unique_ptr<WritableFileWriter>&& dest)
: dest_(std::move(dest)), : dest_(std::move(dest)), block_offset_(0) {
block_offset_(0) {
for (int i = 0; i <= kMaxRecordType; i++) { for (int i = 0; i <= kMaxRecordType; i++) {
char t = static_cast<char>(i); char t = static_cast<char>(i);
type_crc_[i] = crc32c::Value(&t, 1); type_crc_[i] = crc32c::Value(&t, 1);

@ -16,7 +16,7 @@
namespace rocksdb { namespace rocksdb {
class WritableFile; class WritableFileWriter;
using std::unique_ptr; using std::unique_ptr;
@ -61,16 +61,16 @@ class Writer {
// Create a writer that will append data to "*dest". // Create a writer that will append data to "*dest".
// "*dest" must be initially empty. // "*dest" must be initially empty.
// "*dest" must remain live while this Writer is in use. // "*dest" must remain live while this Writer is in use.
explicit Writer(unique_ptr<WritableFile>&& dest); explicit Writer(unique_ptr<WritableFileWriter>&& dest);
~Writer(); ~Writer();
Status AddRecord(const Slice& slice); Status AddRecord(const Slice& slice);
WritableFile* file() { return dest_.get(); } WritableFileWriter* file() { return dest_.get(); }
const WritableFile* file() const { return dest_.get(); } const WritableFileWriter* file() const { return dest_.get(); }
private: private:
unique_ptr<WritableFile> dest_; unique_ptr<WritableFileWriter> dest_;
int block_offset_; // Current offset in block int block_offset_; // Current offset in block
// crc32c values for all supported record types. These are // crc32c values for all supported record types. These are

@ -198,10 +198,9 @@ class TestPlainTableReader : public PlainTableReader {
int bloom_bits_per_key, double hash_table_ratio, int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness, size_t index_sparseness,
const TableProperties* table_properties, const TableProperties* table_properties,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file,
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
bool* expect_bloom_not_match, bool* expect_bloom_not_match, bool store_index_in_file)
bool store_index_in_file)
: PlainTableReader(ioptions, std::move(file), env_options, icomparator, : PlainTableReader(ioptions, std::move(file), env_options, icomparator,
encoding_type, file_size, table_properties), encoding_type, file_size, table_properties),
expect_bloom_not_match_(expect_bloom_not_match) { expect_bloom_not_match_(expect_bloom_not_match) {
@ -257,7 +256,8 @@ class TestPlainTableFactory : public PlainTableFactory {
Status NewTableReader(const ImmutableCFOptions& ioptions, Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size,
unique_ptr<TableReader>* table) const override { unique_ptr<TableReader>* table) const override {
TableProperties* props = nullptr; TableProperties* props = nullptr;
auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber, auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,

@ -81,6 +81,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/immutable_options.h" #include "rocksdb/immutable_options.h"
#include "util/file_reader_writer.h"
#include "util/scoped_arena_iterator.h" #include "util/scoped_arena_iterator.h"
namespace rocksdb { namespace rocksdb {
@ -236,6 +237,8 @@ class Repairer {
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
unique_ptr<SequentialFileReader> lfile_reader(
new SequentialFileReader(std::move(lfile)));
// Create the log reader. // Create the log reader.
LogReporter reporter; LogReporter reporter;
@ -246,8 +249,8 @@ class Repairer {
// corruptions cause entire commits to be skipped instead of // corruptions cause entire commits to be skipped instead of
// propagating bad information (like overly large sequence // propagating bad information (like overly large sequence
// numbers). // numbers).
log::Reader reader(std::move(lfile), &reporter, true /*enable checksum*/, log::Reader reader(std::move(lfile_reader), &reporter,
0/*initial_offset*/); true /*enable checksum*/, 0 /*initial_offset*/);
// Read all the records and add to a memtable // Read all the records and add to a memtable
std::string scratch; std::string scratch;
@ -378,8 +381,8 @@ class Repairer {
Status WriteDescriptor() { Status WriteDescriptor() {
std::string tmp = TempFileName(dbname_, 1); std::string tmp = TempFileName(dbname_, 1);
unique_ptr<WritableFile> file; unique_ptr<WritableFile> file;
Status status = env_->NewWritableFile( EnvOptions env_options = env_->OptimizeForManifestWrite(env_options_);
tmp, &file, env_->OptimizeForManifestWrite(env_options_)); Status status = env_->NewWritableFile(tmp, &file, env_options);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
} }
@ -407,7 +410,9 @@ class Repairer {
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str()); //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
{ {
log::Writer log(std::move(file)); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options));
log::Writer log(std::move(file_writer));
std::string record; std::string record;
edit_->EncodeTo(&record); edit_->EncodeTo(&record);
status = log.AddRecord(record); status = log.AddRecord(record);

@ -18,6 +18,7 @@
#include "table/table_reader.h" #include "table/table_reader.h"
#include "table/get_context.h" #include "table/get_context.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/perf_context_imp.h" #include "util/perf_context_imp.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
@ -99,8 +100,10 @@ Status TableCache::FindTable(const EnvOptions& env_options,
file->Hint(RandomAccessFile::RANDOM); file->Hint(RandomAccessFile::RANDOM);
} }
StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS); StopWatch sw(ioptions_.env, ioptions_.statistics, TABLE_OPEN_IO_MICROS);
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file)));
s = ioptions_.table_factory->NewTableReader( s = ioptions_.table_factory->NewTableReader(
ioptions_, env_options, internal_comparator, std::move(file), ioptions_, env_options, internal_comparator, std::move(file_reader),
fd.GetFileSize(), &table_reader); fd.GetFileSize(), &table_reader);
} }

@ -18,6 +18,7 @@
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "table/table_builder.h" #include "table/table_builder.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -33,7 +34,7 @@ class TablePropertiesTest : public testing::Test,
// TODO(kailiu) the following classes should be moved to some more general // TODO(kailiu) the following classes should be moved to some more general
// places, so that other tests can also make use of them. // places, so that other tests can also make use of them.
// `FakeWritableFile` and `FakeRandomeAccessFile` bypass the real file system // `FakeWritableFileWriter* file system
// and therefore enable us to quickly setup the tests. // and therefore enable us to quickly setup the tests.
class FakeWritableFile : public WritableFile { class FakeWritableFile : public WritableFile {
public: public:
@ -96,9 +97,11 @@ void MakeBuilder(const Options& options, const ImmutableCFOptions& ioptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
std::unique_ptr<FakeWritableFile>* writable, std::unique_ptr<WritableFileWriter>* writable,
std::unique_ptr<TableBuilder>* builder) { std::unique_ptr<TableBuilder>* builder) {
writable->reset(new FakeWritableFile); unique_ptr<WritableFile> wf(new FakeWritableFile);
writable->reset(new WritableFileWriter(std::move(wf), EnvOptions()));
builder->reset(NewTableBuilder( builder->reset(NewTableBuilder(
ioptions, internal_comparator, int_tbl_prop_collector_factories, ioptions, internal_comparator, int_tbl_prop_collector_factories,
writable->get(), options.compression, options.compression_opts)); writable->get(), options.compression, options.compression_opts));
@ -289,7 +292,7 @@ void TestCustomizedTablePropertiesCollector(
// -- Step 1: build table // -- Step 1: build table
std::unique_ptr<TableBuilder> builder; std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable; std::unique_ptr<WritableFileWriter> writer;
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
std::vector<std::unique_ptr<IntTblPropCollectorFactory>> std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
@ -300,7 +303,7 @@ void TestCustomizedTablePropertiesCollector(
GetIntTblPropCollectorFactory(options, &int_tbl_prop_collector_factories); GetIntTblPropCollectorFactory(options, &int_tbl_prop_collector_factories);
} }
MakeBuilder(options, ioptions, internal_comparator, MakeBuilder(options, ioptions, internal_comparator,
&int_tbl_prop_collector_factories, &writable, &builder); &int_tbl_prop_collector_factories, &writer, &builder);
SequenceNumber seqNum = 0U; SequenceNumber seqNum = 0U;
for (const auto& kv : kvs) { for (const auto& kv : kvs) {
@ -310,18 +313,17 @@ void TestCustomizedTablePropertiesCollector(
builder->Add(ikey.Encode(), kv.second); builder->Add(ikey.Encode(), kv.second);
} }
ASSERT_OK(builder->Finish()); ASSERT_OK(builder->Finish());
writer->Flush();
// -- Step 2: Read properties // -- Step 2: Read properties
FakeRandomeAccessFile readable(writable->contents()); FakeWritableFile* fwf =
static_cast<FakeWritableFile*>(writer->writable_file());
std::unique_ptr<RandomAccessFileReader> fake_file_reader(
test::GetRandomAccessFileReader(
new FakeRandomeAccessFile(fwf->contents())));
TableProperties* props; TableProperties* props;
Status s = ReadTableProperties( Status s = ReadTableProperties(fake_file_reader.get(), fwf->contents().size(),
&readable, magic_number, Env::Default(), nullptr, &props);
writable->contents().size(),
magic_number,
Env::Default(),
nullptr,
&props
);
std::unique_ptr<TableProperties> props_guard(props); std::unique_ptr<TableProperties> props_guard(props);
ASSERT_OK(s); ASSERT_OK(s);
@ -414,7 +416,7 @@ void TestInternalKeyPropertiesCollector(
}; };
std::unique_ptr<TableBuilder> builder; std::unique_ptr<TableBuilder> builder;
std::unique_ptr<FakeWritableFile> writable; std::unique_ptr<WritableFileWriter> writable;
Options options; Options options;
test::PlainInternalKeyComparator pikc(options.comparator); test::PlainInternalKeyComparator pikc(options.comparator);
@ -449,12 +451,16 @@ void TestInternalKeyPropertiesCollector(
} }
ASSERT_OK(builder->Finish()); ASSERT_OK(builder->Finish());
writable->Flush();
FakeRandomeAccessFile readable(writable->contents()); FakeWritableFile* fwf =
static_cast<FakeWritableFile*>(writable->writable_file());
unique_ptr<RandomAccessFileReader> reader(test::GetRandomAccessFileReader(
new FakeRandomeAccessFile(fwf->contents())));
TableProperties* props; TableProperties* props;
Status s = Status s =
ReadTableProperties(&readable, writable->contents().size(), ReadTableProperties(reader.get(), fwf->contents().size(), magic_number,
magic_number, Env::Default(), nullptr, &props); Env::Default(), nullptr, &props);
ASSERT_OK(s); ASSERT_OK(s);
std::unique_ptr<TableProperties> props_guard(props); std::unique_ptr<TableProperties> props_guard(props);

@ -11,6 +11,7 @@
#include <inttypes.h> #include <inttypes.h>
#include "db/transaction_log_impl.h" #include "db/transaction_log_impl.h"
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -40,23 +41,27 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl(
} }
Status TransactionLogIteratorImpl::OpenLogFile( Status TransactionLogIteratorImpl::OpenLogFile(
const LogFile* logFile, const LogFile* logFile, unique_ptr<SequentialFileReader>* file_reader) {
unique_ptr<SequentialFile>* file) {
Env* env = options_->env; Env* env = options_->env;
unique_ptr<SequentialFile> file;
Status s;
if (logFile->Type() == kArchivedLogFile) { if (logFile->Type() == kArchivedLogFile) {
std::string fname = ArchivedLogFileName(dir_, logFile->LogNumber()); std::string fname = ArchivedLogFileName(dir_, logFile->LogNumber());
return env->NewSequentialFile(fname, file, soptions_); s = env->NewSequentialFile(fname, &file, soptions_);
} else { } else {
std::string fname = LogFileName(dir_, logFile->LogNumber()); std::string fname = LogFileName(dir_, logFile->LogNumber());
Status s = env->NewSequentialFile(fname, file, soptions_); s = env->NewSequentialFile(fname, &file, soptions_);
if (!s.ok()) { if (!s.ok()) {
// If cannot open file in DB directory. // If cannot open file in DB directory.
// Try the archive dir, as it could have moved in the meanwhile. // Try the archive dir, as it could have moved in the meanwhile.
fname = ArchivedLogFileName(dir_, logFile->LogNumber()); fname = ArchivedLogFileName(dir_, logFile->LogNumber());
s = env->NewSequentialFile(fname, file, soptions_); s = env->NewSequentialFile(fname, &file, soptions_);
} }
return s;
} }
if (s.ok()) {
file_reader->reset(new SequentialFileReader(std::move(file)));
}
return s;
} }
BatchResult TransactionLogIteratorImpl::GetBatch() { BatchResult TransactionLogIteratorImpl::GetBatch() {
@ -251,7 +256,7 @@ void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) {
} }
Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* logFile) { Status TransactionLogIteratorImpl::OpenLogReader(const LogFile* logFile) {
unique_ptr<SequentialFile> file; unique_ptr<SequentialFileReader> file;
Status s = OpenLogFile(logFile, &file); Status s = OpenLogFile(logFile, &file);
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -84,7 +84,8 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
size_t currentFileIndex_; size_t currentFileIndex_;
std::unique_ptr<WriteBatch> currentBatch_; std::unique_ptr<WriteBatch> currentBatch_;
unique_ptr<log::Reader> currentLogReader_; unique_ptr<log::Reader> currentLogReader_;
Status OpenLogFile(const LogFile* logFile, unique_ptr<SequentialFile>* file); Status OpenLogFile(const LogFile* logFile,
unique_ptr<SequentialFileReader>* file);
struct LogReporter : public log::Reader::Reporter { struct LogReporter : public log::Reader::Reporter {
Env* env; Env* env;

@ -42,6 +42,7 @@
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "table/get_context.h" #include "table/get_context.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/sync_point.h" #include "util/sync_point.h"
@ -567,10 +568,12 @@ Status Version::GetTableProperties(std::shared_ptr<const TableProperties>* tp,
TableProperties* raw_table_properties; TableProperties* raw_table_properties;
// By setting the magic number to kInvalidTableMagicNumber, we can by // By setting the magic number to kInvalidTableMagicNumber, we can by
// pass the magic number check in the footer. // pass the magic number check in the footer.
std::unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(file)));
s = ReadTableProperties( s = ReadTableProperties(
file.get(), file_meta->fd.GetFileSize(), file_reader.get(), file_meta->fd.GetFileSize(),
Footer::kInvalidTableMagicNumber /* table's magic number */, Footer::kInvalidTableMagicNumber /* table's magic number */, vset_->env_,
vset_->env_, ioptions->info_log, &raw_table_properties); ioptions->info_log, &raw_table_properties);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
@ -1912,13 +1915,17 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log,
"Creating manifest %" PRIu64 "\n", pending_manifest_file_number_); "Creating manifest %" PRIu64 "\n", pending_manifest_file_number_);
unique_ptr<WritableFile> descriptor_file; unique_ptr<WritableFile> descriptor_file;
EnvOptions opt_env_opts = env_->OptimizeForManifestWrite(env_options_);
s = env_->NewWritableFile( s = env_->NewWritableFile(
DescriptorFileName(dbname_, pending_manifest_file_number_), DescriptorFileName(dbname_, pending_manifest_file_number_),
&descriptor_file, env_->OptimizeForManifestWrite(env_options_)); &descriptor_file, opt_env_opts);
if (s.ok()) { if (s.ok()) {
descriptor_file->SetPreallocationBlockSize( descriptor_file->SetPreallocationBlockSize(
db_options_->manifest_preallocation_size); db_options_->manifest_preallocation_size);
descriptor_log_.reset(new log::Writer(std::move(descriptor_file)));
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(descriptor_file), opt_env_opts));
descriptor_log_.reset(new log::Writer(std::move(file_writer)));
s = WriteSnapshot(descriptor_log_.get()); s = WriteSnapshot(descriptor_log_.get());
} }
} }
@ -2132,11 +2139,16 @@ Status VersionSet::Recover(
manifest_filename.c_str()); manifest_filename.c_str());
manifest_filename = dbname_ + "/" + manifest_filename; manifest_filename = dbname_ + "/" + manifest_filename;
unique_ptr<SequentialFile> manifest_file; unique_ptr<SequentialFileReader> manifest_file_reader;
s = env_->NewSequentialFile(manifest_filename, &manifest_file, {
env_options_); unique_ptr<SequentialFile> manifest_file;
if (!s.ok()) { s = env_->NewSequentialFile(manifest_filename, &manifest_file,
return s; env_options_);
if (!s.ok()) {
return s;
}
manifest_file_reader.reset(
new SequentialFileReader(std::move(manifest_file)));
} }
uint64_t current_manifest_file_size; uint64_t current_manifest_file_size;
s = env_->GetFileSize(manifest_filename, &current_manifest_file_size); s = env_->GetFileSize(manifest_filename, &current_manifest_file_size);
@ -2170,8 +2182,8 @@ Status VersionSet::Recover(
{ {
VersionSet::LogReporter reporter; VersionSet::LogReporter reporter;
reporter.status = &s; reporter.status = &s;
log::Reader reader(std::move(manifest_file), &reporter, true /*checksum*/, log::Reader reader(std::move(manifest_file_reader), &reporter,
0 /*initial_offset*/); true /*checksum*/, 0 /*initial_offset*/);
Slice record; Slice record;
std::string scratch; std::string scratch;
while (reader.ReadRecord(&record, &scratch) && s.ok()) { while (reader.ReadRecord(&record, &scratch) && s.ok()) {
@ -2405,18 +2417,23 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
current.resize(current.size() - 1); current.resize(current.size() - 1);
std::string dscname = dbname + "/" + current; std::string dscname = dbname + "/" + current;
unique_ptr<SequentialFileReader> file_reader;
{
unique_ptr<SequentialFile> file; unique_ptr<SequentialFile> file;
s = env->NewSequentialFile(dscname, &file, soptions); s = env->NewSequentialFile(dscname, &file, soptions);
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
file_reader.reset(new SequentialFileReader(std::move(file)));
}
std::map<uint32_t, std::string> column_family_names; std::map<uint32_t, std::string> column_family_names;
// default column family is always implicitly there // default column family is always implicitly there
column_family_names.insert({0, kDefaultColumnFamilyName}); column_family_names.insert({0, kDefaultColumnFamilyName});
VersionSet::LogReporter reporter; VersionSet::LogReporter reporter;
reporter.status = &s; reporter.status = &s;
log::Reader reader(std::move(file), &reporter, true /*checksum*/, log::Reader reader(std::move(file_reader), &reporter, true /*checksum*/,
0 /*initial_offset*/); 0 /*initial_offset*/);
Slice record; Slice record;
std::string scratch; std::string scratch;
@ -2542,10 +2559,15 @@ Status VersionSet::ReduceNumberOfLevels(const std::string& dbname,
Status VersionSet::DumpManifest(Options& options, std::string& dscname, Status VersionSet::DumpManifest(Options& options, std::string& dscname,
bool verbose, bool hex, bool json) { bool verbose, bool hex, bool json) {
// Open the specified manifest file. // Open the specified manifest file.
unique_ptr<SequentialFile> file; unique_ptr<SequentialFileReader> file_reader;
Status s = options.env->NewSequentialFile(dscname, &file, env_options_); Status s;
if (!s.ok()) { {
return s; unique_ptr<SequentialFile> file;
s = options.env->NewSequentialFile(dscname, &file, env_options_);
if (!s.ok()) {
return s;
}
file_reader.reset(new SequentialFileReader(std::move(file)));
} }
bool have_prev_log_number = false; bool have_prev_log_number = false;
@ -2569,8 +2591,8 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
{ {
VersionSet::LogReporter reporter; VersionSet::LogReporter reporter;
reporter.status = &s; reporter.status = &s;
log::Reader reader(std::move(file), &reporter, true/*checksum*/, log::Reader reader(std::move(file_reader), &reporter, true /*checksum*/,
0/*initial_offset*/); 0 /*initial_offset*/);
Slice record; Slice record;
std::string scratch; std::string scratch;
while (reader.ReadRecord(&record, &scratch) && s.ok()) { while (reader.ReadRecord(&record, &scratch) && s.ok()) {
@ -2664,7 +2686,7 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
} }
} }
} }
file.reset(); file_reader.reset();
if (s.ok()) { if (s.ok()) {
if (!have_next_file) { if (!have_next_file) {
@ -2806,17 +2828,23 @@ bool VersionSet::ManifestContains(uint64_t manifest_file_num,
std::string fname = DescriptorFileName(dbname_, manifest_file_num); std::string fname = DescriptorFileName(dbname_, manifest_file_num);
Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log,
"ManifestContains: checking %s\n", fname.c_str()); "ManifestContains: checking %s\n", fname.c_str());
unique_ptr<SequentialFile> file;
Status s = env_->NewSequentialFile(fname, &file, env_options_); unique_ptr<SequentialFileReader> file_reader;
if (!s.ok()) { Status s;
Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, {
"ManifestContains: %s\n", s.ToString().c_str()); unique_ptr<SequentialFile> file;
Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log, s = env_->NewSequentialFile(fname, &file, env_options_);
"ManifestContains: is unable to reopen the manifest file %s", if (!s.ok()) {
fname.c_str()); Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log,
return false; "ManifestContains: %s\n", s.ToString().c_str());
Log(InfoLogLevel::INFO_LEVEL, db_options_->info_log,
"ManifestContains: is unable to reopen the manifest file %s",
fname.c_str());
return false;
}
file_reader.reset(new SequentialFileReader(std::move(file)));
} }
log::Reader reader(std::move(file), nullptr, true/*checksum*/, 0); log::Reader reader(std::move(file_reader), nullptr, true /*checksum*/, 0);
Slice r; Slice r;
std::string scratch; std::string scratch;
bool result = false; bool result = false;

@ -28,6 +28,7 @@
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/write_batch.h" #include "rocksdb/write_batch.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
#include "util/sync_point.h" #include "util/sync_point.h"
@ -430,6 +431,8 @@ Status WalManager::ReadFirstLine(const std::string& fname,
std::unique_ptr<SequentialFile> file; std::unique_ptr<SequentialFile> file;
Status status = env_->NewSequentialFile(fname, &file, env_options_); Status status = env_->NewSequentialFile(fname, &file, env_options_);
unique_ptr<SequentialFileReader> file_reader(
new SequentialFileReader(std::move(file)));
if (!status.ok()) { if (!status.ok()) {
return status; return status;
@ -441,7 +444,7 @@ Status WalManager::ReadFirstLine(const std::string& fname,
reporter.fname = fname.c_str(); reporter.fname = fname.c_str();
reporter.status = &status; reporter.status = &status;
reporter.ignore_error = !db_options_.paranoid_checks; reporter.ignore_error = !db_options_.paranoid_checks;
log::Reader reader(std::move(file), &reporter, true /*checksum*/, log::Reader reader(std::move(file_reader), &reporter, true /*checksum*/,
0 /*initial_offset*/); 0 /*initial_offset*/);
std::string scratch; std::string scratch;
Slice record; Slice record;

@ -14,6 +14,7 @@
#include "db/column_family.h" #include "db/column_family.h"
#include "db/version_set.h" #include "db/version_set.h"
#include "db/writebuffer.h" #include "db/writebuffer.h"
#include "util/file_reader_writer.h"
#include "util/mock_env.h" #include "util/mock_env.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "util/testharness.h" #include "util/testharness.h"
@ -72,7 +73,9 @@ class WalManagerTest : public testing::Test {
std::string fname = ArchivedLogFileName(dbname_, current_log_number_); std::string fname = ArchivedLogFileName(dbname_, current_log_number_);
unique_ptr<WritableFile> file; unique_ptr<WritableFile> file;
ASSERT_OK(env_->NewWritableFile(fname, &file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &file, env_options_));
current_log_writer_.reset(new log::Writer(std::move(file))); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options_));
current_log_writer_.reset(new log::Writer(std::move(file_writer)));
} }
void CreateArchiveLogs(int num_logs, int entries_per_log) { void CreateArchiveLogs(int num_logs, int entries_per_log) {
@ -120,7 +123,9 @@ TEST_F(WalManagerTest, ReadFirstRecordCache) {
ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s)); ASSERT_OK(wal_manager_->TEST_ReadFirstRecord(kAliveLogFile, 1, &s));
ASSERT_EQ(s, 0U); ASSERT_EQ(s, 0U);
log::Writer writer(std::move(file)); unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), EnvOptions()));
log::Writer writer(std::move(file_writer));
WriteBatch batch; WriteBatch batch;
batch.Put("foo", "bar"); batch.Put("foo", "bar");
WriteBatchInternal::SetSequence(&batch, 10); WriteBatchInternal::SetSequence(&batch, 10);

@ -465,6 +465,8 @@ class WritableFile {
io_priority_ = pri; io_priority_ = pri;
} }
virtual Env::IOPriority GetIOPriority() { return io_priority_; }
/* /*
* Get the size of valid data in the file. * Get the size of valid data in the file.
*/ */
@ -501,7 +503,14 @@ class WritableFile {
return Status::NotSupported("InvalidateCache not supported."); return Status::NotSupported("InvalidateCache not supported.");
} }
protected: // Sync a file range with disk.
// offset is the starting byte of the file range to be synchronized.
// nbytes specifies the length of the range to be synchronized.
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(off_t offset, off_t nbytes) { return Status::OK(); }
// PrepareWrite performs any necessary preparation for a write // PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation // before the write actually occurs. This allows for pre-allocation
// of space on devices where it can result in less file // of space on devices where it can result in less file
@ -526,6 +535,7 @@ class WritableFile {
} }
} }
protected:
/* /*
* Pre-allocate space for a file. * Pre-allocate space for a file.
*/ */
@ -533,16 +543,6 @@ class WritableFile {
return Status::OK(); return Status::OK();
} }
// Sync a file range with disk.
// offset is the starting byte of the file range to be synchronized.
// nbytes specifies the length of the range to be synchronized.
// This asks the OS to initiate flushing the cached data to disk,
// without waiting for completion.
// Default implementation does nothing.
virtual Status RangeSync(off_t offset, off_t nbytes) {
return Status::OK();
}
size_t preallocation_block_size() { return preallocation_block_size_; } size_t preallocation_block_size() { return preallocation_block_size_; }
private: private:
@ -893,6 +893,7 @@ class WritableFileWrapper : public WritableFile {
void SetIOPriority(Env::IOPriority pri) override { void SetIOPriority(Env::IOPriority pri) override {
target_->SetIOPriority(pri); target_->SetIOPriority(pri);
} }
Env::IOPriority GetIOPriority() override { return target_->GetIOPriority(); }
uint64_t GetFileSize() override { return target_->GetFileSize(); } uint64_t GetFileSize() override { return target_->GetFileSize(); }
void GetPreallocationStatus(size_t* block_size, void GetPreallocationStatus(size_t* block_size,
size_t* last_allocated_block) override { size_t* last_allocated_block) override {

@ -34,7 +34,7 @@ class RandomAccessFile;
struct TableBuilderOptions; struct TableBuilderOptions;
class TableBuilder; class TableBuilder;
class TableReader; class TableReader;
class WritableFile; class WritableFileWriter;
struct EnvOptions; struct EnvOptions;
struct Options; struct Options;
@ -315,6 +315,8 @@ extern TableFactory* NewCuckooTableFactory(
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
class RandomAccessFileReader;
// A base class for table factories. // A base class for table factories.
class TableFactory { class TableFactory {
public: public:
@ -348,7 +350,7 @@ class TableFactory {
virtual Status NewTableReader( virtual Status NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const = 0; unique_ptr<TableReader>* table_reader) const = 0;
// Return a table builder to write to a file for this table type. // Return a table builder to write to a file for this table type.
@ -372,7 +374,7 @@ class TableFactory {
// to use in this table. // to use in this table.
virtual TableBuilder* NewTableBuilder( virtual TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const = 0; WritableFileWriter* file) const = 0;
// Sanitizes the specified DB Options and ColumnFamilyOptions. // Sanitizes the specified DB Options and ColumnFamilyOptions.
// //

@ -9,11 +9,13 @@
#include "port/port_posix.h" #include "port/port_posix.h"
#include <stdio.h>
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <sys/time.h> #include <signal.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include <sys/time.h>
#include <unistd.h>
#include <cstdlib> #include <cstdlib>
#include "util/logging.h" #include "util/logging.h"
@ -133,5 +135,11 @@ void InitOnce(OnceType* once, void (*initializer)()) {
PthreadCall("once", pthread_once(once, initializer)); PthreadCall("once", pthread_once(once, initializer));
} }
void Crash(const std::string& srcfile, int srcline) {
fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
fflush(stdout);
kill(getpid(), SIGTERM);
}
} // namespace port } // namespace port
} // namespace rocksdb } // namespace rocksdb

@ -53,7 +53,7 @@
#if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\ #if defined(OS_MACOSX) || defined(OS_SOLARIS) || defined(OS_FREEBSD) ||\
defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\ defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLYBSD) ||\
defined(OS_ANDROID) || defined(OS_CYGWIN) defined(OS_ANDROID) || defined(CYGWIN)
// Use fread/fwrite/fflush on platforms without _unlocked variants // Use fread/fwrite/fflush on platforms without _unlocked variants
#define fread_unlocked fread #define fread_unlocked fread
#define fwrite_unlocked fwrite #define fwrite_unlocked fwrite
@ -150,6 +150,7 @@ extern void InitOnce(OnceType* once, void (*initializer)());
#define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) #define PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
extern void Crash(const std::string& srcfile, int srcline);
} // namespace port } // namespace port
} // namespace rocksdb } // namespace rocksdb

@ -29,6 +29,7 @@
#include "util/random.h" #include "util/random.h"
#include "util/iostats_context_imp.h" #include "util/iostats_context_imp.h"
#include "util/rate_limiter.h" #include "util/rate_limiter.h"
#include "util/sync_point.h"
#include "util/thread_status_updater.h" #include "util/thread_status_updater.h"
#include "util/thread_status_util.h" #include "util/thread_status_util.h"
@ -36,10 +37,6 @@
#include <Rpc.h> // For UUID generation #include <Rpc.h> // For UUID generation
#include <Windows.h> #include <Windows.h>
// This is only set from db_stress.cc and for testing only.
// If non-zero, kill at various points in source code with probability 1/this
int rocksdb_kill_odds = 0;
namespace rocksdb { namespace rocksdb {
std::string GetWindowsErrSz(DWORD err) { std::string GetWindowsErrSz(DWORD err) {
@ -90,40 +87,6 @@ inline void PrintThreadInfo(size_t thread_id, size_t terminatingId) {
// returns the ID of the current process // returns the ID of the current process
inline int current_process_id() { return _getpid(); } inline int current_process_id() { return _getpid(); }
#ifdef NDEBUG
// empty in release build
#define TEST_KILL_RANDOM(rocksdb_kill_odds)
#else
// Kill the process with probablity 1/odds for testing.
void TestKillRandom(int odds, const std::string& srcfile, int srcline) {
time_t curtime = time(nullptr);
Random r((uint32_t)curtime);
assert(odds > 0);
bool crash = r.OneIn(odds);
if (crash) {
fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
fflush(stdout);
std::string* p_str = nullptr;
p_str->c_str();
}
}
// To avoid crashing always at some frequently executed codepaths (during
// kill random test), use this factor to reduce odds
#define REDUCE_ODDS 2
#define REDUCE_ODDS2 4
#define TEST_KILL_RANDOM(rocksdb_kill_odds) \
{ \
if (rocksdb_kill_odds > 0) { \
TestKillRandom(rocksdb_kill_odds, __FILE__, __LINE__); \
} \
}
#endif
// RAII helpers for HANDLEs // RAII helpers for HANDLEs
const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); }; const auto CloseHandleFunc = [](HANDLE h) { ::CloseHandle(h); };
typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr; typedef std::unique_ptr<void, decltype(CloseHandleFunc)> UniqueCloseHandlePtr;

@ -88,6 +88,7 @@ LIB_SOURCES = \
util/env_hdfs.cc \ util/env_hdfs.cc \
util/env_posix.cc \ util/env_posix.cc \
util/file_util.cc \ util/file_util.cc \
util/file_reader_writer.cc \
util/filter_policy.cc \ util/filter_policy.cc \
util/hash.cc \ util/hash.cc \
util/hash_cuckoo_rep.cc \ util/hash_cuckoo_rep.cc \

@ -41,8 +41,9 @@ extern const uint64_t kCuckooTableMagicNumber;
Status AdaptiveTableFactory::NewTableReader( Status AdaptiveTableFactory::NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& icomp, unique_ptr<RandomAccessFile>&& file, const InternalKeyComparator& icomp,
uint64_t file_size, unique_ptr<TableReader>* table) const { unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
unique_ptr<TableReader>* table) const {
Footer footer; Footer footer;
auto s = ReadFooterFromFile(file.get(), file_size, &footer); auto s = ReadFooterFromFile(file.get(), file_size, &footer);
if (!s.ok()) { if (!s.ok()) {
@ -66,7 +67,7 @@ Status AdaptiveTableFactory::NewTableReader(
TableBuilder* AdaptiveTableFactory::NewTableBuilder( TableBuilder* AdaptiveTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const { WritableFileWriter* file) const {
return table_factory_to_write_->NewTableBuilder(table_builder_options, file); return table_factory_to_write_->NewTableBuilder(table_builder_options, file);
} }

@ -33,15 +33,16 @@ class AdaptiveTableFactory : public TableFactory {
const char* Name() const override { return "AdaptiveTableFactory"; } const char* Name() const override { return "AdaptiveTableFactory"; }
Status NewTableReader( Status NewTableReader(const ImmutableCFOptions& ioptions,
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table) const override; uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const override; WritableFileWriter* file) const override;
// Sanitizes the specified DB Options. // Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts, Status SanitizeOptions(const DBOptions& db_opts,

@ -437,7 +437,7 @@ struct BlockBasedTableBuilder::Rep {
const ImmutableCFOptions ioptions; const ImmutableCFOptions ioptions;
const BlockBasedTableOptions table_options; const BlockBasedTableOptions table_options;
const InternalKeyComparator& internal_comparator; const InternalKeyComparator& internal_comparator;
WritableFile* file; WritableFileWriter* file;
uint64_t offset = 0; uint64_t offset = 0;
Status status; Status status;
BlockBuilder data_block; BlockBuilder data_block;
@ -467,7 +467,7 @@ struct BlockBasedTableBuilder::Rep {
const InternalKeyComparator& icomparator, const InternalKeyComparator& icomparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* f, const CompressionType _compression_type, WritableFileWriter* f, const CompressionType _compression_type,
const CompressionOptions& _compression_opts, const bool skip_filters) const CompressionOptions& _compression_opts, const bool skip_filters)
: ioptions(_ioptions), : ioptions(_ioptions),
table_options(table_opt), table_options(table_opt),
@ -502,7 +502,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters) { const CompressionOptions& compression_opts, const bool skip_filters) {
BlockBasedTableOptions sanitized_table_options(table_options); BlockBasedTableOptions sanitized_table_options(table_options);
if (sanitized_table_options.format_version == 0 && if (sanitized_table_options.format_version == 0 &&
@ -524,7 +524,7 @@ BlockBasedTableBuilder::BlockBasedTableBuilder(
} }
if (table_options.block_cache_compressed.get() != nullptr) { if (table_options.block_cache_compressed.get() != nullptr) {
BlockBasedTable::GenerateCachePrefix( BlockBasedTable::GenerateCachePrefix(
table_options.block_cache_compressed.get(), file, table_options.block_cache_compressed.get(), file->writable_file(),
&rep_->compressed_cache_key_prefix[0], &rep_->compressed_cache_key_prefix[0],
&rep_->compressed_cache_key_prefix_size); &rep_->compressed_cache_key_prefix_size);
} }

@ -40,7 +40,7 @@ class BlockBasedTableBuilder : public TableBuilder {
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, const CompressionType compression_type, WritableFileWriter* file, const CompressionType compression_type,
const CompressionOptions& compression_opts, const bool skip_filters); const CompressionOptions& compression_opts, const bool skip_filters);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.

@ -44,7 +44,7 @@ BlockBasedTableFactory::BlockBasedTableFactory(
Status BlockBasedTableFactory::NewTableReader( Status BlockBasedTableFactory::NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& soptions, const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader, const bool prefetch_enabled) const { unique_ptr<TableReader>* table_reader, const bool prefetch_enabled) const {
return BlockBasedTable::Open(ioptions, soptions, table_options_, return BlockBasedTable::Open(ioptions, soptions, table_options_,
internal_comparator, std::move(file), file_size, internal_comparator, std::move(file), file_size,
@ -53,7 +53,7 @@ Status BlockBasedTableFactory::NewTableReader(
TableBuilder* BlockBasedTableFactory::NewTableBuilder( TableBuilder* BlockBasedTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const { WritableFileWriter* file) const {
auto table_builder = new BlockBasedTableBuilder( auto table_builder = new BlockBasedTableBuilder(
table_builder_options.ioptions, table_options_, table_builder_options.ioptions, table_options_,
table_builder_options.internal_comparator, table_builder_options.internal_comparator,

@ -36,7 +36,8 @@ class BlockBasedTableFactory : public TableFactory {
Status NewTableReader(const ImmutableCFOptions& ioptions, Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& soptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override { unique_ptr<TableReader>* table_reader) const override {
return NewTableReader(ioptions, soptions, internal_comparator, return NewTableReader(ioptions, soptions, internal_comparator,
std::move(file), file_size, table_reader, std::move(file), file_size, table_reader,
@ -48,13 +49,14 @@ class BlockBasedTableFactory : public TableFactory {
Status NewTableReader(const ImmutableCFOptions& ioptions, Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& soptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size,
unique_ptr<TableReader>* table_reader, unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter) const; bool prefetch_index_and_filter) const;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const override; WritableFileWriter* file) const override;
// Sanitizes the specified DB Options. // Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts, Status SanitizeOptions(const DBOptions& db_opts,

@ -37,6 +37,7 @@
#include "table/get_context.h" #include "table/get_context.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/perf_context_imp.h" #include "util/perf_context_imp.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
#include "util/string_util.h" #include "util/string_util.h"
@ -62,7 +63,7 @@ const size_t kMaxCacheKeyPrefixSize __attribute__((unused)) =
// The only relevant option is options.verify_checksums for now. // The only relevant option is options.verify_checksums for now.
// On failure return non-OK. // On failure return non-OK.
// On success fill *result and return OK - caller owns *result // On success fill *result and return OK - caller owns *result
Status ReadBlockFromFile(RandomAccessFile* file, const Footer& footer, Status ReadBlockFromFile(RandomAccessFileReader* file, const Footer& footer,
const ReadOptions& options, const BlockHandle& handle, const ReadOptions& options, const BlockHandle& handle,
std::unique_ptr<Block>* result, Env* env, std::unique_ptr<Block>* result, Env* env,
bool do_uncompress = true) { bool do_uncompress = true) {
@ -167,7 +168,7 @@ class BinarySearchIndexReader : public IndexReader {
// `BinarySearchIndexReader`. // `BinarySearchIndexReader`.
// On success, index_reader will be populated; otherwise it will remain // On success, index_reader will be populated; otherwise it will remain
// unmodified. // unmodified.
static Status Create(RandomAccessFile* file, const Footer& footer, static Status Create(RandomAccessFileReader* file, const Footer& footer,
const BlockHandle& index_handle, Env* env, const BlockHandle& index_handle, Env* env,
const Comparator* comparator, const Comparator* comparator,
IndexReader** index_reader) { IndexReader** index_reader) {
@ -212,8 +213,8 @@ class BinarySearchIndexReader : public IndexReader {
class HashIndexReader : public IndexReader { class HashIndexReader : public IndexReader {
public: public:
static Status Create(const SliceTransform* hash_key_extractor, static Status Create(const SliceTransform* hash_key_extractor,
const Footer& footer, RandomAccessFile* file, Env* env, const Footer& footer, RandomAccessFileReader* file,
const Comparator* comparator, Env* env, const Comparator* comparator,
const BlockHandle& index_handle, const BlockHandle& index_handle,
Iterator* meta_index_iter, IndexReader** index_reader, Iterator* meta_index_iter, IndexReader** index_reader,
bool hash_index_allow_collision) { bool hash_index_allow_collision) {
@ -347,7 +348,7 @@ struct BlockBasedTable::Rep {
const FilterPolicy* const filter_policy; const FilterPolicy* const filter_policy;
const InternalKeyComparator& internal_comparator; const InternalKeyComparator& internal_comparator;
Status status; Status status;
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFileReader> file;
char cache_key_prefix[kMaxCacheKeyPrefixSize]; char cache_key_prefix[kMaxCacheKeyPrefixSize];
size_t cache_key_prefix_size = 0; size_t cache_key_prefix_size = 0;
char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize]; char compressed_cache_key_prefix[kMaxCacheKeyPrefixSize];
@ -405,13 +406,12 @@ void BlockBasedTable::SetupCacheKeyPrefix(Rep* rep) {
rep->cache_key_prefix_size = 0; rep->cache_key_prefix_size = 0;
rep->compressed_cache_key_prefix_size = 0; rep->compressed_cache_key_prefix_size = 0;
if (rep->table_options.block_cache != nullptr) { if (rep->table_options.block_cache != nullptr) {
GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file.get(), GenerateCachePrefix(rep->table_options.block_cache.get(), rep->file->file(),
&rep->cache_key_prefix[0], &rep->cache_key_prefix[0], &rep->cache_key_prefix_size);
&rep->cache_key_prefix_size);
} }
if (rep->table_options.block_cache_compressed != nullptr) { if (rep->table_options.block_cache_compressed != nullptr) {
GenerateCachePrefix(rep->table_options.block_cache_compressed.get(), GenerateCachePrefix(rep->table_options.block_cache_compressed.get(),
rep->file.get(), &rep->compressed_cache_key_prefix[0], rep->file->file(), &rep->compressed_cache_key_prefix[0],
&rep->compressed_cache_key_prefix_size); &rep->compressed_cache_key_prefix_size);
} }
} }
@ -469,7 +469,7 @@ Status BlockBasedTable::Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const BlockBasedTableOptions& table_options, const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size, uint64_t file_size,
unique_ptr<TableReader>* table_reader, unique_ptr<TableReader>* table_reader,
const bool prefetch_index_and_filter) { const bool prefetch_index_and_filter) {

@ -21,6 +21,7 @@
#include "table/table_reader.h" #include "table/table_reader.h"
#include "table/table_properties_internal.h" #include "table/table_properties_internal.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -69,8 +70,8 @@ class BlockBasedTable : public TableReader {
const EnvOptions& env_options, const EnvOptions& env_options,
const BlockBasedTableOptions& table_options, const BlockBasedTableOptions& table_options,
const InternalKeyComparator& internal_key_comparator, const InternalKeyComparator& internal_key_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table_reader, uint64_t file_size, unique_ptr<TableReader>* table_reader,
bool prefetch_index_and_filter = true); bool prefetch_index_and_filter = true);
bool PrefixMayMatch(const Slice& internal_key); bool PrefixMayMatch(const Slice& internal_key);

@ -20,6 +20,7 @@
#include "table/format.h" #include "table/format.h"
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "util/autovector.h" #include "util/autovector.h"
#include "util/file_reader_writer.h"
#include "util/random.h" #include "util/random.h"
#include "util/string_util.h" #include "util/string_util.h"
@ -47,7 +48,7 @@ const std::string CuckooTablePropertyNames::kUserKeyLength =
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull; extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder( CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, double max_hash_table_ratio, WritableFileWriter* file, double max_hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth, uint32_t max_num_hash_table, uint32_t max_search_depth,
const Comparator* user_comparator, uint32_t cuckoo_block_size, const Comparator* user_comparator, uint32_t cuckoo_block_size,
bool use_module_hash, bool identity_as_first_hash, bool use_module_hash, bool identity_as_first_hash,

@ -21,12 +21,13 @@ namespace rocksdb {
class CuckooTableBuilder: public TableBuilder { class CuckooTableBuilder: public TableBuilder {
public: public:
CuckooTableBuilder( CuckooTableBuilder(WritableFileWriter* file, double max_hash_table_ratio,
WritableFile* file, double max_hash_table_ratio, uint32_t max_num_hash_func, uint32_t max_search_depth,
uint32_t max_num_hash_func, uint32_t max_search_depth, const Comparator* user_comparator,
const Comparator* user_comparator, uint32_t cuckoo_block_size, uint32_t cuckoo_block_size, bool use_module_hash,
bool use_module_hash, bool identity_as_first_hash, bool identity_as_first_hash,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)); uint64_t (*get_slice_hash)(const Slice&, uint32_t,
uint64_t));
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~CuckooTableBuilder() {} ~CuckooTableBuilder() {}
@ -82,7 +83,7 @@ class CuckooTableBuilder: public TableBuilder {
inline Slice GetValue(uint64_t idx) const; inline Slice GetValue(uint64_t idx) const;
uint32_t num_hash_func_; uint32_t num_hash_func_;
WritableFile* file_; WritableFileWriter* file_;
const double max_hash_table_ratio_; const double max_hash_table_ratio_;
const uint32_t max_num_hash_func_; const uint32_t max_num_hash_func_;
const uint32_t max_search_depth_; const uint32_t max_search_depth_;

@ -10,6 +10,7 @@
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "table/cuckoo_table_builder.h" #include "table/cuckoo_table_builder.h"
#include "util/file_reader_writer.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -48,8 +49,11 @@ class CuckooBuilderTest : public testing::Test {
// Assert Table Properties. // Assert Table Properties.
TableProperties* props = nullptr; TableProperties* props = nullptr;
ASSERT_OK(ReadTableProperties(read_file.get(), read_file_size, unique_ptr<RandomAccessFileReader> file_reader(
kCuckooTableMagicNumber, env_, nullptr, &props)); new RandomAccessFileReader(std::move(read_file)));
ASSERT_OK(ReadTableProperties(file_reader.get(), read_file_size,
kCuckooTableMagicNumber, env_, nullptr,
&props));
// Check unused bucket. // Check unused bucket.
std::string unused_key = props->user_collected_properties[ std::string unused_key = props->user_collected_properties[
CuckooTablePropertyNames::kEmptyKey]; CuckooTablePropertyNames::kEmptyKey];
@ -90,8 +94,8 @@ class CuckooBuilderTest : public testing::Test {
size_t bucket_size = expected_unused_bucket.size(); size_t bucket_size = expected_unused_bucket.size();
for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) { for (uint32_t i = 0; i < table_size + cuckoo_block_size - 1; ++i) {
Slice read_slice; Slice read_slice;
ASSERT_OK(read_file->Read(i*bucket_size, bucket_size, ASSERT_OK(file_reader->Read(i * bucket_size, bucket_size, &read_slice,
&read_slice, nullptr)); nullptr));
size_t key_idx = size_t key_idx =
std::find(expected_locations.begin(), expected_locations.end(), i) - std::find(expected_locations.begin(), expected_locations.end(), i) -
expected_locations.begin(); expected_locations.begin();
@ -104,7 +108,7 @@ class CuckooBuilderTest : public testing::Test {
} }
} }
for (auto key_found : keys_found) { for (auto key_found : keys_found) {
// Check that all keys were found. // Check that all keys wereReader found.
ASSERT_TRUE(key_found); ASSERT_TRUE(key_found);
} }
} }
@ -133,12 +137,15 @@ TEST_F(CuckooBuilderTest, SuccessWithEmptyFile) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/EmptyFile"; fname = test::TmpDir() + "/EmptyFile";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
4, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, 4, 100,
BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
ASSERT_EQ(0UL, builder.FileSize()); ASSERT_EQ(0UL, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
CheckFileContents({}, {}, {}, "", 2, 2, false); CheckFileContents({}, {}, {}, "", 2, 2, false);
} }
@ -165,8 +172,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionFullKey"; fname = test::TmpDir() + "/NoCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -176,7 +186,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionFullKey) {
size_t bucket_size = keys[0].size() + values[0].size(); size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
@ -209,8 +219,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionFullKey"; fname = test::TmpDir() + "/WithCollisionFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -220,7 +233,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionFullKey) {
size_t bucket_size = keys[0].size() + values[0].size(); size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
@ -254,9 +267,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
uint32_t cuckoo_block_size = 2; uint32_t cuckoo_block_size = 2;
fname = test::TmpDir() + "/WithCollisionFullKey2"; fname = test::TmpDir() + "/WithCollisionFullKey2";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), cuckoo_block_size, new WritableFileWriter(std::move(writable_file), EnvOptions()));
false, false, GetSliceHash); CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), cuckoo_block_size,
false, false, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -266,7 +281,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionAndCuckooBlock) {
size_t bucket_size = keys[0].size() + values[0].size(); size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
@ -304,8 +319,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKey"; fname = test::TmpDir() + "/WithCollisionPathFullKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -315,7 +333,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKey) {
size_t bucket_size = keys[0].size() + values[0].size(); size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
@ -350,8 +368,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock"; fname = test::TmpDir() + "/WithCollisionPathFullKeyAndCuckooBlock";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 2, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 2, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(keys[i]), Slice(values[i])); builder.Add(Slice(keys[i]), Slice(values[i]));
@ -361,7 +382,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathFullKeyAndCuckooBlock) {
size_t bucket_size = keys[0].size() + values[0].size(); size_t bucket_size = keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = GetInternalKey("key00", true); std::string expected_unused_bucket = GetInternalKey("key00", true);
@ -389,8 +410,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/NoCollisionUserKey"; fname = test::TmpDir() + "/NoCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -400,7 +424,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessNoCollisionUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size(); size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
@ -429,8 +453,11 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionUserKey"; fname = test::TmpDir() + "/WithCollisionUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -440,7 +467,7 @@ TEST_F(CuckooBuilderTest, WriteSuccessWithCollisionUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size(); size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
@ -471,8 +498,11 @@ TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathUserKey"; fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
2, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i])); builder.Add(Slice(GetInternalKey(user_keys[i], true)), Slice(values[i]));
@ -482,7 +512,7 @@ TEST_F(CuckooBuilderTest, WithCollisionPathUserKey) {
size_t bucket_size = user_keys[0].size() + values[0].size(); size_t bucket_size = user_keys[0].size() + values[0].size();
ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize()); ASSERT_EQ(expected_table_size * bucket_size - 1, builder.FileSize());
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
ASSERT_LE(expected_table_size * bucket_size, builder.FileSize()); ASSERT_LE(expected_table_size * bucket_size, builder.FileSize());
std::string expected_unused_bucket = "key00"; std::string expected_unused_bucket = "key00";
@ -512,8 +542,11 @@ TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/WithCollisionPathUserKey"; fname = test::TmpDir() + "/WithCollisionPathUserKey";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 2, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
2, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t i = 0; i < user_keys.size(); i++) { for (uint32_t i = 0; i < user_keys.size(); i++) {
builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value")); builder.Add(Slice(GetInternalKey(user_keys[i], false)), Slice("value"));
@ -521,7 +554,7 @@ TEST_F(CuckooBuilderTest, FailWhenCollisionPathTooLong) {
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
} }
ASSERT_TRUE(builder.Finish().IsNotSupported()); ASSERT_TRUE(builder.Finish().IsNotSupported());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
} }
TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) { TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
@ -536,8 +569,11 @@ TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
unique_ptr<WritableFile> writable_file; unique_ptr<WritableFile> writable_file;
fname = test::TmpDir() + "/FailWhenSameKeyInserted"; fname = test::TmpDir() + "/FailWhenSameKeyInserted";
ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_)); ASSERT_OK(env_->NewWritableFile(fname, &writable_file, env_options_));
CuckooTableBuilder builder(writable_file.get(), kHashTableRatio, unique_ptr<WritableFileWriter> file_writer(
num_hash_fun, 100, BytewiseComparator(), 1, false, false, GetSliceHash); new WritableFileWriter(std::move(writable_file), EnvOptions()));
CuckooTableBuilder builder(file_writer.get(), kHashTableRatio, num_hash_fun,
100, BytewiseComparator(), 1, false, false,
GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1")); builder.Add(Slice(GetInternalKey(user_key, false)), Slice("value1"));
@ -548,7 +584,7 @@ TEST_F(CuckooBuilderTest, FailWhenSameKeyInserted) {
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
ASSERT_TRUE(builder.Finish().IsNotSupported()); ASSERT_TRUE(builder.Finish().IsNotSupported());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
} }
} // namespace rocksdb } // namespace rocksdb

@ -12,9 +12,10 @@
namespace rocksdb { namespace rocksdb {
Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions, Status CuckooTableFactory::NewTableReader(
const EnvOptions& env_options, const InternalKeyComparator& icomp, const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
std::unique_ptr<RandomAccessFile>&& file, uint64_t file_size, const InternalKeyComparator& icomp,
std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
std::unique_ptr<TableReader>* table) const { std::unique_ptr<TableReader>* table) const {
std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(ioptions, std::unique_ptr<CuckooTableReader> new_reader(new CuckooTableReader(ioptions,
std::move(file), file_size, icomp.user_comparator(), nullptr)); std::move(file), file_size, icomp.user_comparator(), nullptr));
@ -27,7 +28,7 @@ Status CuckooTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* CuckooTableFactory::NewTableBuilder( TableBuilder* CuckooTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const { WritableFileWriter* file) const {
// Ignore the skipFIlters flag. Does not apply to this file format // Ignore the skipFIlters flag. Does not apply to this file format
// //

@ -55,15 +55,16 @@ class CuckooTableFactory : public TableFactory {
const char* Name() const override { return "CuckooTable"; } const char* Name() const override { return "CuckooTable"; }
Status NewTableReader( Status NewTableReader(const ImmutableCFOptions& ioptions,
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table) const override; uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const override; WritableFileWriter* file) const override;
// Sanitizes the specified DB Options. // Sanitizes the specified DB Options.
Status SanitizeOptions(const DBOptions& db_opts, Status SanitizeOptions(const DBOptions& db_opts,

@ -33,8 +33,7 @@ extern const uint64_t kCuckooTableMagicNumber;
CuckooTableReader::CuckooTableReader( CuckooTableReader::CuckooTableReader(
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
std::unique_ptr<RandomAccessFile>&& file, std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
uint64_t file_size,
const Comparator* comparator, const Comparator* comparator,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t)) uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: file_(std::move(file)), : file_(std::move(file)),

@ -18,6 +18,7 @@
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "table/table_reader.h" #include "table/table_reader.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -26,12 +27,11 @@ class TableReader;
class CuckooTableReader: public TableReader { class CuckooTableReader: public TableReader {
public: public:
CuckooTableReader( CuckooTableReader(const ImmutableCFOptions& ioptions,
const ImmutableCFOptions& ioptions, std::unique_ptr<RandomAccessFileReader>&& file,
std::unique_ptr<RandomAccessFile>&& file, uint64_t file_size, const Comparator* user_comparator,
uint64_t file_size, uint64_t (*get_slice_hash)(const Slice&, uint32_t,
const Comparator* user_comparator, uint64_t));
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
~CuckooTableReader() {} ~CuckooTableReader() {}
std::shared_ptr<const TableProperties> GetTableProperties() const override { std::shared_ptr<const TableProperties> GetTableProperties() const override {
@ -57,7 +57,7 @@ class CuckooTableReader: public TableReader {
private: private:
friend class CuckooTableIterator; friend class CuckooTableIterator;
void LoadAllKeys(std::vector<std::pair<Slice, uint32_t>>* key_to_bucket_id); void LoadAllKeys(std::vector<std::pair<Slice, uint32_t>>* key_to_bucket_id);
std::unique_ptr<RandomAccessFile> file_; std::unique_ptr<RandomAccessFileReader> file_;
Slice file_data_; Slice file_data_;
bool is_last_level_; bool is_last_level_;
bool identity_as_first_hash_; bool identity_as_first_hash_;

@ -62,7 +62,6 @@ uint64_t GetSliceHash(const Slice& s, uint32_t index,
uint64_t max_num_buckets) { uint64_t max_num_buckets) {
return hash_map[s.ToString()][index]; return hash_map[s.ToString()][index];
} }
} // namespace } // namespace
class CuckooReaderTest : public testing::Test { class CuckooReaderTest : public testing::Test {
@ -94,9 +93,11 @@ class CuckooReaderTest : public testing::Test {
const Comparator* ucomp = BytewiseComparator()) { const Comparator* ucomp = BytewiseComparator()) {
std::unique_ptr<WritableFile> writable_file; std::unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder( unique_ptr<WritableFileWriter> file_writer(
writable_file.get(), 0.9, kNumHashFunc, 100, ucomp, 2, new WritableFileWriter(std::move(writable_file), env_options));
false, false, GetSliceHash);
CuckooTableBuilder builder(file_writer.get(), 0.9, kNumHashFunc, 100, ucomp,
2, false, false, GetSliceHash);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) { for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx])); builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
@ -106,18 +107,16 @@ class CuckooReaderTest : public testing::Test {
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_EQ(num_items, builder.NumEntries()); ASSERT_EQ(num_items, builder.NumEntries());
file_size = builder.FileSize(); file_size = builder.FileSize();
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
// Check reader now. // Check reader now.
std::unique_ptr<RandomAccessFile> read_file; std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(read_file)));
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
CuckooTableReader reader( CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp,
ioptions, GetSliceHash);
std::move(read_file),
file_size,
ucomp,
GetSliceHash);
ASSERT_OK(reader.status()); ASSERT_OK(reader.status());
// Assume no merge/deletion // Assume no merge/deletion
for (uint32_t i = 0; i < num_items; ++i) { for (uint32_t i = 0; i < num_items; ++i) {
@ -141,13 +140,11 @@ class CuckooReaderTest : public testing::Test {
void CheckIterator(const Comparator* ucomp = BytewiseComparator()) { void CheckIterator(const Comparator* ucomp = BytewiseComparator()) {
std::unique_ptr<RandomAccessFile> read_file; std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(read_file)));
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
CuckooTableReader reader( CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucomp,
ioptions, GetSliceHash);
std::move(read_file),
file_size,
ucomp,
GetSliceHash);
ASSERT_OK(reader.status()); ASSERT_OK(reader.status());
Iterator* it = reader.NewIterator(ReadOptions(), nullptr); Iterator* it = reader.NewIterator(ReadOptions(), nullptr);
ASSERT_OK(it->status()); ASSERT_OK(it->status());
@ -321,13 +318,11 @@ TEST_F(CuckooReaderTest, WhenKeyNotFound) {
CreateCuckooFileAndCheckReader(); CreateCuckooFileAndCheckReader();
std::unique_ptr<RandomAccessFile> read_file; std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(read_file)));
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
CuckooTableReader reader( CuckooTableReader reader(ioptions, std::move(file_reader), file_size, ucmp,
ioptions, GetSliceHash);
std::move(read_file),
file_size,
ucmp,
GetSliceHash);
ASSERT_OK(reader.status()); ASSERT_OK(reader.status());
// Search for a key with colliding hash values. // Search for a key with colliding hash values.
std::string not_found_user_key = "key" + NumToStr(num_items); std::string not_found_user_key = "key" + NumToStr(num_items);
@ -406,10 +401,11 @@ void WriteFile(const std::vector<std::string>& keys,
std::unique_ptr<WritableFile> writable_file; std::unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options)); ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder( unique_ptr<WritableFileWriter> file_writer(
writable_file.get(), hash_ratio, new WritableFileWriter(std::move(writable_file), env_options));
64, 1000, test::Uint64Comparator(), 5, CuckooTableBuilder builder(file_writer.get(), hash_ratio, 64, 1000,
false, FLAGS_identity_as_first_hash, nullptr); test::Uint64Comparator(), 5, false,
FLAGS_identity_as_first_hash, nullptr);
ASSERT_OK(builder.status()); ASSERT_OK(builder.status());
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) { for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key. // Value is just a part of key.
@ -419,17 +415,18 @@ void WriteFile(const std::vector<std::string>& keys,
} }
ASSERT_OK(builder.Finish()); ASSERT_OK(builder.Finish());
ASSERT_EQ(num, builder.NumEntries()); ASSERT_EQ(num, builder.NumEntries());
ASSERT_OK(writable_file->Close()); ASSERT_OK(file_writer->Close());
uint64_t file_size; uint64_t file_size;
env->GetFileSize(fname, &file_size); env->GetFileSize(fname, &file_size);
std::unique_ptr<RandomAccessFile> read_file; std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(read_file)));
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
CuckooTableReader reader( CuckooTableReader reader(ioptions, std::move(file_reader), file_size,
ioptions, std::move(read_file), file_size, test::Uint64Comparator(), nullptr);
test::Uint64Comparator(), nullptr);
ASSERT_OK(reader.status()); ASSERT_OK(reader.status());
ReadOptions r_options; ReadOptions r_options;
std::string value; std::string value;
@ -455,11 +452,12 @@ void ReadKeys(uint64_t num, uint32_t batch_size) {
env->GetFileSize(fname, &file_size); env->GetFileSize(fname, &file_size);
std::unique_ptr<RandomAccessFile> read_file; std::unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options)); ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
unique_ptr<RandomAccessFileReader> file_reader(
new RandomAccessFileReader(std::move(read_file)));
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
CuckooTableReader reader( CuckooTableReader reader(ioptions, std::move(file_reader), file_size,
ioptions, std::move(read_file), file_size, test::Uint64Comparator(), test::Uint64Comparator(), nullptr);
nullptr);
ASSERT_OK(reader.status()); ASSERT_OK(reader.status());
const UserCollectedProperties user_props = const UserCollectedProperties user_props =
reader.GetTableProperties()->user_collected_properties; reader.GetTableProperties()->user_collected_properties;

@ -17,6 +17,7 @@
#include "util/coding.h" #include "util/coding.h"
#include "util/compression.h" #include "util/compression.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/perf_context_imp.h" #include "util/perf_context_imp.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "util/xxhash.h" #include "util/xxhash.h"
@ -210,7 +211,7 @@ std::string Footer::ToString() const {
return result; return result;
} }
Status ReadFooterFromFile(RandomAccessFile* file, uint64_t file_size, Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size,
Footer* footer, uint64_t enforce_table_magic_number) { Footer* footer, uint64_t enforce_table_magic_number) {
if (file_size < Footer::kMinEncodedLength) { if (file_size < Footer::kMinEncodedLength) {
return Status::Corruption("file is too short to be an sstable"); return Status::Corruption("file is too short to be an sstable");
@ -249,9 +250,9 @@ namespace {
// Read a block and check its CRC // Read a block and check its CRC
// contents is the result of reading. // contents is the result of reading.
// According to the implementation of file->Read, contents may not point to buf // According to the implementation of file->Read, contents may not point to buf
Status ReadBlock(RandomAccessFile* file, const Footer& footer, Status ReadBlock(RandomAccessFileReader* file, const Footer& footer,
const ReadOptions& options, const BlockHandle& handle, const ReadOptions& options, const BlockHandle& handle,
Slice* contents, /* result of reading */ char* buf) { Slice* contents, /* result of reading */ char* buf) {
size_t n = static_cast<size_t>(handle.size()); size_t n = static_cast<size_t>(handle.size());
Status s; Status s;
@ -299,7 +300,7 @@ Status ReadBlock(RandomAccessFile* file, const Footer& footer,
} // namespace } // namespace
Status ReadBlockContents(RandomAccessFile* file, const Footer& footer, Status ReadBlockContents(RandomAccessFileReader* file, const Footer& footer,
const ReadOptions& options, const BlockHandle& handle, const ReadOptions& options, const BlockHandle& handle,
BlockContents* contents, Env* env, BlockContents* contents, Env* env,
bool decompression_requested) { bool decompression_requested) {

@ -166,7 +166,7 @@ class Footer {
// Read the footer from file // Read the footer from file
// If enforce_table_magic_number != 0, ReadFooterFromFile() will return // If enforce_table_magic_number != 0, ReadFooterFromFile() will return
// corruption if table_magic number is not equal to enforce_table_magic_number // corruption if table_magic number is not equal to enforce_table_magic_number
Status ReadFooterFromFile(RandomAccessFile* file, uint64_t file_size, Status ReadFooterFromFile(RandomAccessFileReader* file, uint64_t file_size,
Footer* footer, Footer* footer,
uint64_t enforce_table_magic_number = 0); uint64_t enforce_table_magic_number = 0);
@ -205,7 +205,8 @@ struct BlockContents {
// Read the block identified by "handle" from "file". On failure // Read the block identified by "handle" from "file". On failure
// return non-OK. On success fill *result and return OK. // return non-OK. On success fill *result and return OK.
extern Status ReadBlockContents(RandomAccessFile* file, const Footer& footer, extern Status ReadBlockContents(RandomAccessFileReader* file,
const Footer& footer,
const ReadOptions& options, const ReadOptions& options,
const BlockHandle& handle, const BlockHandle& handle,
BlockContents* contents, Env* env, BlockContents* contents, Env* env,

@ -129,9 +129,9 @@ bool NotifyCollectTableCollectorsOnFinish(
return all_succeeded; return all_succeeded;
} }
Status ReadProperties(const Slice &handle_value, RandomAccessFile *file, Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
const Footer &footer, Env *env, Logger *logger, const Footer& footer, Env* env, Logger* logger,
TableProperties **table_properties) { TableProperties** table_properties) {
assert(table_properties); assert(table_properties);
Slice v = handle_value; Slice v = handle_value;
@ -217,7 +217,7 @@ Status ReadProperties(const Slice &handle_value, RandomAccessFile *file,
return s; return s;
} }
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties) { Logger* info_log, TableProperties** properties) {
// -- Read metaindex block // -- Read metaindex block
@ -271,7 +271,7 @@ Status FindMetaBlock(Iterator* meta_index_iter,
} }
} }
Status FindMetaBlock(RandomAccessFile* file, uint64_t file_size, Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
const std::string& meta_block_name, const std::string& meta_block_name,
BlockHandle* block_handle) { BlockHandle* block_handle) {
@ -298,7 +298,7 @@ Status FindMetaBlock(RandomAccessFile* file, uint64_t file_size,
return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle); return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
} }
Status ReadMetaBlock(RandomAccessFile* file, uint64_t file_size, Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
const std::string& meta_block_name, const std::string& meta_block_name,
BlockContents* contents) { BlockContents* contents) {

@ -107,26 +107,25 @@ bool NotifyCollectTableCollectorsOnFinish(
// @returns a status to indicate if the operation succeeded. On success, // @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties // *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified. // object, otherwise value of `table_properties` will not be modified.
Status ReadProperties(const Slice &handle_value, RandomAccessFile *file, Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
const Footer &footer, Env *env, Logger *logger, const Footer& footer, Env* env, Logger* logger,
TableProperties **table_properties); TableProperties** table_properties);
// Directly read the properties from the properties block of a plain table. // Directly read the properties from the properties block of a plain table.
// @returns a status to indicate if the operation succeeded. On success, // @returns a status to indicate if the operation succeeded. On success,
// *table_properties will point to a heap-allocated TableProperties // *table_properties will point to a heap-allocated TableProperties
// object, otherwise value of `table_properties` will not be modified. // object, otherwise value of `table_properties` will not be modified.
Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size, Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
Logger* info_log, TableProperties** properties); Logger* info_log, TableProperties** properties);
// Find the meta block from the meta index block. // Find the meta block from the meta index block.
Status FindMetaBlock(Iterator* meta_index_iter, Status FindMetaBlock(Iterator* meta_index_iter,
const std::string& meta_block_name, const std::string& meta_block_name,
BlockHandle* block_handle); BlockHandle* block_handle);
// Find the meta block // Find the meta block
Status FindMetaBlock(RandomAccessFile* file, uint64_t file_size, Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
const std::string& meta_block_name, const std::string& meta_block_name,
BlockHandle* block_handle); BlockHandle* block_handle);
@ -134,7 +133,7 @@ Status FindMetaBlock(RandomAccessFile* file, uint64_t file_size,
// Read the specified meta block with name meta_block_name // Read the specified meta block with name meta_block_name
// from `file` and initialize `contents` with contents of this block. // from `file` and initialize `contents` with contents of this block.
// Return Status::OK in case of success. // Return Status::OK in case of success.
Status ReadMetaBlock(RandomAccessFile* file, uint64_t file_size, Status ReadMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
uint64_t table_magic_number, Env* env, uint64_t table_magic_number, Env* env,
const std::string& meta_block_name, const std::string& meta_block_name,
BlockContents* contents); BlockContents* contents);

@ -11,6 +11,7 @@
#include "db/dbformat.h" #include "db/dbformat.h"
#include "port/port.h" #include "port/port.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
namespace mock { namespace mock {
@ -45,7 +46,7 @@ MockTableFactory::MockTableFactory() : next_id_(1) {}
Status MockTableFactory::NewTableReader( Status MockTableFactory::NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& env_options, const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& internal_key, const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) const { unique_ptr<TableReader>* table_reader) const {
uint32_t id = GetIDFromFile(file.get()); uint32_t id = GetIDFromFile(file.get());
@ -63,8 +64,8 @@ Status MockTableFactory::NewTableReader(
TableBuilder* MockTableFactory::NewTableBuilder( TableBuilder* MockTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const { WritableFileWriter* file) const {
uint32_t id = GetAndWriteNextID(file); uint32_t id = GetAndWriteNextID(file->writable_file());
return new MockTableBuilder(id, &file_system_); return new MockTableBuilder(id, &file_system_);
} }
@ -90,7 +91,7 @@ uint32_t MockTableFactory::GetAndWriteNextID(WritableFile* file) const {
return next_id; return next_id;
} }
uint32_t MockTableFactory::GetIDFromFile(RandomAccessFile* file) const { uint32_t MockTableFactory::GetIDFromFile(RandomAccessFileReader* file) const {
char buf[4]; char buf[4];
Slice result; Slice result;
file->Read(0, 4, &result, buf); file->Read(0, 4, &result, buf);

@ -140,13 +140,14 @@ class MockTableFactory : public TableFactory {
MockTableFactory(); MockTableFactory();
const char* Name() const override { return "MockTable"; } const char* Name() const override { return "MockTable"; }
Status NewTableReader(const ImmutableCFOptions& ioptions, Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_key, const InternalKeyComparator& internal_key,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table_reader) const override; uint64_t file_size,
unique_ptr<TableReader>* table_reader) const override;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const override; WritableFileWriter* file) const override;
// This function will directly create mock table instead of going through // This function will directly create mock table instead of going through
// MockTableBuilder. MockFileContents has to have a format of <internal_key, // MockTableBuilder. MockFileContents has to have a format of <internal_key,
@ -171,7 +172,7 @@ class MockTableFactory : public TableFactory {
private: private:
uint32_t GetAndWriteNextID(WritableFile* file) const; uint32_t GetAndWriteNextID(WritableFile* file) const;
uint32_t GetIDFromFile(RandomAccessFile* file) const; uint32_t GetIDFromFile(RandomAccessFileReader* file) const;
mutable MockTableFileSystem file_system_; mutable MockTableFileSystem file_system_;
mutable std::atomic<uint32_t> next_id_; mutable std::atomic<uint32_t> next_id_;

@ -26,6 +26,7 @@
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/stop_watch.h" #include "util/stop_watch.h"
namespace rocksdb { namespace rocksdb {
@ -35,11 +36,8 @@ namespace {
// a utility that helps writing block content to the file // a utility that helps writing block content to the file
// @offset will advance if @block_contents was successfully written. // @offset will advance if @block_contents was successfully written.
// @block_handle the block handle this particular block. // @block_handle the block handle this particular block.
Status WriteBlock( Status WriteBlock(const Slice& block_contents, WritableFileWriter* file,
const Slice& block_contents, uint64_t* offset, BlockHandle* block_handle) {
WritableFile* file,
uint64_t* offset,
BlockHandle* block_handle) {
block_handle->set_offset(*offset); block_handle->set_offset(*offset);
block_handle->set_size(block_contents.size()); block_handle->set_size(block_contents.size());
Status s = file->Append(block_contents); Status s = file->Append(block_contents);
@ -62,7 +60,7 @@ PlainTableBuilder::PlainTableBuilder(
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, uint32_t user_key_len, EncodingType encoding_type, WritableFileWriter* file, uint32_t user_key_len, EncodingType encoding_type,
size_t index_sparseness, uint32_t bloom_bits_per_key, uint32_t num_probes, size_t index_sparseness, uint32_t bloom_bits_per_key, uint32_t num_probes,
size_t huge_page_tlb_size, double hash_table_ratio, size_t huge_page_tlb_size, double hash_table_ratio,
bool store_index_in_file) bool store_index_in_file)

@ -34,10 +34,11 @@ class PlainTableBuilder: public TableBuilder {
const ImmutableCFOptions& ioptions, const ImmutableCFOptions& ioptions,
const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>* const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
int_tbl_prop_collector_factories, int_tbl_prop_collector_factories,
WritableFile* file, uint32_t user_key_size, EncodingType encoding_type, WritableFileWriter* file, uint32_t user_key_size,
size_t index_sparseness, uint32_t bloom_bits_per_key, EncodingType encoding_type, size_t index_sparseness,
uint32_t num_probes = 6, size_t huge_page_tlb_size = 0, uint32_t bloom_bits_per_key, uint32_t num_probes = 6,
double hash_table_ratio = 0, bool store_index_in_file = false); size_t huge_page_tlb_size = 0, double hash_table_ratio = 0,
bool store_index_in_file = false);
// REQUIRES: Either Finish() or Abandon() has been called. // REQUIRES: Either Finish() or Abandon() has been called.
~PlainTableBuilder(); ~PlainTableBuilder();
@ -82,7 +83,7 @@ class PlainTableBuilder: public TableBuilder {
BloomBlockBuilder bloom_block_; BloomBlockBuilder bloom_block_;
std::unique_ptr<PlainTableIndexBuilder> index_builder_; std::unique_ptr<PlainTableIndexBuilder> index_builder_;
WritableFile* file_; WritableFileWriter* file_;
uint64_t offset_ = 0; uint64_t offset_ = 0;
uint32_t bloom_bits_per_key_; uint32_t bloom_bits_per_key_;
size_t huge_page_tlb_size_; size_t huge_page_tlb_size_;

@ -14,12 +14,11 @@
namespace rocksdb { namespace rocksdb {
Status PlainTableFactory::NewTableReader(const ImmutableCFOptions& ioptions, Status PlainTableFactory::NewTableReader(
const EnvOptions& env_options, const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
const InternalKeyComparator& icomp, const InternalKeyComparator& icomp,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
uint64_t file_size, unique_ptr<TableReader>* table) const {
unique_ptr<TableReader>* table) const {
return PlainTableReader::Open(ioptions, env_options, icomp, std::move(file), return PlainTableReader::Open(ioptions, env_options, icomp, std::move(file),
file_size, table, bloom_bits_per_key_, file_size, table, bloom_bits_per_key_,
hash_table_ratio_, index_sparseness_, hash_table_ratio_, index_sparseness_,
@ -28,7 +27,7 @@ Status PlainTableFactory::NewTableReader(const ImmutableCFOptions& ioptions,
TableBuilder* PlainTableFactory::NewTableBuilder( TableBuilder* PlainTableFactory::NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const { WritableFileWriter* file) const {
// Ignore the skip_filters flag. PlainTable format is optimized for small // Ignore the skip_filters flag. PlainTable format is optimized for small
// in-memory dbs. The skip_filters optimization is not useful for plain // in-memory dbs. The skip_filters optimization is not useful for plain
// tables // tables

@ -153,14 +153,15 @@ class PlainTableFactory : public TableFactory {
full_scan_mode_(options.full_scan_mode), full_scan_mode_(options.full_scan_mode),
store_index_in_file_(options.store_index_in_file) {} store_index_in_file_(options.store_index_in_file) {}
const char* Name() const override { return "PlainTable"; } const char* Name() const override { return "PlainTable"; }
Status NewTableReader( Status NewTableReader(const ImmutableCFOptions& options,
const ImmutableCFOptions& options, const EnvOptions& soptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table) const override; uint64_t file_size,
unique_ptr<TableReader>* table) const override;
TableBuilder* NewTableBuilder( TableBuilder* NewTableBuilder(
const TableBuilderOptions& table_builder_options, const TableBuilderOptions& table_builder_options,
WritableFile* file) const override; WritableFileWriter* file) const override;
std::string GetPrintableTableOptions() const override; std::string GetPrintableTableOptions() const override;

@ -6,8 +6,9 @@
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
#include "table/plain_table_key_coding.h" #include "table/plain_table_key_coding.h"
#include "table/plain_table_factory.h"
#include "db/dbformat.h" #include "db/dbformat.h"
#include "table/plain_table_factory.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -64,7 +65,8 @@ const char* DecodeSize(const char* offset, const char* limit,
} }
} // namespace } // namespace
Status PlainTableKeyEncoder::AppendKey(const Slice& key, WritableFile* file, Status PlainTableKeyEncoder::AppendKey(const Slice& key,
WritableFileWriter* file,
uint64_t* offset, char* meta_bytes_buf, uint64_t* offset, char* meta_bytes_buf,
size_t* meta_bytes_buf_size) { size_t* meta_bytes_buf_size) {
ParsedInternalKey parsed_key; ParsedInternalKey parsed_key;

@ -34,7 +34,7 @@ class PlainTableKeyEncoder {
// meta_bytes_buf: buffer for extra meta bytes // meta_bytes_buf: buffer for extra meta bytes
// meta_bytes_buf_size: offset to append extra meta bytes. Will be updated // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
// if meta_bytes_buf is updated. // if meta_bytes_buf is updated.
Status AppendKey(const Slice& key, WritableFile* file, uint64_t* offset, Status AppendKey(const Slice& key, WritableFileWriter* file, uint64_t* offset,
char* meta_bytes_buf, size_t* meta_bytes_buf_size); char* meta_bytes_buf, size_t* meta_bytes_buf_size);
// Return actual encoding type to be picked // Return actual encoding type to be picked

@ -90,7 +90,7 @@ class PlainTableIterator : public Iterator {
extern const uint64_t kPlainTableMagicNumber; extern const uint64_t kPlainTableMagicNumber;
PlainTableReader::PlainTableReader(const ImmutableCFOptions& ioptions, PlainTableReader::PlainTableReader(const ImmutableCFOptions& ioptions,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file,
const EnvOptions& storage_options, const EnvOptions& storage_options,
const InternalKeyComparator& icomparator, const InternalKeyComparator& icomparator,
EncodingType encoding_type, EncodingType encoding_type,
@ -115,7 +115,7 @@ PlainTableReader::~PlainTableReader() {
Status PlainTableReader::Open(const ImmutableCFOptions& ioptions, Status PlainTableReader::Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file,
uint64_t file_size, uint64_t file_size,
unique_ptr<TableReader>* table_reader, unique_ptr<TableReader>* table_reader,
const int bloom_bits_per_key, const int bloom_bits_per_key,

@ -22,6 +22,7 @@
#include "table/plain_table_index.h" #include "table/plain_table_index.h"
#include "util/arena.h" #include "util/arena.h"
#include "util/dynamic_bloom.h" #include "util/dynamic_bloom.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -56,8 +57,8 @@ class PlainTableReader: public TableReader {
static Status Open(const ImmutableCFOptions& ioptions, static Status Open(const ImmutableCFOptions& ioptions,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, unique_ptr<RandomAccessFileReader>&& file,
unique_ptr<TableReader>* table, uint64_t file_size, unique_ptr<TableReader>* table,
const int bloom_bits_per_key, double hash_table_ratio, const int bloom_bits_per_key, double hash_table_ratio,
size_t index_sparseness, size_t huge_page_tlb_size, size_t index_sparseness, size_t huge_page_tlb_size,
bool full_scan_mode); bool full_scan_mode);
@ -83,7 +84,7 @@ class PlainTableReader: public TableReader {
} }
PlainTableReader(const ImmutableCFOptions& ioptions, PlainTableReader(const ImmutableCFOptions& ioptions,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFileReader>&& file,
const EnvOptions& env_options, const EnvOptions& env_options,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
EncodingType encoding_type, uint64_t file_size, EncodingType encoding_type, uint64_t file_size,
@ -134,7 +135,7 @@ class PlainTableReader: public TableReader {
Arena arena_; Arena arena_;
const ImmutableCFOptions& ioptions_; const ImmutableCFOptions& ioptions_;
unique_ptr<RandomAccessFile> file_; unique_ptr<RandomAccessFileReader> file_;
uint64_t file_size_; uint64_t file_size_;
std::shared_ptr<const TableProperties> table_properties_; std::shared_ptr<const TableProperties> table_properties_;

@ -22,6 +22,7 @@ int main() {
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "table/table_builder.h" #include "table/table_builder.h"
#include "table/get_context.h" #include "table/get_context.h"
#include "util/file_reader_writer.h"
#include "util/histogram.h" #include "util/histogram.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -90,11 +91,14 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
std::vector<std::unique_ptr<IntTblPropCollectorFactory> > std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options));
tb = opts.table_factory->NewTableBuilder( tb = opts.table_factory->NewTableBuilder(
TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories, TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories,
CompressionType::kNoCompression, CompressionType::kNoCompression,
CompressionOptions(), false), CompressionOptions(), false),
file.get()); file_writer.get());
} else { } else {
s = DB::Open(opts, dbname, &db); s = DB::Open(opts, dbname, &db);
ASSERT_OK(s); ASSERT_OK(s);
@ -119,13 +123,16 @@ void TableReaderBenchmark(Options& opts, EnvOptions& env_options,
} }
unique_ptr<TableReader> table_reader; unique_ptr<TableReader> table_reader;
unique_ptr<RandomAccessFile> raf;
if (!through_db) { if (!through_db) {
unique_ptr<RandomAccessFile> raf;
s = env->NewRandomAccessFile(file_name, &raf, env_options); s = env->NewRandomAccessFile(file_name, &raf, env_options);
uint64_t file_size; uint64_t file_size;
env->GetFileSize(file_name, &file_size); env->GetFileSize(file_name, &file_size);
s = opts.table_factory->NewTableReader( unique_ptr<RandomAccessFileReader> file_reader(
ioptions, env_options, ikc, std::move(raf), file_size, &table_reader); new RandomAccessFileReader(std::move(raf)));
s = opts.table_factory->NewTableReader(ioptions, env_options, ikc,
std::move(file_reader), file_size,
&table_reader);
} }
Random rnd(301); Random rnd(301);

@ -347,7 +347,7 @@ class TableConstructor: public Constructor {
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
const KVMap& kv_map) override { const KVMap& kv_map) override {
Reset(); Reset();
sink_.reset(new StringSink()); file_writer_.reset(test::GetWritableFileWriter(new StringSink()));
unique_ptr<TableBuilder> builder; unique_ptr<TableBuilder> builder;
std::vector<std::unique_ptr<IntTblPropCollectorFactory>> std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
@ -355,7 +355,7 @@ class TableConstructor: public Constructor {
TableBuilderOptions(ioptions, internal_comparator, TableBuilderOptions(ioptions, internal_comparator,
&int_tbl_prop_collector_factories, &int_tbl_prop_collector_factories,
options.compression, CompressionOptions(), false), options.compression, CompressionOptions(), false),
sink_.get())); file_writer_.get()));
for (const auto kv : kv_map) { for (const auto kv : kv_map) {
if (convert_to_internal_key_) { if (convert_to_internal_key_) {
@ -369,17 +369,18 @@ class TableConstructor: public Constructor {
EXPECT_TRUE(builder->status().ok()); EXPECT_TRUE(builder->status().ok());
} }
Status s = builder->Finish(); Status s = builder->Finish();
file_writer_->Flush();
EXPECT_TRUE(s.ok()) << s.ToString(); EXPECT_TRUE(s.ok()) << s.ToString();
EXPECT_EQ(sink_->contents().size(), builder->FileSize()); EXPECT_EQ(GetSink()->contents().size(), builder->FileSize());
// Open the table // Open the table
uniq_id_ = cur_uniq_id_++; uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_, file_reader_.reset(test::GetRandomAccessFileReader(new StringSource(
ioptions.allow_mmap_reads)); GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
return ioptions.table_factory->NewTableReader( return ioptions.table_factory->NewTableReader(
ioptions, soptions, internal_comparator, std::move(source_), ioptions, soptions, internal_comparator, std::move(file_reader_),
sink_->contents().size(), &table_reader_); GetSink()->contents().size(), &table_reader_);
} }
virtual Iterator* NewIterator() const override { virtual Iterator* NewIterator() const override {
@ -397,12 +398,11 @@ class TableConstructor: public Constructor {
} }
virtual Status Reopen(const ImmutableCFOptions& ioptions) { virtual Status Reopen(const ImmutableCFOptions& ioptions) {
source_.reset( file_reader_.reset(test::GetRandomAccessFileReader(new StringSource(
new StringSource(sink_->contents(), uniq_id_, GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
ioptions.allow_mmap_reads));
return ioptions.table_factory->NewTableReader( return ioptions.table_factory->NewTableReader(
ioptions, soptions, *last_internal_key_, std::move(source_), ioptions, soptions, *last_internal_key_, std::move(file_reader_),
sink_->contents().size(), &table_reader_); GetSink()->contents().size(), &table_reader_);
} }
virtual TableReader* GetTableReader() { virtual TableReader* GetTableReader() {
@ -417,13 +417,17 @@ class TableConstructor: public Constructor {
void Reset() { void Reset() {
uniq_id_ = 0; uniq_id_ = 0;
table_reader_.reset(); table_reader_.reset();
sink_.reset(); file_writer_.reset();
source_.reset(); file_reader_.reset();
}
StringSink* GetSink() {
return static_cast<StringSink*>(file_writer_->writable_file());
} }
uint64_t uniq_id_; uint64_t uniq_id_;
unique_ptr<StringSink> sink_; unique_ptr<WritableFileWriter> file_writer_;
unique_ptr<StringSource> source_; unique_ptr<RandomAccessFileReader> file_reader_;
unique_ptr<TableReader> table_reader_; unique_ptr<TableReader> table_reader_;
bool convert_to_internal_key_; bool convert_to_internal_key_;
@ -1766,6 +1770,8 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
PlainTableFactory factory(plain_table_options); PlainTableFactory factory(plain_table_options);
StringSink sink; StringSink sink;
unique_ptr<WritableFileWriter> file_writer(
test::GetWritableFileWriter(new StringSink()));
Options options; Options options;
const ImmutableCFOptions ioptions(options); const ImmutableCFOptions ioptions(options);
InternalKeyComparator ikc(options.comparator); InternalKeyComparator ikc(options.comparator);
@ -1774,7 +1780,7 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder( std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories, TableBuilderOptions(ioptions, ikc, &int_tbl_prop_collector_factories,
kNoCompression, CompressionOptions(), false), kNoCompression, CompressionOptions(), false),
&sink)); file_writer.get()));
for (char c = 'a'; c <= 'z'; ++c) { for (char c = 'a'; c <= 'z'; ++c) {
std::string key(8, c); std::string key(8, c);
@ -1783,11 +1789,15 @@ TEST_F(PlainTableTest, BasicPlainTableProperties) {
builder->Add(key, value); builder->Add(key, value);
} }
ASSERT_OK(builder->Finish()); ASSERT_OK(builder->Finish());
file_writer->Flush();
StringSource source(sink.contents(), 72242, true); StringSink* ss = static_cast<StringSink*>(file_writer->writable_file());
unique_ptr<RandomAccessFileReader> file_reader(
test::GetRandomAccessFileReader(
new StringSource(ss->contents(), 72242, true)));
TableProperties* props = nullptr; TableProperties* props = nullptr;
auto s = ReadTableProperties(&source, sink.contents().size(), auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
kPlainTableMagicNumber, Env::Default(), nullptr, kPlainTableMagicNumber, Env::Default(), nullptr,
&props); &props);
std::unique_ptr<TableProperties> props_guard(props); std::unique_ptr<TableProperties> props_guard(props);

@ -170,6 +170,9 @@ class SpecialEnv : public EnvWrapper {
void SetIOPriority(Env::IOPriority pri) override { void SetIOPriority(Env::IOPriority pri) override {
base_->SetIOPriority(pri); base_->SetIOPriority(pri);
} }
Env::IOPriority GetIOPriority() override {
return base_->GetIOPriority();
}
}; };
class ManifestFile : public WritableFile { class ManifestFile : public WritableFile {
public: public:

@ -40,7 +40,6 @@
#include "util/posix_logger.h" #include "util/posix_logger.h"
#include "util/random.h" #include "util/random.h"
#include "util/iostats_context_imp.h" #include "util/iostats_context_imp.h"
#include "util/rate_limiter.h"
#include "util/sync_point.h" #include "util/sync_point.h"
#include "util/thread_status_updater.h" #include "util/thread_status_updater.h"
#include "util/thread_status_util.h" #include "util/thread_status_util.h"
@ -74,9 +73,6 @@
#define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */ #define POSIX_FADV_DONTNEED 4 /* [MC1] dont need these pages */
#endif #endif
// This is only set from db_stress.cc and for testing only.
// If non-zero, kill at various points in source code with probability 1/this
int rocksdb_kill_odds = 0;
namespace rocksdb { namespace rocksdb {
@ -104,39 +100,6 @@ static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number)); return Status::IOError(context, strerror(err_number));
} }
#ifdef NDEBUG
// empty in release build
#define TEST_KILL_RANDOM(rocksdb_kill_odds)
#else
// Kill the process with probablity 1/odds for testing.
static void TestKillRandom(int odds, const std::string& srcfile,
int srcline) {
time_t curtime = time(nullptr);
Random r((uint32_t)curtime);
assert(odds > 0);
bool crash = r.OneIn(odds);
if (crash) {
fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
fflush(stdout);
kill(getpid(), SIGTERM);
}
}
// To avoid crashing always at some frequently executed codepaths (during
// kill random test), use this factor to reduce odds
#define REDUCE_ODDS 2
#define REDUCE_ODDS2 4
#define TEST_KILL_RANDOM(rocksdb_kill_odds) { \
if (rocksdb_kill_odds > 0) { \
TestKillRandom(rocksdb_kill_odds, __FILE__, __LINE__); \
} \
}
#endif
#if defined(OS_LINUX) #if defined(OS_LINUX)
namespace { namespace {
static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size) { static size_t GetUniqueIdFromFile(int fd, char* id, size_t max_size) {
@ -188,7 +151,6 @@ class PosixSequentialFile: public SequentialFile {
do { do {
r = fread_unlocked(scratch, 1, n, file_); r = fread_unlocked(scratch, 1, n, file_);
} while (r == 0 && ferror(file_) && errno == EINTR); } while (r == 0 && ferror(file_) && errno == EINTR);
IOSTATS_ADD(bytes_read, r);
*result = Slice(scratch, r); *result = Slice(scratch, r);
if (r < n) { if (r < n) {
if (feof(file_)) { if (feof(file_)) {
@ -252,10 +214,7 @@ class PosixRandomAccessFile: public RandomAccessFile {
size_t left = n; size_t left = n;
char* ptr = scratch; char* ptr = scratch;
while (left > 0) { while (left > 0) {
{ r = pread(fd_, ptr, left, static_cast<off_t>(offset));
IOSTATS_TIMER_GUARD(read_nanos);
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
}
if (r <= 0) { if (r <= 0) {
if (errno == EINTR) { if (errno == EINTR) {
@ -268,7 +227,6 @@ class PosixRandomAccessFile: public RandomAccessFile {
left -= r; left -= r;
} }
IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
*result = Slice(scratch, (r < 0) ? 0 : n - left); *result = Slice(scratch, (r < 0) ? 0 : n - left);
if (r < 0) { if (r < 0) {
// An error: return a non-ok status // An error: return a non-ok status
@ -458,7 +416,6 @@ class PosixMmapFile : public WritableFile {
if (ptr == MAP_FAILED) { if (ptr == MAP_FAILED) {
return Status::IOError("MMap failed on " + filename_); return Status::IOError("MMap failed on " + filename_);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
base_ = reinterpret_cast<char*>(ptr); base_ = reinterpret_cast<char*>(ptr);
@ -482,8 +439,7 @@ class PosixMmapFile : public WritableFile {
limit_(nullptr), limit_(nullptr),
dst_(nullptr), dst_(nullptr),
last_sync_(nullptr), last_sync_(nullptr),
file_offset_(0), file_offset_(0) {
pending_sync_(false) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size; fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif #endif
@ -501,8 +457,6 @@ class PosixMmapFile : public WritableFile {
virtual Status Append(const Slice& data) override { virtual Status Append(const Slice& data) override {
const char* src = data.data(); const char* src = data.data();
size_t left = data.size(); size_t left = data.size();
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS);
PrepareWrite(static_cast<size_t>(GetFileSize()), left);
while (left > 0) { while (left > 0) {
assert(base_ <= dst_); assert(base_ <= dst_);
assert(dst_ <= limit_); assert(dst_ <= limit_);
@ -521,12 +475,10 @@ class PosixMmapFile : public WritableFile {
size_t n = (left <= avail) ? left : avail; size_t n = (left <= avail) ? left : avail;
memcpy(dst_, src, n); memcpy(dst_, src, n);
IOSTATS_ADD(bytes_written, n);
dst_ += n; dst_ += n;
src += n; src += n;
left -= n; left -= n;
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
return Status::OK(); return Status::OK();
} }
@ -534,8 +486,6 @@ class PosixMmapFile : public WritableFile {
Status s; Status s;
size_t unused = limit_ - dst_; size_t unused = limit_ - dst_;
TEST_KILL_RANDOM(rocksdb_kill_odds);
s = UnmapCurrentRegion(); s = UnmapCurrentRegion();
if (!s.ok()) { if (!s.ok()) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
@ -546,8 +496,6 @@ class PosixMmapFile : public WritableFile {
} }
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
if (close(fd_) < 0) { if (close(fd_) < 0) {
if (s.ok()) { if (s.ok()) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
@ -561,22 +509,15 @@ class PosixMmapFile : public WritableFile {
} }
virtual Status Flush() override { virtual Status Flush() override {
TEST_KILL_RANDOM(rocksdb_kill_odds);
return Status::OK(); return Status::OK();
} }
virtual Status Sync() override { virtual Status Sync() override {
Status s; Status s;
if (pending_sync_) {
// Some unmapped data was not synced
TEST_KILL_RANDOM(rocksdb_kill_odds);
pending_sync_ = false;
if (fdatasync(fd_) < 0) { if (fdatasync(fd_) < 0) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS);
}
if (dst_ > last_sync_) { if (dst_ > last_sync_) {
// Find the beginnings of the pages that contain the first and last // Find the beginnings of the pages that contain the first and last
@ -588,7 +529,6 @@ class PosixMmapFile : public WritableFile {
if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) { if (msync(base_ + p1, p2 - p1 + page_size_, MS_SYNC) < 0) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
} }
return s; return s;
@ -598,15 +538,10 @@ class PosixMmapFile : public WritableFile {
* Flush data as well as metadata to stable storage. * Flush data as well as metadata to stable storage.
*/ */
virtual Status Fsync() override { virtual Status Fsync() override {
if (pending_sync_) {
// Some unmapped data was not synced // Some unmapped data was not synced
TEST_KILL_RANDOM(rocksdb_kill_odds);
pending_sync_ = false;
if (fsync(fd_) < 0) { if (fsync(fd_) < 0) {
return IOError(filename_, errno); return IOError(filename_, errno);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
}
// This invocation to Sync will not issue the call to // This invocation to Sync will not issue the call to
// fdatasync because pending_sync_ has already been cleared. // fdatasync because pending_sync_ has already been cleared.
return Sync(); return Sync();
@ -638,7 +573,6 @@ class PosixMmapFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate( int alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
if (alloc_status == 0) { if (alloc_status == 0) {
@ -655,33 +589,14 @@ class PosixWritableFile : public WritableFile {
private: private:
const std::string filename_; const std::string filename_;
int fd_; int fd_;
size_t cursize_; // current size of cached data in buf_
size_t capacity_; // max size of buf_
unique_ptr<char[]> buf_; // a buffer to cache writes
uint64_t filesize_; uint64_t filesize_;
bool pending_sync_;
bool pending_fsync_;
uint64_t last_sync_size_;
uint64_t bytes_per_sync_;
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_; bool fallocate_with_keep_size_;
#endif #endif
RateLimiter* rate_limiter_;
public: public:
PosixWritableFile(const std::string& fname, int fd, size_t capacity, PosixWritableFile(const std::string& fname, int fd, const EnvOptions& options)
const EnvOptions& options) : filename_(fname), fd_(fd), filesize_(0) {
: filename_(fname),
fd_(fd),
cursize_(0),
capacity_(capacity),
buf_(new char[capacity]),
filesize_(0),
pending_sync_(false),
pending_fsync_(false),
last_sync_size_(0),
bytes_per_sync_(options.bytes_per_sync),
rate_limiter_(options.rate_limiter) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size; fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif #endif
@ -698,64 +613,23 @@ class PosixWritableFile : public WritableFile {
const char* src = data.data(); const char* src = data.data();
size_t left = data.size(); size_t left = data.size();
Status s; Status s;
pending_sync_ = true;
pending_fsync_ = true;
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
PrepareWrite(static_cast<size_t>(GetFileSize()), left);
// if there is no space in the cache, then flush
if (cursize_ + left > capacity_) {
s = Flush();
if (!s.ok()) {
return s;
}
// Increase the buffer size, but capped at 1MB
if (capacity_ < (1<<20)) {
capacity_ *= 2;
buf_.reset(new char[capacity_]);
}
assert(cursize_ == 0);
}
// if the write fits into the cache, then write to cache
// otherwise do a write() syscall to write to OS buffers.
if (cursize_ + left <= capacity_) {
memcpy(buf_.get()+cursize_, src, left);
cursize_ += left;
} else {
while (left != 0) { while (left != 0) {
ssize_t done; ssize_t done = write(fd_, src, left);
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
done = write(fd_, src, size);
}
if (done < 0) { if (done < 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
} }
return IOError(filename_, errno); return IOError(filename_, errno);
} }
IOSTATS_ADD(bytes_written, done);
TEST_KILL_RANDOM(rocksdb_kill_odds);
left -= done; left -= done;
src += done; src += done;
} }
} filesize_ += data.size();
filesize_ += data.size();
return Status::OK(); return Status::OK();
} }
virtual Status Close() override { virtual Status Close() override {
Status s; Status s;
s = Flush(); // flush cache to OS
if (!s.ok()) {
return s;
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
size_t block_size; size_t block_size;
size_t last_allocated_block; size_t last_allocated_block;
@ -793,68 +667,20 @@ class PosixWritableFile : public WritableFile {
// write out the cached data to the OS cache // write out the cached data to the OS cache
virtual Status Flush() override { virtual Status Flush() override {
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
size_t left = cursize_;
char* src = buf_.get();
while (left != 0) {
ssize_t done;
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
done = write(fd_, src, size);
}
if (done < 0) {
if (errno == EINTR) {
continue;
}
return IOError(filename_, errno);
}
IOSTATS_ADD(bytes_written, done);
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
left -= done;
src += done;
}
cursize_ = 0;
// sync OS cache to disk for every bytes_per_sync_
// TODO: give log file and sst file different options (log
// files could be potentially cached in OS for their whole
// life time, thus we might not want to flush at all).
if (bytes_per_sync_ &&
filesize_ - last_sync_size_ >= bytes_per_sync_) {
RangeSync(last_sync_size_, filesize_ - last_sync_size_);
last_sync_size_ = filesize_;
}
return Status::OK(); return Status::OK();
} }
virtual Status Sync() override { virtual Status Sync() override {
Status s = Flush(); if (fdatasync(fd_) < 0) {
if (!s.ok()) {
return s;
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
if (pending_sync_ && fdatasync(fd_) < 0) {
return IOError(filename_, errno); return IOError(filename_, errno);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
pending_sync_ = false;
return Status::OK(); return Status::OK();
} }
virtual Status Fsync() override { virtual Status Fsync() override {
Status s = Flush(); if (fsync(fd_) < 0) {
if (!s.ok()) {
return s;
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
if (pending_fsync_ && fsync(fd_) < 0) {
return IOError(filename_, errno); return IOError(filename_, errno);
} }
TEST_KILL_RANDOM(rocksdb_kill_odds);
pending_fsync_ = false;
pending_sync_ = false;
return Status::OK(); return Status::OK();
} }
@ -876,8 +702,8 @@ class PosixWritableFile : public WritableFile {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds); TEST_KILL_RANDOM(rocksdb_kill_odds);
int alloc_status;
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status;
alloc_status = fallocate( alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
if (alloc_status == 0) { if (alloc_status == 0) {
@ -888,7 +714,6 @@ class PosixWritableFile : public WritableFile {
} }
virtual Status RangeSync(off_t offset, off_t nbytes) override { virtual Status RangeSync(off_t offset, off_t nbytes) override {
IOSTATS_TIMER_GUARD(range_sync_nanos);
if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) { if (sync_file_range(fd_, offset, nbytes, SYNC_FILE_RANGE_WRITE) == 0) {
return Status::OK(); return Status::OK();
} else { } else {
@ -899,34 +724,19 @@ class PosixWritableFile : public WritableFile {
return GetUniqueIdFromFile(fd_, id, max_size); return GetUniqueIdFromFile(fd_, id, max_size);
} }
#endif #endif
private:
inline size_t RequestToken(size_t bytes) {
if (rate_limiter_ && io_priority_ < Env::IO_TOTAL) {
bytes = std::min(bytes,
static_cast<size_t>(rate_limiter_->GetSingleBurstBytes()));
rate_limiter_->Request(bytes, io_priority_);
}
return bytes;
}
}; };
class PosixRandomRWFile : public RandomRWFile { class PosixRandomRWFile : public RandomRWFile {
private: private:
const std::string filename_; const std::string filename_;
int fd_; int fd_;
bool pending_sync_;
bool pending_fsync_;
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
bool fallocate_with_keep_size_; bool fallocate_with_keep_size_;
#endif #endif
public: public:
PosixRandomRWFile(const std::string& fname, int fd, const EnvOptions& options) PosixRandomRWFile(const std::string& fname, int fd, const EnvOptions& options)
: filename_(fname), : filename_(fname), fd_(fd) {
fd_(fd),
pending_sync_(false),
pending_fsync_(false) {
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
fallocate_with_keep_size_ = options.fallocate_with_keep_size; fallocate_with_keep_size_ = options.fallocate_with_keep_size;
#endif #endif
@ -943,22 +753,15 @@ class PosixRandomRWFile : public RandomRWFile {
const char* src = data.data(); const char* src = data.data();
size_t left = data.size(); size_t left = data.size();
Status s; Status s;
pending_sync_ = true;
pending_fsync_ = true;
while (left != 0) { while (left != 0) {
ssize_t done; ssize_t done = pwrite(fd_, src, left, offset);
{
IOSTATS_TIMER_GUARD(write_nanos);
done = pwrite(fd_, src, left, offset);
}
if (done < 0) { if (done < 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
} }
return IOError(filename_, errno); return IOError(filename_, errno);
} }
IOSTATS_ADD(bytes_written, done);
left -= done; left -= done;
src += done; src += done;
@ -975,11 +778,7 @@ class PosixRandomRWFile : public RandomRWFile {
size_t left = n; size_t left = n;
char* ptr = scratch; char* ptr = scratch;
while (left > 0) { while (left > 0) {
{ r = pread(fd_, ptr, left, static_cast<off_t>(offset));
IOSTATS_TIMER_GUARD(read_nanos);
r = pread(fd_, ptr, left, static_cast<off_t>(offset));
}
if (r <= 0) { if (r <= 0) {
if (errno == EINTR) { if (errno == EINTR) {
continue; continue;
@ -990,7 +789,6 @@ class PosixRandomRWFile : public RandomRWFile {
offset += r; offset += r;
left -= r; left -= r;
} }
IOSTATS_ADD_IF_POSITIVE(bytes_read, n - left);
*result = Slice(scratch, (r < 0) ? 0 : n - left); *result = Slice(scratch, (r < 0) ? 0 : n - left);
if (r < 0) { if (r < 0) {
s = IOError(filename_, errno); s = IOError(filename_, errno);
@ -1008,25 +806,21 @@ class PosixRandomRWFile : public RandomRWFile {
} }
virtual Status Sync() override { virtual Status Sync() override {
if (pending_sync_ && fdatasync(fd_) < 0) { if (fdatasync(fd_) < 0) {
return IOError(filename_, errno); return IOError(filename_, errno);
} }
pending_sync_ = false;
return Status::OK(); return Status::OK();
} }
virtual Status Fsync() override { virtual Status Fsync() override {
if (pending_fsync_ && fsync(fd_) < 0) { if (fsync(fd_) < 0) {
return IOError(filename_, errno); return IOError(filename_, errno);
} }
pending_fsync_ = false;
pending_sync_ = false;
return Status::OK(); return Status::OK();
} }
#ifdef ROCKSDB_FALLOCATE_PRESENT #ifdef ROCKSDB_FALLOCATE_PRESENT
virtual Status Allocate(off_t offset, off_t len) override { virtual Status Allocate(off_t offset, off_t len) override {
TEST_KILL_RANDOM(rocksdb_kill_odds);
IOSTATS_TIMER_GUARD(allocate_nanos); IOSTATS_TIMER_GUARD(allocate_nanos);
int alloc_status = fallocate( int alloc_status = fallocate(
fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len); fd_, fallocate_with_keep_size_ ? FALLOC_FL_KEEP_SIZE : 0, offset, len);
@ -1216,9 +1010,7 @@ class PosixEnv : public Env {
EnvOptions no_mmap_writes_options = options; EnvOptions no_mmap_writes_options = options;
no_mmap_writes_options.use_mmap_writes = false; no_mmap_writes_options.use_mmap_writes = false;
result->reset( result->reset(new PosixWritableFile(fname, fd, no_mmap_writes_options));
new PosixWritableFile(fname, fd, 65536, no_mmap_writes_options)
);
} }
} }
return s; return s;

@ -664,6 +664,7 @@ TEST_F(EnvPosixTest, AllocateTest) {
size_t kPageSize = 4096; size_t kPageSize = 4096;
std::string data(1024 * 1024, 'a'); std::string data(1024 * 1024, 'a');
wfile->SetPreallocationBlockSize(kPreallocateSize); wfile->SetPreallocationBlockSize(kPreallocateSize);
wfile->PrepareWrite(wfile->GetFileSize(), data.size());
ASSERT_OK(wfile->Append(Slice(data))); ASSERT_OK(wfile->Append(Slice(data)));
ASSERT_OK(wfile->Flush()); ASSERT_OK(wfile->Flush());
@ -974,18 +975,22 @@ TEST_F(EnvPosixTest, Preallocation) {
ASSERT_EQ(last_allocated_block, 0UL); ASSERT_EQ(last_allocated_block, 0UL);
// Small write should preallocate one block // Small write should preallocate one block
srcfile->Append("test"); std::string str = "test";
srcfile->PrepareWrite(srcfile->GetFileSize(), str.size());
srcfile->Append(str);
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
ASSERT_EQ(last_allocated_block, 1UL); ASSERT_EQ(last_allocated_block, 1UL);
// Write an entire preallocation block, make sure we increased by two. // Write an entire preallocation block, make sure we increased by two.
std::string buf(block_size, ' '); std::string buf(block_size, ' ');
srcfile->PrepareWrite(srcfile->GetFileSize(), buf.size());
srcfile->Append(buf); srcfile->Append(buf);
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
ASSERT_EQ(last_allocated_block, 2UL); ASSERT_EQ(last_allocated_block, 2UL);
// Write five more blocks at once, ensure we're where we need to be. // Write five more blocks at once, ensure we're where we need to be.
buf = std::string(block_size * 5, ' '); buf = std::string(block_size * 5, ' ');
srcfile->PrepareWrite(srcfile->GetFileSize(), buf.size());
srcfile->Append(buf); srcfile->Append(buf);
srcfile->GetPreallocationStatus(&block_size, &last_allocated_block); srcfile->GetPreallocationStatus(&block_size, &last_allocated_block);
ASSERT_EQ(last_allocated_block, 7UL); ASSERT_EQ(last_allocated_block, 7UL);

@ -0,0 +1,225 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "util/file_reader_writer.h"
#include <algorithm>
#include "port/port.h"
#include "util/iostats_context_imp.h"
#include "util/random.h"
#include "util/rate_limiter.h"
#include "util/sync_point.h"
namespace rocksdb {
Status SequentialFileReader::Read(size_t n, Slice* result, char* scratch) {
Status s = file_->Read(n, result, scratch);
IOSTATS_ADD(bytes_read, result->size());
return s;
}
Status SequentialFileReader::Skip(uint64_t n) { return file_->Skip(n); }
Status RandomAccessFileReader::Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
IOSTATS_TIMER_GUARD(read_nanos);
Status s = file_->Read(offset, n, result, scratch);
IOSTATS_ADD_IF_POSITIVE(bytes_read, result->size());
return s;
}
Status WritableFileWriter::Append(const Slice& data) {
const char* src = data.data();
size_t left = data.size();
Status s;
pending_sync_ = true;
pending_fsync_ = true;
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left);
// if there is no space in the cache, then flush
if (cursize_ + left > capacity_) {
s = Flush();
if (!s.ok()) {
return s;
}
// Increase the buffer size, but capped at 1MB
if (capacity_ < (1 << 20)) {
capacity_ *= 2;
buf_.reset(new char[capacity_]);
}
assert(cursize_ == 0);
}
// if the write fits into the cache, then write to cache
// otherwise do a write() syscall to write to OS buffers.
if (cursize_ + left <= capacity_) {
memcpy(buf_.get() + cursize_, src, left);
cursize_ += left;
} else {
while (left != 0) {
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
s = writable_file_->Append(Slice(src, size));
if (!s.ok()) {
return s;
}
}
IOSTATS_ADD(bytes_written, size);
TEST_KILL_RANDOM(rocksdb_kill_odds);
left -= size;
src += size;
}
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
filesize_ += data.size();
return Status::OK();
}
Status WritableFileWriter::Close() {
Status s;
s = Flush(); // flush cache to OS
if (!s.ok()) {
return s;
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
return writable_file_->Close();
}
// write out the cached data to the OS cache
Status WritableFileWriter::Flush() {
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
size_t left = cursize_;
char* src = buf_.get();
while (left != 0) {
size_t size = RequestToken(left);
{
IOSTATS_TIMER_GUARD(write_nanos);
Status s = writable_file_->Append(Slice(src, size));
if (!s.ok()) {
return s;
}
}
IOSTATS_ADD(bytes_written, size);
TEST_KILL_RANDOM(rocksdb_kill_odds * REDUCE_ODDS2);
left -= size;
src += size;
}
cursize_ = 0;
writable_file_->Flush();
// sync OS cache to disk for every bytes_per_sync_
// TODO: give log file and sst file different options (log
// files could be potentially cached in OS for their whole
// life time, thus we might not want to flush at all).
if (bytes_per_sync_ && filesize_ - last_sync_size_ >= bytes_per_sync_) {
writable_file_->RangeSync(last_sync_size_, filesize_ - last_sync_size_);
last_sync_size_ = filesize_;
}
return Status::OK();
}
Status WritableFileWriter::Sync(bool use_fsync) {
Status s = Flush();
if (!s.ok()) {
return s;
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
if (pending_sync_) {
if (use_fsync) {
s = writable_file_->Fsync();
} else {
s = writable_file_->Sync();
}
if (!s.ok()) {
return s;
}
}
TEST_KILL_RANDOM(rocksdb_kill_odds);
pending_sync_ = false;
if (use_fsync) {
pending_fsync_ = false;
}
return Status::OK();
}
Status WritableFileWriter::RangeSync(off_t offset, off_t nbytes) {
IOSTATS_TIMER_GUARD(range_sync_nanos);
return writable_file_->RangeSync(offset, nbytes);
}
size_t WritableFileWriter::RequestToken(size_t bytes) {
Env::IOPriority io_priority;
if (rate_limiter_&&(io_priority = writable_file_->GetIOPriority()) <
Env::IO_TOTAL) {
bytes = std::min(bytes,
static_cast<size_t>(rate_limiter_->GetSingleBurstBytes()));
rate_limiter_->Request(bytes, io_priority);
}
return bytes;
}
Status RandomRWFileAccessor::Write(uint64_t offset, const Slice& data) {
Status s;
pending_sync_ = true;
pending_fsync_ = true;
{
IOSTATS_TIMER_GUARD(write_nanos);
s = random_rw_file_->Write(offset, data);
if (!s.ok()) {
return s;
}
}
IOSTATS_ADD(bytes_written, data.size());
return s;
}
Status RandomRWFileAccessor::Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
{
IOSTATS_TIMER_GUARD(read_nanos);
s = random_rw_file_->Read(offset, n, result, scratch);
if (!s.ok()) {
return s;
}
}
IOSTATS_ADD_IF_POSITIVE(bytes_read, result->size());
return s;
}
Status RandomRWFileAccessor::Close() { return random_rw_file_->Close(); }
Status RandomRWFileAccessor::Sync(bool use_fsync) {
Status s;
if (pending_sync_) {
if (use_fsync) {
s = random_rw_file_->Fsync();
} else {
s = random_rw_file_->Sync();
}
if (!s.ok()) {
return s;
}
}
if (use_fsync) {
pending_fsync_ = false;
}
pending_sync_ = false;
return s;
}
} // namespace rocksdb

@ -0,0 +1,109 @@
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#pragma once
#include "rocksdb/env.h"
namespace rocksdb {
class SequentialFileReader {
private:
std::unique_ptr<SequentialFile> file_;
public:
explicit SequentialFileReader(std::unique_ptr<SequentialFile>&& _file)
: file_(std::move(_file)) {}
Status Read(size_t n, Slice* result, char* scratch);
Status Skip(uint64_t n);
SequentialFile* file() { return file_.get(); }
};
class RandomAccessFileReader : public RandomAccessFile {
private:
std::unique_ptr<RandomAccessFile> file_;
public:
explicit RandomAccessFileReader(std::unique_ptr<RandomAccessFile>&& raf)
: file_(std::move(raf)) {}
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
RandomAccessFile* file() { return file_.get(); }
};
// Use posix write to write data to a file.
class WritableFileWriter {
private:
std::unique_ptr<WritableFile> writable_file_;
size_t cursize_; // current size of cached data in buf_
size_t capacity_; // max size of buf_
unique_ptr<char[]> buf_; // a buffer to cache writes
uint64_t filesize_;
bool pending_sync_;
bool pending_fsync_;
uint64_t last_sync_size_;
uint64_t bytes_per_sync_;
RateLimiter* rate_limiter_;
public:
explicit WritableFileWriter(std::unique_ptr<WritableFile>&& file,
const EnvOptions& options)
: writable_file_(std::move(file)),
cursize_(0),
capacity_(65536),
buf_(new char[capacity_]),
filesize_(0),
pending_sync_(false),
pending_fsync_(false),
last_sync_size_(0),
bytes_per_sync_(options.bytes_per_sync),
rate_limiter_(options.rate_limiter) {}
~WritableFileWriter() { Flush(); }
Status Append(const Slice& data);
Status Flush();
Status Close();
Status Sync(bool use_fsync);
uint64_t GetFileSize() { return filesize_; }
Status InvalidateCache(size_t offset, size_t length) {
return writable_file_->InvalidateCache(offset, length);
}
WritableFile* writable_file() const { return writable_file_.get(); }
private:
Status RangeSync(off_t offset, off_t nbytes);
size_t RequestToken(size_t bytes);
};
class RandomRWFileAccessor {
private:
std::unique_ptr<RandomRWFile> random_rw_file_;
bool pending_sync_;
bool pending_fsync_;
public:
explicit RandomRWFileAccessor(std::unique_ptr<RandomRWFile>&& f)
: random_rw_file_(std::move(f)),
pending_sync_(false),
pending_fsync_(false) {}
Status Write(uint64_t offset, const Slice& data);
Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const;
Status Close();
Status Sync(bool use_fsync);
};
} // namespace rocksdb

@ -8,6 +8,7 @@
#include "util/file_util.h" #include "util/file_util.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "db/filename.h" #include "db/filename.h"
#include "util/file_reader_writer.h"
namespace rocksdb { namespace rocksdb {
@ -15,8 +16,12 @@ namespace rocksdb {
Status CopyFile(Env* env, const std::string& source, Status CopyFile(Env* env, const std::string& source,
const std::string& destination, uint64_t size) { const std::string& destination, uint64_t size) {
const EnvOptions soptions; const EnvOptions soptions;
unique_ptr<SequentialFile> srcfile;
Status s; Status s;
unique_ptr<SequentialFileReader> src_reader;
unique_ptr<WritableFileWriter> dest_writer;
{
unique_ptr<SequentialFile> srcfile;
s = env->NewSequentialFile(source, &srcfile, soptions); s = env->NewSequentialFile(source, &srcfile, soptions);
unique_ptr<WritableFile> destfile; unique_ptr<WritableFile> destfile;
if (s.ok()) { if (s.ok()) {
@ -33,6 +38,9 @@ Status CopyFile(Env* env, const std::string& source,
return s; return s;
} }
} }
src_reader.reset(new SequentialFileReader(std::move(srcfile)));
dest_writer.reset(new WritableFileWriter(std::move(destfile), soptions));
}
char buffer[4096]; char buffer[4096];
Slice slice; Slice slice;
@ -40,13 +48,13 @@ Status CopyFile(Env* env, const std::string& source,
uint64_t bytes_to_read = uint64_t bytes_to_read =
std::min(static_cast<uint64_t>(sizeof(buffer)), size); std::min(static_cast<uint64_t>(sizeof(buffer)), size);
if (s.ok()) { if (s.ok()) {
s = srcfile->Read(bytes_to_read, &slice, buffer); s = src_reader->Read(bytes_to_read, &slice, buffer);
} }
if (s.ok()) { if (s.ok()) {
if (slice.size() == 0) { if (slice.size() == 0) {
return Status::Corruption("file too small"); return Status::Corruption("file too small");
} }
s = destfile->Append(slice); s = dest_writer->Append(slice);
} }
if (!s.ok()) { if (!s.ok()) {
return s; return s;

@ -1408,10 +1408,18 @@ class InMemoryHandler : public WriteBatch::Handler {
void DumpWalFile(std::string wal_file, bool print_header, bool print_values, void DumpWalFile(std::string wal_file, bool print_header, bool print_values,
LDBCommandExecuteResult* exec_state) { LDBCommandExecuteResult* exec_state) {
unique_ptr<SequentialFile> file;
Env* env_ = Env::Default(); Env* env_ = Env::Default();
EnvOptions soptions; EnvOptions soptions;
Status status = env_->NewSequentialFile(wal_file, &file, soptions); unique_ptr<SequentialFileReader> wal_file_reader;
Status status;
{
unique_ptr<SequentialFile> file;
status = env_->NewSequentialFile(wal_file, &file, soptions);
if (status.ok()) {
wal_file_reader.reset(new SequentialFileReader(std::move(file)));
}
}
if (!status.ok()) { if (!status.ok()) {
if (exec_state) { if (exec_state) {
*exec_state = LDBCommandExecuteResult::Failed("Failed to open WAL file " + *exec_state = LDBCommandExecuteResult::Failed("Failed to open WAL file " +
@ -1422,7 +1430,7 @@ void DumpWalFile(std::string wal_file, bool print_header, bool print_values,
} }
} else { } else {
StdErrReporter reporter; StdErrReporter reporter;
log::Reader reader(move(file), &reporter, true, 0); log::Reader reader(move(wal_file_reader), &reporter, true, 0);
string scratch; string scratch;
WriteBatch batch; WriteBatch batch;
Slice record; Slice record;

@ -13,6 +13,7 @@
#include "rocksdb/filter_policy.h" #include "rocksdb/filter_policy.h"
#include "table/block_based_table_factory.h" #include "table/block_based_table_factory.h"
#include "table/table_builder.h" #include "table/table_builder.h"
#include "util/file_reader_writer.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/testutil.h" #include "util/testutil.h"
@ -53,12 +54,13 @@ void createSST(const std::string& file_name,
opts.table_factory = tf; opts.table_factory = tf;
std::vector<std::unique_ptr<IntTblPropCollectorFactory> > std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
int_tbl_prop_collector_factories; int_tbl_prop_collector_factories;
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), EnvOptions()));
tb.reset(opts.table_factory->NewTableBuilder( tb.reset(opts.table_factory->NewTableBuilder(
TableBuilderOptions(imoptions, ikc, &int_tbl_prop_collector_factories, TableBuilderOptions(imoptions, ikc, &int_tbl_prop_collector_factories,
CompressionType::kNoCompression, CompressionOptions(), CompressionType::kNoCompression, CompressionOptions(),
false), false),
file.get())); file_writer.get()));
// Populate slightly more than 1K keys // Populate slightly more than 1K keys
uint32_t num_keys = 1024; uint32_t num_keys = 1024;
@ -66,7 +68,7 @@ void createSST(const std::string& file_name,
tb->Add(MakeKey(i), MakeValue(i)); tb->Add(MakeKey(i), MakeValue(i));
} }
tb->Finish(); tb->Finish();
file->Close(); file_writer->Close();
} }
void cleanup(const std::string& file_name) { void cleanup(const std::string& file_name) {

@ -24,7 +24,6 @@ SstFileReader::SstFileReader(const std::string& file_path,
output_hex_(output_hex), ioptions_(options_), output_hex_(output_hex), ioptions_(options_),
internal_comparator_(BytewiseComparator()) { internal_comparator_(BytewiseComparator()) {
fprintf(stdout, "Process %s\n", file_path.c_str()); fprintf(stdout, "Process %s\n", file_path.c_str());
init_result_ = GetTableReader(file_name_); init_result_ = GetTableReader(file_name_);
} }
@ -41,10 +40,13 @@ Status SstFileReader::GetTableReader(const std::string& file_path) {
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFile> file;
uint64_t file_size; uint64_t file_size;
Status s = options_.env->NewRandomAccessFile(file_path, &file_, soptions_); Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_);
if (s.ok()) { if (s.ok()) {
s = options_.env->GetFileSize(file_path, &file_size); s = options_.env->GetFileSize(file_path, &file_size);
} }
file_.reset(new RandomAccessFileReader(std::move(file)));
if (s.ok()) { if (s.ok()) {
s = ReadFooterFromFile(file_.get(), file_size, &footer); s = ReadFooterFromFile(file_.get(), file_size, &footer);
} }
@ -56,7 +58,8 @@ Status SstFileReader::GetTableReader(const std::string& file_path) {
if (magic_number == kPlainTableMagicNumber || if (magic_number == kPlainTableMagicNumber ||
magic_number == kLegacyPlainTableMagicNumber) { magic_number == kLegacyPlainTableMagicNumber) {
soptions_.use_mmap_reads = true; soptions_.use_mmap_reads = true;
options_.env->NewRandomAccessFile(file_path, &file_, soptions_); options_.env->NewRandomAccessFile(file_path, &file, soptions_);
file_.reset(new RandomAccessFileReader(std::move(file)));
} }
options_.comparator = &internal_comparator_; options_.comparator = &internal_comparator_;
// For old sst format, ReadTableProperties might fail but file can be read // For old sst format, ReadTableProperties might fail but file can be read
@ -68,16 +71,15 @@ Status SstFileReader::GetTableReader(const std::string& file_path) {
} }
if (s.ok()) { if (s.ok()) {
s = NewTableReader(ioptions_, soptions_, internal_comparator_, s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
std::move(file_), file_size, &table_reader_); &table_reader_);
} }
return s; return s;
} }
Status SstFileReader::NewTableReader( Status SstFileReader::NewTableReader(
const ImmutableCFOptions& ioptions, const EnvOptions& soptions, const ImmutableCFOptions& ioptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator, uint64_t file_size,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size,
unique_ptr<TableReader>* table_reader) { unique_ptr<TableReader>* table_reader) {
// We need to turn off pre-fetching of index and filter nodes for // We need to turn off pre-fetching of index and filter nodes for
// BlockBasedTable // BlockBasedTable
@ -108,7 +110,7 @@ Status SstFileReader::DumpTable(const std::string& out_filename) {
} }
Status SstFileReader::ReadTableProperties(uint64_t table_magic_number, Status SstFileReader::ReadTableProperties(uint64_t table_magic_number,
RandomAccessFile* file, RandomAccessFileReader* file,
uint64_t file_size) { uint64_t file_size) {
TableProperties* table_properties = nullptr; TableProperties* table_properties = nullptr;
Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number, Status s = rocksdb::ReadTableProperties(file, file_size, table_magic_number,

@ -28,6 +28,7 @@
#include "table/format.h" #include "table/format.h"
#include "table/meta_blocks.h" #include "table/meta_blocks.h"
#include "table/plain_table_factory.h" #include "table/plain_table_factory.h"
#include "util/file_reader_writer.h"
#include "util/ldb_cmd.h" #include "util/ldb_cmd.h"
#include "util/random.h" #include "util/random.h"
#include "util/testharness.h" #include "util/testharness.h"
@ -56,7 +57,7 @@ class SstFileReader {
// Get the TableReader implementation for the sst file // Get the TableReader implementation for the sst file
Status GetTableReader(const std::string& file_path); Status GetTableReader(const std::string& file_path);
Status ReadTableProperties(uint64_t table_magic_number, Status ReadTableProperties(uint64_t table_magic_number,
RandomAccessFile* file, uint64_t file_size); RandomAccessFileReader* file, uint64_t file_size);
Status SetTableOptionsByMagicNumber(uint64_t table_magic_number); Status SetTableOptionsByMagicNumber(uint64_t table_magic_number);
Status SetOldTableOptions(); Status SetOldTableOptions();
@ -65,7 +66,7 @@ class SstFileReader {
Status NewTableReader(const ImmutableCFOptions& ioptions, Status NewTableReader(const ImmutableCFOptions& ioptions,
const EnvOptions& soptions, const EnvOptions& soptions,
const InternalKeyComparator& internal_comparator, const InternalKeyComparator& internal_comparator,
unique_ptr<RandomAccessFile>&& file, uint64_t file_size, uint64_t file_size,
unique_ptr<TableReader>* table_reader); unique_ptr<TableReader>* table_reader);
std::string file_name_; std::string file_name_;
@ -76,7 +77,7 @@ class SstFileReader {
Status init_result_; Status init_result_;
unique_ptr<TableReader> table_reader_; unique_ptr<TableReader> table_reader_;
unique_ptr<RandomAccessFile> file_; unique_ptr<RandomAccessFileReader> file_;
// options_ and internal_comparator_ will also be used in // options_ and internal_comparator_ will also be used in
// ReadSequential internally (specifically, seek-related operations) // ReadSequential internally (specifically, seek-related operations)
Options options_; Options options_;

@ -4,10 +4,25 @@
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
#include "util/sync_point.h" #include "util/sync_point.h"
#include "port/port.h"
#include "util/random.h"
int rocksdb_kill_odds = 0;
#ifndef NDEBUG #ifndef NDEBUG
namespace rocksdb { namespace rocksdb {
void TestKillRandom(int odds, const std::string& srcfile, int srcline) {
time_t curtime = time(nullptr);
Random r((uint32_t)curtime);
assert(odds > 0);
bool crash = r.OneIn(odds);
if (crash) {
port::Crash(srcfile, srcline);
}
}
SyncPoint* SyncPoint::GetInstance() { SyncPoint* SyncPoint::GetInstance() {
static SyncPoint sync_point; static SyncPoint sync_point;
return &sync_point; return &sync_point;

@ -4,6 +4,7 @@
// of patent rights can be found in the PATENTS file in the same directory. // of patent rights can be found in the PATENTS file in the same directory.
#pragma once #pragma once
#include <assert.h>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include <string> #include <string>
@ -11,6 +12,33 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
// This is only set from db_stress.cc and for testing only.
// If non-zero, kill at various points in source code with probability 1/this
extern int rocksdb_kill_odds;
#ifdef NDEBUG
// empty in release build
#define TEST_KILL_RANDOM(rocksdb_kill_odds)
#else
namespace rocksdb {
// Kill the process with probablity 1/odds for testing.
extern void TestKillRandom(int odds, const std::string& srcfile, int srcline);
// To avoid crashing always at some frequently executed codepaths (during
// kill random test), use this factor to reduce odds
#define REDUCE_ODDS 2
#define REDUCE_ODDS2 4
#define TEST_KILL_RANDOM(rocksdb_kill_odds) \
{ \
if (rocksdb_kill_odds > 0) { \
TestKillRandom(rocksdb_kill_odds, __FILE__, __LINE__); \
} \
}
} // namespace rocksdb
#endif
#ifdef NDEBUG #ifdef NDEBUG
#define TEST_SYNC_POINT(x) #define TEST_SYNC_POINT(x)
#define TEST_SYNC_POINT_CALLBACK(x, y) #define TEST_SYNC_POINT_CALLBACK(x, y)

@ -10,6 +10,7 @@
#include "util/testutil.h" #include "util/testutil.h"
#include "port/port.h" #include "port/port.h"
#include "util/file_reader_writer.h"
#include "util/random.h" #include "util/random.h"
namespace rocksdb { namespace rocksdb {
@ -107,5 +108,20 @@ const Comparator* Uint64Comparator() {
return uint64comp; return uint64comp;
} }
WritableFileWriter* GetWritableFileWriter(WritableFile* wf) {
unique_ptr<WritableFile> file(wf);
return new WritableFileWriter(std::move(file), EnvOptions());
}
RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf) {
unique_ptr<RandomAccessFile> file(raf);
return new RandomAccessFileReader(std::move(file));
}
SequentialFileReader* GetSequentialFileReader(SequentialFile* se) {
unique_ptr<SequentialFile> file(se);
return new SequentialFileReader(std::move(file));
}
} // namespace test } // namespace test
} // namespace rocksdb } // namespace rocksdb

@ -19,6 +19,9 @@
#include "util/random.h" #include "util/random.h"
namespace rocksdb { namespace rocksdb {
class SequentialFile;
class SequentialFileReader;
namespace test { namespace test {
// Store in *dst a random string of length "len" and return a Slice that // Store in *dst a random string of length "len" and return a Slice that
@ -159,6 +162,11 @@ class VectorIterator : public Iterator {
std::vector<std::string> values_; std::vector<std::string> values_;
size_t current_; size_t current_;
}; };
extern WritableFileWriter* GetWritableFileWriter(WritableFile* wf);
extern RandomAccessFileReader* GetRandomAccessFileReader(RandomAccessFile* raf);
extern SequentialFileReader* GetSequentialFileReader(SequentialFile* se);
} // namespace test } // namespace test
} // namespace rocksdb } // namespace rocksdb

@ -14,6 +14,7 @@
#include "util/channel.h" #include "util/channel.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/crc32c.h" #include "util/crc32c.h"
#include "util/file_reader_writer.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/string_util.h" #include "util/string_util.h"
#include "rocksdb/transaction_log.h" #include "rocksdb/transaction_log.h"
@ -1105,7 +1106,10 @@ Status BackupEngineImpl::GetLatestBackupFileContents(uint32_t* latest_backup) {
char buf[11]; char buf[11];
Slice data; Slice data;
s = file->Read(10, &data, buf); unique_ptr<SequentialFileReader> file_reader(
new SequentialFileReader(std::move(file)));
s = file_reader->Read(10, &data, buf);
if (!s.ok() || data.size() == 0) { if (!s.ok() || data.size() == 0) {
return s.ok() ? Status::Corruption("Latest backup file corrupted") : s; return s.ok() ? Status::Corruption("Latest backup file corrupted") : s;
} }
@ -1137,14 +1141,16 @@ Status BackupEngineImpl::PutLatestBackupFileContents(uint32_t latest_backup) {
return s; return s;
} }
unique_ptr<WritableFileWriter> file_writer(
new WritableFileWriter(std::move(file), env_options));
char file_contents[10]; char file_contents[10];
int len = sprintf(file_contents, "%u\n", latest_backup); int len = sprintf(file_contents, "%u\n", latest_backup);
s = file->Append(Slice(file_contents, len)); s = file_writer->Append(Slice(file_contents, len));
if (s.ok() && options_.sync) { if (s.ok() && options_.sync) {
file->Sync(); file_writer->Sync(false);
} }
if (s.ok()) { if (s.ok()) {
s = file->Close(); s = file_writer->Close();
} }
if (s.ok()) { if (s.ok()) {
// atomically replace real file with new tmp // atomically replace real file with new tmp
@ -1187,6 +1193,10 @@ Status BackupEngineImpl::CopyFile(
return s; return s;
} }
unique_ptr<WritableFileWriter> dest_writer(
new WritableFileWriter(std::move(dst_file), env_options));
unique_ptr<SequentialFileReader> src_reader(
new SequentialFileReader(std::move(src_file)));
unique_ptr<char[]> buf(new char[copy_file_buffer_size_]); unique_ptr<char[]> buf(new char[copy_file_buffer_size_]);
Slice data; Slice data;
@ -1196,7 +1206,7 @@ Status BackupEngineImpl::CopyFile(
} }
size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ? size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ?
copy_file_buffer_size_ : size_limit; copy_file_buffer_size_ : size_limit;
s = src_file->Read(buffer_to_read, &data, buf.get()); s = src_reader->Read(buffer_to_read, &data, buf.get());
size_limit -= data.size(); size_limit -= data.size();
if (!s.ok()) { if (!s.ok()) {
@ -1210,14 +1220,14 @@ Status BackupEngineImpl::CopyFile(
*checksum_value = crc32c::Extend(*checksum_value, data.data(), *checksum_value = crc32c::Extend(*checksum_value, data.data(),
data.size()); data.size());
} }
s = dst_file->Append(data); s = dest_writer->Append(data);
if (rate_limiter != nullptr) { if (rate_limiter != nullptr) {
rate_limiter->ReportAndWait(data.size()); rate_limiter->ReportAndWait(data.size());
} }
} while (s.ok() && data.size() > 0 && size_limit > 0); } while (s.ok() && data.size() > 0 && size_limit > 0);
if (s.ok() && sync) { if (s.ok() && sync) {
s = dst_file->Sync(); s = dest_writer->Sync(false);
} }
return s; return s;
@ -1358,6 +1368,8 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
return s; return s;
} }
unique_ptr<SequentialFileReader> src_reader(
new SequentialFileReader(std::move(src_file)));
std::unique_ptr<char[]> buf(new char[copy_file_buffer_size_]); std::unique_ptr<char[]> buf(new char[copy_file_buffer_size_]);
Slice data; Slice data;
@ -1367,7 +1379,7 @@ Status BackupEngineImpl::CalculateChecksum(const std::string& src, Env* src_env,
} }
size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ? size_t buffer_to_read = (copy_file_buffer_size_ < size_limit) ?
copy_file_buffer_size_ : size_limit; copy_file_buffer_size_ : size_limit;
s = src_file->Read(buffer_to_read, &data, buf.get()); s = src_reader->Read(buffer_to_read, &data, buf.get());
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -1522,9 +1534,11 @@ Status BackupEngineImpl::BackupMeta::LoadFromFile(
return s; return s;
} }
unique_ptr<SequentialFileReader> backup_meta_reader(
new SequentialFileReader(std::move(backup_meta_file)));
unique_ptr<char[]> buf(new char[max_backup_meta_file_size_ + 1]); unique_ptr<char[]> buf(new char[max_backup_meta_file_size_ + 1]);
Slice data; Slice data;
s = backup_meta_file->Read(max_backup_meta_file_size_, &data, buf.get()); s = backup_meta_reader->Read(max_backup_meta_file_size_, &data, buf.get());
if (!s.ok() || data.size() == max_backup_meta_file_size_) { if (!s.ok() || data.size() == max_backup_meta_file_size_) {
return s.ok() ? Status::Corruption("File size too big") : s; return s.ok() ? Status::Corruption("File size too big") : s;

@ -16,6 +16,7 @@
#include "rocksdb/types.h" #include "rocksdb/types.h"
#include "rocksdb/transaction_log.h" #include "rocksdb/transaction_log.h"
#include "rocksdb/utilities/backupable_db.h" #include "rocksdb/utilities/backupable_db.h"
#include "util/file_reader_writer.h"
#include "util/testharness.h" #include "util/testharness.h"
#include "util/random.h" #include "util/random.h"
#include "util/mutexlock.h" #include "util/mutexlock.h"
@ -292,11 +293,12 @@ class FileManager : public EnvWrapper {
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
RandomRWFileAccessor accessor(std::move(file));
for (uint64_t i = 0; s.ok() && i < bytes_to_corrupt; ++i) { for (uint64_t i = 0; s.ok() && i < bytes_to_corrupt; ++i) {
std::string tmp; std::string tmp;
// write one random byte to a random position // write one random byte to a random position
s = file->Write(rnd_.Next() % size, test::RandomString(&rnd_, 1, &tmp)); s = accessor.Write(rnd_.Next() % size,
test::RandomString(&rnd_, 1, &tmp));
} }
return s; return s;
} }

Loading…
Cancel
Save