Allow SstFileWriter to Fadvise the file away from page cache

Summary:
Add `fadvise_trigger` option to `SstFileWriter`

If fadvise_trigger is passed with a non-zero value, SstFileWriter will invalidate the os page cache every `fadvise_trigger` bytes for the sst file
Closes https://github.com/facebook/rocksdb/pull/1731

Differential Revision: D4371246

Pulled By: IslamAbdelRahman

fbshipit-source-id: 91caff1
main
Islam AbdelRahman 8 years ago committed by Facebook Github Bot
parent 17a4b75cc3
commit d58ef52ba6
  1. 41
      db/external_sst_file_test.cc
  2. 7
      include/rocksdb/sst_file_writer.h
  3. 57
      table/sst_file_writer.cc

@ -1947,6 +1947,47 @@ TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) {
db_->ReleaseSnapshot(snap); db_->ReleaseSnapshot(snap);
} }
TEST_F(ExternalSSTFileTest, FadviseTrigger) {
Options options = CurrentOptions();
const int kNumKeys = 10000;
size_t total_fadvised_bytes = 0;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"SstFileWriter::InvalidatePageCache", [&](void* arg) {
size_t fadvise_size = *(reinterpret_cast<size_t*>(arg));
total_fadvised_bytes += fadvise_size;
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
std::unique_ptr<SstFileWriter> sst_file_writer;
std::string sst_file_path = sst_files_dir_ + "file_fadvise_disable.sst";
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options,
options.comparator, nullptr, false));
ASSERT_OK(sst_file_writer->Open(sst_file_path));
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(sst_file_writer->Add(Key(i), Key(i)));
}
ASSERT_OK(sst_file_writer->Finish());
// fadvise disabled
ASSERT_EQ(total_fadvised_bytes, 0);
sst_file_path = sst_files_dir_ + "file_fadvise_enable.sst";
sst_file_writer.reset(new SstFileWriter(EnvOptions(), options,
options.comparator, nullptr, true));
ASSERT_OK(sst_file_writer->Open(sst_file_path));
for (int i = 0; i < kNumKeys; i++) {
ASSERT_OK(sst_file_writer->Add(Key(i), Key(i)));
}
ASSERT_OK(sst_file_writer->Finish());
// fadvise enabled
ASSERT_EQ(total_fadvised_bytes, sst_file_writer->FileSize());
ASSERT_GT(total_fadvised_bytes, 0);
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}
#endif // ROCKSDB_LITE #endif // ROCKSDB_LITE
} // namespace rocksdb } // namespace rocksdb

@ -47,9 +47,12 @@ class SstFileWriter {
// User can pass `column_family` to specify that the the generated file will // User can pass `column_family` to specify that the the generated file will
// be ingested into this column_family, note that passing nullptr means that // be ingested into this column_family, note that passing nullptr means that
// the column_family is unknown. // the column_family is unknown.
// If invalidate_page_cache is set to true, SstFileWriter will give the OS a
// hint that this file pages is not needed everytime we write 1MB to the file
SstFileWriter(const EnvOptions& env_options, const Options& options, SstFileWriter(const EnvOptions& env_options, const Options& options,
const Comparator* user_comparator, const Comparator* user_comparator,
ColumnFamilyHandle* column_family = nullptr); ColumnFamilyHandle* column_family = nullptr,
bool invalidate_page_cache = true);
~SstFileWriter(); ~SstFileWriter();
@ -70,6 +73,8 @@ class SstFileWriter {
uint64_t FileSize(); uint64_t FileSize();
private: private:
void InvalidatePageCache(bool closing);
struct Rep; struct Rep;
Rep* rep_; Rep* rep_;
}; };

@ -11,6 +11,7 @@
#include "table/block_based_table_builder.h" #include "table/block_based_table_builder.h"
#include "table/sst_file_writer_collectors.h" #include "table/sst_file_writer_collectors.h"
#include "util/file_reader_writer.h" #include "util/file_reader_writer.h"
#include "util/sync_point.h"
namespace rocksdb { namespace rocksdb {
@ -18,15 +19,19 @@ const std::string ExternalSstFilePropertyNames::kVersion =
"rocksdb.external_sst_file.version"; "rocksdb.external_sst_file.version";
const std::string ExternalSstFilePropertyNames::kGlobalSeqno = const std::string ExternalSstFilePropertyNames::kGlobalSeqno =
"rocksdb.external_sst_file.global_seqno"; "rocksdb.external_sst_file.global_seqno";
const size_t kFadviseTrigger = 1024 * 1024; // 1MB
struct SstFileWriter::Rep { struct SstFileWriter::Rep {
Rep(const EnvOptions& _env_options, const Options& options, Rep(const EnvOptions& _env_options, const Options& options,
const Comparator* _user_comparator, ColumnFamilyHandle* _cfh) const Comparator* _user_comparator, ColumnFamilyHandle* _cfh,
bool _invalidate_page_cache)
: env_options(_env_options), : env_options(_env_options),
ioptions(options), ioptions(options),
mutable_cf_options(options), mutable_cf_options(options),
internal_comparator(_user_comparator), internal_comparator(_user_comparator),
cfh(_cfh) {} cfh(_cfh),
invalidate_page_cache(_invalidate_page_cache),
last_fadvise_size(0) {}
std::unique_ptr<WritableFileWriter> file_writer; std::unique_ptr<WritableFileWriter> file_writer;
std::unique_ptr<TableBuilder> builder; std::unique_ptr<TableBuilder> builder;
@ -38,15 +43,23 @@ struct SstFileWriter::Rep {
InternalKey ikey; InternalKey ikey;
std::string column_family_name; std::string column_family_name;
ColumnFamilyHandle* cfh; ColumnFamilyHandle* cfh;
// If true, We will give the OS a hint that this file pages is not needed
// everytime we write 1MB to the file
bool invalidate_page_cache;
// the size of the file during the last time we called Fadvise to remove
// cached pages from page cache.
uint64_t last_fadvise_size;
}; };
SstFileWriter::SstFileWriter(const EnvOptions& env_options, SstFileWriter::SstFileWriter(const EnvOptions& env_options,
const Options& options, const Options& options,
const Comparator* user_comparator, const Comparator* user_comparator,
ColumnFamilyHandle* column_family) ColumnFamilyHandle* column_family,
: rep_(new Rep(env_options, options, user_comparator, column_family)) { bool invalidate_page_cache)
: rep_(new Rep(env_options, options, user_comparator, column_family,
invalidate_page_cache)) {
rep_->file_info.file_size = 0; rep_->file_info.file_size = 0;
} }
SstFileWriter::~SstFileWriter() { SstFileWriter::~SstFileWriter() {
if (rep_->builder) { if (rep_->builder) {
@ -143,15 +156,17 @@ Status SstFileWriter::Add(const Slice& user_key, const Slice& value) {
} }
} }
// TODO(tec) : For external SST files we could omit the seqno and type.
r->ikey.Set(user_key, 0 /* Sequence Number */,
ValueType::kTypeValue /* Put */);
r->builder->Add(r->ikey.Encode(), value);
// update file info // update file info
r->file_info.num_entries++; r->file_info.num_entries++;
r->file_info.largest_key.assign(user_key.data(), user_key.size()); r->file_info.largest_key.assign(user_key.data(), user_key.size());
r->file_info.file_size = r->builder->FileSize(); r->file_info.file_size = r->builder->FileSize();
// TODO(tec) : For external SST files we could omit the seqno and type. InvalidatePageCache(false /* closing */);
r->ikey.Set(user_key, 0 /* Sequence Number */,
ValueType::kTypeValue /* Put */);
r->builder->Add(r->ikey.Encode(), value);
return Status::OK(); return Status::OK();
} }
@ -166,10 +181,13 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
} }
Status s = r->builder->Finish(); Status s = r->builder->Finish();
r->file_info.file_size = r->builder->FileSize();
if (s.ok()) { if (s.ok()) {
if (!r->ioptions.disable_data_sync) { if (!r->ioptions.disable_data_sync) {
s = r->file_writer->Sync(r->ioptions.use_fsync); s = r->file_writer->Sync(r->ioptions.use_fsync);
} }
InvalidatePageCache(true /* closing */);
if (s.ok()) { if (s.ok()) {
s = r->file_writer->Close(); s = r->file_writer->Close();
} }
@ -181,8 +199,7 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
r->ioptions.env->DeleteFile(r->file_info.file_path); r->ioptions.env->DeleteFile(r->file_info.file_path);
} }
if (s.ok() && file_info != nullptr) { if (file_info != nullptr) {
r->file_info.file_size = r->builder->FileSize();
*file_info = r->file_info; *file_info = r->file_info;
} }
@ -190,6 +207,24 @@ Status SstFileWriter::Finish(ExternalSstFileInfo* file_info) {
return s; return s;
} }
void SstFileWriter::InvalidatePageCache(bool closing) {
Rep* r = rep_;
if (r->invalidate_page_cache == false) {
// Fadvise disabled
return;
}
uint64_t bytes_since_last_fadvise =
r->builder->FileSize() - r->last_fadvise_size;
if (bytes_since_last_fadvise > kFadviseTrigger || closing) {
TEST_SYNC_POINT_CALLBACK("SstFileWriter::InvalidatePageCache",
&(bytes_since_last_fadvise));
// Tell the OS that we dont need this file in page cache
r->file_writer->InvalidateCache(0, 0);
r->last_fadvise_size = r->builder->FileSize();
}
}
uint64_t SstFileWriter::FileSize() { uint64_t SstFileWriter::FileSize() {
return rep_->file_info.file_size; return rep_->file_info.file_size;
} }

Loading…
Cancel
Save