Redesign pending_outputs_

Summary:
Here's a prototype of redesigning pending_outputs_. This way, we don't have to expose pending_outputs_ to other classes (CompactionJob, FlushJob, MemtableList). DBImpl takes care of it.

Still have to write some comments, but should be good enough to start the discussion.

Test Plan: make check, will also run stress test

Reviewers: ljin, sdong, rven, yhchiang

Reviewed By: yhchiang

Subscribers: dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D28353
main
Igor Canadi 10 years ago
parent ec101cd49a
commit 53af5d877d
  1. 28
      db/compaction_job.cc
  2. 9
      db/compaction_job.h
  3. 56
      db/db_impl.cc
  4. 33
      db/db_impl.h
  5. 39
      db/db_test.cc
  6. 3
      db/filename.h
  7. 17
      db/flush_job.cc
  8. 8
      db/flush_job.h
  9. 9
      db/flush_job_test.cc
  10. 8
      db/job_context.h
  11. 15
      db/memtable_list.cc
  12. 8
      db/memtable_list.h
  13. 3
      db/version_set.h

@ -205,10 +205,9 @@ CompactionJob::CompactionJob(
Compaction* compaction, const DBOptions& db_options, Compaction* compaction, const DBOptions& db_options,
const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options, const MutableCFOptions& mutable_cf_options, const EnvOptions& env_options,
VersionSet* versions, port::Mutex* db_mutex, VersionSet* versions, port::Mutex* db_mutex,
std::atomic<bool>* shutting_down, FileNumToPathIdMap* pending_outputs, std::atomic<bool>* shutting_down, LogBuffer* log_buffer,
LogBuffer* log_buffer, Directory* db_directory, Statistics* stats, Directory* db_directory, Statistics* stats, SnapshotList* snapshots,
SnapshotList* snapshots, bool is_snapshot_supported, bool is_snapshot_supported, std::shared_ptr<Cache> table_cache,
std::shared_ptr<Cache> table_cache,
std::function<uint64_t()> yield_callback) std::function<uint64_t()> yield_callback)
: compact_(new CompactionState(compaction)), : compact_(new CompactionState(compaction)),
compaction_stats_(1), compaction_stats_(1),
@ -219,7 +218,6 @@ CompactionJob::CompactionJob(
versions_(versions), versions_(versions),
db_mutex_(db_mutex), db_mutex_(db_mutex),
shutting_down_(shutting_down), shutting_down_(shutting_down),
pending_outputs_(pending_outputs),
log_buffer_(log_buffer), log_buffer_(log_buffer),
db_directory_(db_directory), db_directory_(db_directory),
stats_(stats), stats_(stats),
@ -469,10 +467,6 @@ Status CompactionJob::Install(Status status) {
cfd->internal_stats()->AddCompactionStats( cfd->internal_stats()->AddCompactionStats(
compact_->compaction->output_level(), compaction_stats_); compact_->compaction->output_level(), compaction_stats_);
// if there were any unused file number (mostly in case of
// compaction error), free up the entry from pending_putputs
ReleaseCompactionUnusedFileNumbers();
if (status.ok()) { if (status.ok()) {
status = InstallCompactionResults(); status = InstallCompactionResults();
} }
@ -511,8 +505,6 @@ void CompactionJob::AllocateCompactionOutputFileNumbers() {
int filesNeeded = compact_->compaction->num_input_files(1); int filesNeeded = compact_->compaction->num_input_files(1);
for (int i = 0; i < std::max(filesNeeded, 1); i++) { for (int i = 0; i < std::max(filesNeeded, 1); i++) {
uint64_t file_number = versions_->NewFileNumber(); uint64_t file_number = versions_->NewFileNumber();
pending_outputs_->insert(
{file_number, compact_->compaction->GetOutputPathId()});
compact_->allocated_file_numbers.push_back(file_number); compact_->allocated_file_numbers.push_back(file_number);
} }
} }
@ -1041,14 +1033,6 @@ void CompactionJob::RecordCompactionIOStats() {
IOSTATS_RESET(bytes_written); IOSTATS_RESET(bytes_written);
} }
// Frees up unused file number.
void CompactionJob::ReleaseCompactionUnusedFileNumbers() {
db_mutex_->AssertHeld();
for (const auto file_number : compact_->allocated_file_numbers) {
pending_outputs_->erase(file_number);
}
}
Status CompactionJob::OpenCompactionOutputFile() { Status CompactionJob::OpenCompactionOutputFile() {
assert(compact_ != nullptr); assert(compact_ != nullptr);
assert(compact_->builder == nullptr); assert(compact_->builder == nullptr);
@ -1061,9 +1045,10 @@ Status CompactionJob::OpenCompactionOutputFile() {
compact_->allocated_file_numbers.pop_front(); compact_->allocated_file_numbers.pop_front();
} else { } else {
db_mutex_->Lock(); db_mutex_->Lock();
// TODO(icanadi) make Versions::next_file_number_ atomic and remove db_lock
// around here. Once we do that, AllocateCompactionOutputFileNumbers() will
// not be needed.
file_number = versions_->NewFileNumber(); file_number = versions_->NewFileNumber();
pending_outputs_->insert(
{file_number, compact_->compaction->GetOutputPathId()});
db_mutex_->Unlock(); db_mutex_->Unlock();
} }
// Make the output file // Make the output file
@ -1112,7 +1097,6 @@ void CompactionJob::CleanupCompaction(Status status) {
} }
for (size_t i = 0; i < compact_->outputs.size(); i++) { for (size_t i = 0; i < compact_->outputs.size(); i++) {
const CompactionState::Output& out = compact_->outputs[i]; const CompactionState::Output& out = compact_->outputs[i];
pending_outputs_->erase(out.number);
// If this file was inserted into the table cache then remove // If this file was inserted into the table cache then remove
// them here because this compaction was not committed. // them here because this compaction was not committed.

@ -57,10 +57,9 @@ class CompactionJob {
const MutableCFOptions& mutable_cf_options, const MutableCFOptions& mutable_cf_options,
const EnvOptions& env_options, VersionSet* versions, const EnvOptions& env_options, VersionSet* versions,
port::Mutex* db_mutex, std::atomic<bool>* shutting_down, port::Mutex* db_mutex, std::atomic<bool>* shutting_down,
FileNumToPathIdMap* pending_outputs, LogBuffer* log_buffer, LogBuffer* log_buffer, Directory* db_directory,
Directory* db_directory, Statistics* stats, Statistics* stats, SnapshotList* snapshot_list,
SnapshotList* snapshot_list, bool is_snapshot_supported, bool is_snapshot_supported, std::shared_ptr<Cache> table_cache,
std::shared_ptr<Cache> table_cache,
std::function<uint64_t()> yield_callback); std::function<uint64_t()> yield_callback);
~CompactionJob() { assert(compact_ == nullptr); } ~CompactionJob() { assert(compact_ == nullptr); }
@ -92,7 +91,6 @@ class CompactionJob {
SequenceNumber in, const std::vector<SequenceNumber>& snapshots, SequenceNumber in, const std::vector<SequenceNumber>& snapshots,
SequenceNumber* prev_snapshot); SequenceNumber* prev_snapshot);
void RecordCompactionIOStats(); void RecordCompactionIOStats();
void ReleaseCompactionUnusedFileNumbers();
Status OpenCompactionOutputFile(); Status OpenCompactionOutputFile();
void CleanupCompaction(Status status); void CleanupCompaction(Status status);
@ -115,7 +113,6 @@ class CompactionJob {
VersionSet* versions_; VersionSet* versions_;
port::Mutex* db_mutex_; port::Mutex* db_mutex_;
std::atomic<bool>* shutting_down_; std::atomic<bool>* shutting_down_;
FileNumToPathIdMap* pending_outputs_;
LogBuffer* log_buffer_; LogBuffer* log_buffer_;
Directory* db_directory_; Directory* db_directory_;
Statistics* stats_; Statistics* stats_;

@ -443,8 +443,11 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
} }
// don't delete live files // don't delete live files
for (auto pair : pending_outputs_) { if (pending_outputs_.size()) {
job_context->sst_live.emplace_back(pair.first, pair.second, 0); job_context->min_pending_output = *pending_outputs_.begin();
} else {
// delete all of them
job_context->min_pending_output = std::numeric_limits<uint64_t>::max();
} }
versions_->AddLiveFiles(&job_context->sst_live); versions_->AddLiveFiles(&job_context->sst_live);
@ -567,7 +570,10 @@ void DBImpl::PurgeObsoleteFiles(const JobContext& state) {
keep = (number >= state.manifest_file_number); keep = (number >= state.manifest_file_number);
break; break;
case kTableFile: case kTableFile:
keep = (sst_live_map.find(number) != sst_live_map.end()); // If the second condition is not there, this makes
// DontDeletePendingOutputs fail
keep = (sst_live_map.find(number) != sst_live_map.end()) ||
number >= state.min_pending_output;
break; break;
case kTempFile: case kTempFile:
// Any temp files that are currently being written to must // Any temp files that are currently being written to must
@ -981,7 +987,8 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
const uint64_t start_micros = env_->NowMicros(); const uint64_t start_micros = env_->NowMicros();
FileMetaData meta; FileMetaData meta;
meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
pending_outputs_[meta.fd.GetNumber()] = 0; // path 0 for level 0 file. auto pending_outputs_inserted_elem =
CaptureCurrentFileNumberInPendingOutputs();
ReadOptions ro; ReadOptions ro;
ro.total_order_seek = true; ro.total_order_seek = true;
Arena arena; Arena arena;
@ -1013,7 +1020,7 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
cfd->GetName().c_str(), meta.fd.GetNumber(), meta.fd.GetFileSize(), cfd->GetName().c_str(), meta.fd.GetNumber(), meta.fd.GetFileSize(),
s.ToString().c_str()); s.ToString().c_str());
} }
pending_outputs_.erase(meta.fd.GetNumber()); ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
// Note that if file_size is zero, the file has been deleted and // Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest. // should not be added to the manifest.
@ -1044,9 +1051,9 @@ Status DBImpl::FlushMemTableToOutputFile(
FlushJob flush_job(dbname_, cfd, db_options_, mutable_cf_options, FlushJob flush_job(dbname_, cfd, db_options_, mutable_cf_options,
env_options_, versions_.get(), &mutex_, &shutting_down_, env_options_, versions_.get(), &mutex_, &shutting_down_,
&pending_outputs_, snapshots_.GetNewest(), job_context, snapshots_.GetNewest(), job_context, log_buffer,
log_buffer, db_directory_.get(), db_directory_.get(), GetCompressionFlush(*cfd->ioptions()),
GetCompressionFlush(*cfd->ioptions()), stats_); stats_);
Status s = flush_job.Run(); Status s = flush_job.Run();
@ -1550,6 +1557,9 @@ void DBImpl::BackgroundCallFlush() {
{ {
MutexLock l(&mutex_); MutexLock l(&mutex_);
auto pending_outputs_inserted_elem =
CaptureCurrentFileNumberInPendingOutputs();
Status s; Status s;
if (!shutting_down_.load(std::memory_order_acquire)) { if (!shutting_down_.load(std::memory_order_acquire)) {
s = BackgroundFlush(&madeProgress, &job_context, &log_buffer); s = BackgroundFlush(&madeProgress, &job_context, &log_buffer);
@ -1573,6 +1583,8 @@ void DBImpl::BackgroundCallFlush() {
} }
} }
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
// If !s.ok(), this means that Flush failed. In that case, we want // If !s.ok(), this means that Flush failed. In that case, we want
// to delete all obsolete files and we force FindObsoleteFiles() // to delete all obsolete files and we force FindObsoleteFiles()
FindObsoleteFiles(&job_context, !s.ok()); FindObsoleteFiles(&job_context, !s.ok());
@ -1616,6 +1628,10 @@ void DBImpl::BackgroundCallCompaction() {
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get()); LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, db_options_.info_log.get());
{ {
MutexLock l(&mutex_); MutexLock l(&mutex_);
auto pending_outputs_inserted_elem =
CaptureCurrentFileNumberInPendingOutputs();
assert(bg_compaction_scheduled_); assert(bg_compaction_scheduled_);
Status s; Status s;
if (!shutting_down_.load(std::memory_order_acquire)) { if (!shutting_down_.load(std::memory_order_acquire)) {
@ -1640,6 +1656,8 @@ void DBImpl::BackgroundCallCompaction() {
} }
} }
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
// If !s.ok(), this means that Compaction failed. In that case, we want // If !s.ok(), this means that Compaction failed. In that case, we want
// to delete all obsolete files we might have created and we force // to delete all obsolete files we might have created and we force
// FindObsoleteFiles(). This is because job_context does not // FindObsoleteFiles(). This is because job_context does not
@ -1848,9 +1866,9 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, JobContext* job_context,
}; };
CompactionJob compaction_job( CompactionJob compaction_job(
c.get(), db_options_, *c->mutable_cf_options(), env_options_, c.get(), db_options_, *c->mutable_cf_options(), env_options_,
versions_.get(), &mutex_, &shutting_down_, &pending_outputs_, versions_.get(), &mutex_, &shutting_down_, log_buffer,
log_buffer, db_directory_.get(), stats_, &snapshots_, db_directory_.get(), stats_, &snapshots_, IsSnapshotSupported(),
IsSnapshotSupported(), table_cache_, std::move(yield_callback)); table_cache_, std::move(yield_callback));
compaction_job.Prepare(); compaction_job.Prepare();
mutex_.Unlock(); mutex_.Unlock();
status = compaction_job.Run(); status = compaction_job.Run();
@ -2968,6 +2986,22 @@ void DBImpl::GetApproximateSizes(ColumnFamilyHandle* column_family,
} }
} }
std::list<uint64_t>::iterator
DBImpl::CaptureCurrentFileNumberInPendingOutputs() {
// We need to remember the iterator of our insert, because after the
// background job is done, we need to remove that element from
// pending_outputs_.
pending_outputs_.push_back(versions_->current_next_file_number());
auto pending_outputs_inserted_elem = pending_outputs_.end();
--pending_outputs_inserted_elem;
return pending_outputs_inserted_elem;
}
void DBImpl::ReleaseFileNumberFromPendingOutputs(
std::list<uint64_t>::iterator v) {
pending_outputs_.erase(v);
}
#ifndef ROCKSDB_LITE #ifndef ROCKSDB_LITE
Status DBImpl::GetUpdatesSince( Status DBImpl::GetUpdatesSince(
SequenceNumber seq, unique_ptr<TransactionLogIterator>* iter, SequenceNumber seq, unique_ptr<TransactionLogIterator>* iter,

@ -12,6 +12,7 @@
#include <deque> #include <deque>
#include <limits> #include <limits>
#include <set> #include <set>
#include <list>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <string> #include <string>
@ -265,6 +266,24 @@ class DBImpl : public DB {
// Delete any unneeded files and stale in-memory entries. // Delete any unneeded files and stale in-memory entries.
void DeleteObsoleteFiles(); void DeleteObsoleteFiles();
// Background process needs to call
// auto x = CaptureCurrentFileNumberInPendingOutputs()
// <do something>
// ReleaseFileNumberFromPendingOutputs(x)
// This will protect any temporary files created while <do something> is
// executing from being deleted.
// -----------
// This function will capture current file number and append it to
// pending_outputs_. This will prevent any background process to delete any
// file created after this point.
std::list<uint64_t>::iterator CaptureCurrentFileNumberInPendingOutputs();
// This function should be called with the result of
// CaptureCurrentFileNumberInPendingOutputs(). It then marks that any file
// created between the calls CaptureCurrentFileNumberInPendingOutputs() and
// ReleaseFileNumberFromPendingOutputs() can now be deleted (if it's not live
// and blocked by any other pending_outputs_ calls)
void ReleaseFileNumberFromPendingOutputs(std::list<uint64_t>::iterator v);
// Flush the in-memory write buffer to storage. Switches to a new // Flush the in-memory write buffer to storage. Switches to a new
// log-file/memtable and writes a new descriptor iff successful. // log-file/memtable and writes a new descriptor iff successful.
Status FlushMemTableToOutputFile(ColumnFamilyData* cfd, Status FlushMemTableToOutputFile(ColumnFamilyData* cfd,
@ -390,10 +409,16 @@ class DBImpl : public DB {
SnapshotList snapshots_; SnapshotList snapshots_;
// Set of table files to protect from deletion because they are // For each background job, pending_outputs_ keeps the current file number at
// part of ongoing compactions. // the time that background job started.
// map from pending file number ID to their path IDs. // FindObsoleteFiles()/PurgeObsoleteFiles() never deletes any file that has
FileNumToPathIdMap pending_outputs_; // number bigger than any of the file number in pending_outputs_. Since file
// numbers grow monotonically, this also means that pending_outputs_ is always
// sorted. After a background job is done executing, its file number is
// deleted from pending_outputs_, which allows PurgeObsoleteFiles() to clean
// it up.
// State is protected with db mutex.
std::list<uint64_t> pending_outputs_;
// At least one compaction or flush job is pending but not yet scheduled // At least one compaction or flush job is pending but not yet scheduled
// because of the max background thread limit. // because of the max background thread limit.

@ -165,6 +165,8 @@ class SpecialEnv : public EnvWrapper {
std::atomic<uint32_t> non_writable_count_; std::atomic<uint32_t> non_writable_count_;
std::function<void()>* table_write_callback_;
explicit SpecialEnv(Env* base) : EnvWrapper(base), rnd_(301) { explicit SpecialEnv(Env* base) : EnvWrapper(base), rnd_(301) {
delay_sstable_sync_.store(false, std::memory_order_release); delay_sstable_sync_.store(false, std::memory_order_release);
drop_writes_.store(false, std::memory_order_release); drop_writes_.store(false, std::memory_order_release);
@ -181,6 +183,8 @@ class SpecialEnv : public EnvWrapper {
non_writeable_rate_ = 0; non_writeable_rate_ = 0;
new_writable_count_ = 0; new_writable_count_ = 0;
non_writable_count_ = 0; non_writable_count_ = 0;
periodic_non_writable_ = 0;
table_write_callback_ = nullptr;
} }
Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r, Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
@ -196,6 +200,9 @@ class SpecialEnv : public EnvWrapper {
base_(std::move(base)) { base_(std::move(base)) {
} }
Status Append(const Slice& data) { Status Append(const Slice& data) {
if (env_->table_write_callback_) {
(*env_->table_write_callback_)();
}
if (env_->drop_writes_.load(std::memory_order_acquire)) { if (env_->drop_writes_.load(std::memory_order_acquire)) {
// Drop writes on the floor // Drop writes on the floor
return Status::OK(); return Status::OK();
@ -9042,6 +9049,38 @@ TEST(DBTest, DynamicMiscOptions) {
assert_reseek_count(300, 1); assert_reseek_count(300, 1);
} }
TEST(DBTest, DontDeletePendingOutputs) {
Options options;
options.env = env_;
options.create_if_missing = true;
DestroyAndReopen(options);
// Every time we write to a table file, call FOF/POF with full DB scan. This
// will make sure our pending_outputs_ protection work correctly
std::function<void()> purge_obsolete_files_function = [&]() {
JobContext job_context;
dbfull()->TEST_LockMutex();
dbfull()->FindObsoleteFiles(&job_context, true /*force*/);
dbfull()->TEST_UnlockMutex();
dbfull()->PurgeObsoleteFiles(job_context);
};
env_->table_write_callback_ = &purge_obsolete_files_function;
for (int i = 0; i < 2; ++i) {
ASSERT_OK(Put("a", "begin"));
ASSERT_OK(Put("z", "end"));
ASSERT_OK(Flush());
}
// If pending output guard does not work correctly, PurgeObsoleteFiles() will
// delete the file that Compaction is trying to create, causing this: error
// db/db_test.cc:975: IO error:
// /tmp/rocksdbtest-1552237650/db_test/000009.sst: No such file or directory
Compact("a", "b");
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -36,9 +36,6 @@ enum FileType {
kIdentityFile kIdentityFile
}; };
// map from file number to path ID.
typedef std::unordered_map<uint64_t, uint32_t> FileNumToPathIdMap;
// Return the name of the log file with the specified number // Return the name of the log file with the specified number
// in the db named by "dbname". The result will be prefixed with // in the db named by "dbname". The result will be prefixed with
// "dbname". // "dbname".

@ -55,7 +55,6 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
const MutableCFOptions& mutable_cf_options, const MutableCFOptions& mutable_cf_options,
const EnvOptions& env_options, VersionSet* versions, const EnvOptions& env_options, VersionSet* versions,
port::Mutex* db_mutex, std::atomic<bool>* shutting_down, port::Mutex* db_mutex, std::atomic<bool>* shutting_down,
FileNumToPathIdMap* pending_outputs,
SequenceNumber newest_snapshot, JobContext* job_context, SequenceNumber newest_snapshot, JobContext* job_context,
LogBuffer* log_buffer, Directory* db_directory, LogBuffer* log_buffer, Directory* db_directory,
CompressionType output_compression, Statistics* stats) CompressionType output_compression, Statistics* stats)
@ -67,7 +66,6 @@ FlushJob::FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
versions_(versions), versions_(versions),
db_mutex_(db_mutex), db_mutex_(db_mutex),
shutting_down_(shutting_down), shutting_down_(shutting_down),
pending_outputs_(pending_outputs),
newest_snapshot_(newest_snapshot), newest_snapshot_(newest_snapshot),
job_context_(job_context), job_context_(job_context),
log_buffer_(log_buffer), log_buffer_(log_buffer),
@ -107,13 +105,12 @@ Status FlushJob::Run() {
} }
if (!s.ok()) { if (!s.ok()) {
cfd_->imm()->RollbackMemtableFlush(mems, file_number, pending_outputs_); cfd_->imm()->RollbackMemtableFlush(mems, file_number);
} else { } else {
// Replace immutable memtable with the generated Table // Replace immutable memtable with the generated Table
s = cfd_->imm()->InstallMemtableFlushResults( s = cfd_->imm()->InstallMemtableFlushResults(
cfd_, mutable_cf_options_, mems, versions_, db_mutex_, file_number, cfd_, mutable_cf_options_, mems, versions_, db_mutex_, file_number,
pending_outputs_, &job_context_->memtables_to_free, db_directory_, &job_context_->memtables_to_free, db_directory_, log_buffer_);
log_buffer_);
} }
return s; return s;
@ -128,7 +125,6 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0); meta.fd = FileDescriptor(versions_->NewFileNumber(), 0, 0);
*filenumber = meta.fd.GetNumber(); *filenumber = meta.fd.GetNumber();
// path 0 for level 0 file. // path 0 for level 0 file.
pending_outputs_->insert({meta.fd.GetNumber(), 0});
const SequenceNumber earliest_seqno_in_memtable = const SequenceNumber earliest_seqno_in_memtable =
mems[0]->GetFirstSequenceNumber(); mems[0]->GetFirstSequenceNumber();
@ -180,15 +176,6 @@ Status FlushJob::WriteLevel0Table(const autovector<MemTable*>& mems,
// re-acquire the most current version // re-acquire the most current version
base = cfd_->current(); base = cfd_->current();
// There could be multiple threads writing to its own level-0 file.
// The pending_outputs cannot be cleared here, otherwise this newly
// created file might not be considered as a live-file by another
// compaction thread that is concurrently deleting obselete files.
// The pending_outputs can be cleared only after the new version is
// committed so that other threads can recognize this file as a
// valid one.
// pending_outputs_.erase(meta.number);
// Note that if file_size is zero, the file has been deleted and // Note that if file_size is zero, the file has been deleted and
// should not be added to the manifest. // should not be added to the manifest.
int level = 0; int level = 0;

@ -55,10 +55,9 @@ class FlushJob {
const MutableCFOptions& mutable_cf_options, const MutableCFOptions& mutable_cf_options,
const EnvOptions& env_options, VersionSet* versions, const EnvOptions& env_options, VersionSet* versions,
port::Mutex* db_mutex, std::atomic<bool>* shutting_down, port::Mutex* db_mutex, std::atomic<bool>* shutting_down,
FileNumToPathIdMap* pending_outputs, SequenceNumber newest_snapshot, SequenceNumber newest_snapshot, JobContext* job_context,
JobContext* job_context, LogBuffer* log_buffer, LogBuffer* log_buffer, Directory* db_directory,
Directory* db_directory, CompressionType output_compression, CompressionType output_compression, Statistics* stats);
Statistics* stats);
~FlushJob() {} ~FlushJob() {}
Status Run(); Status Run();
@ -74,7 +73,6 @@ class FlushJob {
VersionSet* versions_; VersionSet* versions_;
port::Mutex* db_mutex_; port::Mutex* db_mutex_;
std::atomic<bool>* shutting_down_; std::atomic<bool>* shutting_down_;
FileNumToPathIdMap* pending_outputs_;
SequenceNumber newest_snapshot_; SequenceNumber newest_snapshot_;
JobContext* job_context_; JobContext* job_context_;
LogBuffer* log_buffer_; LogBuffer* log_buffer_;

@ -73,7 +73,6 @@ class FlushJobTest {
std::unique_ptr<VersionSet> versions_; std::unique_ptr<VersionSet> versions_;
port::Mutex mutex_; port::Mutex mutex_;
std::atomic<bool> shutting_down_; std::atomic<bool> shutting_down_;
FileNumToPathIdMap pending_outputs_;
std::shared_ptr<MockTableFactory> mock_table_factory_; std::shared_ptr<MockTableFactory> mock_table_factory_;
}; };
@ -83,8 +82,8 @@ TEST(FlushJobTest, Empty) {
FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(),
db_options_, *cfd->GetLatestMutableCFOptions(), db_options_, *cfd->GetLatestMutableCFOptions(),
env_options_, versions_.get(), &mutex_, &shutting_down_, env_options_, versions_.get(), &mutex_, &shutting_down_,
&pending_outputs_, SequenceNumber(), &job_context, nullptr, SequenceNumber(), &job_context, nullptr, nullptr,
nullptr, kNoCompression, nullptr); kNoCompression, nullptr);
ASSERT_OK(flush_job.Run()); ASSERT_OK(flush_job.Run());
} }
@ -108,8 +107,8 @@ TEST(FlushJobTest, NonEmpty) {
FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(), FlushJob flush_job(dbname_, versions_->GetColumnFamilySet()->GetDefault(),
db_options_, *cfd->GetLatestMutableCFOptions(), db_options_, *cfd->GetLatestMutableCFOptions(),
env_options_, versions_.get(), &mutex_, &shutting_down_, env_options_, versions_.get(), &mutex_, &shutting_down_,
&pending_outputs_, SequenceNumber(), &job_context, nullptr, SequenceNumber(), &job_context, nullptr, nullptr,
nullptr, kNoCompression, nullptr); kNoCompression, nullptr);
mutex_.Lock(); mutex_.Lock();
ASSERT_OK(flush_job.Run()); ASSERT_OK(flush_job.Run());
mutex_.Unlock(); mutex_.Unlock();

@ -58,8 +58,12 @@ struct JobContext {
// the current manifest_file_number, log_number and prev_log_number // the current manifest_file_number, log_number and prev_log_number
// that corresponds to the set of files in 'live'. // that corresponds to the set of files in 'live'.
uint64_t manifest_file_number, pending_manifest_file_number, log_number, uint64_t manifest_file_number;
prev_log_number; uint64_t pending_manifest_file_number;
uint64_t log_number;
uint64_t prev_log_number;
uint64_t min_pending_output = 0;
explicit JobContext(bool create_superversion = false) { explicit JobContext(bool create_superversion = false) {
manifest_file_number = 0; manifest_file_number = 0;

@ -144,8 +144,7 @@ void MemTableList::PickMemtablesToFlush(autovector<MemTable*>* ret) {
} }
void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems, void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
uint64_t file_number, uint64_t file_number) {
FileNumToPathIdMap* pending_outputs) {
assert(!mems.empty()); assert(!mems.empty());
// If the flush was not successful, then just reset state. // If the flush was not successful, then just reset state.
@ -159,7 +158,6 @@ void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
m->edit_.Clear(); m->edit_.Clear();
num_flush_not_started_++; num_flush_not_started_++;
} }
pending_outputs->erase(file_number);
imm_flush_needed.store(true, std::memory_order_release); imm_flush_needed.store(true, std::memory_order_release);
} }
@ -167,9 +165,8 @@ void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
Status MemTableList::InstallMemtableFlushResults( Status MemTableList::InstallMemtableFlushResults(
ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
const autovector<MemTable*>& mems, VersionSet* vset, port::Mutex* mu, const autovector<MemTable*>& mems, VersionSet* vset, port::Mutex* mu,
uint64_t file_number, FileNumToPathIdMap* pending_outputs, uint64_t file_number, autovector<MemTable*>* to_delete,
autovector<MemTable*>* to_delete, Directory* db_directory, Directory* db_directory, LogBuffer* log_buffer) {
LogBuffer* log_buffer) {
mu->AssertHeld(); mu->AssertHeld();
// flush was sucessful // flush was sucessful
@ -220,11 +217,6 @@ Status MemTableList::InstallMemtableFlushResults(
current_->Remove(m); current_->Remove(m);
assert(m->file_number_ > 0); assert(m->file_number_ > 0);
// pending_outputs can be cleared only after the newly created file
// has been written to a committed version so that other concurrently
// executing compaction threads do not mistakenly assume that this
// file is not live.
pending_outputs->erase(m->file_number_);
if (m->Unref() != nullptr) { if (m->Unref() != nullptr) {
to_delete->push_back(m); to_delete->push_back(m);
} }
@ -237,7 +229,6 @@ Status MemTableList::InstallMemtableFlushResults(
m->flush_in_progress_ = false; m->flush_in_progress_ = false;
m->edit_.Clear(); m->edit_.Clear();
num_flush_not_started_++; num_flush_not_started_++;
pending_outputs->erase(m->file_number_);
m->file_number_ = 0; m->file_number_ = 0;
imm_flush_needed.store(true, std::memory_order_release); imm_flush_needed.store(true, std::memory_order_release);
} }

@ -108,16 +108,14 @@ class MemTableList {
// Reset status of the given memtable list back to pending state so that // Reset status of the given memtable list back to pending state so that
// they can get picked up again on the next round of flush. // they can get picked up again on the next round of flush.
void RollbackMemtableFlush(const autovector<MemTable*>& mems, void RollbackMemtableFlush(const autovector<MemTable*>& mems,
uint64_t file_number, uint64_t file_number);
FileNumToPathIdMap* pending_outputs);
// Commit a successful flush in the manifest file // Commit a successful flush in the manifest file
Status InstallMemtableFlushResults( Status InstallMemtableFlushResults(
ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options, ColumnFamilyData* cfd, const MutableCFOptions& mutable_cf_options,
const autovector<MemTable*>& m, VersionSet* vset, port::Mutex* mu, const autovector<MemTable*>& m, VersionSet* vset, port::Mutex* mu,
uint64_t file_number, FileNumToPathIdMap* pending_outputs, uint64_t file_number, autovector<MemTable*>* to_delete,
autovector<MemTable*>* to_delete, Directory* db_directory, Directory* db_directory, LogBuffer* log_buffer);
LogBuffer* log_buffer);
// New memtables are inserted at the front of the list. // New memtables are inserted at the front of the list.
// Takes ownership of the referenced held on *m by the caller of Add(). // Takes ownership of the referenced held on *m by the caller of Add().

@ -530,6 +530,9 @@ class VersionSet {
return pending_manifest_file_number_; return pending_manifest_file_number_;
} }
// REQUIRED: mutex locked
uint64_t current_next_file_number() const { return next_file_number_; }
// Allocate and return a new file number // Allocate and return a new file number
uint64_t NewFileNumber() { return next_file_number_++; } uint64_t NewFileNumber() { return next_file_number_++; }

Loading…
Cancel
Save