[CF] Rething LogAndApply for column families

Summary:
I though I might get away with as little changes to LogAndApply() as possible. It turns out this is not the case.

This diff introduces different behavior of LogAndApply() for three cases:
1. column family add
2. column family drop
3. no-column family manipulation

(1) and (2) don't support group commit yet.

There were a lot of problems with old version od LogAndApply, detected by db_stress. The biggest was non-atomicity of manifest writes and metadata changes (i.e. if column family add is in manifest, it also has to be in in-memory data structure).

Test Plan: db_stress

Reviewers: dhruba, haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16491
main
Igor Canadi 11 years ago
parent 12966ec1bb
commit 8ea21a778b
  1. 9
      db/column_family.cc
  2. 1
      db/column_family.h
  3. 15
      db/db_impl.cc
  4. 4
      db/version_edit.h
  5. 113
      db/version_set.cc
  6. 11
      db/version_set.h

@ -333,6 +333,15 @@ ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const {
} }
} }
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name)
const {
auto cfd_iter = column_families_.find(name);
if (cfd_iter == column_families_.end()) {
return nullptr;
}
return GetColumnFamily(cfd_iter->second);
}
bool ColumnFamilySet::Exists(uint32_t id) { bool ColumnFamilySet::Exists(uint32_t id) {
return column_family_data_.find(id) != column_family_data_.end(); return column_family_data_.find(id) != column_family_data_.end();
} }

@ -264,6 +264,7 @@ class ColumnFamilySet {
ColumnFamilyData* GetDefault() const; ColumnFamilyData* GetDefault() const;
// GetColumnFamily() calls return nullptr if column family is not found // GetColumnFamily() calls return nullptr if column family is not found
ColumnFamilyData* GetColumnFamily(uint32_t id) const; ColumnFamilyData* GetColumnFamily(uint32_t id) const;
ColumnFamilyData* GetColumnFamily(const std::string& name) const;
bool Exists(uint32_t id); bool Exists(uint32_t id);
bool Exists(const std::string& name); bool Exists(const std::string& name);
uint32_t GetID(const std::string& name); uint32_t GetID(const std::string& name);

@ -3068,9 +3068,12 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
edit.SetLogNumber(logfile_number_); edit.SetLogNumber(logfile_number_);
edit.SetComparatorName(options.comparator->Name()); edit.SetComparatorName(options.comparator->Name());
Status s = versions_->LogAndApply(default_cf_handle_->cfd(), &edit, &mutex_); Status s = versions_->LogAndApply(nullptr, &edit, &mutex_,
db_directory_.get(), false, &options);
if (s.ok()) { if (s.ok()) {
auto cfd = versions_->CreateColumnFamily(options, &edit); auto cfd =
versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name);
assert(cfd != nullptr);
*handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_); *handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_);
Log(options_.info_log, "Created column family \"%s\" (ID %u)", Log(options_.info_log, "Created column family \"%s\" (ID %u)",
column_family_name.c_str(), (unsigned)cfd->GetID()); column_family_name.c_str(), (unsigned)cfd->GetID());
@ -3098,16 +3101,8 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
s = Status::InvalidArgument("Column family already dropped!\n"); s = Status::InvalidArgument("Column family already dropped!\n");
} }
if (s.ok()) { if (s.ok()) {
cfd->SetDropped();
s = versions_->LogAndApply(cfd, &edit, &mutex_); s = versions_->LogAndApply(cfd, &edit, &mutex_);
} }
if (s.ok()) {
// DB is holding one reference to each column family when it's alive,
// need to drop it now
if (cfd->Unref()) {
delete cfd;
}
}
if (s.ok()) { if (s.ok()) {
Log(options_.info_log, "Dropped column family with id %u\n", cfd->GetID()); Log(options_.info_log, "Dropped column family with id %u\n", cfd->GetID());

@ -101,6 +101,10 @@ class VersionEdit {
return new_files_.size() + deleted_files_.size(); return new_files_.size() + deleted_files_.size();
} }
bool IsColumnFamilyManipulation() {
return is_column_family_add_ || is_column_family_drop_;
}
void SetColumnFamily(uint32_t column_family_id) { void SetColumnFamily(uint32_t column_family_id) {
column_family_ = column_family_id; column_family_ = column_family_id;
} }

@ -1485,15 +1485,20 @@ void VersionSet::AppendVersion(ColumnFamilyData* column_family_data,
Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data, Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
VersionEdit* edit, port::Mutex* mu, VersionEdit* edit, port::Mutex* mu,
Directory* db_directory, Directory* db_directory, bool new_descriptor_log,
bool new_descriptor_log) { const ColumnFamilyOptions* options) {
mu->AssertHeld(); mu->AssertHeld();
if (column_family_data->IsDropped() && !edit->is_column_family_drop_) { assert(column_family_data != nullptr || edit->is_column_family_add_);
if (column_family_data != nullptr && column_family_data->IsDropped()) {
// if column family is dropped no need to write anything to the manifest // if column family is dropped no need to write anything to the manifest
// (unless, of course, thit is the drop column family write) // (unless, of course, thit is the drop column family write)
return Status::OK(); return Status::OK();
} }
if (edit->is_column_family_drop_) {
column_family_data->SetDropped();
}
// queue our request // queue our request
ManifestWriter w(mu, column_family_data, edit); ManifestWriter w(mu, column_family_data, edit);
@ -1506,23 +1511,36 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
} }
std::vector<VersionEdit*> batch_edits; std::vector<VersionEdit*> batch_edits;
Version* v = new Version(column_family_data, this, current_version_number_++); Version* v = nullptr;
Builder builder(column_family_data); std::unique_ptr<Builder> builder(nullptr);
// process all requests in the queue // process all requests in the queue
ManifestWriter* last_writer = &w; ManifestWriter* last_writer = &w;
assert(!manifest_writers_.empty()); assert(!manifest_writers_.empty());
assert(manifest_writers_.front() == &w); assert(manifest_writers_.front() == &w);
for (const auto& writer : manifest_writers_) { if (edit->IsColumnFamilyManipulation()) {
if (writer->cfd->GetID() != column_family_data->GetID()) { // no group commits for column family add or drop
// group commits across column families are not yet supported last_writer = &w;
break; edit->SetNextFile(next_file_number_);
edit->SetLastSequence(last_sequence_);
batch_edits.push_back(edit);
} else {
v = new Version(column_family_data, this, current_version_number_++);
builder.reset(new Builder(column_family_data));
for (const auto& writer : manifest_writers_) {
if (writer->edit->IsColumnFamilyManipulation() ||
writer->cfd->GetID() != column_family_data->GetID()) {
// no group commits for column family add or drop
// also, group commits across column families are not supported
break;
}
last_writer = writer;
LogAndApplyHelper(column_family_data, builder.get(), v, last_writer->edit,
mu);
batch_edits.push_back(last_writer->edit);
} }
last_writer = writer; builder->SaveTo(v);
LogAndApplyHelper(column_family_data, &builder, v, last_writer->edit, mu);
batch_edits.push_back(last_writer->edit);
} }
builder.SaveTo(v);
// Initialize new descriptor log file if necessary by creating // Initialize new descriptor log file if necessary by creating
// a temporary file that contains a snapshot of the current version. // a temporary file that contains a snapshot of the current version.
@ -1547,17 +1565,20 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
// Unlock during expensive operations. New writes cannot get here // Unlock during expensive operations. New writes cannot get here
// because &w is ensuring that all new writes get queued. // because &w is ensuring that all new writes get queued.
{ {
// calculate the amount of data being compacted at every level std::vector<uint64_t> size_being_compacted;
std::vector<uint64_t> size_being_compacted(v->NumberLevels() - 1); if (!edit->IsColumnFamilyManipulation()) {
column_family_data->compaction_picker()->SizeBeingCompacted( size_being_compacted.resize(v->NumberLevels() - 1);
size_being_compacted); // calculate the amount of data being compacted at every level
column_family_data->compaction_picker()->SizeBeingCompacted(
size_being_compacted);
}
mu->Unlock(); mu->Unlock();
if (options_->max_open_files == -1) { if (!edit->IsColumnFamilyManipulation() && options_->max_open_files == -1) {
// unlimited table cache. Pre-load table handle now. // unlimited table cache. Pre-load table handle now.
// Need to do it out of the mutex. // Need to do it out of the mutex.
builder.LoadTableHandlers(); builder->LoadTableHandlers();
} }
// This is fine because everything inside of this block is serialized -- // This is fine because everything inside of this block is serialized --
@ -1573,10 +1594,12 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
} }
} }
// The calls to Finalize and UpdateFilesBySize are cpu-heavy if (!edit->IsColumnFamilyManipulation()) {
// and is best called outside the mutex. // The calls to Finalize and UpdateFilesBySize are cpu-heavy
v->Finalize(size_being_compacted); // and is best called outside the mutex.
v->UpdateFilesBySize(); v->Finalize(size_being_compacted);
v->UpdateFilesBySize();
}
// Write new record to MANIFEST log // Write new record to MANIFEST log
if (s.ok()) { if (s.ok()) {
@ -1650,11 +1673,23 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
// Install the new version // Install the new version
if (s.ok()) { if (s.ok()) {
if (edit->is_column_family_add_) {
// no group commit on column family add
assert(batch_edits.size() == 1);
assert(options != nullptr);
CreateColumnFamily(*options, edit);
} else if (edit->is_column_family_drop_) {
assert(batch_edits.size() == 1);
if (column_family_data->Unref()) {
delete column_family_data;
}
} else {
column_family_data->SetLogNumber(batch_edits.back()->log_number_);
AppendVersion(column_family_data, v);
}
manifest_file_size_ = new_manifest_file_size; manifest_file_size_ = new_manifest_file_size;
AppendVersion(column_family_data, v);
column_family_data->SetLogNumber(edit->log_number_);
prev_log_number_ = edit->prev_log_number_; prev_log_number_ = edit->prev_log_number_;
} else { } else {
Log(options_->info_log, "Error in committing version %lu", Log(options_->info_log, "Error in committing version %lu",
(unsigned long)v->GetVersionNumber()); (unsigned long)v->GetVersionNumber());
@ -1694,19 +1729,14 @@ void VersionSet::LogAndApplyHelper(ColumnFamilyData* cfd, Builder* builder,
edit->SetNextFile(next_file_number_); edit->SetNextFile(next_file_number_);
edit->SetLastSequence(last_sequence_); edit->SetLastSequence(last_sequence_);
if (edit->is_column_family_add_) { if (edit->has_log_number_) {
assert(edit->has_log_number_); assert(edit->log_number_ >= cfd->GetLogNumber());
} else { } else {
if (edit->has_log_number_) { edit->SetLogNumber(cfd->GetLogNumber());
assert(edit->log_number_ >= cfd->GetLogNumber());
} else {
edit->SetLogNumber(cfd->GetLogNumber());
}
builder->Apply(edit);
} }
assert(edit->log_number_ < next_file_number_); assert(edit->log_number_ < next_file_number_);
builder->Apply(edit);
} }
Status VersionSet::Recover( Status VersionSet::Recover(
@ -2013,9 +2043,20 @@ Status VersionSet::ListColumnFamilies(std::vector<std::string>* column_families,
break; break;
} }
if (edit.is_column_family_add_) { if (edit.is_column_family_add_) {
if (column_family_names.find(edit.column_family_) !=
column_family_names.end()) {
s = Status::Corruption("Manifest adding the same column family twice");
break;
}
column_family_names.insert( column_family_names.insert(
{edit.column_family_, edit.column_family_name_}); {edit.column_family_, edit.column_family_name_});
} else if (edit.is_column_family_drop_) { } else if (edit.is_column_family_drop_) {
if (column_family_names.find(edit.column_family_) ==
column_family_names.end()) {
s = Status::Corruption(
"Manifest - dropping non-existing column family");
break;
}
column_family_names.erase(edit.column_family_); column_family_names.erase(edit.column_family_);
} }
} }

@ -296,11 +296,14 @@ class VersionSet {
// Apply *edit to the current version to form a new descriptor that // Apply *edit to the current version to form a new descriptor that
// is both saved to persistent state and installed as the new // is both saved to persistent state and installed as the new
// current version. Will release *mu while actually writing to the file. // current version. Will release *mu while actually writing to the file.
// column_family_options has to be set if edit is column family add
// REQUIRES: *mu is held on entry. // REQUIRES: *mu is held on entry.
// REQUIRES: no other thread concurrently calls LogAndApply() // REQUIRES: no other thread concurrently calls LogAndApply()
Status LogAndApply(ColumnFamilyData* column_family_data, VersionEdit* edit, Status LogAndApply(ColumnFamilyData* column_family_data, VersionEdit* edit,
port::Mutex* mu, Directory* db_directory = nullptr, port::Mutex* mu, Directory* db_directory = nullptr,
bool new_descriptor_log = false); bool new_descriptor_log = false,
const ColumnFamilyOptions* column_family_options =
nullptr);
// Recover the last saved descriptor from persistent storage. // Recover the last saved descriptor from persistent storage.
Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families); Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families);
@ -401,9 +404,6 @@ class VersionSet {
void GetObsoleteFiles(std::vector<FileMetaData*>* files); void GetObsoleteFiles(std::vector<FileMetaData*>* files);
ColumnFamilyData* CreateColumnFamily(const ColumnFamilyOptions& options,
VersionEdit* edit);
ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); } ColumnFamilySet* GetColumnFamilySet() { return column_family_set_.get(); }
private: private:
@ -426,6 +426,9 @@ class VersionSet {
bool ManifestContains(const std::string& record) const; bool ManifestContains(const std::string& record) const;
ColumnFamilyData* CreateColumnFamily(const ColumnFamilyOptions& options,
VersionEdit* edit);
std::unique_ptr<ColumnFamilySet> column_family_set_; std::unique_ptr<ColumnFamilySet> column_family_set_;
Env* const env_; Env* const env_;

Loading…
Cancel
Save