[CF] Dont reuse dropped column family IDs

Summary:
Column family IDs should be unique, even if column family is dropped. To achieve this, we save max column family in manifest.

Note that the diff is still not ready. I'm only using differential to move the patch to my Mac machine.

Test Plan: added a test to column_family_test

Reviewers: dhruba, haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D16581
main
Igor Canadi 11 years ago
parent e21d5b8bbc
commit 9625acbf70
  1. 6
      db/column_family.cc
  2. 11
      db/column_family.h
  3. 26
      db/column_family_test.cc
  4. 15
      db/version_edit.cc
  5. 8
      db/version_edit.h
  6. 1
      db/version_edit_test.cc
  7. 37
      db/version_set.cc

@ -399,6 +399,12 @@ uint32_t ColumnFamilySet::GetNextColumnFamilyID() {
return ++max_column_family_; return ++max_column_family_;
} }
uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; }
void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) {
max_column_family_ = std::max(new_max_column_family, max_column_family_);
}
// under a DB mutex // under a DB mutex
ColumnFamilyData* ColumnFamilySet::CreateColumnFamily( ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
const std::string& name, uint32_t id, Version* dummy_versions, const std::string& name, uint32_t id, Version* dummy_versions,

@ -290,11 +290,11 @@ class ColumnFamilySet {
uint32_t GetID(const std::string& name); uint32_t GetID(const std::string& name);
// this call will return the next available column family ID. it guarantees // this call will return the next available column family ID. it guarantees
// that there is no column family with id greater than or equal to the // that there is no column family with id greater than or equal to the
// returned value in the current running instance. It does not, however, // returned value in the current running instance or anytime in RocksDB
// guarantee that the returned ID is unique accross RocksDB restarts. // instance history.
// For example, if a client adds a column family 6 and then drops it,
// after a restart, we might reuse column family 6 ID.
uint32_t GetNextColumnFamilyID(); uint32_t GetNextColumnFamilyID();
uint32_t GetMaxColumnFamily();
void UpdateMaxColumnFamily(uint32_t new_max_column_family);
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id, ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id,
Version* dummy_version, Version* dummy_version,
@ -314,7 +314,8 @@ class ColumnFamilySet {
// family might get dropped when you release the DB mutex. // family might get dropped when you release the DB mutex.
// * GetDefault(), GetColumnFamily(), Exists(), GetID() -- either inside of DB // * GetDefault(), GetColumnFamily(), Exists(), GetID() -- either inside of DB
// mutex or call Lock() // mutex or call Lock()
// * GetNextColumnFamilyID() -- inside of DB mutex // * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
// inside of DB mutex
void Lock(); void Lock();
void Unlock(); void Unlock();

@ -284,6 +284,32 @@ class ColumnFamilyTest {
Random rnd_; Random rnd_;
}; };
TEST(ColumnFamilyTest, DontReuseColumnFamilyID) {
for (int iter = 0; iter < 3; ++iter) {
Open();
CreateColumnFamilies({"one", "two", "three"});
for (size_t i = 0; i < handles_.size(); ++i) {
ASSERT_EQ(i, handles_[i]->GetID());
}
if (iter == 1) {
Reopen();
}
DropColumnFamilies({3});
Reopen();
if (iter == 2) {
// this tests if max_column_family is correctly persisted with
// WriteSnapshot()
Reopen();
}
CreateColumnFamilies({"three2"});
// ID 3 that was used for dropped column family "three" should not be reused
ASSERT_EQ(4, handles_[3]->GetID());
Close();
Destroy();
}
}
TEST(ColumnFamilyTest, AddDrop) { TEST(ColumnFamilyTest, AddDrop) {
Open(); Open();
CreateColumnFamilies({"one", "two", "three"}); CreateColumnFamilies({"one", "two", "three"});

@ -34,6 +34,7 @@ enum Tag {
kColumnFamily = 200, // specify column family for version edit kColumnFamily = 200, // specify column family for version edit
kColumnFamilyAdd = 201, kColumnFamilyAdd = 201,
kColumnFamilyDrop = 202, kColumnFamilyDrop = 202,
kMaxColumnFamily = 203,
}; };
void VersionEdit::Clear() { void VersionEdit::Clear() {
@ -43,11 +44,13 @@ void VersionEdit::Clear() {
prev_log_number_ = 0; prev_log_number_ = 0;
last_sequence_ = 0; last_sequence_ = 0;
next_file_number_ = 0; next_file_number_ = 0;
max_column_family_ = 0;
has_comparator_ = false; has_comparator_ = false;
has_log_number_ = false; has_log_number_ = false;
has_prev_log_number_ = false; has_prev_log_number_ = false;
has_next_file_number_ = false; has_next_file_number_ = false;
has_last_sequence_ = false; has_last_sequence_ = false;
has_max_column_family_ = false;
deleted_files_.clear(); deleted_files_.clear();
new_files_.clear(); new_files_.clear();
column_family_ = 0; column_family_ = 0;
@ -77,6 +80,10 @@ void VersionEdit::EncodeTo(std::string* dst) const {
PutVarint32(dst, kLastSequence); PutVarint32(dst, kLastSequence);
PutVarint64(dst, last_sequence_); PutVarint64(dst, last_sequence_);
} }
if (has_max_column_family_) {
PutVarint32(dst, kMaxColumnFamily);
PutVarint32(dst, max_column_family_);
}
for (const auto& deleted : deleted_files_) { for (const auto& deleted : deleted_files_) {
PutVarint32(dst, kDeletedFile); PutVarint32(dst, kDeletedFile);
@ -191,6 +198,14 @@ Status VersionEdit::DecodeFrom(const Slice& src) {
} }
break; break;
case kMaxColumnFamily:
if (GetVarint32(&input, &max_column_family_)) {
has_max_column_family_ = true;
} else {
msg = "max column family";
}
break;
case kCompactPointer: case kCompactPointer:
if (GetLevel(&input, &level, &msg) && if (GetLevel(&input, &level, &msg) &&
GetInternalKey(&input, &key)) { GetInternalKey(&input, &key)) {

@ -70,6 +70,10 @@ class VersionEdit {
has_last_sequence_ = true; has_last_sequence_ = true;
last_sequence_ = seq; last_sequence_ = seq;
} }
void SetMaxColumnFamily(uint32_t max_column_family) {
has_max_column_family_ = true;
max_column_family_ = max_column_family;
}
// Add the specified file at the specified number. // Add the specified file at the specified number.
// REQUIRES: This version has not been saved (see VersionSet::SaveTo) // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
@ -143,15 +147,17 @@ class VersionEdit {
uint64_t log_number_; uint64_t log_number_;
uint64_t prev_log_number_; uint64_t prev_log_number_;
uint64_t next_file_number_; uint64_t next_file_number_;
uint32_t max_column_family_;
SequenceNumber last_sequence_; SequenceNumber last_sequence_;
bool has_comparator_; bool has_comparator_;
bool has_log_number_; bool has_log_number_;
bool has_prev_log_number_; bool has_prev_log_number_;
bool has_next_file_number_; bool has_next_file_number_;
bool has_last_sequence_; bool has_last_sequence_;
bool has_max_column_family_;
DeletedFileSet deleted_files_; DeletedFileSet deleted_files_;
std::vector< std::pair<int, FileMetaData> > new_files_; std::vector<std::pair<int, FileMetaData>> new_files_;
// Each version edit record should have column_family_id set // Each version edit record should have column_family_id set
// If it's not set, it is default (0) // If it's not set, it is default (0)

@ -49,6 +49,7 @@ TEST(VersionEditTest, ColumnFamilyTest) {
VersionEdit edit; VersionEdit edit;
edit.SetColumnFamily(2); edit.SetColumnFamily(2);
edit.AddColumnFamily("column_family"); edit.AddColumnFamily("column_family");
edit.SetMaxColumnFamily(5);
TestEncodeDecode(edit); TestEncodeDecode(edit);
edit.Clear(); edit.Clear();

@ -1497,6 +1497,9 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
return Status::OK(); return Status::OK();
} }
if (edit->is_column_family_drop_) { if (edit->is_column_family_drop_) {
// if we drop column family, we have to make sure to save max column family,
// so that we don't reuse existing ID
edit->SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
column_family_data->SetDropped(); column_family_data->SetDropped();
} }
@ -1789,6 +1792,7 @@ Status VersionSet::Recover(
uint64_t last_sequence = 0; uint64_t last_sequence = 0;
uint64_t log_number = 0; uint64_t log_number = 0;
uint64_t prev_log_number = 0; uint64_t prev_log_number = 0;
uint32_t max_column_family = 0;
std::unordered_map<uint32_t, Builder*> builders; std::unordered_map<uint32_t, Builder*> builders;
// add default column family // add default column family
@ -1918,6 +1922,10 @@ Status VersionSet::Recover(
have_next_file = true; have_next_file = true;
} }
if (edit.has_max_column_family_) {
max_column_family = edit.max_column_family_;
}
if (edit.has_last_sequence_) { if (edit.has_last_sequence_) {
last_sequence = edit.last_sequence_; last_sequence = edit.last_sequence_;
have_last_sequence = true; have_last_sequence = true;
@ -1938,6 +1946,8 @@ Status VersionSet::Recover(
prev_log_number = 0; prev_log_number = 0;
} }
column_family_set_->UpdateMaxColumnFamily(max_column_family);
MarkFileNumberUsed(prev_log_number); MarkFileNumberUsed(prev_log_number);
MarkFileNumberUsed(log_number); MarkFileNumberUsed(log_number);
} }
@ -1981,13 +1991,15 @@ Status VersionSet::Recover(
Log(options_->info_log, "Recovered from manifest file:%s succeeded," Log(options_->info_log, "Recovered from manifest file:%s succeeded,"
"manifest_file_number is %lu, next_file_number is %lu, " "manifest_file_number is %lu, next_file_number is %lu, "
"last_sequence is %lu, log_number is %lu," "last_sequence is %lu, log_number is %lu,"
"prev_log_number is %lu\n", "prev_log_number is %lu,"
"max_column_family is %u\n",
manifest_filename.c_str(), manifest_filename.c_str(),
(unsigned long)manifest_file_number_, (unsigned long)manifest_file_number_,
(unsigned long)next_file_number_, (unsigned long)next_file_number_,
(unsigned long)last_sequence_, (unsigned long)last_sequence_,
(unsigned long)log_number, (unsigned long)log_number,
(unsigned long)prev_log_number_); (unsigned long)prev_log_number_,
column_family_set_->GetMaxColumnFamily());
for (auto cfd : *column_family_set_) { for (auto cfd : *column_family_set_) {
Log(options_->info_log, Log(options_->info_log,
@ -2267,6 +2279,10 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
last_sequence = edit.last_sequence_; last_sequence = edit.last_sequence_;
have_last_sequence = true; have_last_sequence = true;
} }
if (edit.has_max_column_family_) {
column_family_set_->UpdateMaxColumnFamily(edit.max_column_family_);
}
} }
} }
file.reset(); file.reset();
@ -2315,9 +2331,10 @@ Status VersionSet::DumpManifest(Options& options, std::string& dscname,
printf( printf(
"manifest_file_number %lu next_file_number %lu last_sequence " "manifest_file_number %lu next_file_number %lu last_sequence "
"%lu prev_log_number %lu\n", "%lu prev_log_number %lu max_column_family %u\n",
(unsigned long)manifest_file_number_, (unsigned long)next_file_number_, (unsigned long)manifest_file_number_, (unsigned long)next_file_number_,
(unsigned long)last_sequence, (unsigned long)prev_log_number); (unsigned long)last_sequence, (unsigned long)prev_log_number,
column_family_set_->GetMaxColumnFamily());
} }
return s; return s;
@ -2378,6 +2395,18 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
} }
} }
// save max column family to avoid reusing the same column
// family ID for two different column families
if (column_family_set_->GetMaxColumnFamily() > 0) {
VersionEdit edit;
edit.SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
std::string record;
edit.EncodeTo(&record);
Status s = log->AddRecord(record);
if (!s.ok()) {
return s;
}
}
return Status::OK(); return Status::OK();
} }

Loading…
Cancel
Save