[CF] Code cleanup part 1

Summary:
I'm cleaning up some code preparing for the big diff review tomorrow. This is the first part of the cleanup.

Changes are mostly cosmetic. The goal is to decrease amount of code difference between columnfamilies and master branch.

This diff also fixes race condition when dropping column family.

Test Plan: Ran db_stress with variety of parameters

Reviewers: dhruba, haobo

Differential Revision: https://reviews.facebook.net/D16833
main
Igor Canadi 11 years ago
parent 56ca8338e5
commit fb2346fc1f
  1. 3
      HISTORY.md
  2. 124
      db/column_family.cc
  3. 131
      db/column_family.h
  4. 2
      db/column_family_test.cc
  5. 16
      db/compaction_picker.cc
  6. 18
      db/compaction_picker.h
  7. 146
      db/db_impl.cc
  8. 7
      db/db_impl_readonly.cc
  9. 2
      db/memtable.cc
  10. 2
      db/memtable.h
  11. 1
      db/repair.cc
  12. 37
      db/version_set.cc
  13. 6
      db/write_batch.cc
  14. 4
      db/write_batch_internal.h
  15. 2
      db/write_batch_test.cc
  16. 3
      table/table_test.cc

@ -4,6 +4,9 @@
* By default, max_background_flushes is 1 and flush process is * By default, max_background_flushes is 1 and flush process is
removed from background compaction process. Flush process is now always removed from background compaction process. Flush process is now always
executed in high priority thread pool. executed in high priority thread pool.
* Column family support
* If you write something to the non-default column family with disableWAL = true,
you need to Flush the column family before exiting if you want your data to be persistent
## Unreleased (will be relased in 2.8) ## Unreleased (will be relased in 2.8)

@ -185,9 +185,8 @@ ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id,
dropped_(false), dropped_(false),
internal_comparator_(options.comparator), internal_comparator_(options.comparator),
internal_filter_policy_(options.filter_policy), internal_filter_policy_(options.filter_policy),
options_(SanitizeOptions(&internal_comparator_, &internal_filter_policy_, options_(*db_options, SanitizeOptions(&internal_comparator_,
options)), &internal_filter_policy_, options)),
full_options_(*db_options, options_),
mem_(nullptr), mem_(nullptr),
imm_(options.min_write_buffer_number_to_merge), imm_(options.min_write_buffer_number_to_merge),
super_version_(nullptr), super_version_(nullptr),
@ -205,18 +204,19 @@ ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id,
internal_stats_.reset(new InternalStats(options.num_levels, db_options->env, internal_stats_.reset(new InternalStats(options.num_levels, db_options->env,
db_options->statistics.get())); db_options->statistics.get()));
table_cache_.reset( table_cache_.reset(
new TableCache(dbname, &full_options_, storage_options, table_cache)); new TableCache(dbname, &options_, storage_options, table_cache));
if (options_.compaction_style == kCompactionStyleUniversal) { if (options_.compaction_style == kCompactionStyleUniversal) {
compaction_picker_.reset(new UniversalCompactionPicker( compaction_picker_.reset(
&options_, &internal_comparator_, db_options->info_log.get())); new UniversalCompactionPicker(&options_, &internal_comparator_));
} else { } else {
compaction_picker_.reset(new LevelCompactionPicker( compaction_picker_.reset(
&options_, &internal_comparator_, db_options->info_log.get())); new LevelCompactionPicker(&options_, &internal_comparator_));
} }
Log(full_options_.info_log, "Options for column family \"%s\":\n", Log(options_.info_log, "Options for column family \"%s\":\n",
name.c_str()); name.c_str());
options_.Dump(full_options_.info_log.get()); const ColumnFamilyOptions* cf_options = &options_;
cf_options->Dump(options_.info_log.get());
} }
} }
@ -232,14 +232,27 @@ ColumnFamilyData::~ColumnFamilyData() {
// it's nullptr for dummy CFD // it's nullptr for dummy CFD
if (column_family_set_ != nullptr) { if (column_family_set_ != nullptr) {
// remove from column_family_set // remove from column_family_set
column_family_set_->DropColumnFamily(this); column_family_set_->RemoveColumnFamily(this);
} }
if (current_ != nullptr) { if (current_ != nullptr) {
current_->Unref(); current_->Unref();
} }
DeleteSuperVersion(); if (super_version_ != nullptr) {
// Release SuperVersion reference kept in ThreadLocalPtr.
// This must be done outside of mutex_ since unref handler can lock mutex.
super_version_->db_mutex->Unlock();
local_sv_.reset();
super_version_->db_mutex->Lock();
bool is_last_reference __attribute__((unused));
is_last_reference = super_version_->Unref();
assert(is_last_reference);
super_version_->Cleanup();
delete super_version_;
super_version_ = nullptr;
}
if (dummy_versions_ != nullptr) { if (dummy_versions_ != nullptr) {
// List must be empty // List must be empty
@ -257,10 +270,6 @@ ColumnFamilyData::~ColumnFamilyData() {
} }
} }
InternalStats* ColumnFamilyData::internal_stats() {
return internal_stats_.get();
}
void ColumnFamilyData::SetCurrent(Version* current) { void ColumnFamilyData::SetCurrent(Version* current) {
current_ = current; current_ = current;
need_slowdown_for_num_level0_files_ = need_slowdown_for_num_level0_files_ =
@ -320,23 +329,6 @@ void ColumnFamilyData::ResetThreadLocalSuperVersions() {
} }
} }
void ColumnFamilyData::DeleteSuperVersion() {
if (super_version_ != nullptr) {
// Release SuperVersion reference kept in ThreadLocalPtr.
// This must be done outside of mutex_ since unref handler can lock mutex.
super_version_->db_mutex->Unlock();
local_sv_.reset();
super_version_->db_mutex->Lock();
bool is_last_reference __attribute__((unused));
is_last_reference = super_version_->Unref();
assert(is_last_reference);
super_version_->Cleanup();
delete super_version_;
super_version_ = nullptr;
}
}
ColumnFamilySet::ColumnFamilySet(const std::string& dbname, ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
const DBOptions* db_options, const DBOptions* db_options,
const EnvOptions& storage_options, const EnvOptions& storage_options,
@ -345,6 +337,7 @@ ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
dummy_cfd_(new ColumnFamilyData(dbname, 0, "", nullptr, nullptr, dummy_cfd_(new ColumnFamilyData(dbname, 0, "", nullptr, nullptr,
ColumnFamilyOptions(), db_options, ColumnFamilyOptions(), db_options,
storage_options_, nullptr)), storage_options_, nullptr)),
default_cfd_cache_(nullptr),
db_name_(dbname), db_name_(dbname),
db_options_(db_options), db_options_(db_options),
storage_options_(storage_options), storage_options_(storage_options),
@ -367,10 +360,8 @@ ColumnFamilySet::~ColumnFamilySet() {
} }
ColumnFamilyData* ColumnFamilySet::GetDefault() const { ColumnFamilyData* ColumnFamilySet::GetDefault() const {
auto cfd = GetColumnFamily(0); assert(default_cfd_cache_ != nullptr);
// default column family should always exist return default_cfd_cache_;
assert(cfd != nullptr);
return cfd;
} }
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const { ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const {
@ -385,24 +376,13 @@ ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const {
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name) ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name)
const { const {
auto cfd_iter = column_families_.find(name); auto cfd_iter = column_families_.find(name);
if (cfd_iter == column_families_.end()) { if (cfd_iter != column_families_.end()) {
auto cfd = GetColumnFamily(cfd_iter->second);
assert(cfd != nullptr);
return cfd;
} else {
return nullptr; return nullptr;
} }
return GetColumnFamily(cfd_iter->second);
}
bool ColumnFamilySet::Exists(uint32_t id) {
return column_family_data_.find(id) != column_family_data_.end();
}
bool ColumnFamilySet::Exists(const std::string& name) {
return column_families_.find(name) != column_families_.end();
}
uint32_t ColumnFamilySet::GetID(const std::string& name) {
auto cfd_iter = column_families_.find(name);
assert(cfd_iter != column_families_.end());
return cfd_iter->second;
} }
uint32_t ColumnFamilySet::GetNextColumnFamilyID() { uint32_t ColumnFamilySet::GetNextColumnFamilyID() {
@ -434,11 +414,22 @@ ColumnFamilyData* ColumnFamilySet::CreateColumnFamily(
new_cfd->prev_ = prev; new_cfd->prev_ = prev;
prev->next_ = new_cfd; prev->next_ = new_cfd;
dummy_cfd_->prev_ = new_cfd; dummy_cfd_->prev_ = new_cfd;
if (id == 0) {
default_cfd_cache_ = new_cfd;
}
return new_cfd; return new_cfd;
} }
void ColumnFamilySet::Lock() {
// spin lock
while (spin_lock_.test_and_set(std::memory_order_acquire)) {
}
}
void ColumnFamilySet::Unlock() { spin_lock_.clear(std::memory_order_release); }
// under a DB mutex // under a DB mutex
void ColumnFamilySet::DropColumnFamily(ColumnFamilyData* cfd) { void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) {
auto cfd_iter = column_family_data_.find(cfd->GetID()); auto cfd_iter = column_family_data_.find(cfd->GetID());
assert(cfd_iter != column_family_data_.end()); assert(cfd_iter != column_family_data_.end());
Lock(); Lock();
@ -447,19 +438,16 @@ void ColumnFamilySet::DropColumnFamily(ColumnFamilyData* cfd) {
Unlock(); Unlock();
} }
void ColumnFamilySet::Lock() {
// spin lock
while (spin_lock_.test_and_set(std::memory_order_acquire)) {
}
}
void ColumnFamilySet::Unlock() { spin_lock_.clear(std::memory_order_release); }
bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) { bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) {
// maybe outside of db mutex, should lock if (column_family_id == 0) {
column_family_set_->Lock(); // optimization for common case
current_ = column_family_set_->GetColumnFamily(column_family_id); current_ = column_family_set_->GetDefault();
column_family_set_->Unlock(); } else {
// maybe outside of db mutex, should lock
column_family_set_->Lock();
current_ = column_family_set_->GetColumnFamily(column_family_id);
column_family_set_->Unlock();
}
handle_.SetCFD(current_); handle_.SetCFD(current_);
return current_ != nullptr; return current_ != nullptr;
} }
@ -474,9 +462,9 @@ MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const {
return current_->mem(); return current_->mem();
} }
const Options* ColumnFamilyMemTablesImpl::GetFullOptions() const { const Options* ColumnFamilyMemTablesImpl::GetOptions() const {
assert(current_ != nullptr); assert(current_ != nullptr);
return current_->full_options(); return current_->options();
} }
ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() { ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() {

@ -15,6 +15,7 @@
#include <atomic> #include <atomic>
#include "rocksdb/options.h" #include "rocksdb/options.h"
#include "rocksdb/db.h"
#include "rocksdb/env.h" #include "rocksdb/env.h"
#include "db/memtable_list.h" #include "db/memtable_list.h"
#include "db/write_batch_internal.h" #include "db/write_batch_internal.h"
@ -35,6 +36,9 @@ class ColumnFamilyData;
class DBImpl; class DBImpl;
class LogBuffer; class LogBuffer;
// ColumnFamilyHandleImpl is the class that clients use to access different
// column families. It has non-trivial destructor, which gets called when client
// is done using the column family
class ColumnFamilyHandleImpl : public ColumnFamilyHandle { class ColumnFamilyHandleImpl : public ColumnFamilyHandle {
public: public:
// create while holding the mutex // create while holding the mutex
@ -51,7 +55,12 @@ class ColumnFamilyHandleImpl : public ColumnFamilyHandle {
port::Mutex* mutex_; port::Mutex* mutex_;
}; };
// does not ref-count cfd_ // Does not ref-count ColumnFamilyData
// We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter
// calls DBImpl methods. When this happens, MemTableInserter need access to
// ColumnFamilyHandle (same as the client would need). In that case, we feed
// MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl
// methods
class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl { class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl {
public: public:
ColumnFamilyHandleInternal() ColumnFamilyHandleInternal()
@ -110,15 +119,18 @@ extern ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp,
class ColumnFamilySet; class ColumnFamilySet;
// column family metadata. not thread-safe. should be protected by db_mutex // This class keeps all the data that a column family needs. It's mosly dumb and
// used just to provide access to metadata.
// Most methods require DB mutex held, unless otherwise noted
class ColumnFamilyData { class ColumnFamilyData {
public: public:
~ColumnFamilyData(); ~ColumnFamilyData();
// thread-safe
uint32_t GetID() const { return id_; } uint32_t GetID() const { return id_; }
const std::string& GetName() { return name_; } // thread-safe
const std::string& GetName() const { return name_; }
// DB mutex held for all these
void Ref() { ++refs_; } void Ref() { ++refs_; }
// will just decrease reference count to 0, but will not delete it. returns // will just decrease reference count to 0, but will not delete it. returns
// true if the ref count was decreased to zero and needs to be cleaned up by // true if the ref count was decreased to zero and needs to be cleaned up by
@ -127,24 +139,37 @@ class ColumnFamilyData {
assert(refs_ > 0); assert(refs_ > 0);
return --refs_ == 0; return --refs_ == 0;
} }
bool Dead() { return refs_ == 0; }
// SetDropped() and IsDropped() are thread-safe // This can only be called from single-threaded VersionSet::LogAndApply()
// After dropping column family no other operation on that column family
// will be executed. All the files and memory will be, however, kept around
// until client drops the column family handle. That way, client can still
// access data from dropped column family.
// Column family can be dropped and still alive. In that state:
// *) Column family is not included in the iteration.
// *) Compaction and flush is not executed on the dropped column family.
// *) Client can continue writing and reading from column family. However, all
// writes stay in the current memtable.
// When the dropped column family is unreferenced, then we:
// *) delete all memory associated with that column family
// *) delete all the files associated with that column family
void SetDropped() { void SetDropped() {
// can't drop default CF // can't drop default CF
assert(id_ != 0); assert(id_ != 0);
dropped_.store(true); dropped_ = true;
} }
bool IsDropped() const { return dropped_.load(); } bool IsDropped() const { return dropped_; }
// thread-safe
int NumberLevels() const { return options_.num_levels; } int NumberLevels() const { return options_.num_levels; }
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } void SetLogNumber(uint64_t log_number) { log_number_ = log_number; }
uint64_t GetLogNumber() const { return log_number_; } uint64_t GetLogNumber() const { return log_number_; }
const ColumnFamilyOptions* options() const { return &options_; } // thread-safe
const Options* full_options() const { return &full_options_; } const Options* options() const { return &options_; }
InternalStats* internal_stats();
InternalStats* internal_stats() { return internal_stats_.get(); }
MemTableList* imm() { return &imm_; } MemTableList* imm() { return &imm_; }
MemTable* mem() { return mem_; } MemTable* mem() { return mem_; }
@ -154,7 +179,7 @@ class ColumnFamilyData {
void SetCurrent(Version* current); void SetCurrent(Version* current);
void CreateNewMemtable(); void CreateNewMemtable();
TableCache* table_cache() const { return table_cache_.get(); } TableCache* table_cache() { return table_cache_.get(); }
// See documentation in compaction_picker.h // See documentation in compaction_picker.h
Compaction* PickCompaction(LogBuffer* log_buffer); Compaction* PickCompaction(LogBuffer* log_buffer);
@ -162,18 +187,20 @@ class ColumnFamilyData {
const InternalKey* begin, const InternalKey* end, const InternalKey* begin, const InternalKey* end,
InternalKey** compaction_end); InternalKey** compaction_end);
CompactionPicker* compaction_picker() const { CompactionPicker* compaction_picker() { return compaction_picker_.get(); }
return compaction_picker_.get(); // thread-safe
}
const Comparator* user_comparator() const { const Comparator* user_comparator() const {
return internal_comparator_.user_comparator(); return internal_comparator_.user_comparator();
} }
// thread-safe
const InternalKeyComparator& internal_comparator() const { const InternalKeyComparator& internal_comparator() const {
return internal_comparator_; return internal_comparator_;
} }
SuperVersion* GetSuperVersion() const { return super_version_; } SuperVersion* GetSuperVersion() { return super_version_; }
// thread-safe
ThreadLocalPtr* GetThreadLocalSuperVersion() const { return local_sv_.get(); } ThreadLocalPtr* GetThreadLocalSuperVersion() const { return local_sv_.get(); }
// thread-safe
uint64_t GetSuperVersionNumber() const { uint64_t GetSuperVersionNumber() const {
return super_version_number_.load(); return super_version_number_.load();
} }
@ -185,9 +212,6 @@ class ColumnFamilyData {
port::Mutex* db_mutex); port::Mutex* db_mutex);
void ResetThreadLocalSuperVersions(); void ResetThreadLocalSuperVersions();
// REQUIRED: db mutex held
// Do not access column family after calling this method
void DeleteSuperVersion();
// A Flag indicating whether write needs to slowdown because of there are // A Flag indicating whether write needs to slowdown because of there are
// too many number of level0 files. // too many number of level0 files.
@ -204,21 +228,18 @@ class ColumnFamilyData {
const EnvOptions& storage_options, const EnvOptions& storage_options,
ColumnFamilySet* column_family_set); ColumnFamilySet* column_family_set);
ColumnFamilyData* next() { return next_; }
uint32_t id_; uint32_t id_;
const std::string name_; const std::string name_;
Version* dummy_versions_; // Head of circular doubly-linked list of versions. Version* dummy_versions_; // Head of circular doubly-linked list of versions.
Version* current_; // == dummy_versions->prev_ Version* current_; // == dummy_versions->prev_
int refs_; // outstanding references to ColumnFamilyData int refs_; // outstanding references to ColumnFamilyData
std::atomic<bool> dropped_; // true if client dropped it bool dropped_; // true if client dropped it
const InternalKeyComparator internal_comparator_; const InternalKeyComparator internal_comparator_;
const InternalFilterPolicy internal_filter_policy_; const InternalFilterPolicy internal_filter_policy_;
ColumnFamilyOptions const options_; Options const options_;
Options const full_options_;
std::unique_ptr<TableCache> table_cache_; std::unique_ptr<TableCache> table_cache_;
@ -258,19 +279,33 @@ class ColumnFamilyData {
ColumnFamilySet* column_family_set_; ColumnFamilySet* column_family_set_;
}; };
// Thread safe only for reading without a writer. All access should be // ColumnFamilySet has interesting thread-safety requirements
// locked when adding or dropping column family // * CreateColumnFamily() or RemoveColumnFamily() -- need to protect by DB
// mutex. Inside, column_family_data_ and column_families_ will be protected
// by Lock() and Unlock(). CreateColumnFamily() should ONLY be called from
// VersionSet::LogAndApply() in the normal runtime. It is also called
// during Recovery and in DumpManifest(). RemoveColumnFamily() is called
// from ColumnFamilyData destructor
// * Iteration -- hold DB mutex, but you can release it in the body of
// iteration. If you release DB mutex in body, reference the column
// family before the mutex and unreference after you unlock, since the column
// family might get dropped when the DB mutex is released
// * GetDefault() -- thread safe
// * GetColumnFamily() -- either inside of DB mutex or call Lock() <-> Unlock()
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
// inside of DB mutex
class ColumnFamilySet { class ColumnFamilySet {
public: public:
// ColumnFamilySet supports iteration
class iterator { class iterator {
public: public:
explicit iterator(ColumnFamilyData* cfd) explicit iterator(ColumnFamilyData* cfd)
: current_(cfd) {} : current_(cfd) {}
iterator& operator++() { iterator& operator++() {
// dummy is never dead, so this will never be infinite // dummy is never dead or dropped, so this will never be infinite
do { do {
current_ = current_->next(); current_ = current_->next_;
} while (current_->Dead()); } while (current_->refs_ == 0 || current_->IsDropped());
return *this; return *this;
} }
bool operator!=(const iterator& other) { bool operator!=(const iterator& other) {
@ -290,9 +325,6 @@ class ColumnFamilySet {
// GetColumnFamily() calls return nullptr if column family is not found // GetColumnFamily() calls return nullptr if column family is not found
ColumnFamilyData* GetColumnFamily(uint32_t id) const; ColumnFamilyData* GetColumnFamily(uint32_t id) const;
ColumnFamilyData* GetColumnFamily(const std::string& name) const; ColumnFamilyData* GetColumnFamily(const std::string& name) const;
bool Exists(uint32_t id);
bool Exists(const std::string& name);
uint32_t GetID(const std::string& name);
// this call will return the next available column family ID. it guarantees // this call will return the next available column family ID. it guarantees
// that there is no column family with id greater than or equal to the // that there is no column family with id greater than or equal to the
// returned value in the current running instance or anytime in RocksDB // returned value in the current running instance or anytime in RocksDB
@ -304,34 +336,33 @@ class ColumnFamilySet {
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id, ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id,
Version* dummy_version, Version* dummy_version,
const ColumnFamilyOptions& options); const ColumnFamilyOptions& options);
void DropColumnFamily(ColumnFamilyData* cfd);
iterator begin() { return iterator(dummy_cfd_->next()); } iterator begin() { return iterator(dummy_cfd_->next_); }
iterator end() { return iterator(dummy_cfd_); } iterator end() { return iterator(dummy_cfd_); }
// ColumnFamilySet has interesting thread-safety requirements
// * CreateColumnFamily() or DropColumnFamily() -- need to protect by DB
// mutex. Inside, column_family_data_ and column_families_ will be protected
// by Lock() and Unlock()
// * Iterate -- hold DB mutex, but you can release it in the body of
// iteration. If you release DB mutex in body, reference the column
// family before the mutex and unreference after you unlock, since the column
// family might get dropped when you release the DB mutex.
// * GetDefault(), GetColumnFamily(), Exists(), GetID() -- either inside of DB
// mutex or call Lock()
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
// inside of DB mutex
void Lock(); void Lock();
void Unlock(); void Unlock();
private: private:
// when mutating: 1. DB mutex locked first, 2. spinlock locked second friend class ColumnFamilyData;
// when reading, either: 1. lock DB mutex, or 2. lock spinlock // helper function that gets called from cfd destructor
// REQUIRES: DB mutex held
void RemoveColumnFamily(ColumnFamilyData* cfd);
// column_families_ and column_family_data_ need to be protected:
// * when mutating: 1. DB mutex locked first, 2. spinlock locked second
// * when reading, either: 1. lock DB mutex, or 2. lock spinlock
// (if both, respect the ordering to avoid deadlock!) // (if both, respect the ordering to avoid deadlock!)
std::unordered_map<std::string, uint32_t> column_families_; std::unordered_map<std::string, uint32_t> column_families_;
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_; std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_;
uint32_t max_column_family_; uint32_t max_column_family_;
ColumnFamilyData* dummy_cfd_; ColumnFamilyData* dummy_cfd_;
// We don't hold the refcount here, since default column family always exists
// We are also not responsible for cleaning up default_cfd_cache_. This is
// just a cache that makes common case (accessing default column family)
// faster
ColumnFamilyData* default_cfd_cache_;
const std::string db_name_; const std::string db_name_;
const DBOptions* const db_options_; const DBOptions* const db_options_;
@ -340,6 +371,8 @@ class ColumnFamilySet {
std::atomic_flag spin_lock_; std::atomic_flag spin_lock_;
}; };
// We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
// memtables of different column families (specified by ID in the write batch)
class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables {
public: public:
explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set) explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set)
@ -357,7 +390,7 @@ class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables {
// Returns options for selected column family // Returns options for selected column family
// REQUIRES: Seek() called first // REQUIRES: Seek() called first
virtual const Options* GetFullOptions() const override; virtual const Options* GetOptions() const override;
// Returns column family handle for the selected column family // Returns column family handle for the selected column family
virtual ColumnFamilyHandle* GetColumnFamilyHandle() override; virtual ColumnFamilyHandle* GetColumnFamilyHandle() override;

@ -797,7 +797,7 @@ std::string IterStatus(Iterator* iter) {
} }
return result; return result;
} }
} // namespace anonymous } // anonymous namespace
TEST(ColumnFamilyTest, NewIteratorsTest) { TEST(ColumnFamilyTest, NewIteratorsTest) {
// iter == 0 -- no tailing // iter == 0 -- no tailing

@ -42,11 +42,9 @@ uint64_t MultiplyCheckOverflow(uint64_t op1, int op2) {
} // anonymous namespace } // anonymous namespace
CompactionPicker::CompactionPicker(const ColumnFamilyOptions* options, CompactionPicker::CompactionPicker(const Options* options,
const InternalKeyComparator* icmp, const InternalKeyComparator* icmp)
Logger* logger)
: compactions_in_progress_(options->num_levels), : compactions_in_progress_(options->num_levels),
logger_(logger),
options_(options), options_(options),
num_levels_(options->num_levels), num_levels_(options->num_levels),
icmp_(icmp) { icmp_(icmp) {
@ -272,7 +270,7 @@ void CompactionPicker::SetupOtherInputs(Compaction* c) {
&c->parent_index_); &c->parent_index_);
if (expanded1.size() == c->inputs_[1].size() && if (expanded1.size() == c->inputs_[1].size() &&
!FilesInCompaction(expanded1)) { !FilesInCompaction(expanded1)) {
Log(logger_, Log(options_->info_log,
"Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu bytes)" "Expanding@%lu %lu+%lu (%lu+%lu bytes) to %lu+%lu (%lu+%lu bytes)"
"\n", "\n",
(unsigned long)level, (unsigned long)(c->inputs_[0].size()), (unsigned long)level, (unsigned long)(c->inputs_[0].size()),
@ -343,7 +341,7 @@ Compaction* CompactionPicker::CompactRange(Version* version, int input_level,
c->inputs_[0] = inputs; c->inputs_[0] = inputs;
if (ExpandWhileOverlapping(c) == false) { if (ExpandWhileOverlapping(c) == false) {
delete c; delete c;
Log(logger_, "Could not compact due to expansion failure.\n"); Log(options_->info_log, "Could not compact due to expansion failure.\n");
return nullptr; return nullptr;
} }
@ -514,7 +512,7 @@ Compaction* LevelCompactionPicker::PickCompactionBySize(Version* version,
} }
//if (i > Version::number_of_files_to_sort_) { //if (i > Version::number_of_files_to_sort_) {
// Log(logger_, "XXX Looking at index %d", i); // Log(options_->info_log, "XXX Looking at index %d", i);
//} //}
// Do not pick this file if its parents at level+1 are being compacted. // Do not pick this file if its parents at level+1 are being compacted.
@ -610,6 +608,10 @@ Compaction* UniversalCompactionPicker::PickCompaction(Version* version,
c->bottommost_level_ = true; c->bottommost_level_ = true;
} }
// update statistics
MeasureTime(options_->statistics.get(), NUM_FILES_IN_SINGLE_COMPACTION,
c->inputs_[0].size());
// mark all the files that are being compacted // mark all the files that are being compacted
c->MarkFilesBeingCompacted(true); c->MarkFilesBeingCompacted(true);

@ -26,8 +26,7 @@ class Version;
class CompactionPicker { class CompactionPicker {
public: public:
CompactionPicker(const ColumnFamilyOptions* options, CompactionPicker(const Options* options, const InternalKeyComparator* icmp);
const InternalKeyComparator* icmp, Logger* logger);
virtual ~CompactionPicker(); virtual ~CompactionPicker();
// Pick level and inputs for a new compaction. // Pick level and inputs for a new compaction.
@ -119,8 +118,7 @@ class CompactionPicker {
// Per-level max bytes // Per-level max bytes
std::unique_ptr<uint64_t[]> level_max_bytes_; std::unique_ptr<uint64_t[]> level_max_bytes_;
Logger* logger_; const Options* const options_;
const ColumnFamilyOptions* const options_;
private: private:
int num_levels_; int num_levels_;
@ -130,9 +128,9 @@ class CompactionPicker {
class UniversalCompactionPicker : public CompactionPicker { class UniversalCompactionPicker : public CompactionPicker {
public: public:
UniversalCompactionPicker(const ColumnFamilyOptions* options, UniversalCompactionPicker(const Options* options,
const InternalKeyComparator* icmp, Logger* logger) const InternalKeyComparator* icmp)
: CompactionPicker(options, icmp, logger) {} : CompactionPicker(options, icmp) {}
virtual Compaction* PickCompaction(Version* version, virtual Compaction* PickCompaction(Version* version,
LogBuffer* log_buffer) override; LogBuffer* log_buffer) override;
@ -150,9 +148,9 @@ class UniversalCompactionPicker : public CompactionPicker {
class LevelCompactionPicker : public CompactionPicker { class LevelCompactionPicker : public CompactionPicker {
public: public:
LevelCompactionPicker(const ColumnFamilyOptions* options, LevelCompactionPicker(const Options* options,
const InternalKeyComparator* icmp, Logger* logger) const InternalKeyComparator* icmp)
: CompactionPicker(options, icmp, logger) {} : CompactionPicker(options, icmp) {}
virtual Compaction* PickCompaction(Version* version, virtual Compaction* PickCompaction(Version* version,
LogBuffer* log_buffer) override; LogBuffer* log_buffer) override;

@ -276,26 +276,17 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname)
} }
DBImpl::~DBImpl() { DBImpl::~DBImpl() {
// Wait for background work to finish // only the default CFD is alive at this point
mutex_.Lock(); if (default_cf_handle_ != nullptr) {
if (flush_on_destroy_) { auto default_cfd = default_cf_handle_->cfd();
autovector<ColumnFamilyData*> to_delete; if (flush_on_destroy_ &&
for (auto cfd : *versions_->GetColumnFamilySet()) { default_cfd->mem()->GetFirstSequenceNumber() != 0) {
// TODO(icanadi) do this in ColumnFamilyData destructor FlushMemTable(default_cfd, FlushOptions());
if (!cfd->IsDropped() && cfd->mem()->GetFirstSequenceNumber() != 0) {
cfd->Ref();
mutex_.Unlock();
FlushMemTable(cfd, FlushOptions());
mutex_.Lock();
if (cfd->Unref()) {
to_delete.push_back(cfd);
}
}
}
for (auto cfd : to_delete) {
delete cfd;
} }
} }
mutex_.Lock();
// Wait for background work to finish
shutting_down_.Release_Store(this); // Any non-nullptr value is ok shutting_down_.Release_Store(this); // Any non-nullptr value is ok
while (bg_compaction_scheduled_ || while (bg_compaction_scheduled_ ||
bg_flush_scheduled_ || bg_flush_scheduled_ ||
@ -303,8 +294,11 @@ DBImpl::~DBImpl() {
bg_cv_.Wait(); bg_cv_.Wait();
} }
for (auto cfd : *versions_->GetColumnFamilySet()) { if (default_cf_handle_ != nullptr) {
cfd->DeleteSuperVersion(); // we need to delete handle outside of lock because it does its own locking
mutex_.Unlock();
delete default_cf_handle_;
mutex_.Lock();
} }
if (options_.allow_thread_local) { if (options_.allow_thread_local) {
@ -328,21 +322,13 @@ DBImpl::~DBImpl() {
} }
} }
mutex_.Unlock();
if (default_cf_handle_ != nullptr) {
// we need to delete handle outside of lock because it does its own locking
delete default_cf_handle_;
}
if (db_lock_ != nullptr) {
env_->UnlockFile(db_lock_);
}
mutex_.Lock();
// versions need to be destroyed before table_cache since it can hold // versions need to be destroyed before table_cache since it can hold
// references to table_cache. // references to table_cache.
versions_.reset(); versions_.reset();
mutex_.Unlock(); mutex_.Unlock();
if (db_lock_ != nullptr) {
env_->UnlockFile(db_lock_);
}
LogFlush(options_.info_log); LogFlush(options_.info_log);
} }
@ -876,14 +862,8 @@ Status DBImpl::Recover(
versions_->MarkFileNumberUsed(log); versions_->MarkFileNumberUsed(log);
s = RecoverLogFile(log, &max_sequence, read_only); s = RecoverLogFile(log, &max_sequence, read_only);
} }
SetTickerCount(options_.statistics.get(), SEQUENCE_NUMBER,
if (s.ok()) { versions_->LastSequence());
if (versions_->LastSequence() < max_sequence) {
versions_->SetLastSequence(max_sequence);
}
SetTickerCount(options_.statistics.get(), SEQUENCE_NUMBER,
versions_->LastSequence());
}
} }
return s; return s;
@ -1029,9 +1009,11 @@ Status DBImpl::RecoverLogFile(uint64_t log_number, SequenceNumber* max_sequence,
// we must mark the next log number as used, even though it's // we must mark the next log number as used, even though it's
// not actually used. that is because VersionSet assumes // not actually used. that is because VersionSet assumes
// VersionSet::next_file_number_ always to be strictly greater than any // VersionSet::next_file_number_ always to be strictly greater than any
// log // log number
// number
versions_->MarkFileNumberUsed(log_number + 1); versions_->MarkFileNumberUsed(log_number + 1);
if (versions_->LastSequence() < *max_sequence) {
versions_->SetLastSequence(*max_sequence);
}
status = versions_->LogAndApply(cfd, edit, &mutex_); status = versions_->LogAndApply(cfd, edit, &mutex_);
if (!status.ok()) { if (!status.ok()) {
return status; return status;
@ -1059,10 +1041,10 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
Status s; Status s;
{ {
mutex_.Unlock(); mutex_.Unlock();
s = BuildTable(dbname_, env_, *cfd->full_options(), storage_options_, s = BuildTable(dbname_, env_, *cfd->options(), storage_options_,
cfd->table_cache(), iter, &meta, cfd->internal_comparator(), cfd->table_cache(), iter, &meta, cfd->internal_comparator(),
newest_snapshot, earliest_seqno_in_memtable, newest_snapshot, earliest_seqno_in_memtable,
GetCompressionFlush(*cfd->full_options())); GetCompressionFlush(*cfd->options()));
LogFlush(options_.info_log); LogFlush(options_.info_log);
mutex_.Lock(); mutex_.Lock();
} }
@ -1124,10 +1106,10 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
Log(options_.info_log, "Level-0 flush table #%lu: started", Log(options_.info_log, "Level-0 flush table #%lu: started",
(unsigned long)meta.number); (unsigned long)meta.number);
s = BuildTable(dbname_, env_, *cfd->full_options(), storage_options_, s = BuildTable(dbname_, env_, *cfd->options(), storage_options_,
cfd->table_cache(), iter, &meta, cfd->internal_comparator(), cfd->table_cache(), iter, &meta, cfd->internal_comparator(),
newest_snapshot, earliest_seqno_in_memtable, newest_snapshot, earliest_seqno_in_memtable,
GetCompressionFlush(*cfd->full_options())); GetCompressionFlush(*cfd->options()));
LogFlush(options_.info_log); LogFlush(options_.info_log);
delete iter; delete iter;
Log(options_.info_log, "Level-0 flush table #%lu: %lu bytes %s", Log(options_.info_log, "Level-0 flush table #%lu: %lu bytes %s",
@ -1758,7 +1740,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
bool is_flush_pending = false; bool is_flush_pending = false;
// no need to refcount since we're under a mutex // no need to refcount since we're under a mutex
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
if (!cfd->IsDropped() && cfd->imm()->IsFlushPending()) { if (cfd->imm()->IsFlushPending()) {
is_flush_pending = true; is_flush_pending = true;
} }
} }
@ -1775,7 +1757,7 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
bool is_compaction_needed = false; bool is_compaction_needed = false;
// no need to refcount since we're under a mutex // no need to refcount since we're under a mutex
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
if (!cfd->IsDropped() && cfd->current()->NeedsCompaction()) { if (cfd->current()->NeedsCompaction()) {
is_compaction_needed = true; is_compaction_needed = true;
break; break;
} }
@ -1813,9 +1795,6 @@ Status DBImpl::BackgroundFlush(bool* madeProgress,
autovector<ColumnFamilyData*> to_delete; autovector<ColumnFamilyData*> to_delete;
// refcounting in iteration // refcounting in iteration
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
if (cfd->IsDropped()) {
continue;
}
cfd->Ref(); cfd->Ref();
Status flush_status; Status flush_status;
while (flush_status.ok() && cfd->imm()->IsFlushPending()) { while (flush_status.ok() && cfd->imm()->IsFlushPending()) {
@ -1988,7 +1967,7 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress,
} else { } else {
// no need to refcount in iteration since it's always under a mutex // no need to refcount in iteration since it's always under a mutex
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
if (!cfd->options()->disable_auto_compactions && !cfd->IsDropped()) { if (!cfd->options()->disable_auto_compactions) {
c.reset(cfd->PickCompaction(log_buffer)); c.reset(cfd->PickCompaction(log_buffer));
if (c != nullptr) { if (c != nullptr) {
// update statistics // update statistics
@ -2166,12 +2145,12 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
1.1 * cfd->compaction_picker()->MaxFileSizeForLevel( 1.1 * cfd->compaction_picker()->MaxFileSizeForLevel(
compact->compaction->output_level())); compact->compaction->output_level()));
CompressionType compression_type = GetCompressionType( CompressionType compression_type =
*cfd->full_options(), compact->compaction->output_level(), GetCompressionType(*cfd->options(), compact->compaction->output_level(),
compact->compaction->enable_compression()); compact->compaction->enable_compression());
compact->builder.reset( compact->builder.reset(
NewTableBuilder(*cfd->full_options(), cfd->internal_comparator(), NewTableBuilder(*cfd->options(), cfd->internal_comparator(),
compact->outfile.get(), compression_type)); compact->outfile.get(), compression_type));
} }
LogFlush(options_.info_log); LogFlush(options_.info_log);
@ -2788,8 +2767,8 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
Iterator* mutable_iter = super_version->mem->NewIterator(options); Iterator* mutable_iter = super_version->mem->NewIterator(options);
// create a DBIter that only uses memtable content; see NewIterator() // create a DBIter that only uses memtable content; see NewIterator()
mutable_iter = mutable_iter =
NewDBIterator(&dbname_, env_, *cfd->full_options(), NewDBIterator(&dbname_, env_, *cfd->options(), cfd->user_comparator(),
cfd->user_comparator(), mutable_iter, kMaxSequenceNumber); mutable_iter, kMaxSequenceNumber);
std::vector<Iterator*> list; std::vector<Iterator*> list;
super_version->imm->AddIterators(options, &list); super_version->imm->AddIterators(options, &list);
@ -2799,8 +2778,8 @@ std::pair<Iterator*, Iterator*> DBImpl::GetTailingIteratorPair(
// create a DBIter that only uses memtable content; see NewIterator() // create a DBIter that only uses memtable content; see NewIterator()
immutable_iter = immutable_iter =
NewDBIterator(&dbname_, env_, *cfd->full_options(), NewDBIterator(&dbname_, env_, *cfd->options(), cfd->user_comparator(),
cfd->user_comparator(), immutable_iter, kMaxSequenceNumber); immutable_iter, kMaxSequenceNumber);
// register cleanups // register cleanups
mutable_iter->RegisterCleanup(CleanupIteratorState, mutable_iter->RegisterCleanup(CleanupIteratorState,
@ -2937,11 +2916,10 @@ Status DBImpl::GetImpl(const ReadOptions& options,
// merge_operands will contain the sequence of merges in the latter case. // merge_operands will contain the sequence of merges in the latter case.
LookupKey lkey(key, snapshot); LookupKey lkey(key, snapshot);
BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer); BumpPerfTime(&perf_context.get_snapshot_time, &snapshot_timer);
if (sv->mem->Get(lkey, value, &s, merge_context, *cfd->full_options())) { if (sv->mem->Get(lkey, value, &s, merge_context, *cfd->options())) {
// Done // Done
RecordTick(options_.statistics.get(), MEMTABLE_HIT); RecordTick(options_.statistics.get(), MEMTABLE_HIT);
} else if (sv->imm->Get(lkey, value, &s, merge_context, } else if (sv->imm->Get(lkey, value, &s, merge_context, *cfd->options())) {
*cfd->full_options())) {
// Done // Done
RecordTick(options_.statistics.get(), MEMTABLE_HIT); RecordTick(options_.statistics.get(), MEMTABLE_HIT);
} else { } else {
@ -2950,7 +2928,7 @@ Status DBImpl::GetImpl(const ReadOptions& options,
StartPerfTimer(&from_files_timer); StartPerfTimer(&from_files_timer);
sv->current->Get(options, lkey, value, &s, &merge_context, &stats, sv->current->Get(options, lkey, value, &s, &merge_context, &stats,
*cfd->full_options(), value_found); *cfd->options(), value_found);
have_stat_update = true; have_stat_update = true;
BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer); BumpPerfTime(&perf_context.get_from_output_files_time, &from_files_timer);
RecordTick(options_.statistics.get(), MEMTABLE_MISS); RecordTick(options_.statistics.get(), MEMTABLE_MISS);
@ -3072,14 +3050,14 @@ std::vector<Status> DBImpl::MultiGet(
auto super_version = mgd->super_version; auto super_version = mgd->super_version;
auto cfd = mgd->cfd; auto cfd = mgd->cfd;
if (super_version->mem->Get(lkey, value, &s, merge_context, if (super_version->mem->Get(lkey, value, &s, merge_context,
*cfd->full_options())) { *cfd->options())) {
// Done // Done
} else if (super_version->imm->Get(lkey, value, &s, merge_context, } else if (super_version->imm->Get(lkey, value, &s, merge_context,
*cfd->full_options())) { *cfd->options())) {
// Done // Done
} else { } else {
super_version->current->Get(options, lkey, value, &s, &merge_context, super_version->current->Get(options, lkey, value, &s, &merge_context,
&mgd->stats, *cfd->full_options()); &mgd->stats, *cfd->options());
mgd->have_stat_update = true; mgd->have_stat_update = true;
} }
@ -3134,7 +3112,8 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
*handle = nullptr; *handle = nullptr;
MutexLock l(&mutex_); MutexLock l(&mutex_);
if (versions_->GetColumnFamilySet()->Exists(column_family_name)) { if (versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name) !=
nullptr) {
return Status::InvalidArgument("Column family already exists"); return Status::InvalidArgument("Column family already exists");
} }
VersionEdit edit; VersionEdit edit;
@ -3144,6 +3123,8 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options,
edit.SetLogNumber(logfile_number_); edit.SetLogNumber(logfile_number_);
edit.SetComparatorName(options.comparator->Name()); edit.SetComparatorName(options.comparator->Name());
// LogAndApply will both write the creation in MANIFEST and create
// ColumnFamilyData object
Status s = versions_->LogAndApply(nullptr, &edit, &mutex_, Status s = versions_->LogAndApply(nullptr, &edit, &mutex_,
db_directory_.get(), false, &options); db_directory_.get(), false, &options);
if (s.ok()) { if (s.ok()) {
@ -3184,9 +3165,10 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) {
} }
if (s.ok()) { if (s.ok()) {
assert(cfd->IsDropped());
Log(options_.info_log, "Dropped column family with id %u\n", cfd->GetID()); Log(options_.info_log, "Dropped column family with id %u\n", cfd->GetID());
// Flush the memtables. This will make all WAL files referencing dropped // Flush the memtables. This will make all WAL files referencing dropped
// column family to be obsolete. They will be deleted when user deletes // column family to be obsolete. They will be deleted once user deletes
// column family handle // column family handle
Write(WriteOptions(), nullptr); // ignore error Write(WriteOptions(), nullptr); // ignore error
} else { } else {
@ -3237,7 +3219,7 @@ Iterator* DBImpl::NewIterator(const ReadOptions& options,
options.snapshot != nullptr options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
: latest_snapshot; : latest_snapshot;
iter = NewDBIterator(&dbname_, env_, *cfd->full_options(), iter = NewDBIterator(&dbname_, env_, *cfd->options(),
cfd->user_comparator(), iter, snapshot); cfd->user_comparator(), iter, snapshot);
} }
@ -3292,7 +3274,7 @@ Status DBImpl::NewIterators(
: latest_snapshot; : latest_snapshot;
auto iter = NewInternalIterator(options, cfd, super_versions[i]); auto iter = NewInternalIterator(options, cfd, super_versions[i]);
iter = NewDBIterator(&dbname_, env_, *cfd->full_options(), iter = NewDBIterator(&dbname_, env_, *cfd->options(),
cfd->user_comparator(), iter, snapshot); cfd->user_comparator(), iter, snapshot);
iterators->push_back(iter); iterators->push_back(iter);
} }
@ -3364,9 +3346,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
autovector<ColumnFamilyData*> to_delete; autovector<ColumnFamilyData*> to_delete;
// refcounting cfd in iteration // refcounting cfd in iteration
for (auto cfd : *versions_->GetColumnFamilySet()) { for (auto cfd : *versions_->GetColumnFamilySet()) {
if (cfd->IsDropped()) {
continue;
}
cfd->Ref(); cfd->Ref();
// May temporarily unlock and wait. // May temporarily unlock and wait.
status = MakeRoomForWrite(cfd, my_batch == nullptr); status = MakeRoomForWrite(cfd, my_batch == nullptr);
@ -3586,6 +3565,8 @@ Status DBImpl::MakeRoomForWrite(ColumnFamilyData* cfd, bool force) {
// Yield previous error // Yield previous error
s = bg_error_; s = bg_error_;
break; break;
} else if (cfd->IsDropped()) {
break;
} else if (allow_delay && cfd->NeedSlowdownForNumLevel0Files()) { } else if (allow_delay && cfd->NeedSlowdownForNumLevel0Files()) {
// We are getting close to hitting a hard limit on the number of // We are getting close to hitting a hard limit on the number of
// L0 files. Rather than delaying a single write by several // L0 files. Rather than delaying a single write by several
@ -3786,7 +3767,7 @@ Env* DBImpl::GetEnv() const {
const Options& DBImpl::GetOptions(ColumnFamilyHandle* column_family) const { const Options& DBImpl::GetOptions(ColumnFamilyHandle* column_family) const {
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
return *cfh->cfd()->full_options(); return *cfh->cfd()->options();
} }
bool DBImpl::GetProperty(ColumnFamilyHandle* column_family, bool DBImpl::GetProperty(ColumnFamilyHandle* column_family,
@ -4058,24 +4039,15 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname,
VersionEdit edit; VersionEdit edit;
impl->logfile_number_ = new_log_number; impl->logfile_number_ = new_log_number;
impl->log_.reset(new log::Writer(std::move(lfile))); impl->log_.reset(new log::Writer(std::move(lfile)));
// We use this LogAndApply just to store the next file number, the one
// that we used by calling impl->versions_->NewFileNumber()
// The used log number are already written to manifest in RecoverLogFile()
// method
s = impl->versions_->LogAndApply(impl->default_cf_handle_->cfd(), &edit,
&impl->mutex_,
impl->db_directory_.get());
}
if (s.ok()) {
// set column family handles // set column family handles
for (auto cf : column_families) { for (auto cf : column_families) {
if (!impl->versions_->GetColumnFamilySet()->Exists(cf.name)) { auto cfd =
impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
if (cfd == nullptr) {
s = Status::InvalidArgument("Column family not found: ", cf.name); s = Status::InvalidArgument("Column family not found: ", cf.name);
break; break;
} }
uint32_t id = impl->versions_->GetColumnFamilySet()->GetID(cf.name);
auto cfd = impl->versions_->GetColumnFamilySet()->GetColumnFamily(id);
assert(cfd != nullptr);
handles->push_back( handles->push_back(
new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_)); new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
} }

@ -63,11 +63,11 @@ Status DBImplReadOnly::Get(const ReadOptions& options,
MergeContext merge_context; MergeContext merge_context;
LookupKey lkey(key, snapshot); LookupKey lkey(key, snapshot);
if (super_version->mem->Get(lkey, value, &s, merge_context, if (super_version->mem->Get(lkey, value, &s, merge_context,
*cfd->full_options())) { *cfd->options())) {
} else { } else {
Version::GetStats stats; Version::GetStats stats;
super_version->current->Get(options, lkey, value, &s, &merge_context, super_version->current->Get(options, lkey, value, &s, &merge_context,
&stats, *cfd->full_options()); &stats, *cfd->options());
} }
return s; return s;
} }
@ -80,8 +80,7 @@ Iterator* DBImplReadOnly::NewIterator(const ReadOptions& options,
SequenceNumber latest_snapshot = versions_->LastSequence(); SequenceNumber latest_snapshot = versions_->LastSequence();
Iterator* internal_iter = NewInternalIterator(options, cfd, super_version); Iterator* internal_iter = NewInternalIterator(options, cfd, super_version);
return NewDBIterator( return NewDBIterator(
&dbname_, env_, *cfd->full_options(), cfd->user_comparator(), &dbname_, env_, *cfd->options(), cfd->user_comparator(), internal_iter,
internal_iter,
(options.snapshot != nullptr (options.snapshot != nullptr
? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
: latest_snapshot)); : latest_snapshot));

@ -29,7 +29,7 @@
namespace rocksdb { namespace rocksdb {
MemTable::MemTable(const InternalKeyComparator& cmp, MemTable::MemTable(const InternalKeyComparator& cmp,
const ColumnFamilyOptions& options) const Options& options)
: comparator_(cmp), : comparator_(cmp),
refs_(0), refs_(0),
arena_(options.arena_block_size), arena_(options.arena_block_size),

@ -39,7 +39,7 @@ class MemTable {
// MemTables are reference counted. The initial reference count // MemTables are reference counted. The initial reference count
// is zero and the caller must call Ref() at least once. // is zero and the caller must call Ref() at least once.
explicit MemTable(const InternalKeyComparator& comparator, explicit MemTable(const InternalKeyComparator& comparator,
const ColumnFamilyOptions& options); const Options& options);
~MemTable(); ~MemTable();

@ -108,7 +108,6 @@ class Repairer {
InternalKeyComparator const icmp_; InternalKeyComparator const icmp_;
InternalFilterPolicy const ipolicy_; InternalFilterPolicy const ipolicy_;
Options const options_; Options const options_;
ColumnFamilyOptions const cf_options_;
std::shared_ptr<Cache> raw_table_cache_; std::shared_ptr<Cache> raw_table_cache_;
TableCache* table_cache_; TableCache* table_cache_;
VersionEdit* edit_; VersionEdit* edit_;

@ -249,7 +249,7 @@ bool Version::PrefixMayMatch(const ReadOptions& options,
Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) { Status Version::GetPropertiesOfAllTables(TablePropertiesCollection* props) {
auto table_cache = cfd_->table_cache(); auto table_cache = cfd_->table_cache();
auto options = cfd_->full_options(); auto options = cfd_->options();
for (int level = 0; level < num_levels_; level++) { for (int level = 0; level < num_levels_; level++) {
for (const auto& file_meta : files_[level]) { for (const auto& file_meta : files_[level]) {
auto fname = TableFileName(vset_->dbname_, file_meta->number); auto fname = TableFileName(vset_->dbname_, file_meta->number);
@ -1491,11 +1491,6 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
assert(column_family_data != nullptr || edit->is_column_family_add_); assert(column_family_data != nullptr || edit->is_column_family_add_);
if (column_family_data != nullptr && column_family_data->IsDropped()) {
// if column family is dropped no need to write anything to the manifest
// (unless, of course, thit is the drop column family write)
return Status::OK();
}
if (edit->is_column_family_drop_) { if (edit->is_column_family_drop_) {
// if we drop column family, we have to make sure to save max column family, // if we drop column family, we have to make sure to save max column family,
// so that we don't reuse existing ID // so that we don't reuse existing ID
@ -1511,6 +1506,16 @@ Status VersionSet::LogAndApply(ColumnFamilyData* column_family_data,
if (w.done) { if (w.done) {
return w.status; return w.status;
} }
if (column_family_data != nullptr && column_family_data->IsDropped()) {
// if column family is dropped by the time we get here, no need to write
// anything to the manifest
manifest_writers_.pop_front();
// Notify new head of write queue
if (!manifest_writers_.empty()) {
manifest_writers_.front()->cv.Signal();
}
return Status::OK();
}
std::vector<VersionEdit*> batch_edits; std::vector<VersionEdit*> batch_edits;
Version* v = nullptr; Version* v = nullptr;
@ -2353,9 +2358,6 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
// LogAndApply. Column family manipulations can only happen within LogAndApply // LogAndApply. Column family manipulations can only happen within LogAndApply
// (the same single thread), so we're safe // (the same single thread), so we're safe
for (auto cfd : *column_family_set_) { for (auto cfd : *column_family_set_) {
if (cfd->IsDropped()) {
continue;
}
{ {
// Store column family info // Store column family info
VersionEdit edit; VersionEdit edit;
@ -2401,19 +2403,18 @@ Status VersionSet::WriteSnapshot(log::Writer* log) {
} }
} }
// save max column family to avoid reusing the same column {
// family ID for two different column families // persist max column family, last sequence and next file
if (column_family_set_->GetMaxColumnFamily() > 0) {
VersionEdit edit; VersionEdit edit;
edit.SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily()); if (column_family_set_->GetMaxColumnFamily() > 0) {
edit.SetMaxColumnFamily(column_family_set_->GetMaxColumnFamily());
}
edit.SetLastSequence(last_sequence_);
edit.SetNextFile(next_file_number_);
std::string record; std::string record;
edit.EncodeTo(&record); edit.EncodeTo(&record);
Status s = log->AddRecord(record); return log->AddRecord(record);
if (!s.ok()) {
return s;
}
} }
return Status::OK();
} }
// Opens the mainfest file and reads all records // Opens the mainfest file and reads all records

@ -289,7 +289,7 @@ class MemTableInserter : public WriteBatch::Handler {
return seek_status; return seek_status;
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetOptions();
if (!options->inplace_update_support) { if (!options->inplace_update_support) {
mem->Add(sequence_, kTypeValue, key, value); mem->Add(sequence_, kTypeValue, key, value);
} else if (options->inplace_callback == nullptr) { } else if (options->inplace_callback == nullptr) {
@ -344,7 +344,7 @@ class MemTableInserter : public WriteBatch::Handler {
return seek_status; return seek_status;
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetOptions();
bool perform_merge = false; bool perform_merge = false;
if (options->max_successive_merges > 0 && db_ != nullptr) { if (options->max_successive_merges > 0 && db_ != nullptr) {
@ -413,7 +413,7 @@ class MemTableInserter : public WriteBatch::Handler {
return seek_status; return seek_status;
} }
MemTable* mem = cf_mems_->GetMemTable(); MemTable* mem = cf_mems_->GetMemTable();
const Options* options = cf_mems_->GetFullOptions(); const Options* options = cf_mems_->GetOptions();
if (!dont_filter_deletes_ && options->filter_deletes) { if (!dont_filter_deletes_ && options->filter_deletes) {
SnapshotImpl read_from_snapshot; SnapshotImpl read_from_snapshot;
read_from_snapshot.number_ = sequence_; read_from_snapshot.number_ = sequence_;

@ -26,7 +26,7 @@ class ColumnFamilyMemTables {
// been processed) // been processed)
virtual uint64_t GetLogNumber() const = 0; virtual uint64_t GetLogNumber() const = 0;
virtual MemTable* GetMemTable() const = 0; virtual MemTable* GetMemTable() const = 0;
virtual const Options* GetFullOptions() const = 0; virtual const Options* GetOptions() const = 0;
virtual ColumnFamilyHandle* GetColumnFamilyHandle() = 0; virtual ColumnFamilyHandle* GetColumnFamilyHandle() = 0;
}; };
@ -47,7 +47,7 @@ class ColumnFamilyMemTablesDefault : public ColumnFamilyMemTables {
return mem_; return mem_;
} }
const Options* GetFullOptions() const override { const Options* GetOptions() const override {
assert(ok_); assert(ok_);
return options_; return options_;
} }

@ -24,7 +24,7 @@ static std::string PrintContents(WriteBatch* b) {
auto factory = std::make_shared<SkipListFactory>(); auto factory = std::make_shared<SkipListFactory>();
Options options; Options options;
options.memtable_factory = factory; options.memtable_factory = factory;
MemTable* mem = new MemTable(cmp, ColumnFamilyOptions(options)); MemTable* mem = new MemTable(cmp, options);
mem->Ref(); mem->Ref();
std::string state; std::string state;
ColumnFamilyMemTablesDefault cf_mems_default(mem, &options); ColumnFamilyMemTablesDefault cf_mems_default(mem, &options);

@ -410,8 +410,7 @@ class MemTableConstructor: public Constructor {
table_factory_(new SkipListFactory) { table_factory_(new SkipListFactory) {
Options options; Options options;
options.memtable_factory = table_factory_; options.memtable_factory = table_factory_;
memtable_ = memtable_ = new MemTable(internal_comparator_, options);
new MemTable(internal_comparator_, ColumnFamilyOptions(options));
memtable_->Ref(); memtable_->Ref();
} }
~MemTableConstructor() { ~MemTableConstructor() {

Loading…
Cancel
Save