commit
c65448f95a
@ -0,0 +1,489 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/column_family.h" |
||||
|
||||
#include <vector> |
||||
#include <string> |
||||
#include <algorithm> |
||||
|
||||
#include "db/db_impl.h" |
||||
#include "db/version_set.h" |
||||
#include "db/internal_stats.h" |
||||
#include "db/compaction_picker.h" |
||||
#include "db/table_properties_collector.h" |
||||
#include "util/autovector.h" |
||||
#include "util/hash_skiplist_rep.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
ColumnFamilyHandleImpl::ColumnFamilyHandleImpl(ColumnFamilyData* cfd, |
||||
DBImpl* db, port::Mutex* mutex) |
||||
: cfd_(cfd), db_(db), mutex_(mutex) { |
||||
if (cfd_ != nullptr) { |
||||
cfd_->Ref(); |
||||
} |
||||
} |
||||
|
||||
ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() { |
||||
if (cfd_ != nullptr) { |
||||
DBImpl::DeletionState deletion_state; |
||||
mutex_->Lock(); |
||||
if (cfd_->Unref()) { |
||||
delete cfd_; |
||||
} |
||||
db_->FindObsoleteFiles(deletion_state, false, true); |
||||
mutex_->Unlock(); |
||||
if (deletion_state.HaveSomethingToDelete()) { |
||||
db_->PurgeObsoleteFiles(deletion_state); |
||||
} |
||||
} |
||||
} |
||||
|
||||
uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); } |
||||
|
||||
namespace { |
||||
// Fix user-supplied options to be reasonable
|
||||
template <class T, class V> |
||||
static void ClipToRange(T* ptr, V minvalue, V maxvalue) { |
||||
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue; |
||||
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue; |
||||
} |
||||
} // anonymous namespace
|
||||
|
||||
ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, |
||||
const InternalFilterPolicy* ipolicy, |
||||
const ColumnFamilyOptions& src) { |
||||
ColumnFamilyOptions result = src; |
||||
result.comparator = icmp; |
||||
result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr; |
||||
ClipToRange(&result.write_buffer_size, |
||||
((size_t)64) << 10, ((size_t)64) << 30); |
||||
// if user sets arena_block_size, we trust user to use this value. Otherwise,
|
||||
// calculate a proper value from writer_buffer_size;
|
||||
if (result.arena_block_size <= 0) { |
||||
result.arena_block_size = result.write_buffer_size / 10; |
||||
} |
||||
result.min_write_buffer_number_to_merge = |
||||
std::min(result.min_write_buffer_number_to_merge, |
||||
result.max_write_buffer_number - 1); |
||||
if (result.block_cache == nullptr && !result.no_block_cache) { |
||||
result.block_cache = NewLRUCache(8 << 20); |
||||
} |
||||
result.compression_per_level = src.compression_per_level; |
||||
if (result.block_size_deviation < 0 || result.block_size_deviation > 100) { |
||||
result.block_size_deviation = 0; |
||||
} |
||||
if (result.max_mem_compaction_level >= result.num_levels) { |
||||
result.max_mem_compaction_level = result.num_levels - 1; |
||||
} |
||||
if (result.soft_rate_limit > result.hard_rate_limit) { |
||||
result.soft_rate_limit = result.hard_rate_limit; |
||||
} |
||||
if (!result.prefix_extractor) { |
||||
assert(result.memtable_factory); |
||||
Slice name = result.memtable_factory->Name(); |
||||
if (name.compare("HashSkipListRepFactory") == 0 || |
||||
name.compare("HashLinkListRepFactory") == 0) { |
||||
result.memtable_factory = std::make_shared<SkipListFactory>(); |
||||
} |
||||
} |
||||
|
||||
// -- Sanitize the table properties collector
|
||||
// All user defined properties collectors will be wrapped by
|
||||
// UserKeyTablePropertiesCollector since for them they only have the
|
||||
// knowledge of the user keys; internal keys are invisible to them.
|
||||
auto& collectors = result.table_properties_collectors; |
||||
for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) { |
||||
assert(collectors[i]); |
||||
collectors[i] = |
||||
std::make_shared<UserKeyTablePropertiesCollector>(collectors[i]); |
||||
} |
||||
// Add collector to collect internal key statistics
|
||||
collectors.push_back(std::make_shared<InternalKeyPropertiesCollector>()); |
||||
|
||||
return result; |
||||
} |
||||
|
||||
int SuperVersion::dummy = 0; |
||||
void* const SuperVersion::kSVInUse = &SuperVersion::dummy; |
||||
void* const SuperVersion::kSVObsolete = nullptr; |
||||
|
||||
SuperVersion::~SuperVersion() { |
||||
for (auto td : to_delete) { |
||||
delete td; |
||||
} |
||||
} |
||||
|
||||
SuperVersion* SuperVersion::Ref() { |
||||
refs.fetch_add(1, std::memory_order_relaxed); |
||||
return this; |
||||
} |
||||
|
||||
bool SuperVersion::Unref() { |
||||
// fetch_sub returns the previous value of ref
|
||||
uint32_t previous_refs = refs.fetch_sub(1, std::memory_order_relaxed); |
||||
assert(previous_refs > 0); |
||||
return previous_refs == 1; |
||||
} |
||||
|
||||
void SuperVersion::Cleanup() { |
||||
assert(refs.load(std::memory_order_relaxed) == 0); |
||||
imm->Unref(&to_delete); |
||||
MemTable* m = mem->Unref(); |
||||
if (m != nullptr) { |
||||
to_delete.push_back(m); |
||||
} |
||||
current->Unref(); |
||||
} |
||||
|
||||
void SuperVersion::Init(MemTable* new_mem, MemTableListVersion* new_imm, |
||||
Version* new_current) { |
||||
mem = new_mem; |
||||
imm = new_imm; |
||||
current = new_current; |
||||
mem->Ref(); |
||||
imm->Ref(); |
||||
current->Ref(); |
||||
refs.store(1, std::memory_order_relaxed); |
||||
} |
||||
|
||||
namespace { |
||||
void SuperVersionUnrefHandle(void* ptr) { |
||||
// UnrefHandle is called when a thread exists or a ThreadLocalPtr gets
|
||||
// destroyed. When former happens, the thread shouldn't see kSVInUse.
|
||||
// When latter happens, we are in ~ColumnFamilyData(), no get should happen as
|
||||
// well.
|
||||
SuperVersion* sv = static_cast<SuperVersion*>(ptr); |
||||
if (sv->Unref()) { |
||||
sv->db_mutex->Lock(); |
||||
sv->Cleanup(); |
||||
sv->db_mutex->Unlock(); |
||||
delete sv; |
||||
} |
||||
} |
||||
} // anonymous namespace
|
||||
|
||||
ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id, |
||||
const std::string& name, |
||||
Version* dummy_versions, Cache* table_cache, |
||||
const ColumnFamilyOptions& options, |
||||
const DBOptions* db_options, |
||||
const EnvOptions& storage_options, |
||||
ColumnFamilySet* column_family_set) |
||||
: id_(id), |
||||
name_(name), |
||||
dummy_versions_(dummy_versions), |
||||
current_(nullptr), |
||||
refs_(0), |
||||
dropped_(false), |
||||
internal_comparator_(options.comparator), |
||||
internal_filter_policy_(options.filter_policy), |
||||
options_(*db_options, SanitizeOptions(&internal_comparator_, |
||||
&internal_filter_policy_, options)), |
||||
mem_(nullptr), |
||||
imm_(options.min_write_buffer_number_to_merge), |
||||
super_version_(nullptr), |
||||
super_version_number_(0), |
||||
local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)), |
||||
next_(nullptr), |
||||
prev_(nullptr), |
||||
log_number_(0), |
||||
need_slowdown_for_num_level0_files_(false), |
||||
column_family_set_(column_family_set) { |
||||
Ref(); |
||||
|
||||
// if dummy_versions is nullptr, then this is a dummy column family.
|
||||
if (dummy_versions != nullptr) { |
||||
internal_stats_.reset(new InternalStats(options.num_levels, db_options->env, |
||||
db_options->statistics.get())); |
||||
table_cache_.reset( |
||||
new TableCache(dbname, &options_, storage_options, table_cache)); |
||||
if (options_.compaction_style == kCompactionStyleUniversal) { |
||||
compaction_picker_.reset( |
||||
new UniversalCompactionPicker(&options_, &internal_comparator_)); |
||||
} else { |
||||
compaction_picker_.reset( |
||||
new LevelCompactionPicker(&options_, &internal_comparator_)); |
||||
} |
||||
|
||||
Log(options_.info_log, "Options for column family \"%s\":\n", |
||||
name.c_str()); |
||||
const ColumnFamilyOptions* cf_options = &options_; |
||||
cf_options->Dump(options_.info_log.get()); |
||||
} |
||||
} |
||||
|
||||
// DB mutex held
|
||||
ColumnFamilyData::~ColumnFamilyData() { |
||||
assert(refs_ == 0); |
||||
// remove from linked list
|
||||
auto prev = prev_; |
||||
auto next = next_; |
||||
prev->next_ = next; |
||||
next->prev_ = prev; |
||||
|
||||
// it's nullptr for dummy CFD
|
||||
if (column_family_set_ != nullptr) { |
||||
// remove from column_family_set
|
||||
column_family_set_->RemoveColumnFamily(this); |
||||
} |
||||
|
||||
if (current_ != nullptr) { |
||||
current_->Unref(); |
||||
} |
||||
|
||||
if (super_version_ != nullptr) { |
||||
// Release SuperVersion reference kept in ThreadLocalPtr.
|
||||
// This must be done outside of mutex_ since unref handler can lock mutex.
|
||||
super_version_->db_mutex->Unlock(); |
||||
local_sv_.reset(); |
||||
super_version_->db_mutex->Lock(); |
||||
|
||||
bool is_last_reference __attribute__((unused)); |
||||
is_last_reference = super_version_->Unref(); |
||||
assert(is_last_reference); |
||||
super_version_->Cleanup(); |
||||
delete super_version_; |
||||
super_version_ = nullptr; |
||||
} |
||||
|
||||
if (dummy_versions_ != nullptr) { |
||||
// List must be empty
|
||||
assert(dummy_versions_->next_ == dummy_versions_); |
||||
delete dummy_versions_; |
||||
} |
||||
|
||||
if (mem_ != nullptr) { |
||||
delete mem_->Unref(); |
||||
} |
||||
autovector<MemTable*> to_delete; |
||||
imm_.current()->Unref(&to_delete); |
||||
for (MemTable* m : to_delete) { |
||||
delete m; |
||||
} |
||||
} |
||||
|
||||
void ColumnFamilyData::SetCurrent(Version* current) { |
||||
current_ = current; |
||||
need_slowdown_for_num_level0_files_ = |
||||
(options_.level0_slowdown_writes_trigger >= 0 && |
||||
current_->NumLevelFiles(0) >= options_.level0_slowdown_writes_trigger); |
||||
} |
||||
|
||||
void ColumnFamilyData::CreateNewMemtable() { |
||||
assert(current_ != nullptr); |
||||
if (mem_ != nullptr) { |
||||
delete mem_->Unref(); |
||||
} |
||||
mem_ = new MemTable(internal_comparator_, options_); |
||||
mem_->Ref(); |
||||
} |
||||
|
||||
Compaction* ColumnFamilyData::PickCompaction(LogBuffer* log_buffer) { |
||||
return compaction_picker_->PickCompaction(current_, log_buffer); |
||||
} |
||||
|
||||
Compaction* ColumnFamilyData::CompactRange(int input_level, int output_level, |
||||
const InternalKey* begin, |
||||
const InternalKey* end, |
||||
InternalKey** compaction_end) { |
||||
return compaction_picker_->CompactRange(current_, input_level, output_level, |
||||
begin, end, compaction_end); |
||||
} |
||||
|
||||
SuperVersion* ColumnFamilyData::InstallSuperVersion( |
||||
SuperVersion* new_superversion, port::Mutex* db_mutex) { |
||||
new_superversion->db_mutex = db_mutex; |
||||
new_superversion->Init(mem_, imm_.current(), current_); |
||||
SuperVersion* old_superversion = super_version_; |
||||
super_version_ = new_superversion; |
||||
++super_version_number_; |
||||
super_version_->version_number = super_version_number_; |
||||
if (old_superversion != nullptr && old_superversion->Unref()) { |
||||
old_superversion->Cleanup(); |
||||
return old_superversion; // will let caller delete outside of mutex
|
||||
} |
||||
return nullptr; |
||||
} |
||||
|
||||
void ColumnFamilyData::ResetThreadLocalSuperVersions() { |
||||
autovector<void*> sv_ptrs; |
||||
local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete); |
||||
for (auto ptr : sv_ptrs) { |
||||
assert(ptr); |
||||
if (ptr == SuperVersion::kSVInUse) { |
||||
continue; |
||||
} |
||||
auto sv = static_cast<SuperVersion*>(ptr); |
||||
if (sv->Unref()) { |
||||
sv->Cleanup(); |
||||
delete sv; |
||||
} |
||||
} |
||||
} |
||||
|
||||
ColumnFamilySet::ColumnFamilySet(const std::string& dbname, |
||||
const DBOptions* db_options, |
||||
const EnvOptions& storage_options, |
||||
Cache* table_cache) |
||||
: max_column_family_(0), |
||||
dummy_cfd_(new ColumnFamilyData(dbname, 0, "", nullptr, nullptr, |
||||
ColumnFamilyOptions(), db_options, |
||||
storage_options_, nullptr)), |
||||
default_cfd_cache_(nullptr), |
||||
db_name_(dbname), |
||||
db_options_(db_options), |
||||
storage_options_(storage_options), |
||||
table_cache_(table_cache), |
||||
spin_lock_(ATOMIC_FLAG_INIT) { |
||||
// initialize linked list
|
||||
dummy_cfd_->prev_ = dummy_cfd_; |
||||
dummy_cfd_->next_ = dummy_cfd_; |
||||
} |
||||
|
||||
ColumnFamilySet::~ColumnFamilySet() { |
||||
while (column_family_data_.size() > 0) { |
||||
// cfd destructor will delete itself from column_family_data_
|
||||
auto cfd = column_family_data_.begin()->second; |
||||
cfd->Unref(); |
||||
delete cfd; |
||||
} |
||||
dummy_cfd_->Unref(); |
||||
delete dummy_cfd_; |
||||
} |
||||
|
||||
ColumnFamilyData* ColumnFamilySet::GetDefault() const { |
||||
assert(default_cfd_cache_ != nullptr); |
||||
return default_cfd_cache_; |
||||
} |
||||
|
||||
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const { |
||||
auto cfd_iter = column_family_data_.find(id); |
||||
if (cfd_iter != column_family_data_.end()) { |
||||
return cfd_iter->second; |
||||
} else { |
||||
return nullptr; |
||||
} |
||||
} |
||||
|
||||
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name) |
||||
const { |
||||
auto cfd_iter = column_families_.find(name); |
||||
if (cfd_iter != column_families_.end()) { |
||||
auto cfd = GetColumnFamily(cfd_iter->second); |
||||
assert(cfd != nullptr); |
||||
return cfd; |
||||
} else { |
||||
return nullptr; |
||||
} |
||||
} |
||||
|
||||
uint32_t ColumnFamilySet::GetNextColumnFamilyID() { |
||||
return ++max_column_family_; |
||||
} |
||||
|
||||
uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; } |
||||
|
||||
void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) { |
||||
max_column_family_ = std::max(new_max_column_family, max_column_family_); |
||||
} |
||||
|
||||
// under a DB mutex
|
||||
ColumnFamilyData* ColumnFamilySet::CreateColumnFamily( |
||||
const std::string& name, uint32_t id, Version* dummy_versions, |
||||
const ColumnFamilyOptions& options) { |
||||
assert(column_families_.find(name) == column_families_.end()); |
||||
ColumnFamilyData* new_cfd = |
||||
new ColumnFamilyData(db_name_, id, name, dummy_versions, table_cache_, |
||||
options, db_options_, storage_options_, this); |
||||
Lock(); |
||||
column_families_.insert({name, id}); |
||||
column_family_data_.insert({id, new_cfd}); |
||||
Unlock(); |
||||
max_column_family_ = std::max(max_column_family_, id); |
||||
// add to linked list
|
||||
new_cfd->next_ = dummy_cfd_; |
||||
auto prev = dummy_cfd_->prev_; |
||||
new_cfd->prev_ = prev; |
||||
prev->next_ = new_cfd; |
||||
dummy_cfd_->prev_ = new_cfd; |
||||
if (id == 0) { |
||||
default_cfd_cache_ = new_cfd; |
||||
} |
||||
return new_cfd; |
||||
} |
||||
|
||||
void ColumnFamilySet::Lock() { |
||||
// spin lock
|
||||
while (spin_lock_.test_and_set(std::memory_order_acquire)) { |
||||
} |
||||
} |
||||
|
||||
void ColumnFamilySet::Unlock() { spin_lock_.clear(std::memory_order_release); } |
||||
|
||||
// REQUIRES: DB mutex held
|
||||
void ColumnFamilySet::FreeDeadColumnFamilies() { |
||||
autovector<ColumnFamilyData*> to_delete; |
||||
for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) { |
||||
if (cfd->refs_ == 0) { |
||||
to_delete.push_back(cfd); |
||||
} |
||||
} |
||||
for (auto cfd : to_delete) { |
||||
// this is very rare, so it's not a problem that we do it under a mutex
|
||||
delete cfd; |
||||
} |
||||
} |
||||
|
||||
// under a DB mutex
|
||||
void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) { |
||||
auto cfd_iter = column_family_data_.find(cfd->GetID()); |
||||
assert(cfd_iter != column_family_data_.end()); |
||||
Lock(); |
||||
column_family_data_.erase(cfd_iter); |
||||
column_families_.erase(cfd->GetName()); |
||||
Unlock(); |
||||
} |
||||
|
||||
bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) { |
||||
if (column_family_id == 0) { |
||||
// optimization for common case
|
||||
current_ = column_family_set_->GetDefault(); |
||||
} else { |
||||
// maybe outside of db mutex, should lock
|
||||
column_family_set_->Lock(); |
||||
current_ = column_family_set_->GetColumnFamily(column_family_id); |
||||
column_family_set_->Unlock(); |
||||
} |
||||
handle_.SetCFD(current_); |
||||
return current_ != nullptr; |
||||
} |
||||
|
||||
uint64_t ColumnFamilyMemTablesImpl::GetLogNumber() const { |
||||
assert(current_ != nullptr); |
||||
return current_->GetLogNumber(); |
||||
} |
||||
|
||||
MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const { |
||||
assert(current_ != nullptr); |
||||
return current_->mem(); |
||||
} |
||||
|
||||
const Options* ColumnFamilyMemTablesImpl::GetOptions() const { |
||||
assert(current_ != nullptr); |
||||
return current_->options(); |
||||
} |
||||
|
||||
ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() { |
||||
assert(current_ != nullptr); |
||||
return &handle_; |
||||
} |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,408 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#pragma once |
||||
|
||||
#include <unordered_map> |
||||
#include <string> |
||||
#include <vector> |
||||
#include <atomic> |
||||
|
||||
#include "rocksdb/options.h" |
||||
#include "rocksdb/db.h" |
||||
#include "rocksdb/env.h" |
||||
#include "db/memtable_list.h" |
||||
#include "db/write_batch_internal.h" |
||||
#include "db/table_cache.h" |
||||
#include "util/thread_local.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
class Version; |
||||
class VersionSet; |
||||
class MemTable; |
||||
class MemTableListVersion; |
||||
class CompactionPicker; |
||||
class Compaction; |
||||
class InternalKey; |
||||
class InternalStats; |
||||
class ColumnFamilyData; |
||||
class DBImpl; |
||||
class LogBuffer; |
||||
|
||||
// ColumnFamilyHandleImpl is the class that clients use to access different
|
||||
// column families. It has non-trivial destructor, which gets called when client
|
||||
// is done using the column family
|
||||
class ColumnFamilyHandleImpl : public ColumnFamilyHandle { |
||||
public: |
||||
// create while holding the mutex
|
||||
ColumnFamilyHandleImpl(ColumnFamilyData* cfd, DBImpl* db, port::Mutex* mutex); |
||||
// destroy without mutex
|
||||
virtual ~ColumnFamilyHandleImpl(); |
||||
virtual ColumnFamilyData* cfd() const { return cfd_; } |
||||
|
||||
virtual uint32_t GetID() const; |
||||
|
||||
private: |
||||
ColumnFamilyData* cfd_; |
||||
DBImpl* db_; |
||||
port::Mutex* mutex_; |
||||
}; |
||||
|
||||
// Does not ref-count ColumnFamilyData
|
||||
// We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter
|
||||
// calls DBImpl methods. When this happens, MemTableInserter need access to
|
||||
// ColumnFamilyHandle (same as the client would need). In that case, we feed
|
||||
// MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl
|
||||
// methods
|
||||
class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl { |
||||
public: |
||||
ColumnFamilyHandleInternal() |
||||
: ColumnFamilyHandleImpl(nullptr, nullptr, nullptr) {} |
||||
|
||||
void SetCFD(ColumnFamilyData* cfd) { internal_cfd_ = cfd; } |
||||
virtual ColumnFamilyData* cfd() const override { return internal_cfd_; } |
||||
|
||||
private: |
||||
ColumnFamilyData* internal_cfd_; |
||||
}; |
||||
|
||||
// holds references to memtable, all immutable memtables and version
|
||||
struct SuperVersion { |
||||
MemTable* mem; |
||||
MemTableListVersion* imm; |
||||
Version* current; |
||||
std::atomic<uint32_t> refs; |
||||
// We need to_delete because during Cleanup(), imm->Unref() returns
|
||||
// all memtables that we need to free through this vector. We then
|
||||
// delete all those memtables outside of mutex, during destruction
|
||||
autovector<MemTable*> to_delete; |
||||
// Version number of the current SuperVersion
|
||||
uint64_t version_number; |
||||
port::Mutex* db_mutex; |
||||
|
||||
// should be called outside the mutex
|
||||
SuperVersion() = default; |
||||
~SuperVersion(); |
||||
SuperVersion* Ref(); |
||||
|
||||
bool Unref(); |
||||
|
||||
// call these two methods with db mutex held
|
||||
// Cleanup unrefs mem, imm and current. Also, it stores all memtables
|
||||
// that needs to be deleted in to_delete vector. Unrefing those
|
||||
// objects needs to be done in the mutex
|
||||
void Cleanup(); |
||||
void Init(MemTable* new_mem, MemTableListVersion* new_imm, |
||||
Version* new_current); |
||||
|
||||
// The value of dummy is not actually used. kSVInUse takes its address as a
|
||||
// mark in the thread local storage to indicate the SuperVersion is in use
|
||||
// by thread. This way, the value of kSVInUse is guaranteed to have no
|
||||
// conflict with SuperVersion object address and portable on different
|
||||
// platform.
|
||||
static int dummy; |
||||
static void* const kSVInUse; |
||||
static void* const kSVObsolete; |
||||
}; |
||||
|
||||
extern ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, |
||||
const InternalFilterPolicy* ipolicy, |
||||
const ColumnFamilyOptions& src); |
||||
|
||||
class ColumnFamilySet; |
||||
|
||||
// This class keeps all the data that a column family needs. It's mosly dumb and
|
||||
// used just to provide access to metadata.
|
||||
// Most methods require DB mutex held, unless otherwise noted
|
||||
class ColumnFamilyData { |
||||
public: |
||||
~ColumnFamilyData(); |
||||
|
||||
// thread-safe
|
||||
uint32_t GetID() const { return id_; } |
||||
// thread-safe
|
||||
const std::string& GetName() const { return name_; } |
||||
|
||||
void Ref() { ++refs_; } |
||||
// will just decrease reference count to 0, but will not delete it. returns
|
||||
// true if the ref count was decreased to zero. in that case, it can be
|
||||
// deleted by the caller immediatelly, or later, by calling
|
||||
// FreeDeadColumnFamilies()
|
||||
bool Unref() { |
||||
assert(refs_ > 0); |
||||
return --refs_ == 0; |
||||
} |
||||
|
||||
// This can only be called from single-threaded VersionSet::LogAndApply()
|
||||
// After dropping column family no other operation on that column family
|
||||
// will be executed. All the files and memory will be, however, kept around
|
||||
// until client drops the column family handle. That way, client can still
|
||||
// access data from dropped column family.
|
||||
// Column family can be dropped and still alive. In that state:
|
||||
// *) Column family is not included in the iteration.
|
||||
// *) Compaction and flush is not executed on the dropped column family.
|
||||
// *) Client can continue writing and reading from column family. However, all
|
||||
// writes stay in the current memtable.
|
||||
// When the dropped column family is unreferenced, then we:
|
||||
// *) delete all memory associated with that column family
|
||||
// *) delete all the files associated with that column family
|
||||
void SetDropped() { |
||||
// can't drop default CF
|
||||
assert(id_ != 0); |
||||
dropped_ = true; |
||||
} |
||||
bool IsDropped() const { return dropped_; } |
||||
|
||||
// thread-safe
|
||||
int NumberLevels() const { return options_.num_levels; } |
||||
|
||||
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } |
||||
uint64_t GetLogNumber() const { return log_number_; } |
||||
|
||||
// thread-safe
|
||||
const Options* options() const { return &options_; } |
||||
|
||||
InternalStats* internal_stats() { return internal_stats_.get(); } |
||||
|
||||
MemTableList* imm() { return &imm_; } |
||||
MemTable* mem() { return mem_; } |
||||
Version* current() { return current_; } |
||||
Version* dummy_versions() { return dummy_versions_; } |
||||
void SetMemtable(MemTable* new_mem) { mem_ = new_mem; } |
||||
void SetCurrent(Version* current); |
||||
void CreateNewMemtable(); |
||||
|
||||
TableCache* table_cache() { return table_cache_.get(); } |
||||
|
||||
// See documentation in compaction_picker.h
|
||||
Compaction* PickCompaction(LogBuffer* log_buffer); |
||||
Compaction* CompactRange(int input_level, int output_level, |
||||
const InternalKey* begin, const InternalKey* end, |
||||
InternalKey** compaction_end); |
||||
|
||||
CompactionPicker* compaction_picker() { return compaction_picker_.get(); } |
||||
// thread-safe
|
||||
const Comparator* user_comparator() const { |
||||
return internal_comparator_.user_comparator(); |
||||
} |
||||
// thread-safe
|
||||
const InternalKeyComparator& internal_comparator() const { |
||||
return internal_comparator_; |
||||
} |
||||
|
||||
SuperVersion* GetSuperVersion() { return super_version_; } |
||||
// thread-safe
|
||||
ThreadLocalPtr* GetThreadLocalSuperVersion() const { return local_sv_.get(); } |
||||
// thread-safe
|
||||
uint64_t GetSuperVersionNumber() const { |
||||
return super_version_number_.load(); |
||||
} |
||||
// will return a pointer to SuperVersion* if previous SuperVersion
|
||||
// if its reference count is zero and needs deletion or nullptr if not
|
||||
// As argument takes a pointer to allocated SuperVersion to enable
|
||||
// the clients to allocate SuperVersion outside of mutex.
|
||||
SuperVersion* InstallSuperVersion(SuperVersion* new_superversion, |
||||
port::Mutex* db_mutex); |
||||
|
||||
void ResetThreadLocalSuperVersions(); |
||||
|
||||
// A Flag indicating whether write needs to slowdown because of there are
|
||||
// too many number of level0 files.
|
||||
bool NeedSlowdownForNumLevel0Files() const { |
||||
return need_slowdown_for_num_level0_files_; |
||||
} |
||||
|
||||
private: |
||||
friend class ColumnFamilySet; |
||||
ColumnFamilyData(const std::string& dbname, uint32_t id, |
||||
const std::string& name, Version* dummy_versions, |
||||
Cache* table_cache, const ColumnFamilyOptions& options, |
||||
const DBOptions* db_options, |
||||
const EnvOptions& storage_options, |
||||
ColumnFamilySet* column_family_set); |
||||
|
||||
uint32_t id_; |
||||
const std::string name_; |
||||
Version* dummy_versions_; // Head of circular doubly-linked list of versions.
|
||||
Version* current_; // == dummy_versions->prev_
|
||||
|
||||
int refs_; // outstanding references to ColumnFamilyData
|
||||
bool dropped_; // true if client dropped it
|
||||
|
||||
const InternalKeyComparator internal_comparator_; |
||||
const InternalFilterPolicy internal_filter_policy_; |
||||
|
||||
Options const options_; |
||||
|
||||
std::unique_ptr<TableCache> table_cache_; |
||||
|
||||
std::unique_ptr<InternalStats> internal_stats_; |
||||
|
||||
MemTable* mem_; |
||||
MemTableList imm_; |
||||
SuperVersion* super_version_; |
||||
|
||||
// An ordinal representing the current SuperVersion. Updated by
|
||||
// InstallSuperVersion(), i.e. incremented every time super_version_
|
||||
// changes.
|
||||
std::atomic<uint64_t> super_version_number_; |
||||
|
||||
// Thread's local copy of SuperVersion pointer
|
||||
// This needs to be destructed before mutex_
|
||||
std::unique_ptr<ThreadLocalPtr> local_sv_; |
||||
|
||||
// pointers for a circular linked list. we use it to support iterations
|
||||
// that can be concurrent with writes
|
||||
ColumnFamilyData* next_; |
||||
ColumnFamilyData* prev_; |
||||
|
||||
// This is the earliest log file number that contains data from this
|
||||
// Column Family. All earlier log files must be ignored and not
|
||||
// recovered from
|
||||
uint64_t log_number_; |
||||
|
||||
// A flag indicating whether we should delay writes because
|
||||
// we have too many level 0 files
|
||||
bool need_slowdown_for_num_level0_files_; |
||||
|
||||
// An object that keeps all the compaction stats
|
||||
// and picks the next compaction
|
||||
std::unique_ptr<CompactionPicker> compaction_picker_; |
||||
|
||||
ColumnFamilySet* column_family_set_; |
||||
}; |
||||
|
||||
// ColumnFamilySet has interesting thread-safety requirements
|
||||
// * CreateColumnFamily() or RemoveColumnFamily() -- need to protect by DB
|
||||
// mutex. Inside, column_family_data_ and column_families_ will be protected
|
||||
// by Lock() and Unlock(). CreateColumnFamily() should ONLY be called from
|
||||
// VersionSet::LogAndApply() in the normal runtime. It is also called
|
||||
// during Recovery and in DumpManifest(). RemoveColumnFamily() is called
|
||||
// from ColumnFamilyData destructor
|
||||
// * Iteration -- hold DB mutex, but you can release it in the body of
|
||||
// iteration. If you release DB mutex in body, reference the column
|
||||
// family before the mutex and unreference after you unlock, since the column
|
||||
// family might get dropped when the DB mutex is released
|
||||
// * GetDefault() -- thread safe
|
||||
// * GetColumnFamily() -- either inside of DB mutex or call Lock() <-> Unlock()
|
||||
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
|
||||
// inside of DB mutex
|
||||
class ColumnFamilySet { |
||||
public: |
||||
// ColumnFamilySet supports iteration
|
||||
class iterator { |
||||
public: |
||||
explicit iterator(ColumnFamilyData* cfd) |
||||
: current_(cfd) {} |
||||
iterator& operator++() { |
||||
// dummy is never dead or dropped, so this will never be infinite
|
||||
do { |
||||
current_ = current_->next_; |
||||
} while (current_->refs_ == 0 || current_->IsDropped()); |
||||
return *this; |
||||
} |
||||
bool operator!=(const iterator& other) { |
||||
return this->current_ != other.current_; |
||||
} |
||||
ColumnFamilyData* operator*() { return current_; } |
||||
|
||||
private: |
||||
ColumnFamilyData* current_; |
||||
}; |
||||
|
||||
ColumnFamilySet(const std::string& dbname, const DBOptions* db_options, |
||||
const EnvOptions& storage_options, Cache* table_cache); |
||||
~ColumnFamilySet(); |
||||
|
||||
ColumnFamilyData* GetDefault() const; |
||||
// GetColumnFamily() calls return nullptr if column family is not found
|
||||
ColumnFamilyData* GetColumnFamily(uint32_t id) const; |
||||
ColumnFamilyData* GetColumnFamily(const std::string& name) const; |
||||
// this call will return the next available column family ID. it guarantees
|
||||
// that there is no column family with id greater than or equal to the
|
||||
// returned value in the current running instance or anytime in RocksDB
|
||||
// instance history.
|
||||
uint32_t GetNextColumnFamilyID(); |
||||
uint32_t GetMaxColumnFamily(); |
||||
void UpdateMaxColumnFamily(uint32_t new_max_column_family); |
||||
|
||||
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id, |
||||
Version* dummy_version, |
||||
const ColumnFamilyOptions& options); |
||||
|
||||
iterator begin() { return iterator(dummy_cfd_->next_); } |
||||
iterator end() { return iterator(dummy_cfd_); } |
||||
|
||||
void Lock(); |
||||
void Unlock(); |
||||
|
||||
// REQUIRES: DB mutex held
|
||||
// Don't call while iterating over ColumnFamilySet
|
||||
void FreeDeadColumnFamilies(); |
||||
|
||||
private: |
||||
friend class ColumnFamilyData; |
||||
// helper function that gets called from cfd destructor
|
||||
// REQUIRES: DB mutex held
|
||||
void RemoveColumnFamily(ColumnFamilyData* cfd); |
||||
|
||||
// column_families_ and column_family_data_ need to be protected:
|
||||
// * when mutating: 1. DB mutex locked first, 2. spinlock locked second
|
||||
// * when reading, either: 1. lock DB mutex, or 2. lock spinlock
|
||||
// (if both, respect the ordering to avoid deadlock!)
|
||||
std::unordered_map<std::string, uint32_t> column_families_; |
||||
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_; |
||||
|
||||
uint32_t max_column_family_; |
||||
ColumnFamilyData* dummy_cfd_; |
||||
// We don't hold the refcount here, since default column family always exists
|
||||
// We are also not responsible for cleaning up default_cfd_cache_. This is
|
||||
// just a cache that makes common case (accessing default column family)
|
||||
// faster
|
||||
ColumnFamilyData* default_cfd_cache_; |
||||
|
||||
const std::string db_name_; |
||||
const DBOptions* const db_options_; |
||||
const EnvOptions storage_options_; |
||||
Cache* table_cache_; |
||||
std::atomic_flag spin_lock_; |
||||
}; |
||||
|
||||
// We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
|
||||
// memtables of different column families (specified by ID in the write batch)
|
||||
class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { |
||||
public: |
||||
explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set) |
||||
: column_family_set_(column_family_set), current_(nullptr) {} |
||||
|
||||
// sets current_ to ColumnFamilyData with column_family_id
|
||||
// returns false if column family doesn't exist
|
||||
bool Seek(uint32_t column_family_id) override; |
||||
|
||||
// Returns log number of the selected column family
|
||||
uint64_t GetLogNumber() const override; |
||||
|
||||
// REQUIRES: Seek() called first
|
||||
virtual MemTable* GetMemTable() const override; |
||||
|
||||
// Returns options for selected column family
|
||||
// REQUIRES: Seek() called first
|
||||
virtual const Options* GetOptions() const override; |
||||
|
||||
// Returns column family handle for the selected column family
|
||||
virtual ColumnFamilyHandle* GetColumnFamilyHandle() override; |
||||
|
||||
private: |
||||
ColumnFamilySet* column_family_set_; |
||||
ColumnFamilyData* current_; |
||||
ColumnFamilyHandleInternal handle_; |
||||
}; |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,857 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include <algorithm> |
||||
#include <vector> |
||||
#include <string> |
||||
|
||||
#include "db/db_impl.h" |
||||
#include "rocksdb/env.h" |
||||
#include "rocksdb/db.h" |
||||
#include "util/testharness.h" |
||||
#include "util/testutil.h" |
||||
#include "util/coding.h" |
||||
#include "utilities/merge_operators.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
namespace { |
||||
std::string RandomString(Random* rnd, int len) { |
||||
std::string r; |
||||
test::RandomString(rnd, len, &r); |
||||
return r; |
||||
} |
||||
} // anonymous namespace
|
||||
|
||||
class ColumnFamilyTest { |
||||
public: |
||||
ColumnFamilyTest() : rnd_(139) { |
||||
env_ = Env::Default(); |
||||
dbname_ = test::TmpDir() + "/column_family_test"; |
||||
db_options_.create_if_missing = true; |
||||
DestroyDB(dbname_, Options(db_options_, column_family_options_)); |
||||
} |
||||
|
||||
void Close() { |
||||
for (auto h : handles_) { |
||||
delete h; |
||||
} |
||||
handles_.clear(); |
||||
names_.clear(); |
||||
delete db_; |
||||
db_ = nullptr; |
||||
} |
||||
|
||||
Status TryOpen(std::vector<std::string> cf, |
||||
std::vector<ColumnFamilyOptions> options = {}) { |
||||
std::vector<ColumnFamilyDescriptor> column_families; |
||||
names_.clear(); |
||||
for (size_t i = 0; i < cf.size(); ++i) { |
||||
column_families.push_back(ColumnFamilyDescriptor( |
||||
cf[i], options.size() == 0 ? column_family_options_ : options[i])); |
||||
names_.push_back(cf[i]); |
||||
} |
||||
return DB::Open(db_options_, dbname_, column_families, &handles_, &db_); |
||||
} |
||||
|
||||
void Open(std::vector<std::string> cf, |
||||
std::vector<ColumnFamilyOptions> options = {}) { |
||||
ASSERT_OK(TryOpen(cf, options)); |
||||
} |
||||
|
||||
void Open() { |
||||
Open({"default"}); |
||||
} |
||||
|
||||
DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); } |
||||
|
||||
int GetProperty(int cf, std::string property) { |
||||
std::string value; |
||||
ASSERT_TRUE(dbfull()->GetProperty(handles_[cf], property, &value)); |
||||
return std::stoi(value); |
||||
} |
||||
|
||||
void Destroy() { |
||||
for (auto h : handles_) { |
||||
delete h; |
||||
} |
||||
handles_.clear(); |
||||
names_.clear(); |
||||
delete db_; |
||||
db_ = nullptr; |
||||
ASSERT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_))); |
||||
} |
||||
|
||||
void CreateColumnFamilies( |
||||
const std::vector<std::string>& cfs, |
||||
const std::vector<ColumnFamilyOptions> options = {}) { |
||||
int cfi = handles_.size(); |
||||
handles_.resize(cfi + cfs.size()); |
||||
names_.resize(cfi + cfs.size()); |
||||
for (size_t i = 0; i < cfs.size(); ++i) { |
||||
ASSERT_OK(db_->CreateColumnFamily( |
||||
options.size() == 0 ? column_family_options_ : options[i], cfs[i], |
||||
&handles_[cfi])); |
||||
names_[cfi] = cfs[i]; |
||||
cfi++; |
||||
} |
||||
} |
||||
|
||||
void Reopen(const std::vector<ColumnFamilyOptions> options = {}) { |
||||
std::vector<std::string> names; |
||||
for (auto name : names_) { |
||||
if (name != "") { |
||||
names.push_back(name); |
||||
} |
||||
} |
||||
Close(); |
||||
assert(options.size() == 0 || names.size() == options.size()); |
||||
Open(names, options); |
||||
} |
||||
|
||||
void CreateColumnFamiliesAndReopen(const std::vector<std::string>& cfs) { |
||||
CreateColumnFamilies(cfs); |
||||
Reopen(); |
||||
} |
||||
|
||||
void DropColumnFamilies(const std::vector<int>& cfs) { |
||||
for (auto cf : cfs) { |
||||
ASSERT_OK(db_->DropColumnFamily(handles_[cf])); |
||||
delete handles_[cf]; |
||||
handles_[cf] = nullptr; |
||||
names_[cf] = ""; |
||||
} |
||||
} |
||||
|
||||
void PutRandomData(int cf, int num, int key_value_size) { |
||||
for (int i = 0; i < num; ++i) { |
||||
// 10 bytes for key, rest is value
|
||||
ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 10), |
||||
RandomString(&rnd_, key_value_size - 10))); |
||||
} |
||||
} |
||||
|
||||
void WaitForFlush(int cf) { |
||||
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); |
||||
} |
||||
|
||||
void WaitForCompaction() { ASSERT_OK(dbfull()->TEST_WaitForCompact()); } |
||||
|
||||
Status Put(int cf, const std::string& key, const std::string& value) { |
||||
return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value)); |
||||
} |
||||
Status Merge(int cf, const std::string& key, const std::string& value) { |
||||
return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value)); |
||||
} |
||||
Status Flush(int cf) { |
||||
return db_->Flush(FlushOptions(), handles_[cf]); |
||||
} |
||||
|
||||
std::string Get(int cf, const std::string& key) { |
||||
ReadOptions options; |
||||
options.verify_checksums = true; |
||||
std::string result; |
||||
Status s = db_->Get(options, handles_[cf], Slice(key), &result); |
||||
if (s.IsNotFound()) { |
||||
result = "NOT_FOUND"; |
||||
} else if (!s.ok()) { |
||||
result = s.ToString(); |
||||
} |
||||
return result; |
||||
} |
||||
|
||||
void CompactAll(int cf) { |
||||
ASSERT_OK(db_->CompactRange(handles_[cf], nullptr, nullptr)); |
||||
} |
||||
|
||||
void Compact(int cf, const Slice& start, const Slice& limit) { |
||||
ASSERT_OK(db_->CompactRange(handles_[cf], &start, &limit)); |
||||
} |
||||
|
||||
int NumTableFilesAtLevel(int level, int cf) { |
||||
return GetProperty(cf, |
||||
"rocksdb.num-files-at-level" + std::to_string(level)); |
||||
} |
||||
|
||||
// Return spread of files per level
|
||||
std::string FilesPerLevel(int cf) { |
||||
std::string result; |
||||
int last_non_zero_offset = 0; |
||||
for (int level = 0; level < dbfull()->NumberLevels(handles_[cf]); level++) { |
||||
int f = NumTableFilesAtLevel(level, cf); |
||||
char buf[100]; |
||||
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); |
||||
result += buf; |
||||
if (f > 0) { |
||||
last_non_zero_offset = result.size(); |
||||
} |
||||
} |
||||
result.resize(last_non_zero_offset); |
||||
return result; |
||||
} |
||||
|
||||
int CountLiveFiles(int cf) { |
||||
std::vector<LiveFileMetaData> metadata; |
||||
db_->GetLiveFilesMetaData(&metadata); |
||||
return static_cast<int>(metadata.size()); |
||||
} |
||||
|
||||
// Do n memtable flushes, each of which produces an sstable
|
||||
// covering the range [small,large].
|
||||
void MakeTables(int cf, int n, const std::string& small, |
||||
const std::string& large) { |
||||
for (int i = 0; i < n; i++) { |
||||
ASSERT_OK(Put(cf, small, "begin")); |
||||
ASSERT_OK(Put(cf, large, "end")); |
||||
ASSERT_OK(db_->Flush(FlushOptions(), handles_[cf])); |
||||
} |
||||
} |
||||
|
||||
int CountLiveLogFiles() { |
||||
int micros_wait_for_log_deletion = 20000; |
||||
env_->SleepForMicroseconds(micros_wait_for_log_deletion); |
||||
int ret = 0; |
||||
VectorLogPtr wal_files; |
||||
Status s; |
||||
// GetSortedWalFiles is a flakey function -- it gets all the wal_dir
|
||||
// children files and then later checks for their existance. if some of the
|
||||
// log files doesn't exist anymore, it reports an error. it does all of this
|
||||
// without DB mutex held, so if a background process deletes the log file
|
||||
// while the function is being executed, it returns an error. We retry the
|
||||
// function 10 times to avoid the error failing the test
|
||||
for (int retries = 0; retries < 10; ++retries) { |
||||
wal_files.clear(); |
||||
s = db_->GetSortedWalFiles(wal_files); |
||||
if (s.ok()) { |
||||
break; |
||||
} |
||||
} |
||||
ASSERT_OK(s); |
||||
for (const auto& wal : wal_files) { |
||||
if (wal->Type() == kAliveLogFile) { |
||||
++ret; |
||||
} |
||||
} |
||||
return ret; |
||||
} |
||||
|
||||
void AssertNumberOfImmutableMemtables(std::vector<int> num_per_cf) { |
||||
assert(num_per_cf.size() == handles_.size()); |
||||
|
||||
for (size_t i = 0; i < num_per_cf.size(); ++i) { |
||||
ASSERT_EQ(num_per_cf[i], |
||||
GetProperty(i, "rocksdb.num-immutable-mem-table")); |
||||
} |
||||
} |
||||
|
||||
void CopyFile(const std::string& source, const std::string& destination, |
||||
uint64_t size = 0) { |
||||
const EnvOptions soptions; |
||||
unique_ptr<SequentialFile> srcfile; |
||||
ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); |
||||
unique_ptr<WritableFile> destfile; |
||||
ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions)); |
||||
|
||||
if (size == 0) { |
||||
// default argument means copy everything
|
||||
ASSERT_OK(env_->GetFileSize(source, &size)); |
||||
} |
||||
|
||||
char buffer[4096]; |
||||
Slice slice; |
||||
while (size > 0) { |
||||
uint64_t one = std::min(uint64_t(sizeof(buffer)), size); |
||||
ASSERT_OK(srcfile->Read(one, &slice, buffer)); |
||||
ASSERT_OK(destfile->Append(slice)); |
||||
size -= slice.size(); |
||||
} |
||||
ASSERT_OK(destfile->Close()); |
||||
} |
||||
|
||||
std::vector<ColumnFamilyHandle*> handles_; |
||||
std::vector<std::string> names_; |
||||
ColumnFamilyOptions column_family_options_; |
||||
DBOptions db_options_; |
||||
std::string dbname_; |
||||
DB* db_ = nullptr; |
||||
Env* env_; |
||||
Random rnd_; |
||||
}; |
||||
|
||||
TEST(ColumnFamilyTest, DontReuseColumnFamilyID) { |
||||
for (int iter = 0; iter < 3; ++iter) { |
||||
Open(); |
||||
CreateColumnFamilies({"one", "two", "three"}); |
||||
for (size_t i = 0; i < handles_.size(); ++i) { |
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(handles_[i]); |
||||
ASSERT_EQ(i, cfh->GetID()); |
||||
} |
||||
if (iter == 1) { |
||||
Reopen(); |
||||
} |
||||
DropColumnFamilies({3}); |
||||
Reopen(); |
||||
if (iter == 2) { |
||||
// this tests if max_column_family is correctly persisted with
|
||||
// WriteSnapshot()
|
||||
Reopen(); |
||||
} |
||||
CreateColumnFamilies({"three2"}); |
||||
// ID 3 that was used for dropped column family "three" should not be reused
|
||||
auto cfh3 = reinterpret_cast<ColumnFamilyHandleImpl*>(handles_[3]); |
||||
ASSERT_EQ(4, cfh3->GetID()); |
||||
Close(); |
||||
Destroy(); |
||||
} |
||||
} |
||||
|
||||
|
||||
TEST(ColumnFamilyTest, AddDrop) { |
||||
Open(); |
||||
CreateColumnFamilies({"one", "two", "three"}); |
||||
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(2, "fodor")); |
||||
DropColumnFamilies({2}); |
||||
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||
CreateColumnFamilies({"four"}); |
||||
ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); |
||||
ASSERT_OK(Put(1, "fodor", "mirko")); |
||||
ASSERT_EQ("mirko", Get(1, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); |
||||
Close(); |
||||
ASSERT_TRUE(TryOpen({"default"}).IsInvalidArgument()); |
||||
Open({"default", "one", "three", "four"}); |
||||
DropColumnFamilies({1}); |
||||
Reopen(); |
||||
Close(); |
||||
|
||||
std::vector<std::string> families; |
||||
ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); |
||||
sort(families.begin(), families.end()); |
||||
ASSERT_TRUE(families == |
||||
std::vector<std::string>({"default", "four", "three"})); |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, DropTest) { |
||||
// first iteration - dont reopen DB before dropping
|
||||
// second iteration - reopen DB before dropping
|
||||
for (int iter = 0; iter < 2; ++iter) { |
||||
Open({"default"}); |
||||
CreateColumnFamiliesAndReopen({"pikachu"}); |
||||
for (int i = 0; i < 100; ++i) { |
||||
ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i))); |
||||
} |
||||
ASSERT_OK(Flush(1)); |
||||
|
||||
if (iter == 1) { |
||||
Reopen(); |
||||
} |
||||
ASSERT_EQ("bar1", Get(1, "1")); |
||||
|
||||
ASSERT_EQ(CountLiveFiles(1), 1); |
||||
DropColumnFamilies({1}); |
||||
// make sure that all files are deleted when we drop the column family
|
||||
ASSERT_EQ(CountLiveFiles(1), 0); |
||||
Destroy(); |
||||
} |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, WriteBatchFailure) { |
||||
Open(); |
||||
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||
WriteBatch batch; |
||||
batch.Put(handles_[1], Slice("non-existing"), Slice("column-family")); |
||||
ASSERT_OK(db_->Write(WriteOptions(), &batch)); |
||||
DropColumnFamilies({1}); |
||||
Status s = db_->Write(WriteOptions(), &batch); |
||||
ASSERT_TRUE(s.IsInvalidArgument()); |
||||
Close(); |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, ReadWrite) { |
||||
Open(); |
||||
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||
ASSERT_OK(Put(0, "foo", "v1")); |
||||
ASSERT_OK(Put(0, "bar", "v2")); |
||||
ASSERT_OK(Put(1, "mirko", "v3")); |
||||
ASSERT_OK(Put(0, "foo", "v2")); |
||||
ASSERT_OK(Put(2, "fodor", "v5")); |
||||
|
||||
for (int iter = 0; iter <= 3; ++iter) { |
||||
ASSERT_EQ("v2", Get(0, "foo")); |
||||
ASSERT_EQ("v2", Get(0, "bar")); |
||||
ASSERT_EQ("v3", Get(1, "mirko")); |
||||
ASSERT_EQ("v5", Get(2, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(2, "foo")); |
||||
if (iter <= 1) { |
||||
Reopen(); |
||||
} |
||||
} |
||||
Close(); |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, IgnoreRecoveredLog) { |
||||
std::string backup_logs = dbname_ + "/backup_logs"; |
||||
|
||||
// delete old files in backup_logs directory
|
||||
ASSERT_OK(env_->CreateDirIfMissing(dbname_)); |
||||
ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); |
||||
std::vector<std::string> old_files; |
||||
env_->GetChildren(backup_logs, &old_files); |
||||
for (auto& file : old_files) { |
||||
if (file != "." && file != "..") { |
||||
env_->DeleteFile(backup_logs + "/" + file); |
||||
} |
||||
} |
||||
|
||||
column_family_options_.merge_operator = |
||||
MergeOperators::CreateUInt64AddOperator(); |
||||
db_options_.wal_dir = dbname_ + "/logs"; |
||||
Destroy(); |
||||
Open(); |
||||
CreateColumnFamilies({"cf1", "cf2"}); |
||||
|
||||
// fill up the DB
|
||||
std::string one, two, three; |
||||
PutFixed64(&one, 1); |
||||
PutFixed64(&two, 2); |
||||
PutFixed64(&three, 3); |
||||
ASSERT_OK(Merge(0, "foo", one)); |
||||
ASSERT_OK(Merge(1, "mirko", one)); |
||||
ASSERT_OK(Merge(0, "foo", one)); |
||||
ASSERT_OK(Merge(2, "bla", one)); |
||||
ASSERT_OK(Merge(2, "fodor", one)); |
||||
ASSERT_OK(Merge(0, "bar", one)); |
||||
ASSERT_OK(Merge(2, "bla", one)); |
||||
ASSERT_OK(Merge(1, "mirko", two)); |
||||
ASSERT_OK(Merge(1, "franjo", one)); |
||||
|
||||
// copy the logs to backup
|
||||
std::vector<std::string> logs; |
||||
env_->GetChildren(db_options_.wal_dir, &logs); |
||||
for (auto& log : logs) { |
||||
if (log != ".." && log != ".") { |
||||
CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); |
||||
} |
||||
} |
||||
|
||||
// recover the DB
|
||||
Close(); |
||||
|
||||
// 1. check consistency
|
||||
// 2. copy the logs from backup back to WAL dir. if the recovery happens
|
||||
// again on the same log files, this should lead to incorrect results
|
||||
// due to applying merge operator twice
|
||||
// 3. check consistency
|
||||
for (int iter = 0; iter < 2; ++iter) { |
||||
// assert consistency
|
||||
Open({"default", "cf1", "cf2"}); |
||||
ASSERT_EQ(two, Get(0, "foo")); |
||||
ASSERT_EQ(one, Get(0, "bar")); |
||||
ASSERT_EQ(three, Get(1, "mirko")); |
||||
ASSERT_EQ(one, Get(1, "franjo")); |
||||
ASSERT_EQ(one, Get(2, "fodor")); |
||||
ASSERT_EQ(two, Get(2, "bla")); |
||||
Close(); |
||||
|
||||
if (iter == 0) { |
||||
// copy the logs from backup back to wal dir
|
||||
for (auto& log : logs) { |
||||
if (log != ".." && log != ".") { |
||||
CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, FlushTest) { |
||||
Open(); |
||||
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||
ASSERT_OK(Put(0, "foo", "v1")); |
||||
ASSERT_OK(Put(0, "bar", "v2")); |
||||
ASSERT_OK(Put(1, "mirko", "v3")); |
||||
ASSERT_OK(Put(0, "foo", "v2")); |
||||
ASSERT_OK(Put(2, "fodor", "v5")); |
||||
for (int i = 0; i < 3; ++i) { |
||||
Flush(i); |
||||
} |
||||
Reopen(); |
||||
|
||||
for (int iter = 0; iter <= 2; ++iter) { |
||||
ASSERT_EQ("v2", Get(0, "foo")); |
||||
ASSERT_EQ("v2", Get(0, "bar")); |
||||
ASSERT_EQ("v3", Get(1, "mirko")); |
||||
ASSERT_EQ("v5", Get(2, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||
ASSERT_EQ("NOT_FOUND", Get(2, "foo")); |
||||
if (iter <= 1) { |
||||
Reopen(); |
||||
} |
||||
} |
||||
Close(); |
||||
} |
||||
|
||||
// Makes sure that obsolete log files get deleted
|
||||
TEST(ColumnFamilyTest, LogDeletionTest) { |
||||
column_family_options_.write_buffer_size = 100000; // 100KB
|
||||
Open(); |
||||
CreateColumnFamilies({"one", "two", "three", "four"}); |
||||
// Each bracket is one log file. if number is in (), it means
|
||||
// we don't need it anymore (it's been flushed)
|
||||
// []
|
||||
ASSERT_EQ(CountLiveLogFiles(), 0); |
||||
PutRandomData(0, 1, 100); |
||||
// [0]
|
||||
PutRandomData(1, 1, 100); |
||||
// [0, 1]
|
||||
PutRandomData(1, 1000, 100); |
||||
WaitForFlush(1); |
||||
// [0, (1)] [1]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||
PutRandomData(0, 1, 100); |
||||
// [0, (1)] [0, 1]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||
PutRandomData(2, 1, 100); |
||||
// [0, (1)] [0, 1, 2]
|
||||
PutRandomData(2, 1000, 100); |
||||
WaitForFlush(2); |
||||
// [0, (1)] [0, 1, (2)] [2]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||
PutRandomData(2, 1000, 100); |
||||
WaitForFlush(2); |
||||
// [0, (1)] [0, 1, (2)] [(2)] [2]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||
PutRandomData(3, 1, 100); |
||||
// [0, (1)] [0, 1, (2)] [(2)] [2, 3]
|
||||
PutRandomData(1, 1, 100); |
||||
// [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||
PutRandomData(1, 1000, 100); |
||||
WaitForFlush(1); |
||||
// [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||
PutRandomData(0, 1000, 100); |
||||
WaitForFlush(0); |
||||
// [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0]
|
||||
// delete obsolete logs -->
|
||||
// [(1), 2, 3] [1, (0)] [0]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||
PutRandomData(0, 1000, 100); |
||||
WaitForFlush(0); |
||||
// [(1), 2, 3] [1, (0)], [(0)] [0]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||
PutRandomData(1, 1000, 100); |
||||
WaitForFlush(1); |
||||
// [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||
PutRandomData(2, 1000, 100); |
||||
WaitForFlush(2); |
||||
// [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 6); |
||||
PutRandomData(3, 1000, 100); |
||||
WaitForFlush(3); |
||||
// [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3]
|
||||
// delete obsolete logs -->
|
||||
// [0, (1)] [1, (2)], [2, (3)] [3]
|
||||
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||
Close(); |
||||
} |
||||
|
||||
// Makes sure that obsolete log files get deleted
|
||||
TEST(ColumnFamilyTest, DifferentWriteBufferSizes) { |
||||
Open(); |
||||
CreateColumnFamilies({"one", "two", "three"}); |
||||
ColumnFamilyOptions default_cf, one, two, three; |
||||
// setup options. all column families have max_write_buffer_number setup to 10
|
||||
// "default" -> 100KB memtable, start flushing immediatelly
|
||||
// "one" -> 200KB memtable, start flushing with two immutable memtables
|
||||
// "two" -> 1MB memtable, start flushing with three immutable memtables
|
||||
// "three" -> 90KB memtable, start flushing with four immutable memtables
|
||||
default_cf.write_buffer_size = 100000; |
||||
default_cf.max_write_buffer_number = 10; |
||||
default_cf.min_write_buffer_number_to_merge = 1; |
||||
one.write_buffer_size = 200000; |
||||
one.max_write_buffer_number = 10; |
||||
one.min_write_buffer_number_to_merge = 2; |
||||
two.write_buffer_size = 1000000; |
||||
two.max_write_buffer_number = 10; |
||||
two.min_write_buffer_number_to_merge = 3; |
||||
three.write_buffer_size = 90000; |
||||
three.max_write_buffer_number = 10; |
||||
three.min_write_buffer_number_to_merge = 4; |
||||
|
||||
Reopen({default_cf, one, two, three}); |
||||
|
||||
int micros_wait_for_flush = 10000; |
||||
PutRandomData(0, 100, 1000); |
||||
WaitForFlush(0); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 1); |
||||
PutRandomData(1, 200, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||
PutRandomData(2, 1000, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 1, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||
PutRandomData(2, 1000, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 2, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||
PutRandomData(3, 90, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 2, 1}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||
PutRandomData(3, 90, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 2, 2}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 6); |
||||
PutRandomData(3, 90, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 2, 3}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 7); |
||||
PutRandomData(0, 100, 1000); |
||||
WaitForFlush(0); |
||||
AssertNumberOfImmutableMemtables({0, 1, 2, 3}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 8); |
||||
PutRandomData(2, 100, 10000); |
||||
WaitForFlush(2); |
||||
AssertNumberOfImmutableMemtables({0, 1, 0, 3}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 9); |
||||
PutRandomData(3, 90, 1000); |
||||
WaitForFlush(3); |
||||
AssertNumberOfImmutableMemtables({0, 1, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 10); |
||||
PutRandomData(3, 90, 1000); |
||||
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||
AssertNumberOfImmutableMemtables({0, 1, 0, 1}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 11); |
||||
PutRandomData(1, 200, 1000); |
||||
WaitForFlush(1); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 1}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||
PutRandomData(3, 90*6, 1000); |
||||
WaitForFlush(3); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||
PutRandomData(0, 100, 1000); |
||||
WaitForFlush(0); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||
PutRandomData(2, 3*100, 10000); |
||||
WaitForFlush(2); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||
PutRandomData(1, 2*200, 1000); |
||||
WaitForFlush(1); |
||||
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||
ASSERT_EQ(CountLiveLogFiles(), 7); |
||||
Close(); |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, DifferentMergeOperators) { |
||||
Open(); |
||||
CreateColumnFamilies({"first", "second"}); |
||||
ColumnFamilyOptions default_cf, first, second; |
||||
first.merge_operator = MergeOperators::CreateUInt64AddOperator(); |
||||
second.merge_operator = MergeOperators::CreateStringAppendOperator(); |
||||
Reopen({default_cf, first, second}); |
||||
|
||||
std::string one, two, three; |
||||
PutFixed64(&one, 1); |
||||
PutFixed64(&two, 2); |
||||
PutFixed64(&three, 3); |
||||
|
||||
ASSERT_OK(Put(0, "foo", two)); |
||||
ASSERT_OK(Put(0, "foo", one)); |
||||
ASSERT_TRUE(Merge(0, "foo", two).IsNotSupported()); |
||||
ASSERT_EQ(Get(0, "foo"), one); |
||||
|
||||
ASSERT_OK(Put(1, "foo", two)); |
||||
ASSERT_OK(Put(1, "foo", one)); |
||||
ASSERT_OK(Merge(1, "foo", two)); |
||||
ASSERT_EQ(Get(1, "foo"), three); |
||||
|
||||
ASSERT_OK(Put(2, "foo", two)); |
||||
ASSERT_OK(Put(2, "foo", one)); |
||||
ASSERT_OK(Merge(2, "foo", two)); |
||||
ASSERT_EQ(Get(2, "foo"), one + "," + two); |
||||
Close(); |
||||
} |
||||
|
||||
TEST(ColumnFamilyTest, DifferentCompactionStyles) { |
||||
Open(); |
||||
CreateColumnFamilies({"one", "two"}); |
||||
ColumnFamilyOptions default_cf, one, two; |
||||
db_options_.max_open_files = 20; // only 10 files in file cache
|
||||
db_options_.disableDataSync = true; |
||||
|
||||
default_cf.compaction_style = kCompactionStyleLevel; |
||||
default_cf.num_levels = 3; |
||||
default_cf.write_buffer_size = 64 << 10; // 64KB
|
||||
default_cf.target_file_size_base = 30 << 10; |
||||
default_cf.filter_policy = nullptr; |
||||
default_cf.no_block_cache = true; |
||||
default_cf.source_compaction_factor = 100; |
||||
default_cf.disable_seek_compaction = false; |
||||
|
||||
one.compaction_style = kCompactionStyleUniversal; |
||||
// trigger compaction if there are >= 4 files
|
||||
one.level0_file_num_compaction_trigger = 4; |
||||
one.write_buffer_size = 100000; |
||||
|
||||
two.compaction_style = kCompactionStyleLevel; |
||||
two.num_levels = 4; |
||||
two.max_mem_compaction_level = 0; |
||||
two.level0_file_num_compaction_trigger = 3; |
||||
two.write_buffer_size = 100000; |
||||
|
||||
Reopen({default_cf, one, two}); |
||||
|
||||
// SETUP column family "default" - test read compaction
|
||||
ASSERT_EQ("", FilesPerLevel(0)); |
||||
PutRandomData(0, 1, 4096); |
||||
ASSERT_OK(Flush(0)); |
||||
ASSERT_EQ("0,0,1", FilesPerLevel(0)); |
||||
// write 8MB
|
||||
PutRandomData(0, 2000, 4096); |
||||
ASSERT_OK(Flush(0)); |
||||
// clear levels 0 and 1
|
||||
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[0]); |
||||
dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[0]); |
||||
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); |
||||
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); |
||||
// write some new keys into level 0 and 1
|
||||
PutRandomData(0, 1024, 512); |
||||
ASSERT_OK(Flush(0)); |
||||
WaitForCompaction(); |
||||
PutRandomData(0, 10, 512); |
||||
ASSERT_OK(Flush(0)); |
||||
// remember number of files in each level
|
||||
int l1 = NumTableFilesAtLevel(0, 0); |
||||
int l2 = NumTableFilesAtLevel(1, 0); |
||||
int l3 = NumTableFilesAtLevel(2, 0); |
||||
ASSERT_NE(l1, 0); |
||||
ASSERT_NE(l2, 0); |
||||
ASSERT_NE(l3, 0); |
||||
|
||||
// SETUP column family "one" -- universal style
|
||||
for (int i = 0; i < one.level0_file_num_compaction_trigger - 1; ++i) { |
||||
PutRandomData(1, 11, 10000); |
||||
WaitForFlush(1); |
||||
ASSERT_EQ(std::to_string(i + 1), FilesPerLevel(1)); |
||||
} |
||||
|
||||
// SETUP column family "two" -- level style with 4 levels
|
||||
for (int i = 0; i < two.level0_file_num_compaction_trigger - 1; ++i) { |
||||
PutRandomData(2, 15, 10000); |
||||
WaitForFlush(2); |
||||
ASSERT_EQ(std::to_string(i + 1), FilesPerLevel(2)); |
||||
} |
||||
|
||||
// TRIGGER compaction "default"
|
||||
// read a bunch of times, trigger read compaction
|
||||
for (int i = 0; i < 200000; ++i) { |
||||
Get(0, std::to_string(i)); |
||||
} |
||||
|
||||
// TRIGGER compaction "one"
|
||||
PutRandomData(1, 12, 10000); |
||||
|
||||
// TRIGGER compaction "two"
|
||||
PutRandomData(2, 10, 10000); |
||||
|
||||
// WAIT for compactions
|
||||
WaitForCompaction(); |
||||
|
||||
// VERIFY compaction "default"
|
||||
// verify that the number of files have decreased
|
||||
// in some level, indicating that there was a compaction
|
||||
ASSERT_TRUE(NumTableFilesAtLevel(0, 0) < l1 || |
||||
NumTableFilesAtLevel(1, 0) < l2 || |
||||
NumTableFilesAtLevel(2, 0) < l3); |
||||
|
||||
// VERIFY compaction "one"
|
||||
ASSERT_EQ("1", FilesPerLevel(1)); |
||||
|
||||
// VERIFY compaction "two"
|
||||
ASSERT_EQ("0,1", FilesPerLevel(2)); |
||||
CompactAll(2); |
||||
ASSERT_EQ("0,1", FilesPerLevel(2)); |
||||
|
||||
Close(); |
||||
} |
||||
|
||||
namespace { |
||||
std::string IterStatus(Iterator* iter) { |
||||
std::string result; |
||||
if (iter->Valid()) { |
||||
result = iter->key().ToString() + "->" + iter->value().ToString(); |
||||
} else { |
||||
result = "(invalid)"; |
||||
} |
||||
return result; |
||||
} |
||||
} // anonymous namespace
|
||||
|
||||
TEST(ColumnFamilyTest, NewIteratorsTest) { |
||||
// iter == 0 -- no tailing
|
||||
// iter == 2 -- tailing
|
||||
for (int iter = 0; iter < 2; ++iter) { |
||||
Open(); |
||||
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||
ASSERT_OK(Put(0, "a", "b")); |
||||
ASSERT_OK(Put(1, "b", "a")); |
||||
ASSERT_OK(Put(2, "c", "m")); |
||||
ASSERT_OK(Put(2, "v", "t")); |
||||
std::vector<Iterator*> iterators; |
||||
ReadOptions options; |
||||
options.tailing = (iter == 1); |
||||
ASSERT_OK(db_->NewIterators(options, handles_, &iterators)); |
||||
|
||||
for (auto it : iterators) { |
||||
it->SeekToFirst(); |
||||
} |
||||
ASSERT_EQ(IterStatus(iterators[0]), "a->b"); |
||||
ASSERT_EQ(IterStatus(iterators[1]), "b->a"); |
||||
ASSERT_EQ(IterStatus(iterators[2]), "c->m"); |
||||
|
||||
ASSERT_OK(Put(1, "x", "x")); |
||||
|
||||
for (auto it : iterators) { |
||||
it->Next(); |
||||
} |
||||
|
||||
ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); |
||||
if (iter == 0) { |
||||
// no tailing
|
||||
ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); |
||||
} else { |
||||
// tailing
|
||||
ASSERT_EQ(IterStatus(iterators[1]), "x->x"); |
||||
} |
||||
ASSERT_EQ(IterStatus(iterators[2]), "v->t"); |
||||
|
||||
for (auto it : iterators) { |
||||
delete it; |
||||
} |
||||
Destroy(); |
||||
} |
||||
} |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
int main(int argc, char** argv) { |
||||
return rocksdb::test::RunAllTests(); |
||||
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@ |
||||
TMP_DIR="/tmp/rocksdb-sanity-test" |
||||
|
||||
if [ "$#" -lt 2 ]; then |
||||
echo "usage: ./auto_sanity_test.sh [new_commit] [old_commit]" |
||||
echo "Missing either [new_commit] or [old_commit], perform sanity check with the latest and 10th latest commits." |
||||
recent_commits=`git log | grep -e "^commit [a-z0-9]\+$"| head -n10 | sed -e 's/commit //g'` |
||||
commit_new=`echo "$recent_commits" | head -n1` |
||||
commit_old=`echo "$recent_commits" | tail -n1` |
||||
echo "the most recent commits are:" |
||||
echo "$recent_commits" |
||||
else |
||||
commit_new=$1 |
||||
commit_old=$2 |
||||
fi |
||||
|
||||
if [ ! -d $TMP_DIR ]; then |
||||
mkdir $TMP_DIR |
||||
fi |
||||
dir_new="${TMP_DIR}/${commit_new}" |
||||
dir_old="${TMP_DIR}/${commit_old}" |
||||
|
||||
function makestuff() { |
||||
echo "make clean" |
||||
make clean > /dev/null |
||||
echo "make db_sanity_test -j32" |
||||
make db_sanity_test -j32 > /dev/null |
||||
if [ $? -ne 0 ]; then |
||||
echo "[ERROR] Failed to perform 'make db_sanity_test'" |
||||
exit 1 |
||||
fi |
||||
} |
||||
|
||||
rm -r -f $dir_new |
||||
rm -r -f $dir_old |
||||
|
||||
echo "Running db sanity check with commits $commit_new and $commit_old." |
||||
|
||||
echo "=============================================================" |
||||
echo "Making build $commit_new" |
||||
makestuff |
||||
mv db_sanity_test new_db_sanity_test |
||||
echo "Creating db based on the new commit --- $commit_new" |
||||
./new_db_sanity_test $dir_new create |
||||
|
||||
echo "=============================================================" |
||||
echo "Making build $commit_old" |
||||
makestuff |
||||
mv db_sanity_test old_db_sanity_test |
||||
echo "Creating db based on the old commit --- $commit_old" |
||||
./old_db_sanity_test $dir_old create |
||||
|
||||
echo "=============================================================" |
||||
echo "Verifying new db $dir_new using the old commit --- $commit_old" |
||||
./old_db_sanity_test $dir_new verify |
||||
if [ $? -ne 0 ]; then |
||||
echo "[ERROR] Verification of $dir_new using commit $commit_old failed." |
||||
exit 2 |
||||
fi |
||||
|
||||
echo "=============================================================" |
||||
echo "Verifying old db $dir_old using the new commit --- $commit_new" |
||||
./new_db_sanity_test $dir_old verify |
||||
if [ $? -ne 0 ]; then |
||||
echo "[ERROR] Verification of $dir_old using commit $commit_new failed." |
||||
exit 2 |
||||
fi |
||||
|
||||
rm old_db_sanity_test |
||||
rm new_db_sanity_test |
||||
|
||||
echo "Auto sanity test passed!" |
@ -0,0 +1,62 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include "util/sync_point.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
SyncPoint* SyncPoint::GetInstance() { |
||||
static SyncPoint sync_point; |
||||
return &sync_point; |
||||
} |
||||
|
||||
void SyncPoint::LoadDependency(const std::vector<Dependency>& dependencies) { |
||||
successors_.clear(); |
||||
predecessors_.clear(); |
||||
cleared_points_.clear(); |
||||
for (const auto& dependency : dependencies) { |
||||
successors_[dependency.predecessor].push_back(dependency.successor); |
||||
predecessors_[dependency.successor].push_back(dependency.predecessor); |
||||
} |
||||
} |
||||
|
||||
bool SyncPoint::PredecessorsAllCleared(const std::string& point) { |
||||
for (const auto& pred : predecessors_[point]) { |
||||
if (cleared_points_.count(pred) == 0) { |
||||
return false; |
||||
} |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
void SyncPoint::EnableProcessing() { |
||||
std::unique_lock<std::mutex> lock(mutex_); |
||||
enabled_ = true; |
||||
} |
||||
|
||||
void SyncPoint::DisableProcessing() { |
||||
std::unique_lock<std::mutex> lock(mutex_); |
||||
enabled_ = false; |
||||
} |
||||
|
||||
void SyncPoint::ClearTrace() { |
||||
std::unique_lock<std::mutex> lock(mutex_); |
||||
cleared_points_.clear(); |
||||
} |
||||
|
||||
void SyncPoint::Process(const std::string& point) { |
||||
std::unique_lock<std::mutex> lock(mutex_); |
||||
|
||||
if (!enabled_) return; |
||||
|
||||
while (!PredecessorsAllCleared(point)) { |
||||
cv_.wait(lock); |
||||
} |
||||
|
||||
cleared_points_.insert(point); |
||||
cv_.notify_all(); |
||||
} |
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,79 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
#pragma once |
||||
|
||||
#include <condition_variable> |
||||
#include <mutex> |
||||
#include <string> |
||||
#include <unordered_set> |
||||
#include <unordered_map> |
||||
#include <vector> |
||||
|
||||
namespace rocksdb { |
||||
|
||||
// This class provides facility to reproduce race conditions deterministically
|
||||
// in unit tests.
|
||||
// Developer could specify sync points in the codebase via TEST_SYNC_POINT.
|
||||
// Each sync point represents a position in the execution stream of a thread.
|
||||
// In the unit test, 'Happens After' relationship among sync points could be
|
||||
// setup via SyncPoint::LoadDependency, to reproduce a desired interleave of
|
||||
// threads execution.
|
||||
// Refer to (DBTest,TransactionLogIteratorRace), for an exmaple use case.
|
||||
|
||||
class SyncPoint { |
||||
public: |
||||
static SyncPoint* GetInstance(); |
||||
|
||||
struct Dependency { |
||||
std::string predecessor; |
||||
std::string successor; |
||||
}; |
||||
// call once at the beginning of a test to setup the dependency between
|
||||
// sync points
|
||||
void LoadDependency(const std::vector<Dependency>& dependencies); |
||||
|
||||
// enable sync point processing (disabled on startup)
|
||||
void EnableProcessing(); |
||||
|
||||
// disable sync point processing
|
||||
void DisableProcessing(); |
||||
|
||||
// remove the execution trace of all sync points
|
||||
void ClearTrace(); |
||||
|
||||
// triggered by TEST_SYNC_POINT, blocking execution until all predecessors
|
||||
// are executed.
|
||||
void Process(const std::string& point); |
||||
|
||||
// TODO: it might be useful to provide a function that blocks until all
|
||||
// sync points are cleared.
|
||||
|
||||
private: |
||||
bool PredecessorsAllCleared(const std::string& point); |
||||
|
||||
// successor/predecessor map loaded from LoadDependency
|
||||
std::unordered_map<std::string, std::vector<std::string>> successors_; |
||||
std::unordered_map<std::string, std::vector<std::string>> predecessors_; |
||||
|
||||
std::mutex mutex_; |
||||
std::condition_variable cv_; |
||||
// sync points that have been passed through
|
||||
std::unordered_set<std::string> cleared_points_; |
||||
bool enabled_ = false; |
||||
}; |
||||
|
||||
} // namespace rocksdb
|
||||
|
||||
// Use TEST_SYNC_POINT to specify sync points inside code base.
|
||||
// Sync points can have happens-after depedency on other sync points,
|
||||
// configured at runtime via SyncPoint::LoadDependency. This could be
|
||||
// utilized to re-produce race conditions between threads.
|
||||
// See TransactionLogIteratorRace in db_test.cc for an example use case.
|
||||
// TEST_SYNC_POINT is no op in release build.
|
||||
#ifdef NDEBUG |
||||
#define TEST_SYNC_POINT(x) |
||||
#else |
||||
#define TEST_SYNC_POINT(x) rocksdb::SyncPoint::GetInstance()->Process(x) |
||||
#endif |
Loading…
Reference in new issue