commit
c65448f95a
@ -0,0 +1,489 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include "db/column_family.h" |
||||||
|
|
||||||
|
#include <vector> |
||||||
|
#include <string> |
||||||
|
#include <algorithm> |
||||||
|
|
||||||
|
#include "db/db_impl.h" |
||||||
|
#include "db/version_set.h" |
||||||
|
#include "db/internal_stats.h" |
||||||
|
#include "db/compaction_picker.h" |
||||||
|
#include "db/table_properties_collector.h" |
||||||
|
#include "util/autovector.h" |
||||||
|
#include "util/hash_skiplist_rep.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
ColumnFamilyHandleImpl::ColumnFamilyHandleImpl(ColumnFamilyData* cfd, |
||||||
|
DBImpl* db, port::Mutex* mutex) |
||||||
|
: cfd_(cfd), db_(db), mutex_(mutex) { |
||||||
|
if (cfd_ != nullptr) { |
||||||
|
cfd_->Ref(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyHandleImpl::~ColumnFamilyHandleImpl() { |
||||||
|
if (cfd_ != nullptr) { |
||||||
|
DBImpl::DeletionState deletion_state; |
||||||
|
mutex_->Lock(); |
||||||
|
if (cfd_->Unref()) { |
||||||
|
delete cfd_; |
||||||
|
} |
||||||
|
db_->FindObsoleteFiles(deletion_state, false, true); |
||||||
|
mutex_->Unlock(); |
||||||
|
if (deletion_state.HaveSomethingToDelete()) { |
||||||
|
db_->PurgeObsoleteFiles(deletion_state); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t ColumnFamilyHandleImpl::GetID() const { return cfd()->GetID(); } |
||||||
|
|
||||||
|
namespace { |
||||||
|
// Fix user-supplied options to be reasonable
|
||||||
|
template <class T, class V> |
||||||
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) { |
||||||
|
if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue; |
||||||
|
if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue; |
||||||
|
} |
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, |
||||||
|
const InternalFilterPolicy* ipolicy, |
||||||
|
const ColumnFamilyOptions& src) { |
||||||
|
ColumnFamilyOptions result = src; |
||||||
|
result.comparator = icmp; |
||||||
|
result.filter_policy = (src.filter_policy != nullptr) ? ipolicy : nullptr; |
||||||
|
ClipToRange(&result.write_buffer_size, |
||||||
|
((size_t)64) << 10, ((size_t)64) << 30); |
||||||
|
// if user sets arena_block_size, we trust user to use this value. Otherwise,
|
||||||
|
// calculate a proper value from writer_buffer_size;
|
||||||
|
if (result.arena_block_size <= 0) { |
||||||
|
result.arena_block_size = result.write_buffer_size / 10; |
||||||
|
} |
||||||
|
result.min_write_buffer_number_to_merge = |
||||||
|
std::min(result.min_write_buffer_number_to_merge, |
||||||
|
result.max_write_buffer_number - 1); |
||||||
|
if (result.block_cache == nullptr && !result.no_block_cache) { |
||||||
|
result.block_cache = NewLRUCache(8 << 20); |
||||||
|
} |
||||||
|
result.compression_per_level = src.compression_per_level; |
||||||
|
if (result.block_size_deviation < 0 || result.block_size_deviation > 100) { |
||||||
|
result.block_size_deviation = 0; |
||||||
|
} |
||||||
|
if (result.max_mem_compaction_level >= result.num_levels) { |
||||||
|
result.max_mem_compaction_level = result.num_levels - 1; |
||||||
|
} |
||||||
|
if (result.soft_rate_limit > result.hard_rate_limit) { |
||||||
|
result.soft_rate_limit = result.hard_rate_limit; |
||||||
|
} |
||||||
|
if (!result.prefix_extractor) { |
||||||
|
assert(result.memtable_factory); |
||||||
|
Slice name = result.memtable_factory->Name(); |
||||||
|
if (name.compare("HashSkipListRepFactory") == 0 || |
||||||
|
name.compare("HashLinkListRepFactory") == 0) { |
||||||
|
result.memtable_factory = std::make_shared<SkipListFactory>(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// -- Sanitize the table properties collector
|
||||||
|
// All user defined properties collectors will be wrapped by
|
||||||
|
// UserKeyTablePropertiesCollector since for them they only have the
|
||||||
|
// knowledge of the user keys; internal keys are invisible to them.
|
||||||
|
auto& collectors = result.table_properties_collectors; |
||||||
|
for (size_t i = 0; i < result.table_properties_collectors.size(); ++i) { |
||||||
|
assert(collectors[i]); |
||||||
|
collectors[i] = |
||||||
|
std::make_shared<UserKeyTablePropertiesCollector>(collectors[i]); |
||||||
|
} |
||||||
|
// Add collector to collect internal key statistics
|
||||||
|
collectors.push_back(std::make_shared<InternalKeyPropertiesCollector>()); |
||||||
|
|
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
int SuperVersion::dummy = 0; |
||||||
|
void* const SuperVersion::kSVInUse = &SuperVersion::dummy; |
||||||
|
void* const SuperVersion::kSVObsolete = nullptr; |
||||||
|
|
||||||
|
SuperVersion::~SuperVersion() { |
||||||
|
for (auto td : to_delete) { |
||||||
|
delete td; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
SuperVersion* SuperVersion::Ref() { |
||||||
|
refs.fetch_add(1, std::memory_order_relaxed); |
||||||
|
return this; |
||||||
|
} |
||||||
|
|
||||||
|
bool SuperVersion::Unref() { |
||||||
|
// fetch_sub returns the previous value of ref
|
||||||
|
uint32_t previous_refs = refs.fetch_sub(1, std::memory_order_relaxed); |
||||||
|
assert(previous_refs > 0); |
||||||
|
return previous_refs == 1; |
||||||
|
} |
||||||
|
|
||||||
|
void SuperVersion::Cleanup() { |
||||||
|
assert(refs.load(std::memory_order_relaxed) == 0); |
||||||
|
imm->Unref(&to_delete); |
||||||
|
MemTable* m = mem->Unref(); |
||||||
|
if (m != nullptr) { |
||||||
|
to_delete.push_back(m); |
||||||
|
} |
||||||
|
current->Unref(); |
||||||
|
} |
||||||
|
|
||||||
|
void SuperVersion::Init(MemTable* new_mem, MemTableListVersion* new_imm, |
||||||
|
Version* new_current) { |
||||||
|
mem = new_mem; |
||||||
|
imm = new_imm; |
||||||
|
current = new_current; |
||||||
|
mem->Ref(); |
||||||
|
imm->Ref(); |
||||||
|
current->Ref(); |
||||||
|
refs.store(1, std::memory_order_relaxed); |
||||||
|
} |
||||||
|
|
||||||
|
namespace { |
||||||
|
void SuperVersionUnrefHandle(void* ptr) { |
||||||
|
// UnrefHandle is called when a thread exists or a ThreadLocalPtr gets
|
||||||
|
// destroyed. When former happens, the thread shouldn't see kSVInUse.
|
||||||
|
// When latter happens, we are in ~ColumnFamilyData(), no get should happen as
|
||||||
|
// well.
|
||||||
|
SuperVersion* sv = static_cast<SuperVersion*>(ptr); |
||||||
|
if (sv->Unref()) { |
||||||
|
sv->db_mutex->Lock(); |
||||||
|
sv->Cleanup(); |
||||||
|
sv->db_mutex->Unlock(); |
||||||
|
delete sv; |
||||||
|
} |
||||||
|
} |
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id, |
||||||
|
const std::string& name, |
||||||
|
Version* dummy_versions, Cache* table_cache, |
||||||
|
const ColumnFamilyOptions& options, |
||||||
|
const DBOptions* db_options, |
||||||
|
const EnvOptions& storage_options, |
||||||
|
ColumnFamilySet* column_family_set) |
||||||
|
: id_(id), |
||||||
|
name_(name), |
||||||
|
dummy_versions_(dummy_versions), |
||||||
|
current_(nullptr), |
||||||
|
refs_(0), |
||||||
|
dropped_(false), |
||||||
|
internal_comparator_(options.comparator), |
||||||
|
internal_filter_policy_(options.filter_policy), |
||||||
|
options_(*db_options, SanitizeOptions(&internal_comparator_, |
||||||
|
&internal_filter_policy_, options)), |
||||||
|
mem_(nullptr), |
||||||
|
imm_(options.min_write_buffer_number_to_merge), |
||||||
|
super_version_(nullptr), |
||||||
|
super_version_number_(0), |
||||||
|
local_sv_(new ThreadLocalPtr(&SuperVersionUnrefHandle)), |
||||||
|
next_(nullptr), |
||||||
|
prev_(nullptr), |
||||||
|
log_number_(0), |
||||||
|
need_slowdown_for_num_level0_files_(false), |
||||||
|
column_family_set_(column_family_set) { |
||||||
|
Ref(); |
||||||
|
|
||||||
|
// if dummy_versions is nullptr, then this is a dummy column family.
|
||||||
|
if (dummy_versions != nullptr) { |
||||||
|
internal_stats_.reset(new InternalStats(options.num_levels, db_options->env, |
||||||
|
db_options->statistics.get())); |
||||||
|
table_cache_.reset( |
||||||
|
new TableCache(dbname, &options_, storage_options, table_cache)); |
||||||
|
if (options_.compaction_style == kCompactionStyleUniversal) { |
||||||
|
compaction_picker_.reset( |
||||||
|
new UniversalCompactionPicker(&options_, &internal_comparator_)); |
||||||
|
} else { |
||||||
|
compaction_picker_.reset( |
||||||
|
new LevelCompactionPicker(&options_, &internal_comparator_)); |
||||||
|
} |
||||||
|
|
||||||
|
Log(options_.info_log, "Options for column family \"%s\":\n", |
||||||
|
name.c_str()); |
||||||
|
const ColumnFamilyOptions* cf_options = &options_; |
||||||
|
cf_options->Dump(options_.info_log.get()); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// DB mutex held
|
||||||
|
ColumnFamilyData::~ColumnFamilyData() { |
||||||
|
assert(refs_ == 0); |
||||||
|
// remove from linked list
|
||||||
|
auto prev = prev_; |
||||||
|
auto next = next_; |
||||||
|
prev->next_ = next; |
||||||
|
next->prev_ = prev; |
||||||
|
|
||||||
|
// it's nullptr for dummy CFD
|
||||||
|
if (column_family_set_ != nullptr) { |
||||||
|
// remove from column_family_set
|
||||||
|
column_family_set_->RemoveColumnFamily(this); |
||||||
|
} |
||||||
|
|
||||||
|
if (current_ != nullptr) { |
||||||
|
current_->Unref(); |
||||||
|
} |
||||||
|
|
||||||
|
if (super_version_ != nullptr) { |
||||||
|
// Release SuperVersion reference kept in ThreadLocalPtr.
|
||||||
|
// This must be done outside of mutex_ since unref handler can lock mutex.
|
||||||
|
super_version_->db_mutex->Unlock(); |
||||||
|
local_sv_.reset(); |
||||||
|
super_version_->db_mutex->Lock(); |
||||||
|
|
||||||
|
bool is_last_reference __attribute__((unused)); |
||||||
|
is_last_reference = super_version_->Unref(); |
||||||
|
assert(is_last_reference); |
||||||
|
super_version_->Cleanup(); |
||||||
|
delete super_version_; |
||||||
|
super_version_ = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
if (dummy_versions_ != nullptr) { |
||||||
|
// List must be empty
|
||||||
|
assert(dummy_versions_->next_ == dummy_versions_); |
||||||
|
delete dummy_versions_; |
||||||
|
} |
||||||
|
|
||||||
|
if (mem_ != nullptr) { |
||||||
|
delete mem_->Unref(); |
||||||
|
} |
||||||
|
autovector<MemTable*> to_delete; |
||||||
|
imm_.current()->Unref(&to_delete); |
||||||
|
for (MemTable* m : to_delete) { |
||||||
|
delete m; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void ColumnFamilyData::SetCurrent(Version* current) { |
||||||
|
current_ = current; |
||||||
|
need_slowdown_for_num_level0_files_ = |
||||||
|
(options_.level0_slowdown_writes_trigger >= 0 && |
||||||
|
current_->NumLevelFiles(0) >= options_.level0_slowdown_writes_trigger); |
||||||
|
} |
||||||
|
|
||||||
|
void ColumnFamilyData::CreateNewMemtable() { |
||||||
|
assert(current_ != nullptr); |
||||||
|
if (mem_ != nullptr) { |
||||||
|
delete mem_->Unref(); |
||||||
|
} |
||||||
|
mem_ = new MemTable(internal_comparator_, options_); |
||||||
|
mem_->Ref(); |
||||||
|
} |
||||||
|
|
||||||
|
Compaction* ColumnFamilyData::PickCompaction(LogBuffer* log_buffer) { |
||||||
|
return compaction_picker_->PickCompaction(current_, log_buffer); |
||||||
|
} |
||||||
|
|
||||||
|
Compaction* ColumnFamilyData::CompactRange(int input_level, int output_level, |
||||||
|
const InternalKey* begin, |
||||||
|
const InternalKey* end, |
||||||
|
InternalKey** compaction_end) { |
||||||
|
return compaction_picker_->CompactRange(current_, input_level, output_level, |
||||||
|
begin, end, compaction_end); |
||||||
|
} |
||||||
|
|
||||||
|
SuperVersion* ColumnFamilyData::InstallSuperVersion( |
||||||
|
SuperVersion* new_superversion, port::Mutex* db_mutex) { |
||||||
|
new_superversion->db_mutex = db_mutex; |
||||||
|
new_superversion->Init(mem_, imm_.current(), current_); |
||||||
|
SuperVersion* old_superversion = super_version_; |
||||||
|
super_version_ = new_superversion; |
||||||
|
++super_version_number_; |
||||||
|
super_version_->version_number = super_version_number_; |
||||||
|
if (old_superversion != nullptr && old_superversion->Unref()) { |
||||||
|
old_superversion->Cleanup(); |
||||||
|
return old_superversion; // will let caller delete outside of mutex
|
||||||
|
} |
||||||
|
return nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
void ColumnFamilyData::ResetThreadLocalSuperVersions() { |
||||||
|
autovector<void*> sv_ptrs; |
||||||
|
local_sv_->Scrape(&sv_ptrs, SuperVersion::kSVObsolete); |
||||||
|
for (auto ptr : sv_ptrs) { |
||||||
|
assert(ptr); |
||||||
|
if (ptr == SuperVersion::kSVInUse) { |
||||||
|
continue; |
||||||
|
} |
||||||
|
auto sv = static_cast<SuperVersion*>(ptr); |
||||||
|
if (sv->Unref()) { |
||||||
|
sv->Cleanup(); |
||||||
|
delete sv; |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilySet::ColumnFamilySet(const std::string& dbname, |
||||||
|
const DBOptions* db_options, |
||||||
|
const EnvOptions& storage_options, |
||||||
|
Cache* table_cache) |
||||||
|
: max_column_family_(0), |
||||||
|
dummy_cfd_(new ColumnFamilyData(dbname, 0, "", nullptr, nullptr, |
||||||
|
ColumnFamilyOptions(), db_options, |
||||||
|
storage_options_, nullptr)), |
||||||
|
default_cfd_cache_(nullptr), |
||||||
|
db_name_(dbname), |
||||||
|
db_options_(db_options), |
||||||
|
storage_options_(storage_options), |
||||||
|
table_cache_(table_cache), |
||||||
|
spin_lock_(ATOMIC_FLAG_INIT) { |
||||||
|
// initialize linked list
|
||||||
|
dummy_cfd_->prev_ = dummy_cfd_; |
||||||
|
dummy_cfd_->next_ = dummy_cfd_; |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilySet::~ColumnFamilySet() { |
||||||
|
while (column_family_data_.size() > 0) { |
||||||
|
// cfd destructor will delete itself from column_family_data_
|
||||||
|
auto cfd = column_family_data_.begin()->second; |
||||||
|
cfd->Unref(); |
||||||
|
delete cfd; |
||||||
|
} |
||||||
|
dummy_cfd_->Unref(); |
||||||
|
delete dummy_cfd_; |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyData* ColumnFamilySet::GetDefault() const { |
||||||
|
assert(default_cfd_cache_ != nullptr); |
||||||
|
return default_cfd_cache_; |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(uint32_t id) const { |
||||||
|
auto cfd_iter = column_family_data_.find(id); |
||||||
|
if (cfd_iter != column_family_data_.end()) { |
||||||
|
return cfd_iter->second; |
||||||
|
} else { |
||||||
|
return nullptr; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyData* ColumnFamilySet::GetColumnFamily(const std::string& name) |
||||||
|
const { |
||||||
|
auto cfd_iter = column_families_.find(name); |
||||||
|
if (cfd_iter != column_families_.end()) { |
||||||
|
auto cfd = GetColumnFamily(cfd_iter->second); |
||||||
|
assert(cfd != nullptr); |
||||||
|
return cfd; |
||||||
|
} else { |
||||||
|
return nullptr; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t ColumnFamilySet::GetNextColumnFamilyID() { |
||||||
|
return ++max_column_family_; |
||||||
|
} |
||||||
|
|
||||||
|
uint32_t ColumnFamilySet::GetMaxColumnFamily() { return max_column_family_; } |
||||||
|
|
||||||
|
void ColumnFamilySet::UpdateMaxColumnFamily(uint32_t new_max_column_family) { |
||||||
|
max_column_family_ = std::max(new_max_column_family, max_column_family_); |
||||||
|
} |
||||||
|
|
||||||
|
// under a DB mutex
|
||||||
|
ColumnFamilyData* ColumnFamilySet::CreateColumnFamily( |
||||||
|
const std::string& name, uint32_t id, Version* dummy_versions, |
||||||
|
const ColumnFamilyOptions& options) { |
||||||
|
assert(column_families_.find(name) == column_families_.end()); |
||||||
|
ColumnFamilyData* new_cfd = |
||||||
|
new ColumnFamilyData(db_name_, id, name, dummy_versions, table_cache_, |
||||||
|
options, db_options_, storage_options_, this); |
||||||
|
Lock(); |
||||||
|
column_families_.insert({name, id}); |
||||||
|
column_family_data_.insert({id, new_cfd}); |
||||||
|
Unlock(); |
||||||
|
max_column_family_ = std::max(max_column_family_, id); |
||||||
|
// add to linked list
|
||||||
|
new_cfd->next_ = dummy_cfd_; |
||||||
|
auto prev = dummy_cfd_->prev_; |
||||||
|
new_cfd->prev_ = prev; |
||||||
|
prev->next_ = new_cfd; |
||||||
|
dummy_cfd_->prev_ = new_cfd; |
||||||
|
if (id == 0) { |
||||||
|
default_cfd_cache_ = new_cfd; |
||||||
|
} |
||||||
|
return new_cfd; |
||||||
|
} |
||||||
|
|
||||||
|
void ColumnFamilySet::Lock() { |
||||||
|
// spin lock
|
||||||
|
while (spin_lock_.test_and_set(std::memory_order_acquire)) { |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void ColumnFamilySet::Unlock() { spin_lock_.clear(std::memory_order_release); } |
||||||
|
|
||||||
|
// REQUIRES: DB mutex held
|
||||||
|
void ColumnFamilySet::FreeDeadColumnFamilies() { |
||||||
|
autovector<ColumnFamilyData*> to_delete; |
||||||
|
for (auto cfd = dummy_cfd_->next_; cfd != dummy_cfd_; cfd = cfd->next_) { |
||||||
|
if (cfd->refs_ == 0) { |
||||||
|
to_delete.push_back(cfd); |
||||||
|
} |
||||||
|
} |
||||||
|
for (auto cfd : to_delete) { |
||||||
|
// this is very rare, so it's not a problem that we do it under a mutex
|
||||||
|
delete cfd; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// under a DB mutex
|
||||||
|
void ColumnFamilySet::RemoveColumnFamily(ColumnFamilyData* cfd) { |
||||||
|
auto cfd_iter = column_family_data_.find(cfd->GetID()); |
||||||
|
assert(cfd_iter != column_family_data_.end()); |
||||||
|
Lock(); |
||||||
|
column_family_data_.erase(cfd_iter); |
||||||
|
column_families_.erase(cfd->GetName()); |
||||||
|
Unlock(); |
||||||
|
} |
||||||
|
|
||||||
|
bool ColumnFamilyMemTablesImpl::Seek(uint32_t column_family_id) { |
||||||
|
if (column_family_id == 0) { |
||||||
|
// optimization for common case
|
||||||
|
current_ = column_family_set_->GetDefault(); |
||||||
|
} else { |
||||||
|
// maybe outside of db mutex, should lock
|
||||||
|
column_family_set_->Lock(); |
||||||
|
current_ = column_family_set_->GetColumnFamily(column_family_id); |
||||||
|
column_family_set_->Unlock(); |
||||||
|
} |
||||||
|
handle_.SetCFD(current_); |
||||||
|
return current_ != nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
uint64_t ColumnFamilyMemTablesImpl::GetLogNumber() const { |
||||||
|
assert(current_ != nullptr); |
||||||
|
return current_->GetLogNumber(); |
||||||
|
} |
||||||
|
|
||||||
|
MemTable* ColumnFamilyMemTablesImpl::GetMemTable() const { |
||||||
|
assert(current_ != nullptr); |
||||||
|
return current_->mem(); |
||||||
|
} |
||||||
|
|
||||||
|
const Options* ColumnFamilyMemTablesImpl::GetOptions() const { |
||||||
|
assert(current_ != nullptr); |
||||||
|
return current_->options(); |
||||||
|
} |
||||||
|
|
||||||
|
ColumnFamilyHandle* ColumnFamilyMemTablesImpl::GetColumnFamilyHandle() { |
||||||
|
assert(current_ != nullptr); |
||||||
|
return &handle_; |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,408 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <unordered_map> |
||||||
|
#include <string> |
||||||
|
#include <vector> |
||||||
|
#include <atomic> |
||||||
|
|
||||||
|
#include "rocksdb/options.h" |
||||||
|
#include "rocksdb/db.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "db/memtable_list.h" |
||||||
|
#include "db/write_batch_internal.h" |
||||||
|
#include "db/table_cache.h" |
||||||
|
#include "util/thread_local.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
class Version; |
||||||
|
class VersionSet; |
||||||
|
class MemTable; |
||||||
|
class MemTableListVersion; |
||||||
|
class CompactionPicker; |
||||||
|
class Compaction; |
||||||
|
class InternalKey; |
||||||
|
class InternalStats; |
||||||
|
class ColumnFamilyData; |
||||||
|
class DBImpl; |
||||||
|
class LogBuffer; |
||||||
|
|
||||||
|
// ColumnFamilyHandleImpl is the class that clients use to access different
|
||||||
|
// column families. It has non-trivial destructor, which gets called when client
|
||||||
|
// is done using the column family
|
||||||
|
class ColumnFamilyHandleImpl : public ColumnFamilyHandle { |
||||||
|
public: |
||||||
|
// create while holding the mutex
|
||||||
|
ColumnFamilyHandleImpl(ColumnFamilyData* cfd, DBImpl* db, port::Mutex* mutex); |
||||||
|
// destroy without mutex
|
||||||
|
virtual ~ColumnFamilyHandleImpl(); |
||||||
|
virtual ColumnFamilyData* cfd() const { return cfd_; } |
||||||
|
|
||||||
|
virtual uint32_t GetID() const; |
||||||
|
|
||||||
|
private: |
||||||
|
ColumnFamilyData* cfd_; |
||||||
|
DBImpl* db_; |
||||||
|
port::Mutex* mutex_; |
||||||
|
}; |
||||||
|
|
||||||
|
// Does not ref-count ColumnFamilyData
|
||||||
|
// We use this dummy ColumnFamilyHandleImpl because sometimes MemTableInserter
|
||||||
|
// calls DBImpl methods. When this happens, MemTableInserter need access to
|
||||||
|
// ColumnFamilyHandle (same as the client would need). In that case, we feed
|
||||||
|
// MemTableInserter dummy ColumnFamilyHandle and enable it to call DBImpl
|
||||||
|
// methods
|
||||||
|
class ColumnFamilyHandleInternal : public ColumnFamilyHandleImpl { |
||||||
|
public: |
||||||
|
ColumnFamilyHandleInternal() |
||||||
|
: ColumnFamilyHandleImpl(nullptr, nullptr, nullptr) {} |
||||||
|
|
||||||
|
void SetCFD(ColumnFamilyData* cfd) { internal_cfd_ = cfd; } |
||||||
|
virtual ColumnFamilyData* cfd() const override { return internal_cfd_; } |
||||||
|
|
||||||
|
private: |
||||||
|
ColumnFamilyData* internal_cfd_; |
||||||
|
}; |
||||||
|
|
||||||
|
// holds references to memtable, all immutable memtables and version
|
||||||
|
struct SuperVersion { |
||||||
|
MemTable* mem; |
||||||
|
MemTableListVersion* imm; |
||||||
|
Version* current; |
||||||
|
std::atomic<uint32_t> refs; |
||||||
|
// We need to_delete because during Cleanup(), imm->Unref() returns
|
||||||
|
// all memtables that we need to free through this vector. We then
|
||||||
|
// delete all those memtables outside of mutex, during destruction
|
||||||
|
autovector<MemTable*> to_delete; |
||||||
|
// Version number of the current SuperVersion
|
||||||
|
uint64_t version_number; |
||||||
|
port::Mutex* db_mutex; |
||||||
|
|
||||||
|
// should be called outside the mutex
|
||||||
|
SuperVersion() = default; |
||||||
|
~SuperVersion(); |
||||||
|
SuperVersion* Ref(); |
||||||
|
|
||||||
|
bool Unref(); |
||||||
|
|
||||||
|
// call these two methods with db mutex held
|
||||||
|
// Cleanup unrefs mem, imm and current. Also, it stores all memtables
|
||||||
|
// that needs to be deleted in to_delete vector. Unrefing those
|
||||||
|
// objects needs to be done in the mutex
|
||||||
|
void Cleanup(); |
||||||
|
void Init(MemTable* new_mem, MemTableListVersion* new_imm, |
||||||
|
Version* new_current); |
||||||
|
|
||||||
|
// The value of dummy is not actually used. kSVInUse takes its address as a
|
||||||
|
// mark in the thread local storage to indicate the SuperVersion is in use
|
||||||
|
// by thread. This way, the value of kSVInUse is guaranteed to have no
|
||||||
|
// conflict with SuperVersion object address and portable on different
|
||||||
|
// platform.
|
||||||
|
static int dummy; |
||||||
|
static void* const kSVInUse; |
||||||
|
static void* const kSVObsolete; |
||||||
|
}; |
||||||
|
|
||||||
|
extern ColumnFamilyOptions SanitizeOptions(const InternalKeyComparator* icmp, |
||||||
|
const InternalFilterPolicy* ipolicy, |
||||||
|
const ColumnFamilyOptions& src); |
||||||
|
|
||||||
|
class ColumnFamilySet; |
||||||
|
|
||||||
|
// This class keeps all the data that a column family needs. It's mosly dumb and
|
||||||
|
// used just to provide access to metadata.
|
||||||
|
// Most methods require DB mutex held, unless otherwise noted
|
||||||
|
class ColumnFamilyData { |
||||||
|
public: |
||||||
|
~ColumnFamilyData(); |
||||||
|
|
||||||
|
// thread-safe
|
||||||
|
uint32_t GetID() const { return id_; } |
||||||
|
// thread-safe
|
||||||
|
const std::string& GetName() const { return name_; } |
||||||
|
|
||||||
|
void Ref() { ++refs_; } |
||||||
|
// will just decrease reference count to 0, but will not delete it. returns
|
||||||
|
// true if the ref count was decreased to zero. in that case, it can be
|
||||||
|
// deleted by the caller immediatelly, or later, by calling
|
||||||
|
// FreeDeadColumnFamilies()
|
||||||
|
bool Unref() { |
||||||
|
assert(refs_ > 0); |
||||||
|
return --refs_ == 0; |
||||||
|
} |
||||||
|
|
||||||
|
// This can only be called from single-threaded VersionSet::LogAndApply()
|
||||||
|
// After dropping column family no other operation on that column family
|
||||||
|
// will be executed. All the files and memory will be, however, kept around
|
||||||
|
// until client drops the column family handle. That way, client can still
|
||||||
|
// access data from dropped column family.
|
||||||
|
// Column family can be dropped and still alive. In that state:
|
||||||
|
// *) Column family is not included in the iteration.
|
||||||
|
// *) Compaction and flush is not executed on the dropped column family.
|
||||||
|
// *) Client can continue writing and reading from column family. However, all
|
||||||
|
// writes stay in the current memtable.
|
||||||
|
// When the dropped column family is unreferenced, then we:
|
||||||
|
// *) delete all memory associated with that column family
|
||||||
|
// *) delete all the files associated with that column family
|
||||||
|
void SetDropped() { |
||||||
|
// can't drop default CF
|
||||||
|
assert(id_ != 0); |
||||||
|
dropped_ = true; |
||||||
|
} |
||||||
|
bool IsDropped() const { return dropped_; } |
||||||
|
|
||||||
|
// thread-safe
|
||||||
|
int NumberLevels() const { return options_.num_levels; } |
||||||
|
|
||||||
|
void SetLogNumber(uint64_t log_number) { log_number_ = log_number; } |
||||||
|
uint64_t GetLogNumber() const { return log_number_; } |
||||||
|
|
||||||
|
// thread-safe
|
||||||
|
const Options* options() const { return &options_; } |
||||||
|
|
||||||
|
InternalStats* internal_stats() { return internal_stats_.get(); } |
||||||
|
|
||||||
|
MemTableList* imm() { return &imm_; } |
||||||
|
MemTable* mem() { return mem_; } |
||||||
|
Version* current() { return current_; } |
||||||
|
Version* dummy_versions() { return dummy_versions_; } |
||||||
|
void SetMemtable(MemTable* new_mem) { mem_ = new_mem; } |
||||||
|
void SetCurrent(Version* current); |
||||||
|
void CreateNewMemtable(); |
||||||
|
|
||||||
|
TableCache* table_cache() { return table_cache_.get(); } |
||||||
|
|
||||||
|
// See documentation in compaction_picker.h
|
||||||
|
Compaction* PickCompaction(LogBuffer* log_buffer); |
||||||
|
Compaction* CompactRange(int input_level, int output_level, |
||||||
|
const InternalKey* begin, const InternalKey* end, |
||||||
|
InternalKey** compaction_end); |
||||||
|
|
||||||
|
CompactionPicker* compaction_picker() { return compaction_picker_.get(); } |
||||||
|
// thread-safe
|
||||||
|
const Comparator* user_comparator() const { |
||||||
|
return internal_comparator_.user_comparator(); |
||||||
|
} |
||||||
|
// thread-safe
|
||||||
|
const InternalKeyComparator& internal_comparator() const { |
||||||
|
return internal_comparator_; |
||||||
|
} |
||||||
|
|
||||||
|
SuperVersion* GetSuperVersion() { return super_version_; } |
||||||
|
// thread-safe
|
||||||
|
ThreadLocalPtr* GetThreadLocalSuperVersion() const { return local_sv_.get(); } |
||||||
|
// thread-safe
|
||||||
|
uint64_t GetSuperVersionNumber() const { |
||||||
|
return super_version_number_.load(); |
||||||
|
} |
||||||
|
// will return a pointer to SuperVersion* if previous SuperVersion
|
||||||
|
// if its reference count is zero and needs deletion or nullptr if not
|
||||||
|
// As argument takes a pointer to allocated SuperVersion to enable
|
||||||
|
// the clients to allocate SuperVersion outside of mutex.
|
||||||
|
SuperVersion* InstallSuperVersion(SuperVersion* new_superversion, |
||||||
|
port::Mutex* db_mutex); |
||||||
|
|
||||||
|
void ResetThreadLocalSuperVersions(); |
||||||
|
|
||||||
|
// A Flag indicating whether write needs to slowdown because of there are
|
||||||
|
// too many number of level0 files.
|
||||||
|
bool NeedSlowdownForNumLevel0Files() const { |
||||||
|
return need_slowdown_for_num_level0_files_; |
||||||
|
} |
||||||
|
|
||||||
|
private: |
||||||
|
friend class ColumnFamilySet; |
||||||
|
ColumnFamilyData(const std::string& dbname, uint32_t id, |
||||||
|
const std::string& name, Version* dummy_versions, |
||||||
|
Cache* table_cache, const ColumnFamilyOptions& options, |
||||||
|
const DBOptions* db_options, |
||||||
|
const EnvOptions& storage_options, |
||||||
|
ColumnFamilySet* column_family_set); |
||||||
|
|
||||||
|
uint32_t id_; |
||||||
|
const std::string name_; |
||||||
|
Version* dummy_versions_; // Head of circular doubly-linked list of versions.
|
||||||
|
Version* current_; // == dummy_versions->prev_
|
||||||
|
|
||||||
|
int refs_; // outstanding references to ColumnFamilyData
|
||||||
|
bool dropped_; // true if client dropped it
|
||||||
|
|
||||||
|
const InternalKeyComparator internal_comparator_; |
||||||
|
const InternalFilterPolicy internal_filter_policy_; |
||||||
|
|
||||||
|
Options const options_; |
||||||
|
|
||||||
|
std::unique_ptr<TableCache> table_cache_; |
||||||
|
|
||||||
|
std::unique_ptr<InternalStats> internal_stats_; |
||||||
|
|
||||||
|
MemTable* mem_; |
||||||
|
MemTableList imm_; |
||||||
|
SuperVersion* super_version_; |
||||||
|
|
||||||
|
// An ordinal representing the current SuperVersion. Updated by
|
||||||
|
// InstallSuperVersion(), i.e. incremented every time super_version_
|
||||||
|
// changes.
|
||||||
|
std::atomic<uint64_t> super_version_number_; |
||||||
|
|
||||||
|
// Thread's local copy of SuperVersion pointer
|
||||||
|
// This needs to be destructed before mutex_
|
||||||
|
std::unique_ptr<ThreadLocalPtr> local_sv_; |
||||||
|
|
||||||
|
// pointers for a circular linked list. we use it to support iterations
|
||||||
|
// that can be concurrent with writes
|
||||||
|
ColumnFamilyData* next_; |
||||||
|
ColumnFamilyData* prev_; |
||||||
|
|
||||||
|
// This is the earliest log file number that contains data from this
|
||||||
|
// Column Family. All earlier log files must be ignored and not
|
||||||
|
// recovered from
|
||||||
|
uint64_t log_number_; |
||||||
|
|
||||||
|
// A flag indicating whether we should delay writes because
|
||||||
|
// we have too many level 0 files
|
||||||
|
bool need_slowdown_for_num_level0_files_; |
||||||
|
|
||||||
|
// An object that keeps all the compaction stats
|
||||||
|
// and picks the next compaction
|
||||||
|
std::unique_ptr<CompactionPicker> compaction_picker_; |
||||||
|
|
||||||
|
ColumnFamilySet* column_family_set_; |
||||||
|
}; |
||||||
|
|
||||||
|
// ColumnFamilySet has interesting thread-safety requirements
|
||||||
|
// * CreateColumnFamily() or RemoveColumnFamily() -- need to protect by DB
|
||||||
|
// mutex. Inside, column_family_data_ and column_families_ will be protected
|
||||||
|
// by Lock() and Unlock(). CreateColumnFamily() should ONLY be called from
|
||||||
|
// VersionSet::LogAndApply() in the normal runtime. It is also called
|
||||||
|
// during Recovery and in DumpManifest(). RemoveColumnFamily() is called
|
||||||
|
// from ColumnFamilyData destructor
|
||||||
|
// * Iteration -- hold DB mutex, but you can release it in the body of
|
||||||
|
// iteration. If you release DB mutex in body, reference the column
|
||||||
|
// family before the mutex and unreference after you unlock, since the column
|
||||||
|
// family might get dropped when the DB mutex is released
|
||||||
|
// * GetDefault() -- thread safe
|
||||||
|
// * GetColumnFamily() -- either inside of DB mutex or call Lock() <-> Unlock()
|
||||||
|
// * GetNextColumnFamilyID(), GetMaxColumnFamily(), UpdateMaxColumnFamily() --
|
||||||
|
// inside of DB mutex
|
||||||
|
class ColumnFamilySet { |
||||||
|
public: |
||||||
|
// ColumnFamilySet supports iteration
|
||||||
|
class iterator { |
||||||
|
public: |
||||||
|
explicit iterator(ColumnFamilyData* cfd) |
||||||
|
: current_(cfd) {} |
||||||
|
iterator& operator++() { |
||||||
|
// dummy is never dead or dropped, so this will never be infinite
|
||||||
|
do { |
||||||
|
current_ = current_->next_; |
||||||
|
} while (current_->refs_ == 0 || current_->IsDropped()); |
||||||
|
return *this; |
||||||
|
} |
||||||
|
bool operator!=(const iterator& other) { |
||||||
|
return this->current_ != other.current_; |
||||||
|
} |
||||||
|
ColumnFamilyData* operator*() { return current_; } |
||||||
|
|
||||||
|
private: |
||||||
|
ColumnFamilyData* current_; |
||||||
|
}; |
||||||
|
|
||||||
|
ColumnFamilySet(const std::string& dbname, const DBOptions* db_options, |
||||||
|
const EnvOptions& storage_options, Cache* table_cache); |
||||||
|
~ColumnFamilySet(); |
||||||
|
|
||||||
|
ColumnFamilyData* GetDefault() const; |
||||||
|
// GetColumnFamily() calls return nullptr if column family is not found
|
||||||
|
ColumnFamilyData* GetColumnFamily(uint32_t id) const; |
||||||
|
ColumnFamilyData* GetColumnFamily(const std::string& name) const; |
||||||
|
// this call will return the next available column family ID. it guarantees
|
||||||
|
// that there is no column family with id greater than or equal to the
|
||||||
|
// returned value in the current running instance or anytime in RocksDB
|
||||||
|
// instance history.
|
||||||
|
uint32_t GetNextColumnFamilyID(); |
||||||
|
uint32_t GetMaxColumnFamily(); |
||||||
|
void UpdateMaxColumnFamily(uint32_t new_max_column_family); |
||||||
|
|
||||||
|
ColumnFamilyData* CreateColumnFamily(const std::string& name, uint32_t id, |
||||||
|
Version* dummy_version, |
||||||
|
const ColumnFamilyOptions& options); |
||||||
|
|
||||||
|
iterator begin() { return iterator(dummy_cfd_->next_); } |
||||||
|
iterator end() { return iterator(dummy_cfd_); } |
||||||
|
|
||||||
|
void Lock(); |
||||||
|
void Unlock(); |
||||||
|
|
||||||
|
// REQUIRES: DB mutex held
|
||||||
|
// Don't call while iterating over ColumnFamilySet
|
||||||
|
void FreeDeadColumnFamilies(); |
||||||
|
|
||||||
|
private: |
||||||
|
friend class ColumnFamilyData; |
||||||
|
// helper function that gets called from cfd destructor
|
||||||
|
// REQUIRES: DB mutex held
|
||||||
|
void RemoveColumnFamily(ColumnFamilyData* cfd); |
||||||
|
|
||||||
|
// column_families_ and column_family_data_ need to be protected:
|
||||||
|
// * when mutating: 1. DB mutex locked first, 2. spinlock locked second
|
||||||
|
// * when reading, either: 1. lock DB mutex, or 2. lock spinlock
|
||||||
|
// (if both, respect the ordering to avoid deadlock!)
|
||||||
|
std::unordered_map<std::string, uint32_t> column_families_; |
||||||
|
std::unordered_map<uint32_t, ColumnFamilyData*> column_family_data_; |
||||||
|
|
||||||
|
uint32_t max_column_family_; |
||||||
|
ColumnFamilyData* dummy_cfd_; |
||||||
|
// We don't hold the refcount here, since default column family always exists
|
||||||
|
// We are also not responsible for cleaning up default_cfd_cache_. This is
|
||||||
|
// just a cache that makes common case (accessing default column family)
|
||||||
|
// faster
|
||||||
|
ColumnFamilyData* default_cfd_cache_; |
||||||
|
|
||||||
|
const std::string db_name_; |
||||||
|
const DBOptions* const db_options_; |
||||||
|
const EnvOptions storage_options_; |
||||||
|
Cache* table_cache_; |
||||||
|
std::atomic_flag spin_lock_; |
||||||
|
}; |
||||||
|
|
||||||
|
// We use ColumnFamilyMemTablesImpl to provide WriteBatch a way to access
|
||||||
|
// memtables of different column families (specified by ID in the write batch)
|
||||||
|
class ColumnFamilyMemTablesImpl : public ColumnFamilyMemTables { |
||||||
|
public: |
||||||
|
explicit ColumnFamilyMemTablesImpl(ColumnFamilySet* column_family_set) |
||||||
|
: column_family_set_(column_family_set), current_(nullptr) {} |
||||||
|
|
||||||
|
// sets current_ to ColumnFamilyData with column_family_id
|
||||||
|
// returns false if column family doesn't exist
|
||||||
|
bool Seek(uint32_t column_family_id) override; |
||||||
|
|
||||||
|
// Returns log number of the selected column family
|
||||||
|
uint64_t GetLogNumber() const override; |
||||||
|
|
||||||
|
// REQUIRES: Seek() called first
|
||||||
|
virtual MemTable* GetMemTable() const override; |
||||||
|
|
||||||
|
// Returns options for selected column family
|
||||||
|
// REQUIRES: Seek() called first
|
||||||
|
virtual const Options* GetOptions() const override; |
||||||
|
|
||||||
|
// Returns column family handle for the selected column family
|
||||||
|
virtual ColumnFamilyHandle* GetColumnFamilyHandle() override; |
||||||
|
|
||||||
|
private: |
||||||
|
ColumnFamilySet* column_family_set_; |
||||||
|
ColumnFamilyData* current_; |
||||||
|
ColumnFamilyHandleInternal handle_; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,857 @@ |
|||||||
|
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
//
|
||||||
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a BSD-style license that can be
|
||||||
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||||
|
|
||||||
|
#include <algorithm> |
||||||
|
#include <vector> |
||||||
|
#include <string> |
||||||
|
|
||||||
|
#include "db/db_impl.h" |
||||||
|
#include "rocksdb/env.h" |
||||||
|
#include "rocksdb/db.h" |
||||||
|
#include "util/testharness.h" |
||||||
|
#include "util/testutil.h" |
||||||
|
#include "util/coding.h" |
||||||
|
#include "utilities/merge_operators.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
namespace { |
||||||
|
std::string RandomString(Random* rnd, int len) { |
||||||
|
std::string r; |
||||||
|
test::RandomString(rnd, len, &r); |
||||||
|
return r; |
||||||
|
} |
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
class ColumnFamilyTest { |
||||||
|
public: |
||||||
|
ColumnFamilyTest() : rnd_(139) { |
||||||
|
env_ = Env::Default(); |
||||||
|
dbname_ = test::TmpDir() + "/column_family_test"; |
||||||
|
db_options_.create_if_missing = true; |
||||||
|
DestroyDB(dbname_, Options(db_options_, column_family_options_)); |
||||||
|
} |
||||||
|
|
||||||
|
void Close() { |
||||||
|
for (auto h : handles_) { |
||||||
|
delete h; |
||||||
|
} |
||||||
|
handles_.clear(); |
||||||
|
names_.clear(); |
||||||
|
delete db_; |
||||||
|
db_ = nullptr; |
||||||
|
} |
||||||
|
|
||||||
|
Status TryOpen(std::vector<std::string> cf, |
||||||
|
std::vector<ColumnFamilyOptions> options = {}) { |
||||||
|
std::vector<ColumnFamilyDescriptor> column_families; |
||||||
|
names_.clear(); |
||||||
|
for (size_t i = 0; i < cf.size(); ++i) { |
||||||
|
column_families.push_back(ColumnFamilyDescriptor( |
||||||
|
cf[i], options.size() == 0 ? column_family_options_ : options[i])); |
||||||
|
names_.push_back(cf[i]); |
||||||
|
} |
||||||
|
return DB::Open(db_options_, dbname_, column_families, &handles_, &db_); |
||||||
|
} |
||||||
|
|
||||||
|
void Open(std::vector<std::string> cf, |
||||||
|
std::vector<ColumnFamilyOptions> options = {}) { |
||||||
|
ASSERT_OK(TryOpen(cf, options)); |
||||||
|
} |
||||||
|
|
||||||
|
void Open() { |
||||||
|
Open({"default"}); |
||||||
|
} |
||||||
|
|
||||||
|
DBImpl* dbfull() { return reinterpret_cast<DBImpl*>(db_); } |
||||||
|
|
||||||
|
int GetProperty(int cf, std::string property) { |
||||||
|
std::string value; |
||||||
|
ASSERT_TRUE(dbfull()->GetProperty(handles_[cf], property, &value)); |
||||||
|
return std::stoi(value); |
||||||
|
} |
||||||
|
|
||||||
|
void Destroy() { |
||||||
|
for (auto h : handles_) { |
||||||
|
delete h; |
||||||
|
} |
||||||
|
handles_.clear(); |
||||||
|
names_.clear(); |
||||||
|
delete db_; |
||||||
|
db_ = nullptr; |
||||||
|
ASSERT_OK(DestroyDB(dbname_, Options(db_options_, column_family_options_))); |
||||||
|
} |
||||||
|
|
||||||
|
void CreateColumnFamilies( |
||||||
|
const std::vector<std::string>& cfs, |
||||||
|
const std::vector<ColumnFamilyOptions> options = {}) { |
||||||
|
int cfi = handles_.size(); |
||||||
|
handles_.resize(cfi + cfs.size()); |
||||||
|
names_.resize(cfi + cfs.size()); |
||||||
|
for (size_t i = 0; i < cfs.size(); ++i) { |
||||||
|
ASSERT_OK(db_->CreateColumnFamily( |
||||||
|
options.size() == 0 ? column_family_options_ : options[i], cfs[i], |
||||||
|
&handles_[cfi])); |
||||||
|
names_[cfi] = cfs[i]; |
||||||
|
cfi++; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void Reopen(const std::vector<ColumnFamilyOptions> options = {}) { |
||||||
|
std::vector<std::string> names; |
||||||
|
for (auto name : names_) { |
||||||
|
if (name != "") { |
||||||
|
names.push_back(name); |
||||||
|
} |
||||||
|
} |
||||||
|
Close(); |
||||||
|
assert(options.size() == 0 || names.size() == options.size()); |
||||||
|
Open(names, options); |
||||||
|
} |
||||||
|
|
||||||
|
void CreateColumnFamiliesAndReopen(const std::vector<std::string>& cfs) { |
||||||
|
CreateColumnFamilies(cfs); |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
|
||||||
|
void DropColumnFamilies(const std::vector<int>& cfs) { |
||||||
|
for (auto cf : cfs) { |
||||||
|
ASSERT_OK(db_->DropColumnFamily(handles_[cf])); |
||||||
|
delete handles_[cf]; |
||||||
|
handles_[cf] = nullptr; |
||||||
|
names_[cf] = ""; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void PutRandomData(int cf, int num, int key_value_size) { |
||||||
|
for (int i = 0; i < num; ++i) { |
||||||
|
// 10 bytes for key, rest is value
|
||||||
|
ASSERT_OK(Put(cf, test::RandomKey(&rnd_, 10), |
||||||
|
RandomString(&rnd_, key_value_size - 10))); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void WaitForFlush(int cf) { |
||||||
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[cf])); |
||||||
|
} |
||||||
|
|
||||||
|
void WaitForCompaction() { ASSERT_OK(dbfull()->TEST_WaitForCompact()); } |
||||||
|
|
||||||
|
Status Put(int cf, const std::string& key, const std::string& value) { |
||||||
|
return db_->Put(WriteOptions(), handles_[cf], Slice(key), Slice(value)); |
||||||
|
} |
||||||
|
Status Merge(int cf, const std::string& key, const std::string& value) { |
||||||
|
return db_->Merge(WriteOptions(), handles_[cf], Slice(key), Slice(value)); |
||||||
|
} |
||||||
|
Status Flush(int cf) { |
||||||
|
return db_->Flush(FlushOptions(), handles_[cf]); |
||||||
|
} |
||||||
|
|
||||||
|
std::string Get(int cf, const std::string& key) { |
||||||
|
ReadOptions options; |
||||||
|
options.verify_checksums = true; |
||||||
|
std::string result; |
||||||
|
Status s = db_->Get(options, handles_[cf], Slice(key), &result); |
||||||
|
if (s.IsNotFound()) { |
||||||
|
result = "NOT_FOUND"; |
||||||
|
} else if (!s.ok()) { |
||||||
|
result = s.ToString(); |
||||||
|
} |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
void CompactAll(int cf) { |
||||||
|
ASSERT_OK(db_->CompactRange(handles_[cf], nullptr, nullptr)); |
||||||
|
} |
||||||
|
|
||||||
|
void Compact(int cf, const Slice& start, const Slice& limit) { |
||||||
|
ASSERT_OK(db_->CompactRange(handles_[cf], &start, &limit)); |
||||||
|
} |
||||||
|
|
||||||
|
int NumTableFilesAtLevel(int level, int cf) { |
||||||
|
return GetProperty(cf, |
||||||
|
"rocksdb.num-files-at-level" + std::to_string(level)); |
||||||
|
} |
||||||
|
|
||||||
|
// Return spread of files per level
|
||||||
|
std::string FilesPerLevel(int cf) { |
||||||
|
std::string result; |
||||||
|
int last_non_zero_offset = 0; |
||||||
|
for (int level = 0; level < dbfull()->NumberLevels(handles_[cf]); level++) { |
||||||
|
int f = NumTableFilesAtLevel(level, cf); |
||||||
|
char buf[100]; |
||||||
|
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f); |
||||||
|
result += buf; |
||||||
|
if (f > 0) { |
||||||
|
last_non_zero_offset = result.size(); |
||||||
|
} |
||||||
|
} |
||||||
|
result.resize(last_non_zero_offset); |
||||||
|
return result; |
||||||
|
} |
||||||
|
|
||||||
|
int CountLiveFiles(int cf) { |
||||||
|
std::vector<LiveFileMetaData> metadata; |
||||||
|
db_->GetLiveFilesMetaData(&metadata); |
||||||
|
return static_cast<int>(metadata.size()); |
||||||
|
} |
||||||
|
|
||||||
|
// Do n memtable flushes, each of which produces an sstable
|
||||||
|
// covering the range [small,large].
|
||||||
|
void MakeTables(int cf, int n, const std::string& small, |
||||||
|
const std::string& large) { |
||||||
|
for (int i = 0; i < n; i++) { |
||||||
|
ASSERT_OK(Put(cf, small, "begin")); |
||||||
|
ASSERT_OK(Put(cf, large, "end")); |
||||||
|
ASSERT_OK(db_->Flush(FlushOptions(), handles_[cf])); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
int CountLiveLogFiles() { |
||||||
|
int micros_wait_for_log_deletion = 20000; |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_log_deletion); |
||||||
|
int ret = 0; |
||||||
|
VectorLogPtr wal_files; |
||||||
|
Status s; |
||||||
|
// GetSortedWalFiles is a flakey function -- it gets all the wal_dir
|
||||||
|
// children files and then later checks for their existance. if some of the
|
||||||
|
// log files doesn't exist anymore, it reports an error. it does all of this
|
||||||
|
// without DB mutex held, so if a background process deletes the log file
|
||||||
|
// while the function is being executed, it returns an error. We retry the
|
||||||
|
// function 10 times to avoid the error failing the test
|
||||||
|
for (int retries = 0; retries < 10; ++retries) { |
||||||
|
wal_files.clear(); |
||||||
|
s = db_->GetSortedWalFiles(wal_files); |
||||||
|
if (s.ok()) { |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
ASSERT_OK(s); |
||||||
|
for (const auto& wal : wal_files) { |
||||||
|
if (wal->Type() == kAliveLogFile) { |
||||||
|
++ret; |
||||||
|
} |
||||||
|
} |
||||||
|
return ret; |
||||||
|
} |
||||||
|
|
||||||
|
void AssertNumberOfImmutableMemtables(std::vector<int> num_per_cf) { |
||||||
|
assert(num_per_cf.size() == handles_.size()); |
||||||
|
|
||||||
|
for (size_t i = 0; i < num_per_cf.size(); ++i) { |
||||||
|
ASSERT_EQ(num_per_cf[i], |
||||||
|
GetProperty(i, "rocksdb.num-immutable-mem-table")); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
void CopyFile(const std::string& source, const std::string& destination, |
||||||
|
uint64_t size = 0) { |
||||||
|
const EnvOptions soptions; |
||||||
|
unique_ptr<SequentialFile> srcfile; |
||||||
|
ASSERT_OK(env_->NewSequentialFile(source, &srcfile, soptions)); |
||||||
|
unique_ptr<WritableFile> destfile; |
||||||
|
ASSERT_OK(env_->NewWritableFile(destination, &destfile, soptions)); |
||||||
|
|
||||||
|
if (size == 0) { |
||||||
|
// default argument means copy everything
|
||||||
|
ASSERT_OK(env_->GetFileSize(source, &size)); |
||||||
|
} |
||||||
|
|
||||||
|
char buffer[4096]; |
||||||
|
Slice slice; |
||||||
|
while (size > 0) { |
||||||
|
uint64_t one = std::min(uint64_t(sizeof(buffer)), size); |
||||||
|
ASSERT_OK(srcfile->Read(one, &slice, buffer)); |
||||||
|
ASSERT_OK(destfile->Append(slice)); |
||||||
|
size -= slice.size(); |
||||||
|
} |
||||||
|
ASSERT_OK(destfile->Close()); |
||||||
|
} |
||||||
|
|
||||||
|
std::vector<ColumnFamilyHandle*> handles_; |
||||||
|
std::vector<std::string> names_; |
||||||
|
ColumnFamilyOptions column_family_options_; |
||||||
|
DBOptions db_options_; |
||||||
|
std::string dbname_; |
||||||
|
DB* db_ = nullptr; |
||||||
|
Env* env_; |
||||||
|
Random rnd_; |
||||||
|
}; |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, DontReuseColumnFamilyID) { |
||||||
|
for (int iter = 0; iter < 3; ++iter) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"one", "two", "three"}); |
||||||
|
for (size_t i = 0; i < handles_.size(); ++i) { |
||||||
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(handles_[i]); |
||||||
|
ASSERT_EQ(i, cfh->GetID()); |
||||||
|
} |
||||||
|
if (iter == 1) { |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
DropColumnFamilies({3}); |
||||||
|
Reopen(); |
||||||
|
if (iter == 2) { |
||||||
|
// this tests if max_column_family is correctly persisted with
|
||||||
|
// WriteSnapshot()
|
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
CreateColumnFamilies({"three2"}); |
||||||
|
// ID 3 that was used for dropped column family "three" should not be reused
|
||||||
|
auto cfh3 = reinterpret_cast<ColumnFamilyHandleImpl*>(handles_[3]); |
||||||
|
ASSERT_EQ(4, cfh3->GetID()); |
||||||
|
Close(); |
||||||
|
Destroy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, AddDrop) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"one", "two", "three"}); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(2, "fodor")); |
||||||
|
DropColumnFamilies({2}); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||||
|
CreateColumnFamilies({"four"}); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); |
||||||
|
ASSERT_OK(Put(1, "fodor", "mirko")); |
||||||
|
ASSERT_EQ("mirko", Get(1, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(3, "fodor")); |
||||||
|
Close(); |
||||||
|
ASSERT_TRUE(TryOpen({"default"}).IsInvalidArgument()); |
||||||
|
Open({"default", "one", "three", "four"}); |
||||||
|
DropColumnFamilies({1}); |
||||||
|
Reopen(); |
||||||
|
Close(); |
||||||
|
|
||||||
|
std::vector<std::string> families; |
||||||
|
ASSERT_OK(DB::ListColumnFamilies(db_options_, dbname_, &families)); |
||||||
|
sort(families.begin(), families.end()); |
||||||
|
ASSERT_TRUE(families == |
||||||
|
std::vector<std::string>({"default", "four", "three"})); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, DropTest) { |
||||||
|
// first iteration - dont reopen DB before dropping
|
||||||
|
// second iteration - reopen DB before dropping
|
||||||
|
for (int iter = 0; iter < 2; ++iter) { |
||||||
|
Open({"default"}); |
||||||
|
CreateColumnFamiliesAndReopen({"pikachu"}); |
||||||
|
for (int i = 0; i < 100; ++i) { |
||||||
|
ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i))); |
||||||
|
} |
||||||
|
ASSERT_OK(Flush(1)); |
||||||
|
|
||||||
|
if (iter == 1) { |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
ASSERT_EQ("bar1", Get(1, "1")); |
||||||
|
|
||||||
|
ASSERT_EQ(CountLiveFiles(1), 1); |
||||||
|
DropColumnFamilies({1}); |
||||||
|
// make sure that all files are deleted when we drop the column family
|
||||||
|
ASSERT_EQ(CountLiveFiles(1), 0); |
||||||
|
Destroy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, WriteBatchFailure) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||||
|
WriteBatch batch; |
||||||
|
batch.Put(handles_[1], Slice("non-existing"), Slice("column-family")); |
||||||
|
ASSERT_OK(db_->Write(WriteOptions(), &batch)); |
||||||
|
DropColumnFamilies({1}); |
||||||
|
Status s = db_->Write(WriteOptions(), &batch); |
||||||
|
ASSERT_TRUE(s.IsInvalidArgument()); |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, ReadWrite) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||||
|
ASSERT_OK(Put(0, "foo", "v1")); |
||||||
|
ASSERT_OK(Put(0, "bar", "v2")); |
||||||
|
ASSERT_OK(Put(1, "mirko", "v3")); |
||||||
|
ASSERT_OK(Put(0, "foo", "v2")); |
||||||
|
ASSERT_OK(Put(2, "fodor", "v5")); |
||||||
|
|
||||||
|
for (int iter = 0; iter <= 3; ++iter) { |
||||||
|
ASSERT_EQ("v2", Get(0, "foo")); |
||||||
|
ASSERT_EQ("v2", Get(0, "bar")); |
||||||
|
ASSERT_EQ("v3", Get(1, "mirko")); |
||||||
|
ASSERT_EQ("v5", Get(2, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(2, "foo")); |
||||||
|
if (iter <= 1) { |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
} |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, IgnoreRecoveredLog) { |
||||||
|
std::string backup_logs = dbname_ + "/backup_logs"; |
||||||
|
|
||||||
|
// delete old files in backup_logs directory
|
||||||
|
ASSERT_OK(env_->CreateDirIfMissing(dbname_)); |
||||||
|
ASSERT_OK(env_->CreateDirIfMissing(backup_logs)); |
||||||
|
std::vector<std::string> old_files; |
||||||
|
env_->GetChildren(backup_logs, &old_files); |
||||||
|
for (auto& file : old_files) { |
||||||
|
if (file != "." && file != "..") { |
||||||
|
env_->DeleteFile(backup_logs + "/" + file); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
column_family_options_.merge_operator = |
||||||
|
MergeOperators::CreateUInt64AddOperator(); |
||||||
|
db_options_.wal_dir = dbname_ + "/logs"; |
||||||
|
Destroy(); |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"cf1", "cf2"}); |
||||||
|
|
||||||
|
// fill up the DB
|
||||||
|
std::string one, two, three; |
||||||
|
PutFixed64(&one, 1); |
||||||
|
PutFixed64(&two, 2); |
||||||
|
PutFixed64(&three, 3); |
||||||
|
ASSERT_OK(Merge(0, "foo", one)); |
||||||
|
ASSERT_OK(Merge(1, "mirko", one)); |
||||||
|
ASSERT_OK(Merge(0, "foo", one)); |
||||||
|
ASSERT_OK(Merge(2, "bla", one)); |
||||||
|
ASSERT_OK(Merge(2, "fodor", one)); |
||||||
|
ASSERT_OK(Merge(0, "bar", one)); |
||||||
|
ASSERT_OK(Merge(2, "bla", one)); |
||||||
|
ASSERT_OK(Merge(1, "mirko", two)); |
||||||
|
ASSERT_OK(Merge(1, "franjo", one)); |
||||||
|
|
||||||
|
// copy the logs to backup
|
||||||
|
std::vector<std::string> logs; |
||||||
|
env_->GetChildren(db_options_.wal_dir, &logs); |
||||||
|
for (auto& log : logs) { |
||||||
|
if (log != ".." && log != ".") { |
||||||
|
CopyFile(db_options_.wal_dir + "/" + log, backup_logs + "/" + log); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
// recover the DB
|
||||||
|
Close(); |
||||||
|
|
||||||
|
// 1. check consistency
|
||||||
|
// 2. copy the logs from backup back to WAL dir. if the recovery happens
|
||||||
|
// again on the same log files, this should lead to incorrect results
|
||||||
|
// due to applying merge operator twice
|
||||||
|
// 3. check consistency
|
||||||
|
for (int iter = 0; iter < 2; ++iter) { |
||||||
|
// assert consistency
|
||||||
|
Open({"default", "cf1", "cf2"}); |
||||||
|
ASSERT_EQ(two, Get(0, "foo")); |
||||||
|
ASSERT_EQ(one, Get(0, "bar")); |
||||||
|
ASSERT_EQ(three, Get(1, "mirko")); |
||||||
|
ASSERT_EQ(one, Get(1, "franjo")); |
||||||
|
ASSERT_EQ(one, Get(2, "fodor")); |
||||||
|
ASSERT_EQ(two, Get(2, "bla")); |
||||||
|
Close(); |
||||||
|
|
||||||
|
if (iter == 0) { |
||||||
|
// copy the logs from backup back to wal dir
|
||||||
|
for (auto& log : logs) { |
||||||
|
if (log != ".." && log != ".") { |
||||||
|
CopyFile(backup_logs + "/" + log, db_options_.wal_dir + "/" + log); |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, FlushTest) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||||
|
ASSERT_OK(Put(0, "foo", "v1")); |
||||||
|
ASSERT_OK(Put(0, "bar", "v2")); |
||||||
|
ASSERT_OK(Put(1, "mirko", "v3")); |
||||||
|
ASSERT_OK(Put(0, "foo", "v2")); |
||||||
|
ASSERT_OK(Put(2, "fodor", "v5")); |
||||||
|
for (int i = 0; i < 3; ++i) { |
||||||
|
Flush(i); |
||||||
|
} |
||||||
|
Reopen(); |
||||||
|
|
||||||
|
for (int iter = 0; iter <= 2; ++iter) { |
||||||
|
ASSERT_EQ("v2", Get(0, "foo")); |
||||||
|
ASSERT_EQ("v2", Get(0, "bar")); |
||||||
|
ASSERT_EQ("v3", Get(1, "mirko")); |
||||||
|
ASSERT_EQ("v5", Get(2, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(0, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(1, "fodor")); |
||||||
|
ASSERT_EQ("NOT_FOUND", Get(2, "foo")); |
||||||
|
if (iter <= 1) { |
||||||
|
Reopen(); |
||||||
|
} |
||||||
|
} |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
// Makes sure that obsolete log files get deleted
|
||||||
|
TEST(ColumnFamilyTest, LogDeletionTest) { |
||||||
|
column_family_options_.write_buffer_size = 100000; // 100KB
|
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"one", "two", "three", "four"}); |
||||||
|
// Each bracket is one log file. if number is in (), it means
|
||||||
|
// we don't need it anymore (it's been flushed)
|
||||||
|
// []
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 0); |
||||||
|
PutRandomData(0, 1, 100); |
||||||
|
// [0]
|
||||||
|
PutRandomData(1, 1, 100); |
||||||
|
// [0, 1]
|
||||||
|
PutRandomData(1, 1000, 100); |
||||||
|
WaitForFlush(1); |
||||||
|
// [0, (1)] [1]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||||
|
PutRandomData(0, 1, 100); |
||||||
|
// [0, (1)] [0, 1]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||||
|
PutRandomData(2, 1, 100); |
||||||
|
// [0, (1)] [0, 1, 2]
|
||||||
|
PutRandomData(2, 1000, 100); |
||||||
|
WaitForFlush(2); |
||||||
|
// [0, (1)] [0, 1, (2)] [2]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||||
|
PutRandomData(2, 1000, 100); |
||||||
|
WaitForFlush(2); |
||||||
|
// [0, (1)] [0, 1, (2)] [(2)] [2]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||||
|
PutRandomData(3, 1, 100); |
||||||
|
// [0, (1)] [0, 1, (2)] [(2)] [2, 3]
|
||||||
|
PutRandomData(1, 1, 100); |
||||||
|
// [0, (1)] [0, 1, (2)] [(2)] [1, 2, 3]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||||
|
PutRandomData(1, 1000, 100); |
||||||
|
WaitForFlush(1); |
||||||
|
// [0, (1)] [0, (1), (2)] [(2)] [(1), 2, 3] [1]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||||
|
PutRandomData(0, 1000, 100); |
||||||
|
WaitForFlush(0); |
||||||
|
// [(0), (1)] [(0), (1), (2)] [(2)] [(1), 2, 3] [1, (0)] [0]
|
||||||
|
// delete obsolete logs -->
|
||||||
|
// [(1), 2, 3] [1, (0)] [0]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||||
|
PutRandomData(0, 1000, 100); |
||||||
|
WaitForFlush(0); |
||||||
|
// [(1), 2, 3] [1, (0)], [(0)] [0]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||||
|
PutRandomData(1, 1000, 100); |
||||||
|
WaitForFlush(1); |
||||||
|
// [(1), 2, 3] [(1), (0)] [(0)] [0, (1)] [1]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||||
|
PutRandomData(2, 1000, 100); |
||||||
|
WaitForFlush(2); |
||||||
|
// [(1), (2), 3] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 6); |
||||||
|
PutRandomData(3, 1000, 100); |
||||||
|
WaitForFlush(3); |
||||||
|
// [(1), (2), (3)] [(1), (0)] [(0)] [0, (1)] [1, (2)], [2, (3)] [3]
|
||||||
|
// delete obsolete logs -->
|
||||||
|
// [0, (1)] [1, (2)], [2, (3)] [3]
|
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
// Makes sure that obsolete log files get deleted
|
||||||
|
TEST(ColumnFamilyTest, DifferentWriteBufferSizes) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"one", "two", "three"}); |
||||||
|
ColumnFamilyOptions default_cf, one, two, three; |
||||||
|
// setup options. all column families have max_write_buffer_number setup to 10
|
||||||
|
// "default" -> 100KB memtable, start flushing immediatelly
|
||||||
|
// "one" -> 200KB memtable, start flushing with two immutable memtables
|
||||||
|
// "two" -> 1MB memtable, start flushing with three immutable memtables
|
||||||
|
// "three" -> 90KB memtable, start flushing with four immutable memtables
|
||||||
|
default_cf.write_buffer_size = 100000; |
||||||
|
default_cf.max_write_buffer_number = 10; |
||||||
|
default_cf.min_write_buffer_number_to_merge = 1; |
||||||
|
one.write_buffer_size = 200000; |
||||||
|
one.max_write_buffer_number = 10; |
||||||
|
one.min_write_buffer_number_to_merge = 2; |
||||||
|
two.write_buffer_size = 1000000; |
||||||
|
two.max_write_buffer_number = 10; |
||||||
|
two.min_write_buffer_number_to_merge = 3; |
||||||
|
three.write_buffer_size = 90000; |
||||||
|
three.max_write_buffer_number = 10; |
||||||
|
three.min_write_buffer_number_to_merge = 4; |
||||||
|
|
||||||
|
Reopen({default_cf, one, two, three}); |
||||||
|
|
||||||
|
int micros_wait_for_flush = 10000; |
||||||
|
PutRandomData(0, 100, 1000); |
||||||
|
WaitForFlush(0); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 1); |
||||||
|
PutRandomData(1, 200, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 2); |
||||||
|
PutRandomData(2, 1000, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 1, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 3); |
||||||
|
PutRandomData(2, 1000, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 2, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 4); |
||||||
|
PutRandomData(3, 90, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 2, 1}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||||
|
PutRandomData(3, 90, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 2, 2}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 6); |
||||||
|
PutRandomData(3, 90, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 2, 3}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 7); |
||||||
|
PutRandomData(0, 100, 1000); |
||||||
|
WaitForFlush(0); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 2, 3}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 8); |
||||||
|
PutRandomData(2, 100, 10000); |
||||||
|
WaitForFlush(2); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 0, 3}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 9); |
||||||
|
PutRandomData(3, 90, 1000); |
||||||
|
WaitForFlush(3); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 10); |
||||||
|
PutRandomData(3, 90, 1000); |
||||||
|
env_->SleepForMicroseconds(micros_wait_for_flush); |
||||||
|
AssertNumberOfImmutableMemtables({0, 1, 0, 1}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 11); |
||||||
|
PutRandomData(1, 200, 1000); |
||||||
|
WaitForFlush(1); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 1}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 5); |
||||||
|
PutRandomData(3, 90*6, 1000); |
||||||
|
WaitForFlush(3); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||||
|
PutRandomData(0, 100, 1000); |
||||||
|
WaitForFlush(0); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||||
|
PutRandomData(2, 3*100, 10000); |
||||||
|
WaitForFlush(2); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 12); |
||||||
|
PutRandomData(1, 2*200, 1000); |
||||||
|
WaitForFlush(1); |
||||||
|
AssertNumberOfImmutableMemtables({0, 0, 0, 0}); |
||||||
|
ASSERT_EQ(CountLiveLogFiles(), 7); |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, DifferentMergeOperators) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"first", "second"}); |
||||||
|
ColumnFamilyOptions default_cf, first, second; |
||||||
|
first.merge_operator = MergeOperators::CreateUInt64AddOperator(); |
||||||
|
second.merge_operator = MergeOperators::CreateStringAppendOperator(); |
||||||
|
Reopen({default_cf, first, second}); |
||||||
|
|
||||||
|
std::string one, two, three; |
||||||
|
PutFixed64(&one, 1); |
||||||
|
PutFixed64(&two, 2); |
||||||
|
PutFixed64(&three, 3); |
||||||
|
|
||||||
|
ASSERT_OK(Put(0, "foo", two)); |
||||||
|
ASSERT_OK(Put(0, "foo", one)); |
||||||
|
ASSERT_TRUE(Merge(0, "foo", two).IsNotSupported()); |
||||||
|
ASSERT_EQ(Get(0, "foo"), one); |
||||||
|
|
||||||
|
ASSERT_OK(Put(1, "foo", two)); |
||||||
|
ASSERT_OK(Put(1, "foo", one)); |
||||||
|
ASSERT_OK(Merge(1, "foo", two)); |
||||||
|
ASSERT_EQ(Get(1, "foo"), three); |
||||||
|
|
||||||
|
ASSERT_OK(Put(2, "foo", two)); |
||||||
|
ASSERT_OK(Put(2, "foo", one)); |
||||||
|
ASSERT_OK(Merge(2, "foo", two)); |
||||||
|
ASSERT_EQ(Get(2, "foo"), one + "," + two); |
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, DifferentCompactionStyles) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamilies({"one", "two"}); |
||||||
|
ColumnFamilyOptions default_cf, one, two; |
||||||
|
db_options_.max_open_files = 20; // only 10 files in file cache
|
||||||
|
db_options_.disableDataSync = true; |
||||||
|
|
||||||
|
default_cf.compaction_style = kCompactionStyleLevel; |
||||||
|
default_cf.num_levels = 3; |
||||||
|
default_cf.write_buffer_size = 64 << 10; // 64KB
|
||||||
|
default_cf.target_file_size_base = 30 << 10; |
||||||
|
default_cf.filter_policy = nullptr; |
||||||
|
default_cf.no_block_cache = true; |
||||||
|
default_cf.source_compaction_factor = 100; |
||||||
|
default_cf.disable_seek_compaction = false; |
||||||
|
|
||||||
|
one.compaction_style = kCompactionStyleUniversal; |
||||||
|
// trigger compaction if there are >= 4 files
|
||||||
|
one.level0_file_num_compaction_trigger = 4; |
||||||
|
one.write_buffer_size = 100000; |
||||||
|
|
||||||
|
two.compaction_style = kCompactionStyleLevel; |
||||||
|
two.num_levels = 4; |
||||||
|
two.max_mem_compaction_level = 0; |
||||||
|
two.level0_file_num_compaction_trigger = 3; |
||||||
|
two.write_buffer_size = 100000; |
||||||
|
|
||||||
|
Reopen({default_cf, one, two}); |
||||||
|
|
||||||
|
// SETUP column family "default" - test read compaction
|
||||||
|
ASSERT_EQ("", FilesPerLevel(0)); |
||||||
|
PutRandomData(0, 1, 4096); |
||||||
|
ASSERT_OK(Flush(0)); |
||||||
|
ASSERT_EQ("0,0,1", FilesPerLevel(0)); |
||||||
|
// write 8MB
|
||||||
|
PutRandomData(0, 2000, 4096); |
||||||
|
ASSERT_OK(Flush(0)); |
||||||
|
// clear levels 0 and 1
|
||||||
|
dbfull()->TEST_CompactRange(0, nullptr, nullptr, handles_[0]); |
||||||
|
dbfull()->TEST_CompactRange(1, nullptr, nullptr, handles_[0]); |
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0); |
||||||
|
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 0); |
||||||
|
// write some new keys into level 0 and 1
|
||||||
|
PutRandomData(0, 1024, 512); |
||||||
|
ASSERT_OK(Flush(0)); |
||||||
|
WaitForCompaction(); |
||||||
|
PutRandomData(0, 10, 512); |
||||||
|
ASSERT_OK(Flush(0)); |
||||||
|
// remember number of files in each level
|
||||||
|
int l1 = NumTableFilesAtLevel(0, 0); |
||||||
|
int l2 = NumTableFilesAtLevel(1, 0); |
||||||
|
int l3 = NumTableFilesAtLevel(2, 0); |
||||||
|
ASSERT_NE(l1, 0); |
||||||
|
ASSERT_NE(l2, 0); |
||||||
|
ASSERT_NE(l3, 0); |
||||||
|
|
||||||
|
// SETUP column family "one" -- universal style
|
||||||
|
for (int i = 0; i < one.level0_file_num_compaction_trigger - 1; ++i) { |
||||||
|
PutRandomData(1, 11, 10000); |
||||||
|
WaitForFlush(1); |
||||||
|
ASSERT_EQ(std::to_string(i + 1), FilesPerLevel(1)); |
||||||
|
} |
||||||
|
|
||||||
|
// SETUP column family "two" -- level style with 4 levels
|
||||||
|
for (int i = 0; i < two.level0_file_num_compaction_trigger - 1; ++i) { |
||||||
|
PutRandomData(2, 15, 10000); |
||||||
|
WaitForFlush(2); |
||||||
|
ASSERT_EQ(std::to_string(i + 1), FilesPerLevel(2)); |
||||||
|
} |
||||||
|
|
||||||
|
// TRIGGER compaction "default"
|
||||||
|
// read a bunch of times, trigger read compaction
|
||||||
|
for (int i = 0; i < 200000; ++i) { |
||||||
|
Get(0, std::to_string(i)); |
||||||
|
} |
||||||
|
|
||||||
|
// TRIGGER compaction "one"
|
||||||
|
PutRandomData(1, 12, 10000); |
||||||
|
|
||||||
|
// TRIGGER compaction "two"
|
||||||
|
PutRandomData(2, 10, 10000); |
||||||
|
|
||||||
|
// WAIT for compactions
|
||||||
|
WaitForCompaction(); |
||||||
|
|
||||||
|
// VERIFY compaction "default"
|
||||||
|
// verify that the number of files have decreased
|
||||||
|
// in some level, indicating that there was a compaction
|
||||||
|
ASSERT_TRUE(NumTableFilesAtLevel(0, 0) < l1 || |
||||||
|
NumTableFilesAtLevel(1, 0) < l2 || |
||||||
|
NumTableFilesAtLevel(2, 0) < l3); |
||||||
|
|
||||||
|
// VERIFY compaction "one"
|
||||||
|
ASSERT_EQ("1", FilesPerLevel(1)); |
||||||
|
|
||||||
|
// VERIFY compaction "two"
|
||||||
|
ASSERT_EQ("0,1", FilesPerLevel(2)); |
||||||
|
CompactAll(2); |
||||||
|
ASSERT_EQ("0,1", FilesPerLevel(2)); |
||||||
|
|
||||||
|
Close(); |
||||||
|
} |
||||||
|
|
||||||
|
namespace { |
||||||
|
std::string IterStatus(Iterator* iter) { |
||||||
|
std::string result; |
||||||
|
if (iter->Valid()) { |
||||||
|
result = iter->key().ToString() + "->" + iter->value().ToString(); |
||||||
|
} else { |
||||||
|
result = "(invalid)"; |
||||||
|
} |
||||||
|
return result; |
||||||
|
} |
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
TEST(ColumnFamilyTest, NewIteratorsTest) { |
||||||
|
// iter == 0 -- no tailing
|
||||||
|
// iter == 2 -- tailing
|
||||||
|
for (int iter = 0; iter < 2; ++iter) { |
||||||
|
Open(); |
||||||
|
CreateColumnFamiliesAndReopen({"one", "two"}); |
||||||
|
ASSERT_OK(Put(0, "a", "b")); |
||||||
|
ASSERT_OK(Put(1, "b", "a")); |
||||||
|
ASSERT_OK(Put(2, "c", "m")); |
||||||
|
ASSERT_OK(Put(2, "v", "t")); |
||||||
|
std::vector<Iterator*> iterators; |
||||||
|
ReadOptions options; |
||||||
|
options.tailing = (iter == 1); |
||||||
|
ASSERT_OK(db_->NewIterators(options, handles_, &iterators)); |
||||||
|
|
||||||
|
for (auto it : iterators) { |
||||||
|
it->SeekToFirst(); |
||||||
|
} |
||||||
|
ASSERT_EQ(IterStatus(iterators[0]), "a->b"); |
||||||
|
ASSERT_EQ(IterStatus(iterators[1]), "b->a"); |
||||||
|
ASSERT_EQ(IterStatus(iterators[2]), "c->m"); |
||||||
|
|
||||||
|
ASSERT_OK(Put(1, "x", "x")); |
||||||
|
|
||||||
|
for (auto it : iterators) { |
||||||
|
it->Next(); |
||||||
|
} |
||||||
|
|
||||||
|
ASSERT_EQ(IterStatus(iterators[0]), "(invalid)"); |
||||||
|
if (iter == 0) { |
||||||
|
// no tailing
|
||||||
|
ASSERT_EQ(IterStatus(iterators[1]), "(invalid)"); |
||||||
|
} else { |
||||||
|
// tailing
|
||||||
|
ASSERT_EQ(IterStatus(iterators[1]), "x->x"); |
||||||
|
} |
||||||
|
ASSERT_EQ(IterStatus(iterators[2]), "v->t"); |
||||||
|
|
||||||
|
for (auto it : iterators) { |
||||||
|
delete it; |
||||||
|
} |
||||||
|
Destroy(); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
int main(int argc, char** argv) { |
||||||
|
return rocksdb::test::RunAllTests(); |
||||||
|
} |
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,71 @@ |
|||||||
|
TMP_DIR="/tmp/rocksdb-sanity-test" |
||||||
|
|
||||||
|
if [ "$#" -lt 2 ]; then |
||||||
|
echo "usage: ./auto_sanity_test.sh [new_commit] [old_commit]" |
||||||
|
echo "Missing either [new_commit] or [old_commit], perform sanity check with the latest and 10th latest commits." |
||||||
|
recent_commits=`git log | grep -e "^commit [a-z0-9]\+$"| head -n10 | sed -e 's/commit //g'` |
||||||
|
commit_new=`echo "$recent_commits" | head -n1` |
||||||
|
commit_old=`echo "$recent_commits" | tail -n1` |
||||||
|
echo "the most recent commits are:" |
||||||
|
echo "$recent_commits" |
||||||
|
else |
||||||
|
commit_new=$1 |
||||||
|
commit_old=$2 |
||||||
|
fi |
||||||
|
|
||||||
|
if [ ! -d $TMP_DIR ]; then |
||||||
|
mkdir $TMP_DIR |
||||||
|
fi |
||||||
|
dir_new="${TMP_DIR}/${commit_new}" |
||||||
|
dir_old="${TMP_DIR}/${commit_old}" |
||||||
|
|
||||||
|
function makestuff() { |
||||||
|
echo "make clean" |
||||||
|
make clean > /dev/null |
||||||
|
echo "make db_sanity_test -j32" |
||||||
|
make db_sanity_test -j32 > /dev/null |
||||||
|
if [ $? -ne 0 ]; then |
||||||
|
echo "[ERROR] Failed to perform 'make db_sanity_test'" |
||||||
|
exit 1 |
||||||
|
fi |
||||||
|
} |
||||||
|
|
||||||
|
rm -r -f $dir_new |
||||||
|
rm -r -f $dir_old |
||||||
|
|
||||||
|
echo "Running db sanity check with commits $commit_new and $commit_old." |
||||||
|
|
||||||
|
echo "=============================================================" |
||||||
|
echo "Making build $commit_new" |
||||||
|
makestuff |
||||||
|
mv db_sanity_test new_db_sanity_test |
||||||
|
echo "Creating db based on the new commit --- $commit_new" |
||||||
|
./new_db_sanity_test $dir_new create |
||||||
|
|
||||||
|
echo "=============================================================" |
||||||
|
echo "Making build $commit_old" |
||||||
|
makestuff |
||||||
|
mv db_sanity_test old_db_sanity_test |
||||||
|
echo "Creating db based on the old commit --- $commit_old" |
||||||
|
./old_db_sanity_test $dir_old create |
||||||
|
|
||||||
|
echo "=============================================================" |
||||||
|
echo "Verifying new db $dir_new using the old commit --- $commit_old" |
||||||
|
./old_db_sanity_test $dir_new verify |
||||||
|
if [ $? -ne 0 ]; then |
||||||
|
echo "[ERROR] Verification of $dir_new using commit $commit_old failed." |
||||||
|
exit 2 |
||||||
|
fi |
||||||
|
|
||||||
|
echo "=============================================================" |
||||||
|
echo "Verifying old db $dir_old using the new commit --- $commit_new" |
||||||
|
./new_db_sanity_test $dir_old verify |
||||||
|
if [ $? -ne 0 ]; then |
||||||
|
echo "[ERROR] Verification of $dir_old using commit $commit_new failed." |
||||||
|
exit 2 |
||||||
|
fi |
||||||
|
|
||||||
|
rm old_db_sanity_test |
||||||
|
rm new_db_sanity_test |
||||||
|
|
||||||
|
echo "Auto sanity test passed!" |
@ -0,0 +1,62 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
#include "util/sync_point.h" |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
SyncPoint* SyncPoint::GetInstance() { |
||||||
|
static SyncPoint sync_point; |
||||||
|
return &sync_point; |
||||||
|
} |
||||||
|
|
||||||
|
void SyncPoint::LoadDependency(const std::vector<Dependency>& dependencies) { |
||||||
|
successors_.clear(); |
||||||
|
predecessors_.clear(); |
||||||
|
cleared_points_.clear(); |
||||||
|
for (const auto& dependency : dependencies) { |
||||||
|
successors_[dependency.predecessor].push_back(dependency.successor); |
||||||
|
predecessors_[dependency.successor].push_back(dependency.predecessor); |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
bool SyncPoint::PredecessorsAllCleared(const std::string& point) { |
||||||
|
for (const auto& pred : predecessors_[point]) { |
||||||
|
if (cleared_points_.count(pred) == 0) { |
||||||
|
return false; |
||||||
|
} |
||||||
|
} |
||||||
|
return true; |
||||||
|
} |
||||||
|
|
||||||
|
void SyncPoint::EnableProcessing() { |
||||||
|
std::unique_lock<std::mutex> lock(mutex_); |
||||||
|
enabled_ = true; |
||||||
|
} |
||||||
|
|
||||||
|
void SyncPoint::DisableProcessing() { |
||||||
|
std::unique_lock<std::mutex> lock(mutex_); |
||||||
|
enabled_ = false; |
||||||
|
} |
||||||
|
|
||||||
|
void SyncPoint::ClearTrace() { |
||||||
|
std::unique_lock<std::mutex> lock(mutex_); |
||||||
|
cleared_points_.clear(); |
||||||
|
} |
||||||
|
|
||||||
|
void SyncPoint::Process(const std::string& point) { |
||||||
|
std::unique_lock<std::mutex> lock(mutex_); |
||||||
|
|
||||||
|
if (!enabled_) return; |
||||||
|
|
||||||
|
while (!PredecessorsAllCleared(point)) { |
||||||
|
cv_.wait(lock); |
||||||
|
} |
||||||
|
|
||||||
|
cleared_points_.insert(point); |
||||||
|
cv_.notify_all(); |
||||||
|
} |
||||||
|
|
||||||
|
} // namespace rocksdb
|
@ -0,0 +1,79 @@ |
|||||||
|
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||||
|
// This source code is licensed under the BSD-style license found in the
|
||||||
|
// LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
// of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
#pragma once |
||||||
|
|
||||||
|
#include <condition_variable> |
||||||
|
#include <mutex> |
||||||
|
#include <string> |
||||||
|
#include <unordered_set> |
||||||
|
#include <unordered_map> |
||||||
|
#include <vector> |
||||||
|
|
||||||
|
namespace rocksdb { |
||||||
|
|
||||||
|
// This class provides facility to reproduce race conditions deterministically
|
||||||
|
// in unit tests.
|
||||||
|
// Developer could specify sync points in the codebase via TEST_SYNC_POINT.
|
||||||
|
// Each sync point represents a position in the execution stream of a thread.
|
||||||
|
// In the unit test, 'Happens After' relationship among sync points could be
|
||||||
|
// setup via SyncPoint::LoadDependency, to reproduce a desired interleave of
|
||||||
|
// threads execution.
|
||||||
|
// Refer to (DBTest,TransactionLogIteratorRace), for an exmaple use case.
|
||||||
|
|
||||||
|
class SyncPoint { |
||||||
|
public: |
||||||
|
static SyncPoint* GetInstance(); |
||||||
|
|
||||||
|
struct Dependency { |
||||||
|
std::string predecessor; |
||||||
|
std::string successor; |
||||||
|
}; |
||||||
|
// call once at the beginning of a test to setup the dependency between
|
||||||
|
// sync points
|
||||||
|
void LoadDependency(const std::vector<Dependency>& dependencies); |
||||||
|
|
||||||
|
// enable sync point processing (disabled on startup)
|
||||||
|
void EnableProcessing(); |
||||||
|
|
||||||
|
// disable sync point processing
|
||||||
|
void DisableProcessing(); |
||||||
|
|
||||||
|
// remove the execution trace of all sync points
|
||||||
|
void ClearTrace(); |
||||||
|
|
||||||
|
// triggered by TEST_SYNC_POINT, blocking execution until all predecessors
|
||||||
|
// are executed.
|
||||||
|
void Process(const std::string& point); |
||||||
|
|
||||||
|
// TODO: it might be useful to provide a function that blocks until all
|
||||||
|
// sync points are cleared.
|
||||||
|
|
||||||
|
private: |
||||||
|
bool PredecessorsAllCleared(const std::string& point); |
||||||
|
|
||||||
|
// successor/predecessor map loaded from LoadDependency
|
||||||
|
std::unordered_map<std::string, std::vector<std::string>> successors_; |
||||||
|
std::unordered_map<std::string, std::vector<std::string>> predecessors_; |
||||||
|
|
||||||
|
std::mutex mutex_; |
||||||
|
std::condition_variable cv_; |
||||||
|
// sync points that have been passed through
|
||||||
|
std::unordered_set<std::string> cleared_points_; |
||||||
|
bool enabled_ = false; |
||||||
|
}; |
||||||
|
|
||||||
|
} // namespace rocksdb
|
||||||
|
|
||||||
|
// Use TEST_SYNC_POINT to specify sync points inside code base.
|
||||||
|
// Sync points can have happens-after depedency on other sync points,
|
||||||
|
// configured at runtime via SyncPoint::LoadDependency. This could be
|
||||||
|
// utilized to re-produce race conditions between threads.
|
||||||
|
// See TransactionLogIteratorRace in db_test.cc for an example use case.
|
||||||
|
// TEST_SYNC_POINT is no op in release build.
|
||||||
|
#ifdef NDEBUG |
||||||
|
#define TEST_SYNC_POINT(x) |
||||||
|
#else |
||||||
|
#define TEST_SYNC_POINT(x) rocksdb::SyncPoint::GetInstance()->Process(x) |
||||||
|
#endif |
Loading…
Reference in new issue