|
|
@ -1236,9 +1236,12 @@ Status DBImpl::Recover( |
|
|
|
SetTickerCount(stats_, SEQUENCE_NUMBER, versions_->LastSequence()); |
|
|
|
SetTickerCount(stats_, SEQUENCE_NUMBER, versions_->LastSequence()); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Initial value
|
|
|
|
|
|
|
|
max_total_in_memory_state_ = 0; |
|
|
|
for (auto cfd : *versions_->GetColumnFamilySet()) { |
|
|
|
for (auto cfd : *versions_->GetColumnFamilySet()) { |
|
|
|
max_total_in_memory_state_ += cfd->options()->write_buffer_size * |
|
|
|
auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); |
|
|
|
cfd->options()->max_write_buffer_number; |
|
|
|
max_total_in_memory_state_ += mutable_cf_options->write_buffer_size * |
|
|
|
|
|
|
|
mutable_cf_options->max_write_buffer_number; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return s; |
|
|
|
return s; |
|
|
@ -1724,9 +1727,37 @@ Status DBImpl::CompactRange(ColumnFamilyHandle* column_family, |
|
|
|
|
|
|
|
|
|
|
|
bool DBImpl::SetOptions(ColumnFamilyHandle* column_family, |
|
|
|
bool DBImpl::SetOptions(ColumnFamilyHandle* column_family, |
|
|
|
const std::unordered_map<std::string, std::string>& options_map) { |
|
|
|
const std::unordered_map<std::string, std::string>& options_map) { |
|
|
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); |
|
|
|
auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd(); |
|
|
|
|
|
|
|
if (options_map.empty()) { |
|
|
|
|
|
|
|
Log(db_options_.info_log, "SetOptions() on column family [%s], empty input", |
|
|
|
|
|
|
|
cfd->GetName().c_str()); |
|
|
|
|
|
|
|
return false; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MutableCFOptions new_options; |
|
|
|
|
|
|
|
bool succeed = false; |
|
|
|
|
|
|
|
{ |
|
|
|
MutexLock l(&mutex_); |
|
|
|
MutexLock l(&mutex_); |
|
|
|
return cfh->cfd()->SetOptions(options_map); |
|
|
|
if (cfd->SetOptions(options_map)) { |
|
|
|
|
|
|
|
new_options = *cfd->GetLatestMutableCFOptions(); |
|
|
|
|
|
|
|
succeed = true; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Log(db_options_.info_log, "SetOptions() on column family [%s], inputs:", |
|
|
|
|
|
|
|
cfd->GetName().c_str()); |
|
|
|
|
|
|
|
for (const auto& o : options_map) { |
|
|
|
|
|
|
|
Log(db_options_.info_log, "%s: %s\n", o.first.c_str(), o.second.c_str()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (succeed) { |
|
|
|
|
|
|
|
Log(db_options_.info_log, "[%s] SetOptions succeeded", |
|
|
|
|
|
|
|
cfd->GetName().c_str()); |
|
|
|
|
|
|
|
new_options.Dump(db_options_.info_log.get()); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
Log(db_options_.info_log, "[%s] SetOptions failed", |
|
|
|
|
|
|
|
cfd->GetName().c_str()); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return succeed; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// return the same level if it cannot be moved
|
|
|
|
// return the same level if it cannot be moved
|
|
|
@ -1803,8 +1834,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) { |
|
|
|
|
|
|
|
|
|
|
|
status = versions_->LogAndApply(cfd, |
|
|
|
status = versions_->LogAndApply(cfd, |
|
|
|
mutable_cf_options, &edit, &mutex_, db_directory_.get()); |
|
|
|
mutable_cf_options, &edit, &mutex_, db_directory_.get()); |
|
|
|
superversion_to_free = cfd->InstallSuperVersion( |
|
|
|
superversion_to_free = InstallSuperVersion( |
|
|
|
new_superversion, &mutex_, mutable_cf_options); |
|
|
|
cfd, new_superversion, mutable_cf_options); |
|
|
|
new_superversion = nullptr; |
|
|
|
new_superversion = nullptr; |
|
|
|
|
|
|
|
|
|
|
|
Log(db_options_.info_log, "[%s] LogAndApply: %s\n", cfd->GetName().c_str(), |
|
|
|
Log(db_options_.info_log, "[%s] LogAndApply: %s\n", cfd->GetName().c_str(), |
|
|
@ -1840,10 +1871,10 @@ int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) { |
|
|
|
return cfh->cfd()->options()->level0_stop_writes_trigger; |
|
|
|
return cfh->cfd()->options()->level0_stop_writes_trigger; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::Flush(const FlushOptions& options, |
|
|
|
Status DBImpl::Flush(const FlushOptions& flush_options, |
|
|
|
ColumnFamilyHandle* column_family) { |
|
|
|
ColumnFamilyHandle* column_family) { |
|
|
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); |
|
|
|
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); |
|
|
|
return FlushMemTable(cfh->cfd(), options); |
|
|
|
return FlushMemTable(cfh->cfd(), flush_options); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
SequenceNumber DBImpl::GetLatestSequenceNumber() const { |
|
|
|
SequenceNumber DBImpl::GetLatestSequenceNumber() const { |
|
|
@ -1933,7 +1964,7 @@ Status DBImpl::RunManualCompaction(ColumnFamilyData* cfd, int input_level, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, |
|
|
|
Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, |
|
|
|
const FlushOptions& options) { |
|
|
|
const FlushOptions& flush_options) { |
|
|
|
Status s; |
|
|
|
Status s; |
|
|
|
{ |
|
|
|
{ |
|
|
|
WriteContext context; |
|
|
|
WriteContext context; |
|
|
@ -1957,7 +1988,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd, |
|
|
|
write_thread_.ExitWriteThread(&w, &w, s); |
|
|
|
write_thread_.ExitWriteThread(&w, &w, s); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (s.ok() && options.wait) { |
|
|
|
if (s.ok() && flush_options.wait) { |
|
|
|
// Wait until the compaction completes
|
|
|
|
// Wait until the compaction completes
|
|
|
|
s = WaitForFlushMemTable(cfd); |
|
|
|
s = WaitForFlushMemTable(cfd); |
|
|
|
} |
|
|
|
} |
|
|
@ -2320,12 +2351,14 @@ Status DBImpl::BackgroundCompaction(bool* madeProgress, |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
// no need to refcount in iteration since it's always under a mutex
|
|
|
|
// no need to refcount in iteration since it's always under a mutex
|
|
|
|
for (auto cfd : *versions_->GetColumnFamilySet()) { |
|
|
|
for (auto cfd : *versions_->GetColumnFamilySet()) { |
|
|
|
if (!cfd->options()->disable_auto_compactions) { |
|
|
|
// Pick up latest mutable CF Options and use it throughout the
|
|
|
|
|
|
|
|
// compaction job
|
|
|
|
|
|
|
|
auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); |
|
|
|
|
|
|
|
if (!mutable_cf_options->disable_auto_compactions) { |
|
|
|
// NOTE: try to avoid unnecessary copy of MutableCFOptions if
|
|
|
|
// NOTE: try to avoid unnecessary copy of MutableCFOptions if
|
|
|
|
// compaction is not necessary. Need to make sure mutex is held
|
|
|
|
// compaction is not necessary. Need to make sure mutex is held
|
|
|
|
// until we make a copy in the following code
|
|
|
|
// until we make a copy in the following code
|
|
|
|
c.reset(cfd->PickCompaction( |
|
|
|
c.reset(cfd->PickCompaction(*mutable_cf_options, log_buffer)); |
|
|
|
*cfd->GetLatestMutableCFOptions(), log_buffer)); |
|
|
|
|
|
|
|
if (c != nullptr) { |
|
|
|
if (c != nullptr) { |
|
|
|
// update statistics
|
|
|
|
// update statistics
|
|
|
|
MeasureTime(stats_, NUM_FILES_IN_SINGLE_COMPACTION, |
|
|
|
MeasureTime(stats_, NUM_FILES_IN_SINGLE_COMPACTION, |
|
|
@ -3441,7 +3474,7 @@ static void CleanupIteratorState(void* arg1, void* arg2) { |
|
|
|
} |
|
|
|
} |
|
|
|
} // namespace
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
|
|
|
|
Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, |
|
|
|
Iterator* DBImpl::NewInternalIterator(const ReadOptions& read_options, |
|
|
|
ColumnFamilyData* cfd, |
|
|
|
ColumnFamilyData* cfd, |
|
|
|
SuperVersion* super_version, |
|
|
|
SuperVersion* super_version, |
|
|
|
Arena* arena) { |
|
|
|
Arena* arena) { |
|
|
@ -3451,11 +3484,11 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options, |
|
|
|
MergeIteratorBuilder merge_iter_builder(&cfd->internal_comparator(), arena); |
|
|
|
MergeIteratorBuilder merge_iter_builder(&cfd->internal_comparator(), arena); |
|
|
|
// Collect iterator for mutable mem
|
|
|
|
// Collect iterator for mutable mem
|
|
|
|
merge_iter_builder.AddIterator( |
|
|
|
merge_iter_builder.AddIterator( |
|
|
|
super_version->mem->NewIterator(options, arena)); |
|
|
|
super_version->mem->NewIterator(read_options, arena)); |
|
|
|
// Collect all needed child iterators for immutable memtables
|
|
|
|
// Collect all needed child iterators for immutable memtables
|
|
|
|
super_version->imm->AddIterators(options, &merge_iter_builder); |
|
|
|
super_version->imm->AddIterators(read_options, &merge_iter_builder); |
|
|
|
// Collect iterators for files in L0 - Ln
|
|
|
|
// Collect iterators for files in L0 - Ln
|
|
|
|
super_version->current->AddIterators(options, env_options_, |
|
|
|
super_version->current->AddIterators(read_options, env_options_, |
|
|
|
&merge_iter_builder); |
|
|
|
&merge_iter_builder); |
|
|
|
internal_iter = merge_iter_builder.Finish(); |
|
|
|
internal_iter = merge_iter_builder.Finish(); |
|
|
|
IterState* cleanup = new IterState(this, &mutex_, super_version); |
|
|
|
IterState* cleanup = new IterState(this, &mutex_, super_version); |
|
|
@ -3468,10 +3501,10 @@ ColumnFamilyHandle* DBImpl::DefaultColumnFamily() const { |
|
|
|
return default_cf_handle_; |
|
|
|
return default_cf_handle_; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::Get(const ReadOptions& options, |
|
|
|
Status DBImpl::Get(const ReadOptions& read_options, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
std::string* value) { |
|
|
|
std::string* value) { |
|
|
|
return GetImpl(options, column_family, key, value); |
|
|
|
return GetImpl(read_options, column_family, key, value); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// DeletionState gets created and destructed outside of the lock -- we
|
|
|
|
// DeletionState gets created and destructed outside of the lock -- we
|
|
|
@ -3488,17 +3521,39 @@ void DBImpl::InstallSuperVersion( |
|
|
|
ColumnFamilyData* cfd, DeletionState& deletion_state, |
|
|
|
ColumnFamilyData* cfd, DeletionState& deletion_state, |
|
|
|
const MutableCFOptions& mutable_cf_options) { |
|
|
|
const MutableCFOptions& mutable_cf_options) { |
|
|
|
mutex_.AssertHeld(); |
|
|
|
mutex_.AssertHeld(); |
|
|
|
// if new_superversion == nullptr, it means somebody already used it
|
|
|
|
|
|
|
|
SuperVersion* new_superversion = |
|
|
|
|
|
|
|
(deletion_state.new_superversion != nullptr) ? |
|
|
|
|
|
|
|
deletion_state.new_superversion : new SuperVersion(); |
|
|
|
|
|
|
|
SuperVersion* old_superversion = |
|
|
|
SuperVersion* old_superversion = |
|
|
|
cfd->InstallSuperVersion(new_superversion, &mutex_, mutable_cf_options); |
|
|
|
InstallSuperVersion(cfd, deletion_state.new_superversion, |
|
|
|
|
|
|
|
mutable_cf_options); |
|
|
|
deletion_state.new_superversion = nullptr; |
|
|
|
deletion_state.new_superversion = nullptr; |
|
|
|
deletion_state.superversions_to_free.push_back(old_superversion); |
|
|
|
deletion_state.superversions_to_free.push_back(old_superversion); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::GetImpl(const ReadOptions& options, |
|
|
|
SuperVersion* DBImpl::InstallSuperVersion( |
|
|
|
|
|
|
|
ColumnFamilyData* cfd, SuperVersion* new_sv, |
|
|
|
|
|
|
|
const MutableCFOptions& mutable_cf_options) { |
|
|
|
|
|
|
|
mutex_.AssertHeld(); |
|
|
|
|
|
|
|
auto* old = cfd->InstallSuperVersion( |
|
|
|
|
|
|
|
new_sv ? new_sv : new SuperVersion(), &mutex_, mutable_cf_options); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// We want to schedule potential flush or compactions since new options may
|
|
|
|
|
|
|
|
// have been picked up in this new version. New options may cause flush
|
|
|
|
|
|
|
|
// compaction trigger condition to change.
|
|
|
|
|
|
|
|
MaybeScheduleFlushOrCompaction(); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Update max_total_in_memory_state_
|
|
|
|
|
|
|
|
auto old_memtable_size = 0; |
|
|
|
|
|
|
|
if (old) { |
|
|
|
|
|
|
|
old_memtable_size = old->mutable_cf_options.write_buffer_size * |
|
|
|
|
|
|
|
old->mutable_cf_options.max_write_buffer_number; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
max_total_in_memory_state_ = |
|
|
|
|
|
|
|
max_total_in_memory_state_ - old_memtable_size + |
|
|
|
|
|
|
|
mutable_cf_options.write_buffer_size * |
|
|
|
|
|
|
|
mutable_cf_options.max_write_buffer_number; |
|
|
|
|
|
|
|
return old; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::GetImpl(const ReadOptions& read_options, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
std::string* value, bool* value_found) { |
|
|
|
std::string* value, bool* value_found) { |
|
|
|
StopWatch sw(env_, stats_, DB_GET); |
|
|
|
StopWatch sw(env_, stats_, DB_GET); |
|
|
@ -3508,8 +3563,9 @@ Status DBImpl::GetImpl(const ReadOptions& options, |
|
|
|
auto cfd = cfh->cfd(); |
|
|
|
auto cfd = cfh->cfd(); |
|
|
|
|
|
|
|
|
|
|
|
SequenceNumber snapshot; |
|
|
|
SequenceNumber snapshot; |
|
|
|
if (options.snapshot != nullptr) { |
|
|
|
if (read_options.snapshot != nullptr) { |
|
|
|
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_; |
|
|
|
snapshot = reinterpret_cast<const SnapshotImpl*>( |
|
|
|
|
|
|
|
read_options.snapshot)->number_; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
snapshot = versions_->LastSequence(); |
|
|
|
snapshot = versions_->LastSequence(); |
|
|
|
} |
|
|
|
} |
|
|
@ -3535,7 +3591,8 @@ Status DBImpl::GetImpl(const ReadOptions& options, |
|
|
|
RecordTick(stats_, MEMTABLE_HIT); |
|
|
|
RecordTick(stats_, MEMTABLE_HIT); |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
PERF_TIMER_GUARD(get_from_output_files_time); |
|
|
|
PERF_TIMER_GUARD(get_from_output_files_time); |
|
|
|
sv->current->Get(options, lkey, value, &s, &merge_context, value_found); |
|
|
|
sv->current->Get(read_options, lkey, value, &s, &merge_context, |
|
|
|
|
|
|
|
value_found); |
|
|
|
RecordTick(stats_, MEMTABLE_MISS); |
|
|
|
RecordTick(stats_, MEMTABLE_MISS); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -3551,7 +3608,7 @@ Status DBImpl::GetImpl(const ReadOptions& options, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
std::vector<Status> DBImpl::MultiGet( |
|
|
|
std::vector<Status> DBImpl::MultiGet( |
|
|
|
const ReadOptions& options, |
|
|
|
const ReadOptions& read_options, |
|
|
|
const std::vector<ColumnFamilyHandle*>& column_family, |
|
|
|
const std::vector<ColumnFamilyHandle*>& column_family, |
|
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) { |
|
|
|
const std::vector<Slice>& keys, std::vector<std::string>* values) { |
|
|
|
|
|
|
|
|
|
|
@ -3577,8 +3634,9 @@ std::vector<Status> DBImpl::MultiGet( |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
mutex_.Lock(); |
|
|
|
mutex_.Lock(); |
|
|
|
if (options.snapshot != nullptr) { |
|
|
|
if (read_options.snapshot != nullptr) { |
|
|
|
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_; |
|
|
|
snapshot = reinterpret_cast<const SnapshotImpl*>( |
|
|
|
|
|
|
|
read_options.snapshot)->number_; |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
snapshot = versions_->LastSequence(); |
|
|
|
snapshot = versions_->LastSequence(); |
|
|
|
} |
|
|
|
} |
|
|
@ -3621,7 +3679,8 @@ std::vector<Status> DBImpl::MultiGet( |
|
|
|
// Done
|
|
|
|
// Done
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
PERF_TIMER_GUARD(get_from_output_files_time); |
|
|
|
PERF_TIMER_GUARD(get_from_output_files_time); |
|
|
|
super_version->current->Get(options, lkey, value, &s, &merge_context); |
|
|
|
super_version->current->Get(read_options, lkey, value, &s, |
|
|
|
|
|
|
|
&merge_context); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
@ -3659,7 +3718,7 @@ std::vector<Status> DBImpl::MultiGet( |
|
|
|
return stat_list; |
|
|
|
return stat_list; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options, |
|
|
|
Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& cf_options, |
|
|
|
const std::string& column_family_name, |
|
|
|
const std::string& column_family_name, |
|
|
|
ColumnFamilyHandle** handle) { |
|
|
|
ColumnFamilyHandle** handle) { |
|
|
|
*handle = nullptr; |
|
|
|
*handle = nullptr; |
|
|
@ -3674,26 +3733,23 @@ Status DBImpl::CreateColumnFamily(const ColumnFamilyOptions& options, |
|
|
|
uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID(); |
|
|
|
uint32_t new_id = versions_->GetColumnFamilySet()->GetNextColumnFamilyID(); |
|
|
|
edit.SetColumnFamily(new_id); |
|
|
|
edit.SetColumnFamily(new_id); |
|
|
|
edit.SetLogNumber(logfile_number_); |
|
|
|
edit.SetLogNumber(logfile_number_); |
|
|
|
edit.SetComparatorName(options.comparator->Name()); |
|
|
|
edit.SetComparatorName(cf_options.comparator->Name()); |
|
|
|
|
|
|
|
|
|
|
|
// LogAndApply will both write the creation in MANIFEST and create
|
|
|
|
// LogAndApply will both write the creation in MANIFEST and create
|
|
|
|
// ColumnFamilyData object
|
|
|
|
// ColumnFamilyData object
|
|
|
|
Options opt(db_options_, options); |
|
|
|
Options opt(db_options_, cf_options); |
|
|
|
Status s = versions_->LogAndApply(nullptr, |
|
|
|
Status s = versions_->LogAndApply(nullptr, |
|
|
|
MutableCFOptions(opt, ImmutableCFOptions(opt)), |
|
|
|
MutableCFOptions(opt, ImmutableCFOptions(opt)), |
|
|
|
&edit, &mutex_, db_directory_.get(), false, &options); |
|
|
|
&edit, &mutex_, db_directory_.get(), false, &cf_options); |
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
single_column_family_mode_ = false; |
|
|
|
single_column_family_mode_ = false; |
|
|
|
auto cfd = |
|
|
|
auto cfd = |
|
|
|
versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); |
|
|
|
versions_->GetColumnFamilySet()->GetColumnFamily(column_family_name); |
|
|
|
assert(cfd != nullptr); |
|
|
|
assert(cfd != nullptr); |
|
|
|
delete cfd->InstallSuperVersion(new SuperVersion(), &mutex_, |
|
|
|
delete InstallSuperVersion(cfd, nullptr, *cfd->GetLatestMutableCFOptions()); |
|
|
|
*cfd->GetLatestMutableCFOptions()); |
|
|
|
|
|
|
|
*handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_); |
|
|
|
*handle = new ColumnFamilyHandleImpl(cfd, this, &mutex_); |
|
|
|
Log(db_options_.info_log, "Created column family [%s] (ID %u)", |
|
|
|
Log(db_options_.info_log, "Created column family [%s] (ID %u)", |
|
|
|
column_family_name.c_str(), (unsigned)cfd->GetID()); |
|
|
|
column_family_name.c_str(), (unsigned)cfd->GetID()); |
|
|
|
max_total_in_memory_state_ += cfd->options()->write_buffer_size * |
|
|
|
|
|
|
|
cfd->options()->max_write_buffer_number; |
|
|
|
|
|
|
|
} else { |
|
|
|
} else { |
|
|
|
Log(db_options_.info_log, "Creating column family [%s] FAILED -- %s", |
|
|
|
Log(db_options_.info_log, "Creating column family [%s] FAILED -- %s", |
|
|
|
column_family_name.c_str(), s.ToString().c_str()); |
|
|
|
column_family_name.c_str(), s.ToString().c_str()); |
|
|
@ -3712,7 +3768,6 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { |
|
|
|
edit.DropColumnFamily(); |
|
|
|
edit.DropColumnFamily(); |
|
|
|
edit.SetColumnFamily(cfd->GetID()); |
|
|
|
edit.SetColumnFamily(cfd->GetID()); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Status s; |
|
|
|
Status s; |
|
|
|
{ |
|
|
|
{ |
|
|
|
MutexLock l(&mutex_); |
|
|
|
MutexLock l(&mutex_); |
|
|
@ -3732,8 +3787,9 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { |
|
|
|
|
|
|
|
|
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
assert(cfd->IsDropped()); |
|
|
|
assert(cfd->IsDropped()); |
|
|
|
max_total_in_memory_state_ -= cfd->options()->write_buffer_size * |
|
|
|
auto* mutable_cf_options = cfd->GetLatestMutableCFOptions(); |
|
|
|
cfd->options()->max_write_buffer_number; |
|
|
|
max_total_in_memory_state_ -= mutable_cf_options->write_buffer_size * |
|
|
|
|
|
|
|
mutable_cf_options->max_write_buffer_number; |
|
|
|
Log(db_options_.info_log, "Dropped column family with id %u\n", |
|
|
|
Log(db_options_.info_log, "Dropped column family with id %u\n", |
|
|
|
cfd->GetID()); |
|
|
|
cfd->GetID()); |
|
|
|
} else { |
|
|
|
} else { |
|
|
@ -3745,14 +3801,14 @@ Status DBImpl::DropColumnFamily(ColumnFamilyHandle* column_family) { |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
bool DBImpl::KeyMayExist(const ReadOptions& options, |
|
|
|
bool DBImpl::KeyMayExist(const ReadOptions& read_options, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key, |
|
|
|
std::string* value, bool* value_found) { |
|
|
|
std::string* value, bool* value_found) { |
|
|
|
if (value_found != nullptr) { |
|
|
|
if (value_found != nullptr) { |
|
|
|
// falsify later if key-may-exist but can't fetch value
|
|
|
|
// falsify later if key-may-exist but can't fetch value
|
|
|
|
*value_found = true; |
|
|
|
*value_found = true; |
|
|
|
} |
|
|
|
} |
|
|
|
ReadOptions roptions = options; |
|
|
|
ReadOptions roptions = read_options; |
|
|
|
roptions.read_tier = kBlockCacheTier; // read from block cache only
|
|
|
|
roptions.read_tier = kBlockCacheTier; // read from block cache only
|
|
|
|
auto s = GetImpl(roptions, column_family, key, value, value_found); |
|
|
|
auto s = GetImpl(roptions, column_family, key, value, value_found); |
|
|
|
|
|
|
|
|
|
|
@ -3941,23 +3997,23 @@ Status DBImpl::Merge(const WriteOptions& o, ColumnFamilyHandle* column_family, |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::Delete(const WriteOptions& options, |
|
|
|
Status DBImpl::Delete(const WriteOptions& write_options, |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key) { |
|
|
|
ColumnFamilyHandle* column_family, const Slice& key) { |
|
|
|
return DB::Delete(options, column_family, key); |
|
|
|
return DB::Delete(write_options, column_family, key); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
Status DBImpl::Write(const WriteOptions& write_options, WriteBatch* my_batch) { |
|
|
|
if (my_batch == nullptr) { |
|
|
|
if (my_batch == nullptr) { |
|
|
|
return Status::Corruption("Batch is nullptr!"); |
|
|
|
return Status::Corruption("Batch is nullptr!"); |
|
|
|
} |
|
|
|
} |
|
|
|
PERF_TIMER_GUARD(write_pre_and_post_process_time); |
|
|
|
PERF_TIMER_GUARD(write_pre_and_post_process_time); |
|
|
|
WriteThread::Writer w(&mutex_); |
|
|
|
WriteThread::Writer w(&mutex_); |
|
|
|
w.batch = my_batch; |
|
|
|
w.batch = my_batch; |
|
|
|
w.sync = options.sync; |
|
|
|
w.sync = write_options.sync; |
|
|
|
w.disableWAL = options.disableWAL; |
|
|
|
w.disableWAL = write_options.disableWAL; |
|
|
|
w.in_batch_group = false; |
|
|
|
w.in_batch_group = false; |
|
|
|
w.done = false; |
|
|
|
w.done = false; |
|
|
|
w.timeout_hint_us = options.timeout_hint_us; |
|
|
|
w.timeout_hint_us = write_options.timeout_hint_us; |
|
|
|
|
|
|
|
|
|
|
|
uint64_t expiration_time = 0; |
|
|
|
uint64_t expiration_time = 0; |
|
|
|
bool has_timeout = false; |
|
|
|
bool has_timeout = false; |
|
|
@ -3968,7 +4024,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
has_timeout = true; |
|
|
|
has_timeout = true; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!options.disableWAL) { |
|
|
|
if (!write_options.disableWAL) { |
|
|
|
RecordTick(stats_, WRITE_WITH_WAL); |
|
|
|
RecordTick(stats_, WRITE_WITH_WAL); |
|
|
|
default_cf_internal_stats_->AddDBStats(InternalStats::WRITE_WITH_WAL, 1); |
|
|
|
default_cf_internal_stats_->AddDBStats(InternalStats::WRITE_WITH_WAL, 1); |
|
|
|
} |
|
|
|
} |
|
|
@ -4036,7 +4092,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
|
|
|
|
|
|
|
|
if (UNLIKELY(status.ok()) && |
|
|
|
if (UNLIKELY(status.ok()) && |
|
|
|
(write_controller_.IsStopped() || write_controller_.GetDelay() > 0)) { |
|
|
|
(write_controller_.IsStopped() || write_controller_.GetDelay() > 0)) { |
|
|
|
DelayWrite(expiration_time); |
|
|
|
status = DelayWrite(expiration_time); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (UNLIKELY(status.ok() && has_timeout && |
|
|
|
if (UNLIKELY(status.ok() && has_timeout && |
|
|
@ -4074,13 +4130,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
// Record statistics
|
|
|
|
// Record statistics
|
|
|
|
RecordTick(stats_, NUMBER_KEYS_WRITTEN, my_batch_count); |
|
|
|
RecordTick(stats_, NUMBER_KEYS_WRITTEN, my_batch_count); |
|
|
|
RecordTick(stats_, BYTES_WRITTEN, WriteBatchInternal::ByteSize(updates)); |
|
|
|
RecordTick(stats_, BYTES_WRITTEN, WriteBatchInternal::ByteSize(updates)); |
|
|
|
if (options.disableWAL) { |
|
|
|
if (write_options.disableWAL) { |
|
|
|
flush_on_destroy_ = true; |
|
|
|
flush_on_destroy_ = true; |
|
|
|
} |
|
|
|
} |
|
|
|
PERF_TIMER_STOP(write_pre_and_post_process_time); |
|
|
|
PERF_TIMER_STOP(write_pre_and_post_process_time); |
|
|
|
|
|
|
|
|
|
|
|
uint64_t log_size = 0; |
|
|
|
uint64_t log_size = 0; |
|
|
|
if (!options.disableWAL) { |
|
|
|
if (!write_options.disableWAL) { |
|
|
|
PERF_TIMER_GUARD(write_wal_time); |
|
|
|
PERF_TIMER_GUARD(write_wal_time); |
|
|
|
Slice log_entry = WriteBatchInternal::Contents(updates); |
|
|
|
Slice log_entry = WriteBatchInternal::Contents(updates); |
|
|
|
status = log_->AddRecord(log_entry); |
|
|
|
status = log_->AddRecord(log_entry); |
|
|
@ -4089,7 +4145,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
log_empty_ = false; |
|
|
|
log_empty_ = false; |
|
|
|
log_size = log_entry.size(); |
|
|
|
log_size = log_entry.size(); |
|
|
|
RecordTick(stats_, WAL_FILE_BYTES, log_size); |
|
|
|
RecordTick(stats_, WAL_FILE_BYTES, log_size); |
|
|
|
if (status.ok() && options.sync) { |
|
|
|
if (status.ok() && write_options.sync) { |
|
|
|
RecordTick(stats_, WAL_FILE_SYNCED); |
|
|
|
RecordTick(stats_, WAL_FILE_SYNCED); |
|
|
|
StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS); |
|
|
|
StopWatch sw(env_, stats_, WAL_FILE_SYNC_MICROS); |
|
|
|
if (db_options_.use_fsync) { |
|
|
|
if (db_options_.use_fsync) { |
|
|
@ -4104,7 +4160,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
|
|
|
|
|
|
|
|
status = WriteBatchInternal::InsertInto( |
|
|
|
status = WriteBatchInternal::InsertInto( |
|
|
|
updates, column_family_memtables_.get(), |
|
|
|
updates, column_family_memtables_.get(), |
|
|
|
options.ignore_missing_column_families, 0, this, false); |
|
|
|
write_options.ignore_missing_column_families, 0, this, false); |
|
|
|
// A non-OK status here indicates iteration failure (either in-memory
|
|
|
|
// A non-OK status here indicates iteration failure (either in-memory
|
|
|
|
// writebatch corruption (very bad), or the client specified invalid
|
|
|
|
// writebatch corruption (very bad), or the client specified invalid
|
|
|
|
// column family). This will later on trigger bg_error_.
|
|
|
|
// column family). This will later on trigger bg_error_.
|
|
|
@ -4123,7 +4179,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
// internal stats
|
|
|
|
// internal stats
|
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
|
InternalStats::BYTES_WRITTEN, batch_size); |
|
|
|
InternalStats::BYTES_WRITTEN, batch_size); |
|
|
|
if (!options.disableWAL) { |
|
|
|
if (!write_options.disableWAL) { |
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
|
InternalStats::WAL_FILE_SYNCED, 1); |
|
|
|
InternalStats::WAL_FILE_SYNCED, 1); |
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
|
default_cf_internal_stats_->AddDBStats( |
|
|
@ -4151,7 +4207,7 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) { |
|
|
|
|
|
|
|
|
|
|
|
// REQUIRES: mutex_ is held
|
|
|
|
// REQUIRES: mutex_ is held
|
|
|
|
// REQUIRES: this thread is currently at the front of the writer queue
|
|
|
|
// REQUIRES: this thread is currently at the front of the writer queue
|
|
|
|
void DBImpl::DelayWrite(uint64_t expiration_time) { |
|
|
|
Status DBImpl::DelayWrite(uint64_t expiration_time) { |
|
|
|
StopWatch sw(env_, stats_, WRITE_STALL); |
|
|
|
StopWatch sw(env_, stats_, WRITE_STALL); |
|
|
|
bool has_timeout = (expiration_time > 0); |
|
|
|
bool has_timeout = (expiration_time > 0); |
|
|
|
auto delay = write_controller_.GetDelay(); |
|
|
|
auto delay = write_controller_.GetDelay(); |
|
|
@ -4161,16 +4217,18 @@ void DBImpl::DelayWrite(uint64_t expiration_time) { |
|
|
|
mutex_.Lock(); |
|
|
|
mutex_.Lock(); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
while (write_controller_.IsStopped()) { |
|
|
|
while (bg_error_.ok() && write_controller_.IsStopped()) { |
|
|
|
if (has_timeout) { |
|
|
|
if (has_timeout) { |
|
|
|
bg_cv_.TimedWait(expiration_time); |
|
|
|
bg_cv_.TimedWait(expiration_time); |
|
|
|
if (env_->NowMicros() > expiration_time) { |
|
|
|
if (env_->NowMicros() > expiration_time) { |
|
|
|
break; |
|
|
|
return Status::TimedOut(); |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
bg_cv_.Wait(); |
|
|
|
bg_cv_.Wait(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return bg_error_; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
Status DBImpl::ScheduleFlushes(WriteContext* context) { |
|
|
|
Status DBImpl::ScheduleFlushes(WriteContext* context) { |
|
|
@ -4219,8 +4277,8 @@ Status DBImpl::SetNewMemtableAndNewLogFile(ColumnFamilyData* cfd, |
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
// Our final size should be less than write_buffer_size
|
|
|
|
// Our final size should be less than write_buffer_size
|
|
|
|
// (compression, etc) but err on the side of caution.
|
|
|
|
// (compression, etc) but err on the side of caution.
|
|
|
|
lfile->SetPreallocationBlockSize(1.1 * |
|
|
|
lfile->SetPreallocationBlockSize( |
|
|
|
cfd->options()->write_buffer_size); |
|
|
|
1.1 * mutable_cf_options.write_buffer_size); |
|
|
|
new_log = new log::Writer(std::move(lfile)); |
|
|
|
new_log = new log::Writer(std::move(lfile)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -4232,6 +4290,9 @@ Status DBImpl::SetNewMemtableAndNewLogFile(ColumnFamilyData* cfd, |
|
|
|
new_superversion = new SuperVersion(); |
|
|
|
new_superversion = new SuperVersion(); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Log(db_options_.info_log, |
|
|
|
|
|
|
|
"[%s] New memtable created with log file: #%" PRIu64 "\n", |
|
|
|
|
|
|
|
cfd->GetName().c_str(), new_log_number); |
|
|
|
mutex_.Lock(); |
|
|
|
mutex_.Lock(); |
|
|
|
if (!s.ok()) { |
|
|
|
if (!s.ok()) { |
|
|
|
// how do we fail if we're not creating new log?
|
|
|
|
// how do we fail if we're not creating new log?
|
|
|
@ -4264,11 +4325,8 @@ Status DBImpl::SetNewMemtableAndNewLogFile(ColumnFamilyData* cfd, |
|
|
|
cfd->imm()->Add(cfd->mem()); |
|
|
|
cfd->imm()->Add(cfd->mem()); |
|
|
|
new_mem->Ref(); |
|
|
|
new_mem->Ref(); |
|
|
|
cfd->SetMemtable(new_mem); |
|
|
|
cfd->SetMemtable(new_mem); |
|
|
|
Log(db_options_.info_log, |
|
|
|
|
|
|
|
"[%s] New memtable created with log file: #%" PRIu64 "\n", |
|
|
|
|
|
|
|
cfd->GetName().c_str(), logfile_number_); |
|
|
|
|
|
|
|
context->superversions_to_free_.push_back( |
|
|
|
context->superversions_to_free_.push_back( |
|
|
|
cfd->InstallSuperVersion(new_superversion, &mutex_, mutable_cf_options)); |
|
|
|
InstallSuperVersion(cfd, new_superversion, mutable_cf_options)); |
|
|
|
return s; |
|
|
|
return s; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
@ -4614,7 +4672,7 @@ Status DB::Merge(const WriteOptions& opt, ColumnFamilyHandle* column_family, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Default implementation -- returns not supported status
|
|
|
|
// Default implementation -- returns not supported status
|
|
|
|
Status DB::CreateColumnFamily(const ColumnFamilyOptions& options, |
|
|
|
Status DB::CreateColumnFamily(const ColumnFamilyOptions& cf_options, |
|
|
|
const std::string& column_family_name, |
|
|
|
const std::string& column_family_name, |
|
|
|
ColumnFamilyHandle** handle) { |
|
|
|
ColumnFamilyHandle** handle) { |
|
|
|
return Status::NotSupported(""); |
|
|
|
return Status::NotSupported(""); |
|
|
@ -4737,8 +4795,8 @@ Status DB::Open(const DBOptions& db_options, const std::string& dbname, |
|
|
|
} |
|
|
|
} |
|
|
|
if (s.ok()) { |
|
|
|
if (s.ok()) { |
|
|
|
for (auto cfd : *impl->versions_->GetColumnFamilySet()) { |
|
|
|
for (auto cfd : *impl->versions_->GetColumnFamilySet()) { |
|
|
|
delete cfd->InstallSuperVersion(new SuperVersion(), &impl->mutex_, |
|
|
|
delete impl->InstallSuperVersion( |
|
|
|
*cfd->GetLatestMutableCFOptions()); |
|
|
|
cfd, nullptr, *cfd->GetLatestMutableCFOptions()); |
|
|
|
} |
|
|
|
} |
|
|
|
impl->alive_log_files_.push_back( |
|
|
|
impl->alive_log_files_.push_back( |
|
|
|
DBImpl::LogFileNumberSize(impl->logfile_number_)); |
|
|
|
DBImpl::LogFileNumberSize(impl->logfile_number_)); |
|
|
|