MemTableList::TrimHistory now use allocated bytes (#9020)

Summary:
Fix a bug when both max_write_buffer_size_to_maintain and max_write_buffer_number_to_maintain are 0.
The bug was introduced in 6.5.0 and  https://github.com/facebook/rocksdb/issues/5022.
Fix https://github.com/facebook/rocksdb/issues/8371

Pull Request resolved: https://github.com/facebook/rocksdb/pull/9020

Reviewed By: pdillinger

Differential Revision: D32767084

Pulled By: ajkr

fbshipit-source-id: c401ee6e2557230e892d0fe8abb4966cbd18e85f
main
lgqss 3 years ago committed by Facebook GitHub Bot
parent 9daf07305c
commit 77c7085594
  1. 1
      HISTORY.md
  2. 2
      db/db_impl/db_impl_write.cc
  3. 7
      db/memtable.h
  4. 22
      db/memtable_list.cc
  5. 14
      db/memtable_list.h
  6. 2
      db/memtable_list_test.cc
  7. 4
      db/write_batch.cc
  8. 3
      utilities/transactions/optimistic_transaction_test.cc

@ -5,6 +5,7 @@
* Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next. * Fixed a bug in rocksdb automatic implicit prefetching which got broken because of new feature adaptive_readahead and internal prefetching got disabled when iterator moves from one file to next.
### Behavior Changes ### Behavior Changes
* MemTableList::TrimHistory now use allocated bytes when max_write_buffer_size_to_maintain > 0(default in TrasactionDB, introduced in PR#5022) Fix #8371.
### Public API change ### Public API change
* Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes. * Extend WriteBatch::AssignTimestamp and AssignTimestamps API so that both functions can accept an optional `checker` argument that performs additional checking on timestamp sizes.

@ -1661,7 +1661,7 @@ Status DBImpl::TrimMemtableHistory(WriteContext* context) {
for (auto& cfd : cfds) { for (auto& cfd : cfds) {
autovector<MemTable*> to_delete; autovector<MemTable*> to_delete;
bool trimmed = cfd->imm()->TrimHistory( bool trimmed = cfd->imm()->TrimHistory(
&context->memtables_to_free_, cfd->mem()->ApproximateMemoryUsage()); &context->memtables_to_free_, cfd->mem()->MemoryAllocatedBytes());
if (trimmed) { if (trimmed) {
context->superversion_context.NewSuperVersion(); context->superversion_context.NewSuperVersion();
assert(context->superversion_context.new_superversion.get() != nullptr); assert(context->superversion_context.new_superversion.get() != nullptr);

@ -146,6 +146,13 @@ class MemTable {
return approximate_memory_usage_.load(std::memory_order_relaxed); return approximate_memory_usage_.load(std::memory_order_relaxed);
} }
// used by MemTableListVersion::MemoryAllocatedBytesExcludingLast
size_t MemoryAllocatedBytes() const {
return table_->ApproximateMemoryUsage() +
range_del_table_->ApproximateMemoryUsage() +
arena_.MemoryAllocatedBytes();
}
// Returns a vector of unique random memtable entries of size 'sample_size'. // Returns a vector of unique random memtable entries of size 'sample_size'.
// //
// Note: the entries are stored in the unordered_set as length-prefixed keys, // Note: the entries are stored in the unordered_set as length-prefixed keys,

@ -259,8 +259,8 @@ SequenceNumber MemTableListVersion::GetEarliestSequenceNumber(
void MemTableListVersion::Add(MemTable* m, autovector<MemTable*>* to_delete) { void MemTableListVersion::Add(MemTable* m, autovector<MemTable*>* to_delete) {
assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable assert(refs_ == 1); // only when refs_ == 1 is MemTableListVersion mutable
AddMemTable(m); AddMemTable(m);
// m->MemoryAllocatedBytes() is added in MemoryAllocatedBytesExcludingLast
TrimHistory(to_delete, m->ApproximateMemoryUsage()); TrimHistory(to_delete, 0);
} }
// Removes m from list of memtables not flushed. Caller should NOT Unref m. // Removes m from list of memtables not flushed. Caller should NOT Unref m.
@ -282,16 +282,16 @@ void MemTableListVersion::Remove(MemTable* m,
} }
// return the total memory usage assuming the oldest flushed memtable is dropped // return the total memory usage assuming the oldest flushed memtable is dropped
size_t MemTableListVersion::ApproximateMemoryUsageExcludingLast() const { size_t MemTableListVersion::MemoryAllocatedBytesExcludingLast() const {
size_t total_memtable_size = 0; size_t total_memtable_size = 0;
for (auto& memtable : memlist_) { for (auto& memtable : memlist_) {
total_memtable_size += memtable->ApproximateMemoryUsage(); total_memtable_size += memtable->MemoryAllocatedBytes();
} }
for (auto& memtable : memlist_history_) { for (auto& memtable : memlist_history_) {
total_memtable_size += memtable->ApproximateMemoryUsage(); total_memtable_size += memtable->MemoryAllocatedBytes();
} }
if (!memlist_history_.empty()) { if (!memlist_history_.empty()) {
total_memtable_size -= memlist_history_.back()->ApproximateMemoryUsage(); total_memtable_size -= memlist_history_.back()->MemoryAllocatedBytes();
} }
return total_memtable_size; return total_memtable_size;
} }
@ -301,7 +301,7 @@ bool MemTableListVersion::MemtableLimitExceeded(size_t usage) {
// calculate the total memory usage after dropping the oldest flushed // calculate the total memory usage after dropping the oldest flushed
// memtable, compare with max_write_buffer_size_to_maintain_ to decide // memtable, compare with max_write_buffer_size_to_maintain_ to decide
// whether to trim history // whether to trim history
return ApproximateMemoryUsageExcludingLast() + usage >= return MemoryAllocatedBytesExcludingLast() + usage >=
static_cast<size_t>(max_write_buffer_size_to_maintain_); static_cast<size_t>(max_write_buffer_size_to_maintain_);
} else if (max_write_buffer_number_to_maintain_ > 0) { } else if (max_write_buffer_number_to_maintain_ > 0) {
return memlist_.size() + memlist_history_.size() > return memlist_.size() + memlist_history_.size() >
@ -596,9 +596,9 @@ size_t MemTableList::ApproximateUnflushedMemTablesMemoryUsage() {
size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; } size_t MemTableList::ApproximateMemoryUsage() { return current_memory_usage_; }
size_t MemTableList::ApproximateMemoryUsageExcludingLast() const { size_t MemTableList::MemoryAllocatedBytesExcludingLast() const {
const size_t usage = const size_t usage =
current_memory_usage_excluding_last_.load(std::memory_order_relaxed); current_memory_allocted_bytes_excluding_last_.load(std::memory_order_relaxed);
return usage; return usage;
} }
@ -609,8 +609,8 @@ bool MemTableList::HasHistory() const {
void MemTableList::UpdateCachedValuesFromMemTableListVersion() { void MemTableList::UpdateCachedValuesFromMemTableListVersion() {
const size_t total_memtable_size = const size_t total_memtable_size =
current_->ApproximateMemoryUsageExcludingLast(); current_->MemoryAllocatedBytesExcludingLast();
current_memory_usage_excluding_last_.store(total_memtable_size, current_memory_allocted_bytes_excluding_last_.store(total_memtable_size,
std::memory_order_relaxed); std::memory_order_relaxed);
const bool has_history = current_->HasHistory(); const bool has_history = current_->HasHistory();

@ -168,7 +168,7 @@ class MemTableListVersion {
// excluding the last MemTable in memlist_history_. The reason for excluding // excluding the last MemTable in memlist_history_. The reason for excluding
// the last MemTable is to see if dropping the last MemTable will keep total // the last MemTable is to see if dropping the last MemTable will keep total
// memory usage above or equal to max_write_buffer_size_to_maintain_ // memory usage above or equal to max_write_buffer_size_to_maintain_
size_t ApproximateMemoryUsageExcludingLast() const; size_t MemoryAllocatedBytesExcludingLast() const;
// Whether this version contains flushed memtables that are only kept around // Whether this version contains flushed memtables that are only kept around
// for transaction conflict checking. // for transaction conflict checking.
@ -221,7 +221,7 @@ class MemTableList {
commit_in_progress_(false), commit_in_progress_(false),
flush_requested_(false), flush_requested_(false),
current_memory_usage_(0), current_memory_usage_(0),
current_memory_usage_excluding_last_(0), current_memory_allocted_bytes_excluding_last_(0),
current_has_history_(false) { current_has_history_(false) {
current_->Ref(); current_->Ref();
} }
@ -281,13 +281,13 @@ class MemTableList {
// Returns an estimate of the number of bytes of data in use. // Returns an estimate of the number of bytes of data in use.
size_t ApproximateMemoryUsage(); size_t ApproximateMemoryUsage();
// Returns the cached current_memory_usage_excluding_last_ value. // Returns the cached current_memory_allocted_bytes_excluding_last_ value.
size_t ApproximateMemoryUsageExcludingLast() const; size_t MemoryAllocatedBytesExcludingLast() const;
// Returns the cached current_has_history_ value. // Returns the cached current_has_history_ value.
bool HasHistory() const; bool HasHistory() const;
// Updates current_memory_usage_excluding_last_ and current_has_history_ // Updates current_memory_allocted_bytes_excluding_last_ and current_has_history_
// from MemTableListVersion. Must be called whenever InstallNewVersion is // from MemTableListVersion. Must be called whenever InstallNewVersion is
// called. // called.
void UpdateCachedValuesFromMemTableListVersion(); void UpdateCachedValuesFromMemTableListVersion();
@ -430,8 +430,8 @@ class MemTableList {
// The current memory usage. // The current memory usage.
size_t current_memory_usage_; size_t current_memory_usage_;
// Cached value of current_->ApproximateMemoryUsageExcludingLast(). // Cached value of current_->MemoryAllocatedBytesExcludingLast().
std::atomic<size_t> current_memory_usage_excluding_last_; std::atomic<size_t> current_memory_allocted_bytes_excluding_last_;
// Cached value of current_->HasHistory(). // Cached value of current_->HasHistory().
std::atomic<bool> current_has_history_; std::atomic<bool> current_has_history_;

@ -344,7 +344,7 @@ TEST_F(MemTableListTest, GetFromHistoryTest) {
// Create MemTableList // Create MemTableList
int min_write_buffer_number_to_merge = 2; int min_write_buffer_number_to_merge = 2;
int max_write_buffer_number_to_maintain = 2; int max_write_buffer_number_to_maintain = 2;
int64_t max_write_buffer_size_to_maintain = 2000; int64_t max_write_buffer_size_to_maintain = 2 * Arena::kInlineSize;
MemTableList list(min_write_buffer_number_to_merge, MemTableList list(min_write_buffer_number_to_merge,
max_write_buffer_number_to_maintain, max_write_buffer_number_to_maintain,
max_write_buffer_size_to_maintain); max_write_buffer_size_to_maintain);

@ -1986,8 +1986,8 @@ class MemTableInserter : public WriteBatch::Handler {
const MemTable* const mem = cfd->mem(); const MemTable* const mem = cfd->mem();
assert(mem); assert(mem);
if (mem->ApproximateMemoryUsageFast() + if (mem->MemoryAllocatedBytes() +
imm->ApproximateMemoryUsageExcludingLast() >= imm->MemoryAllocatedBytesExcludingLast() >=
size_to_maintain && size_to_maintain &&
imm->MarkTrimHistoryNeeded()) { imm->MarkTrimHistoryNeeded()) {
trim_history_scheduler_->ScheduleWork(cfd); trim_history_scheduler_->ScheduleWork(cfd);

@ -37,7 +37,7 @@ class OptimisticTransactionTest
OptimisticTransactionTest() { OptimisticTransactionTest() {
options.create_if_missing = true; options.create_if_missing = true;
options.max_write_buffer_number = 2; options.max_write_buffer_number = 2;
options.max_write_buffer_size_to_maintain = 1600; options.max_write_buffer_size_to_maintain = 2 * Arena::kInlineSize;
options.merge_operator.reset(new TestPutOperator()); options.merge_operator.reset(new TestPutOperator());
dbname = test::PerThreadDBPath("optimistic_transaction_testdb"); dbname = test::PerThreadDBPath("optimistic_transaction_testdb");
@ -318,7 +318,6 @@ TEST_P(OptimisticTransactionTest, CheckKeySkipOldMemtable) {
const int kAttemptImmMemTable = 1; const int kAttemptImmMemTable = 1;
for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable; for (int attempt = kAttemptHistoryMemtable; attempt <= kAttemptImmMemTable;
attempt++) { attempt++) {
options.max_write_buffer_number_to_maintain = 3;
Reopen(); Reopen();
WriteOptions write_options; WriteOptions write_options;

Loading…
Cancel
Save