From de76909464a99d5ed60e61844e9cd0371ca350fe Mon Sep 17 00:00:00 2001 From: jsteemann Date: Fri, 19 Apr 2019 20:30:03 -0700 Subject: [PATCH] refactor SavePoints (#5192) Summary: Savepoints are assumed to be used in a stack-wise fashion (only the top element should be used), so they were stored by `WriteBatch` in a member variable `save_points` using an std::stack. Conceptually this is fine, but the implementation had a few issues: - the `save_points_` instance variable was a plain pointer to a heap- allocated `SavePoints` struct. The destructor of `WriteBatch` simply deletes this pointer. However, the copy constructor of WriteBatch just copied that pointer, meaning that copying a WriteBatch with active savepoints will very likely have crashed before. Now a proper copy of the savepoints is made in the copy constructor, and not just a copy of the pointer - `save_points_` was an std::stack, which defaults to `std::deque` for the underlying container. A deque is a bit over the top here, as we only need access to the most recent savepoint (i.e. stack.top()) but never any elements at the front. std::deque is rather expensive to initialize in common environments. For example, the STL implementation shipped with GNU g++ will perform a heap allocation of more than 500 bytes to create an empty deque object. Although the `save_points_` container is created lazily by RocksDB, moving from a deque to a plain `std::vector` is much more memory-efficient. So `save_points_` is now a vector. - `save_points_` was changed from a plain pointer to an `std::unique_ptr`, making ownership more explicit. Pull Request resolved: https://github.com/facebook/rocksdb/pull/5192 Differential Revision: D15024074 Pulled By: maysamyabandeh fbshipit-source-id: 5b128786d3789cde94e46465c9e91badd07a25d7 --- db/write_batch.cc | 25 ++++++++++++---------- include/rocksdb/write_batch.h | 4 ++-- util/threadpool_imp.cc | 1 + utilities/transactions/transaction_base.cc | 2 +- utilities/transactions/transaction_base.h | 3 ++- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/db/write_batch.cc b/db/write_batch.cc index 30480f64e..939b59530 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -52,6 +52,7 @@ #include "monitoring/perf_context_imp.h" #include "monitoring/statistics.h" #include "rocksdb/merge_operator.h" +#include "util/autovector.h" #include "util/coding.h" #include "util/duplicate_detector.h" #include "util/string_util.h" @@ -137,34 +138,36 @@ struct BatchContentClassifier : public WriteBatch::Handler { } // anon namespace struct SavePoints { - std::stack stack; + std::stack> stack; }; WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes) - : save_points_(nullptr), content_flags_(0), max_bytes_(max_bytes), rep_() { + : content_flags_(0), max_bytes_(max_bytes), rep_() { rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ? reserved_bytes : WriteBatchInternal::kHeader); rep_.resize(WriteBatchInternal::kHeader); } WriteBatch::WriteBatch(const std::string& rep) - : save_points_(nullptr), - content_flags_(ContentFlags::DEFERRED), + : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(rep) {} WriteBatch::WriteBatch(std::string&& rep) - : save_points_(nullptr), - content_flags_(ContentFlags::DEFERRED), + : content_flags_(ContentFlags::DEFERRED), max_bytes_(0), rep_(std::move(rep)) {} WriteBatch::WriteBatch(const WriteBatch& src) - : save_points_(src.save_points_), - wal_term_point_(src.wal_term_point_), + : wal_term_point_(src.wal_term_point_), content_flags_(src.content_flags_.load(std::memory_order_relaxed)), max_bytes_(src.max_bytes_), - rep_(src.rep_) {} + rep_(src.rep_) { + if (src.save_points_ != nullptr) { + save_points_.reset(new SavePoints()); + save_points_->stack = src.save_points_->stack; + } +} WriteBatch::WriteBatch(WriteBatch&& src) noexcept : save_points_(std::move(src.save_points_)), @@ -189,7 +192,7 @@ WriteBatch& WriteBatch::operator=(WriteBatch&& src) { return *this; } -WriteBatch::~WriteBatch() { delete save_points_; } +WriteBatch::~WriteBatch() { } WriteBatch::Handler::~Handler() { } @@ -991,7 +994,7 @@ Status WriteBatch::PutLogData(const Slice& blob) { void WriteBatch::SetSavePoint() { if (save_points_ == nullptr) { - save_points_ = new SavePoints(); + save_points_.reset(new SavePoints()); } // Record length and count of current batch of writes. save_points_->stack.push(SavePoint( diff --git a/include/rocksdb/write_batch.h b/include/rocksdb/write_batch.h index 8782d08f1..29b660d19 100644 --- a/include/rocksdb/write_batch.h +++ b/include/rocksdb/write_batch.h @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include "rocksdb/status.h" #include "rocksdb/write_batch_base.h" @@ -337,7 +337,7 @@ class WriteBatch : public WriteBatchBase { // remove duplicate keys. Remove it when the hack is replaced with a proper // solution. friend class WriteBatchWithIndex; - SavePoints* save_points_; + std::unique_ptr save_points_; // When sending a WriteBatch through WriteImpl we might want to // specify that only the first x records of the batch be written to diff --git a/util/threadpool_imp.cc b/util/threadpool_imp.cc index acac0063b..ea5e875df 100644 --- a/util/threadpool_imp.cc +++ b/util/threadpool_imp.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/utilities/transactions/transaction_base.cc b/utilities/transactions/transaction_base.cc index ac186a528..fc2e6b520 100644 --- a/utilities/transactions/transaction_base.cc +++ b/utilities/transactions/transaction_base.cc @@ -123,7 +123,7 @@ Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family, void TransactionBaseImpl::SetSavePoint() { if (save_points_ == nullptr) { - save_points_.reset(new std::stack()); + save_points_.reset(new std::stack>()); } save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_, num_puts_, num_deletes_, num_merges_); diff --git a/utilities/transactions/transaction_base.h b/utilities/transactions/transaction_base.h index 9154b3274..cc3d0cdc3 100644 --- a/utilities/transactions/transaction_base.h +++ b/utilities/transactions/transaction_base.h @@ -19,6 +19,7 @@ #include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/write_batch_with_index.h" +#include "util/autovector.h" #include "utilities/transactions/transaction_util.h" namespace rocksdb { @@ -318,7 +319,7 @@ class TransactionBaseImpl : public Transaction { // Stack of the Snapshot saved at each save point. Saved snapshots may be // nullptr if there was no snapshot at the time SetSavePoint() was called. - std::unique_ptr> save_points_; + std::unique_ptr>> save_points_; // Map from column_family_id to map of keys that are involved in this // transaction.