refactor SavePoints (#5192)

Summary:
Savepoints are assumed to be used in a stack-wise fashion (only
the top element should be used), so they were stored by `WriteBatch`
in a member variable `save_points` using an std::stack.

Conceptually this is fine, but the implementation had a few issues:
- the `save_points_` instance variable was a plain pointer to a heap-
  allocated `SavePoints` struct. The destructor of `WriteBatch` simply
  deletes this pointer. However, the copy constructor of WriteBatch
  just copied that pointer, meaning that copying a WriteBatch with
  active savepoints will very likely have crashed before. Now a proper
  copy of the savepoints is made in the copy constructor, and not just
  a copy of the pointer
- `save_points_` was an std::stack, which defaults to `std::deque` for
  the underlying container. A deque is a bit over the top here, as we
  only need access to the most recent savepoint (i.e. stack.top()) but
  never any elements at the front. std::deque is rather expensive to
  initialize in common environments. For example, the STL implementation
  shipped with GNU g++ will perform a heap allocation of more than 500
  bytes to create an empty deque object. Although the `save_points_`
  container is created lazily by RocksDB, moving from a deque to a plain
  `std::vector` is much more memory-efficient. So `save_points_` is now
  a vector.
- `save_points_` was changed from a plain pointer to an `std::unique_ptr`,
  making ownership more explicit.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5192

Differential Revision: D15024074

Pulled By: maysamyabandeh

fbshipit-source-id: 5b128786d3789cde94e46465c9e91badd07a25d7
main
jsteemann 6 years ago committed by Facebook Github Bot
parent dc64c2f5cc
commit de76909464
  1. 25
      db/write_batch.cc
  2. 4
      include/rocksdb/write_batch.h
  3. 1
      util/threadpool_imp.cc
  4. 2
      utilities/transactions/transaction_base.cc
  5. 3
      utilities/transactions/transaction_base.h

@ -52,6 +52,7 @@
#include "monitoring/perf_context_imp.h" #include "monitoring/perf_context_imp.h"
#include "monitoring/statistics.h" #include "monitoring/statistics.h"
#include "rocksdb/merge_operator.h" #include "rocksdb/merge_operator.h"
#include "util/autovector.h"
#include "util/coding.h" #include "util/coding.h"
#include "util/duplicate_detector.h" #include "util/duplicate_detector.h"
#include "util/string_util.h" #include "util/string_util.h"
@ -137,34 +138,36 @@ struct BatchContentClassifier : public WriteBatch::Handler {
} // anon namespace } // anon namespace
struct SavePoints { struct SavePoints {
std::stack<SavePoint> stack; std::stack<SavePoint, autovector<SavePoint>> stack;
}; };
WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes) WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes)
: save_points_(nullptr), content_flags_(0), max_bytes_(max_bytes), rep_() { : content_flags_(0), max_bytes_(max_bytes), rep_() {
rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ? rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader) ?
reserved_bytes : WriteBatchInternal::kHeader); reserved_bytes : WriteBatchInternal::kHeader);
rep_.resize(WriteBatchInternal::kHeader); rep_.resize(WriteBatchInternal::kHeader);
} }
WriteBatch::WriteBatch(const std::string& rep) WriteBatch::WriteBatch(const std::string& rep)
: save_points_(nullptr), : content_flags_(ContentFlags::DEFERRED),
content_flags_(ContentFlags::DEFERRED),
max_bytes_(0), max_bytes_(0),
rep_(rep) {} rep_(rep) {}
WriteBatch::WriteBatch(std::string&& rep) WriteBatch::WriteBatch(std::string&& rep)
: save_points_(nullptr), : content_flags_(ContentFlags::DEFERRED),
content_flags_(ContentFlags::DEFERRED),
max_bytes_(0), max_bytes_(0),
rep_(std::move(rep)) {} rep_(std::move(rep)) {}
WriteBatch::WriteBatch(const WriteBatch& src) WriteBatch::WriteBatch(const WriteBatch& src)
: save_points_(src.save_points_), : wal_term_point_(src.wal_term_point_),
wal_term_point_(src.wal_term_point_),
content_flags_(src.content_flags_.load(std::memory_order_relaxed)), content_flags_(src.content_flags_.load(std::memory_order_relaxed)),
max_bytes_(src.max_bytes_), max_bytes_(src.max_bytes_),
rep_(src.rep_) {} rep_(src.rep_) {
if (src.save_points_ != nullptr) {
save_points_.reset(new SavePoints());
save_points_->stack = src.save_points_->stack;
}
}
WriteBatch::WriteBatch(WriteBatch&& src) noexcept WriteBatch::WriteBatch(WriteBatch&& src) noexcept
: save_points_(std::move(src.save_points_)), : save_points_(std::move(src.save_points_)),
@ -189,7 +192,7 @@ WriteBatch& WriteBatch::operator=(WriteBatch&& src) {
return *this; return *this;
} }
WriteBatch::~WriteBatch() { delete save_points_; } WriteBatch::~WriteBatch() { }
WriteBatch::Handler::~Handler() { } WriteBatch::Handler::~Handler() { }
@ -991,7 +994,7 @@ Status WriteBatch::PutLogData(const Slice& blob) {
void WriteBatch::SetSavePoint() { void WriteBatch::SetSavePoint() {
if (save_points_ == nullptr) { if (save_points_ == nullptr) {
save_points_ = new SavePoints(); save_points_.reset(new SavePoints());
} }
// Record length and count of current batch of writes. // Record length and count of current batch of writes.
save_points_->stack.push(SavePoint( save_points_->stack.push(SavePoint(

@ -26,7 +26,7 @@
#include <stdint.h> #include <stdint.h>
#include <atomic> #include <atomic>
#include <stack> #include <memory>
#include <string> #include <string>
#include "rocksdb/status.h" #include "rocksdb/status.h"
#include "rocksdb/write_batch_base.h" #include "rocksdb/write_batch_base.h"
@ -337,7 +337,7 @@ class WriteBatch : public WriteBatchBase {
// remove duplicate keys. Remove it when the hack is replaced with a proper // remove duplicate keys. Remove it when the hack is replaced with a proper
// solution. // solution.
friend class WriteBatchWithIndex; friend class WriteBatchWithIndex;
SavePoints* save_points_; std::unique_ptr<SavePoints> save_points_;
// When sending a WriteBatch through WriteImpl we might want to // When sending a WriteBatch through WriteImpl we might want to
// specify that only the first x records of the batch be written to // specify that only the first x records of the batch be written to

@ -25,6 +25,7 @@
#include <algorithm> #include <algorithm>
#include <atomic> #include <atomic>
#include <condition_variable> #include <condition_variable>
#include <deque>
#include <mutex> #include <mutex>
#include <sstream> #include <sstream>
#include <thread> #include <thread>

@ -123,7 +123,7 @@ Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family,
void TransactionBaseImpl::SetSavePoint() { void TransactionBaseImpl::SetSavePoint() {
if (save_points_ == nullptr) { if (save_points_ == nullptr) {
save_points_.reset(new std::stack<TransactionBaseImpl::SavePoint>()); save_points_.reset(new std::stack<TransactionBaseImpl::SavePoint, autovector<TransactionBaseImpl::SavePoint>>());
} }
save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_, save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_,
num_puts_, num_deletes_, num_merges_); num_puts_, num_deletes_, num_merges_);

@ -19,6 +19,7 @@
#include "rocksdb/utilities/transaction.h" #include "rocksdb/utilities/transaction.h"
#include "rocksdb/utilities/transaction_db.h" #include "rocksdb/utilities/transaction_db.h"
#include "rocksdb/utilities/write_batch_with_index.h" #include "rocksdb/utilities/write_batch_with_index.h"
#include "util/autovector.h"
#include "utilities/transactions/transaction_util.h" #include "utilities/transactions/transaction_util.h"
namespace rocksdb { namespace rocksdb {
@ -318,7 +319,7 @@ class TransactionBaseImpl : public Transaction {
// Stack of the Snapshot saved at each save point. Saved snapshots may be // Stack of the Snapshot saved at each save point. Saved snapshots may be
// nullptr if there was no snapshot at the time SetSavePoint() was called. // nullptr if there was no snapshot at the time SetSavePoint() was called.
std::unique_ptr<std::stack<TransactionBaseImpl::SavePoint>> save_points_; std::unique_ptr<std::stack<TransactionBaseImpl::SavePoint, autovector<TransactionBaseImpl::SavePoint>>> save_points_;
// Map from column_family_id to map of keys that are involved in this // Map from column_family_id to map of keys that are involved in this
// transaction. // transaction.

Loading…
Cancel
Save