@ -27,6 +27,7 @@
// kTypeCommitXID varstring
// kTypeRollbackXID varstring
// kTypeBeginPersistedPrepareXID varstring
// kTypeBeginUnprepareXID varstring
// kTypeNoop
// varstring :=
// len: varint32
@ -366,6 +367,8 @@ Status ReadRecordFromWriteBatch(Slice* input, char* tag,
// This indicates that the prepared batch is also persisted in the db.
// This is used in WritePreparedTxn
case kTypeBeginPersistedPrepareXID :
// This is used in WriteUnpreparedTxn
case kTypeBeginUnprepareXID :
break ;
case kTypeEndPrepareXID :
if ( ! GetLengthPrefixedSlice ( input , xid ) ) {
@ -503,8 +506,15 @@ Status WriteBatch::Iterate(Handler* handler) const {
if ( ! handler - > WriteAfterCommit ( ) ) {
s = Status : : NotSupported (
" WriteCommitted txn tag when write_after_commit_ is disabled (in "
" WritePrepared mode). If it is not due to corruption, the WAL "
" must be emptied before changing the WritePolicy. " ) ;
" WritePrepared/WriteUnprepared mode). If it is not due to "
" corruption, the WAL must be emptied before changing the "
" WritePolicy. " ) ;
}
if ( handler - > WriteBeforePrepare ( ) ) {
s = Status : : NotSupported (
" WriteCommitted txn tag when write_before_prepare_ is enabled "
" (in WriteUnprepared mode). If it is not due to corruption, the "
" WAL must be emptied before changing the WritePolicy. " ) ;
}
break ;
case kTypeBeginPersistedPrepareXID :
@ -514,10 +524,30 @@ Status WriteBatch::Iterate(Handler* handler) const {
empty_batch = false ;
if ( handler - > WriteAfterCommit ( ) ) {
s = Status : : NotSupported (
" WritePrepared txn tag when write_after_commit_ is enabled (in "
" WritePrepared/WriteUnprepared txn tag when write_after_commit_ "
" is enabled (in default WriteCommitted mode). If it is not due "
" to corruption, the WAL must be emptied before changing the "
" WritePolicy. " ) ;
}
break ;
case kTypeBeginUnprepareXID :
assert ( content_flags_ . load ( std : : memory_order_relaxed ) &
( ContentFlags : : DEFERRED | ContentFlags : : HAS_BEGIN_PREPARE ) ) ;
handler - > MarkBeginPrepare ( ) ;
empty_batch = false ;
if ( handler - > WriteAfterCommit ( ) ) {
s = Status : : NotSupported (
" WriteUnprepared txn tag when write_after_commit_ is enabled (in "
" default WriteCommitted mode). If it is not due to corruption, "
" the WAL must be emptied before changing the WritePolicy. " ) ;
}
if ( ! handler - > WriteBeforePrepare ( ) ) {
s = Status : : NotSupported (
" WriteUnprepared txn tag when write_before_prepare_ is disabled "
" (in WriteCommitted/WritePrepared mode). If it is not due to "
" corruption, the WAL must be emptied before changing the "
" WritePolicy. " ) ;
}
break ;
case kTypeEndPrepareXID :
assert ( content_flags_ . load ( std : : memory_order_relaxed ) &
@ -669,7 +699,8 @@ Status WriteBatchInternal::InsertNoop(WriteBatch* b) {
}
Status WriteBatchInternal : : MarkEndPrepare ( WriteBatch * b , const Slice & xid ,
bool write_after_commit ) {
bool write_after_commit ,
bool unprepared_batch ) {
// a manually constructed batch can only contain one prepare section
assert ( b - > rep_ [ 12 ] = = static_cast < char > ( kTypeNoop ) ) ;
@ -681,9 +712,10 @@ Status WriteBatchInternal::MarkEndPrepare(WriteBatch* b, const Slice& xid,
}
// rewrite noop as begin marker
b - > rep_ [ 12 ] =
static_cast < char > ( write_after_commit ? kTypeBeginPrepareXID
: kTypeBeginPersistedPrepareXID ) ;
b - > rep_ [ 12 ] = static_cast < char > (
write_after_commit ? kTypeBeginPrepareXID
: ( unprepared_batch ? kTypeBeginUnprepareXID
: kTypeBeginPersistedPrepareXID ) ) ;
b - > rep_ . push_back ( static_cast < char > ( kTypeEndPrepareXID ) ) ;
PutLengthPrefixedSlice ( & b - > rep_ , xid ) ;
b - > content_flags_ . store ( b - > content_flags_ . load ( std : : memory_order_relaxed ) |
@ -1018,6 +1050,8 @@ class MemTableInserter : public WriteBatch::Handler {
bool seq_per_batch_ ;
// Whether the memtable write will be done only after the commit
bool write_after_commit_ ;
// Whether memtable write can be done before prepare
bool write_before_prepare_ ;
using DupDetector = std : : aligned_storage < sizeof ( DuplicateDetector ) > : : type ;
DupDetector duplicate_detector_ ;
bool dup_dectector_on_ ;
@ -1043,6 +1077,9 @@ class MemTableInserter : public WriteBatch::Handler {
}
protected :
virtual bool WriteBeforePrepare ( ) const override {
return write_before_prepare_ ;
}
virtual bool WriteAfterCommit ( ) const override { return write_after_commit_ ; }
public :
@ -1052,7 +1089,8 @@ class MemTableInserter : public WriteBatch::Handler {
bool ignore_missing_column_families ,
uint64_t recovering_log_number , DB * db ,
bool concurrent_memtable_writes ,
bool * has_valid_writes = nullptr , bool seq_per_batch = false )
bool * has_valid_writes = nullptr , bool seq_per_batch = false ,
bool batch_per_txn = true )
: sequence_ ( _sequence ) ,
cf_mems_ ( cf_mems ) ,
flush_scheduler_ ( flush_scheduler ) ,
@ -1070,6 +1108,9 @@ class MemTableInserter : public WriteBatch::Handler {
// batch). So seq_per_batch being false indicates write_after_commit
// approach.
write_after_commit_ ( ! seq_per_batch ) ,
// WriteUnprepared can write WriteBatches per transaction, so
// batch_per_txn being false indicates write_before_prepare.
write_before_prepare_ ( ! batch_per_txn ) ,
duplicate_detector_ ( ) ,
dup_dectector_on_ ( false ) {
assert ( cf_mems_ ) ;
@ -1090,8 +1131,6 @@ class MemTableInserter : public WriteBatch::Handler {
MemTableInserter ( const MemTableInserter & ) = delete ;
MemTableInserter & operator = ( const MemTableInserter & ) = delete ;
virtual bool WriterAfterCommit ( ) const { return write_after_commit_ ; }
// The batch seq is regularly restarted; In normal mode it is set when
// MemTableInserter is constructed in the write thread and in recovery mode it
// is set when a batch, which is tagged with seq, is read from the WAL.
@ -1693,11 +1732,11 @@ Status WriteBatchInternal::InsertInto(
WriteThread : : WriteGroup & write_group , SequenceNumber sequence ,
ColumnFamilyMemTables * memtables , FlushScheduler * flush_scheduler ,
bool ignore_missing_column_families , uint64_t recovery_log_number , DB * db ,
bool concurrent_memtable_writes , bool seq_per_batch ) {
MemTableInserter inserter ( sequence , memtables , flush_scheduler ,
ignore_missing_column_families , recovery_log_number ,
db , concurrent_memtable_writes ,
nullptr /*has_valid_writes*/ , seq_per_batch ) ;
bool concurrent_memtable_writes , bool seq_per_batch , bool batch_per_txn ) {
MemTableInserter inserter (
sequence , memtables , flush_scheduler , ignore_missing_column_families ,
recovery_log_number , db , concurrent_memtable_writes ,
nullptr /*has_valid_writes*/ , seq_per_batch , batch_per_txn ) ;
for ( auto w : write_group ) {
if ( w - > CallbackFailed ( ) ) {
continue ;
@ -1724,15 +1763,16 @@ Status WriteBatchInternal::InsertInto(
WriteThread : : Writer * writer , SequenceNumber sequence ,
ColumnFamilyMemTables * memtables , FlushScheduler * flush_scheduler ,
bool ignore_missing_column_families , uint64_t log_number , DB * db ,
bool concurrent_memtable_writes , bool seq_per_batch , size_t batch_cnt ) {
bool concurrent_memtable_writes , bool seq_per_batch , size_t batch_cnt ,
bool batch_per_txn ) {
# ifdef NDEBUG
( void ) batch_cnt ;
# endif
assert ( writer - > ShouldWriteToMemtable ( ) ) ;
MemTableInserter inserter ( sequence , memtables , flush_scheduler ,
ignore_missing_column_families , log_number , db ,
concurrent_memtable_writes ,
nullptr /*has_valid_writes*/ , seq_per_batch ) ;
MemTableInserter inserter (
sequence , memtables , flush_scheduler , ignore_missing_column_families ,
log_number , db , concurrent_memtable_writes , nullptr /*has_valid_writes*/ ,
seq_per_batch , batch_per_txn ) ;
SetSequence ( writer - > batch , sequence ) ;
inserter . set_log_number_ref ( writer - > log_ref ) ;
Status s = writer - > batch - > Iterate ( & inserter ) ;
@ -1748,11 +1788,12 @@ Status WriteBatchInternal::InsertInto(
const WriteBatch * batch , ColumnFamilyMemTables * memtables ,
FlushScheduler * flush_scheduler , bool ignore_missing_column_families ,
uint64_t log_number , DB * db , bool concurrent_memtable_writes ,
SequenceNumber * next_seq , bool * has_valid_writes , bool seq_per_batch ) {
SequenceNumber * next_seq , bool * has_valid_writes , bool seq_per_batch ,
bool batch_per_txn ) {
MemTableInserter inserter ( Sequence ( batch ) , memtables , flush_scheduler ,
ignore_missing_column_families , log_number , db ,
concurrent_memtable_writes , has_valid_writes ,
seq_per_batch ) ;
seq_per_batch , batch_per_txn ) ;
Status s = batch - > Iterate ( & inserter ) ;
if ( next_seq ! = nullptr ) {
* next_seq = inserter . sequence ( ) ;