@ -843,11 +843,16 @@ class DBImpl : public DB {
// Lock over the persistent DB state. Non-nullptr iff successfully acquired.
// Lock over the persistent DB state. Non-nullptr iff successfully acquired.
FileLock * db_lock_ ;
FileLock * db_lock_ ;
// It protects the back() of logs_ and alive_log_files_. Any push_back to
// In addition to mutex_, log_write_mutex_ protected writes to logs_ and
// these must be under log_write_mutex_ and any access that requires the
// logfile_number_. With concurrent_prepare it also protects alive_log_files_,
// back() to remain the same must also lock log_write_mutex_.
// and log_empty_. Refer to the definition of each variable below for more
// details.
InstrumentedMutex log_write_mutex_ ;
InstrumentedMutex log_write_mutex_ ;
// State below is protected by mutex_
// State below is protected by mutex_
// With concurrent_prepare enabled, some of the variables that accessed during
// WriteToWAL need different synchronization: log_empty_, alive_log_files_,
// logs_, logfile_number_. Refer to the definition of each variable below for
// more description.
mutable InstrumentedMutex mutex_ ;
mutable InstrumentedMutex mutex_ ;
std : : atomic < bool > shutting_down_ ;
std : : atomic < bool > shutting_down_ ;
@ -861,10 +866,20 @@ class DBImpl : public DB {
// * whenever there is an error in background purge, flush or compaction
// * whenever there is an error in background purge, flush or compaction
// * whenever num_running_ingest_file_ goes to 0.
// * whenever num_running_ingest_file_ goes to 0.
InstrumentedCondVar bg_cv_ ;
InstrumentedCondVar bg_cv_ ;
// Writes are protected by locking both mutex_ and log_write_mutex_, and reads
// must be under either mutex_ or log_write_mutex_. Since after ::Open,
// logfile_number_ is currently updated only in write_thread_, it can be read
// from the same write_thread_ without any locks.
uint64_t logfile_number_ ;
uint64_t logfile_number_ ;
std : : deque < uint64_t >
std : : deque < uint64_t >
log_recycle_files ; // a list of log files that we can recycle
log_recycle_files ; // a list of log files that we can recycle
bool log_dir_synced_ ;
bool log_dir_synced_ ;
// Without concurrent_prepare, read and writes to log_empty_ are protected by
// mutex_. Since it is currently updated/read only in write_thread_, it can be
// accessed from the same write_thread_ without any locks. With
// concurrent_prepare writes, where it can be updated in different threads,
// read and writes are protected by log_write_mutex_ instead. This is to avoid
// expesnive mutex_ lock during WAL write, which update log_empty_.
bool log_empty_ ;
bool log_empty_ ;
ColumnFamilyHandleImpl * default_cf_handle_ ;
ColumnFamilyHandleImpl * default_cf_handle_ ;
InternalStats * default_cf_internal_stats_ ;
InternalStats * default_cf_internal_stats_ ;
@ -899,18 +914,26 @@ class DBImpl : public DB {
// true for some prefix of logs_
// true for some prefix of logs_
bool getting_synced = false ;
bool getting_synced = false ;
} ;
} ;
// Without concurrent_prepare, read and writes to alive_log_files_ are
// protected by mutex_. However since back() is never popped, and push_back()
// is done only from write_thread_, the same thread can access the item
// reffered by back() without mutex_. With concurrent_prepare_, writes
// are protected by locking both mutex_ and log_write_mutex_, and reads must
// be under either mutex_ or log_write_mutex_.
std : : deque < LogFileNumberSize > alive_log_files_ ;
std : : deque < LogFileNumberSize > alive_log_files_ ;
// Log files that aren't fully synced, and the current log file.
// Log files that aren't fully synced, and the current log file.
// Synchronization:
// Synchronization:
// - push_back() is done from write thread with locked mutex_,
// - push_back() is done from write_thread_ with locked mutex_ and
// - pop_front() is done from any thread with locked mutex_,
// log_write_mutex_
// - pop_front() is done from any thread with locked mutex_ and
// log_write_mutex_
// - reads are done with either locked mutex_ or log_write_mutex_
// - back() and items with getting_synced=true are not popped,
// - back() and items with getting_synced=true are not popped,
// - it follows that write thread with unlocked mutex_ can safely access
// - The same thread that sets getting_synced=true will reset it.
// back() and items with getting_synced=true.
// - it follows that the object referred by back() can be safely read from
// -- Update: apparently this was a mistake. back() should be called under
// the write_thread_ without using mutex
// mute_: https://github.com/facebook/rocksdb/pull/1774
// - it follows that the items with getting_synced=true can be safely read
// - When concurrent write threads is enabled, back(), push_back(), and
// from the same thread that has set getting_synced=true
// pop_front() must be called within log_write_mutex_
std : : deque < LogWriterNumber > logs_ ;
std : : deque < LogWriterNumber > logs_ ;
// Signaled when getting_synced becomes false for some of the logs_.
// Signaled when getting_synced becomes false for some of the logs_.
InstrumentedCondVar log_sync_cv_ ;
InstrumentedCondVar log_sync_cv_ ;