Use robust mutexes by default

Making it optional on pthreads just complicates things;
they're always robust on other implementations
vl32b
Howard Chu 10 years ago
parent 58ddb5527b
commit e3b6c359a9
  1. 20
      libraries/liblmdb/lmdb.h
  2. 30
      libraries/liblmdb/mdb.c

@ -50,12 +50,14 @@
* *
* Fix: Check for stale readers periodically, using the * Fix: Check for stale readers periodically, using the
* #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. * #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool.
* Catch stale * Stale writers will be cleared automatically on most systems:
* locks with option MDB_ROBUST if supported (non-BSD). Or just * - Windows - automatic
* make all programs using the database close it; the lockfile * - BSD, systems using SysV semaphores - automatic
* is always reset on first open of the environment. * - Linux, systems using POSIX mutexes with Robust option - automatic
* Otherwise just make all programs using the database close it;
* the lockfile is always reset on first open of the environment.
* *
* - On BSD systems or others configured with MDB_USE_POSIX_SEM, * - On BSD systems or others configured with MDB_USE_SYSV_SEM,
* startup can fail due to semaphores owned by another userid. * startup can fail due to semaphores owned by another userid.
* *
* Fix: Open and close the database as the user which owns the * Fix: Open and close the database as the user which owns the
@ -290,8 +292,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
#define MDB_NORDAHEAD 0x800000 #define MDB_NORDAHEAD 0x800000
/** don't initialize malloc'd memory before writing to datafile */ /** don't initialize malloc'd memory before writing to datafile */
#define MDB_NOMEMINIT 0x1000000 #define MDB_NOMEMINIT 0x1000000
/** catch stale locks if supported (not on BSD, needs robust mutexes) */
#define MDB_ROBUST 0x2000000
/** @} */ /** @} */
/** @defgroup mdb_dbi_open Database Flags /** @defgroup mdb_dbi_open Database Flags
@ -516,12 +516,6 @@ int mdb_env_create(MDB_env **env);
* Open the environment in read-only mode. No write operations will be * Open the environment in read-only mode. No write operations will be
* allowed. LMDB will still modify the lock file - except on read-only * allowed. LMDB will still modify the lock file - except on read-only
* filesystems, where LMDB does not use locks. * filesystems, where LMDB does not use locks.
* <li>#MDB_ROBUST
* Initialize the lockfile to catch stale locks if robust mutexes
* are supported, so aborted processes will not block others.
* Ignored when another process has the environment open. Unsupported
* by liblmdb built with MDB_USE_POSIX_SEM (such as BSD systems).
* Enabled by default on Windows. Some locking slowdown on Unix.
* <li>#MDB_WRITEMAP * <li>#MDB_WRITEMAP
* Use a writeable memory map unless MDB_RDONLY is set. This is faster * Use a writeable memory map unless MDB_RDONLY is set. This is faster
* and uses fewer mallocs, but loses protection from application bugs * and uses fewer mallocs, but loses protection from application bugs

@ -200,7 +200,7 @@ union semun {
#define MDB_DEVEL 0 #define MDB_DEVEL 0
#endif #endif
#if MDB_DEVEL && (defined(_WIN32) || (defined(EOWNERDEAD) && !defined(MDB_USE_SYSV_SEM))) #if defined(WIN32) || (defined(EOWNERDEAD) && !defined(MDB_USE_SYSV_SEM))
#define MDB_ROBUST_SUPPORTED 1 #define MDB_ROBUST_SUPPORTED 1
#endif #endif
@ -670,8 +670,6 @@ typedef struct MDB_txbody {
* when readers release their slots. * when readers release their slots.
*/ */
unsigned mtb_numreaders; unsigned mtb_numreaders;
/** Flags which the lock file was initialized with. */
unsigned mtb_flags;
} MDB_txbody; } MDB_txbody;
/** The actual reader table definition. */ /** The actual reader table definition. */
@ -684,7 +682,6 @@ typedef struct MDB_txninfo {
#define mti_rmname mt1.mtb.mtb_rmname #define mti_rmname mt1.mtb.mtb_rmname
#define mti_txnid mt1.mtb.mtb_txnid #define mti_txnid mt1.mtb.mtb_txnid
#define mti_numreaders mt1.mtb.mtb_numreaders #define mti_numreaders mt1.mtb.mtb_numreaders
#define mti_flags mt1.mtb.mtb_flags
char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)]; char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
} mt1; } mt1;
union { union {
@ -4338,7 +4335,6 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if (!env->me_rmutex) goto fail_errno; if (!env->me_rmutex) goto fail_errno;
env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname); env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname);
if (!env->me_wmutex) goto fail_errno; if (!env->me_wmutex) goto fail_errno;
env->me_flags |= MDB_ROBUST;
#elif defined(MDB_USE_SYSV_SEM) #elif defined(MDB_USE_SYSV_SEM)
union semun semu; union semun semu;
unsigned short vals[2] = {1, 1}; unsigned short vals[2] = {1, 1};
@ -4361,23 +4357,18 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if ((rc = pthread_mutexattr_init(&mattr)) if ((rc = pthread_mutexattr_init(&mattr))
|| (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED)) || (rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED))
#ifdef MDB_ROBUST_SUPPORTED #ifdef MDB_ROBUST_SUPPORTED
|| ((env->me_flags & MDB_ROBUST) && || (rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST))
(rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST)))
#endif #endif
|| (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr)) || (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr))
|| (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr))) || (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
goto fail; goto fail;
pthread_mutexattr_destroy(&mattr); pthread_mutexattr_destroy(&mattr);
#endif /* _WIN32 || MDB_USE_SYSV_SEM */ #endif /* _WIN32 || MDB_USE_SYSV_SEM */
#ifndef MDB_ROBUST_SUPPORTED
env->me_flags &= ~MDB_ROBUST;
#endif
env->me_txns->mti_magic = MDB_MAGIC; env->me_txns->mti_magic = MDB_MAGIC;
env->me_txns->mti_format = MDB_LOCK_FORMAT; env->me_txns->mti_format = MDB_LOCK_FORMAT;
env->me_txns->mti_txnid = 0; env->me_txns->mti_txnid = 0;
env->me_txns->mti_numreaders = 0; env->me_txns->mti_numreaders = 0;
env->me_txns->mti_flags = env->me_flags;
} else { } else {
if (env->me_txns->mti_magic != MDB_MAGIC) { if (env->me_txns->mti_magic != MDB_MAGIC) {
@ -4395,8 +4386,6 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if (rc && rc != EACCES && rc != EAGAIN) { if (rc && rc != EACCES && rc != EAGAIN) {
goto fail; goto fail;
} }
env->me_flags = (env->me_flags & ~MDB_ROBUST) |
(env->me_txns->mti_flags & MDB_ROBUST);
#ifdef _WIN32 #ifdef _WIN32
env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname); env->me_rmutex = OpenMutex(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname);
if (!env->me_rmutex) goto fail_errno; if (!env->me_rmutex) goto fail_errno;
@ -4440,13 +4429,8 @@ fail:
* environment and re-opening it with the new flags. * environment and re-opening it with the new flags.
*/ */
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|ROBUST_FLAG| \ #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \
MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
#ifdef MDB_ROBUST_SUPPORTED
#define ROBUST_FLAG MDB_ROBUST
#else
#define ROBUST_FLAG 0
#endif
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
# error "Persistent DB flags & env flags overlap, but both go in mm_flags" # error "Persistent DB flags & env flags overlap, but both go in mm_flags"
@ -9481,7 +9465,7 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
*/ */
static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc) static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc)
{ {
int toggle, rlocked, rc2; int rlocked, rc2;
#ifndef _WIN32 #ifndef _WIN32
enum { WAIT_ABANDONED = EOWNERDEAD }; enum { WAIT_ABANDONED = EOWNERDEAD };
#endif #endif
@ -9491,12 +9475,6 @@ static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc)
rc = MDB_SUCCESS; rc = MDB_SUCCESS;
rlocked = (mutex == MDB_MUTEX(env, r)); rlocked = (mutex == MDB_MUTEX(env, r));
if (!rlocked) { if (!rlocked) {
/* Keep mti_txnid updated, otherwise next writer can
* overwrite data which latest meta page refers to.
* TODO: Instead revert any aborted commit and sync?
*/
toggle = mdb_env_pick_meta(env);
env->me_txns->mti_txnid = env->me_metas[toggle]->mm_txnid;
/* env is hosed if the dead thread was ours */ /* env is hosed if the dead thread was ours */
if (env->me_txn) { if (env->me_txn) {
env->me_flags |= MDB_FATAL_ERROR; env->me_flags |= MDB_FATAL_ERROR;

Loading…
Cancel
Save