From 91155b9d676f8abe3fe5e8a96b22b4dd51f963dd Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 18 Dec 2014 04:38:53 +0000 Subject: [PATCH] Hack for potential ext3/ext4 corruption issue Use regular fsync() if we think this commit grew the DB file. --- libraries/liblmdb/mdb.c | 43 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 0867af7..cf398d9 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -333,6 +333,7 @@ mdb_sem_wait(sem_t *sem) */ #ifndef MDB_FDATASYNC # define MDB_FDATASYNC fdatasync +# define HAVE_FDATASYNC 1 #endif #ifndef MDB_MSYNC @@ -1112,7 +1113,7 @@ struct MDB_env { MDB_txn *me_txn; /**< current write transaction */ MDB_txn *me_txn0; /**< prealloc'd write transaction */ size_t me_mapsize; /**< size of the data memory map */ - off_t me_size; /**< current file size */ + size_t me_size; /**< current file size */ pgno_t me_maxpg; /**< me_mapsize / me_psize */ MDB_dbx *me_dbxs; /**< array of static DB info */ uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */ @@ -2298,10 +2299,19 @@ fail: return rc; } -int -mdb_env_sync(MDB_env *env, int force) +/* internal env_sync flags: */ +#define FORCE 1 /* as before, force a flush */ +#define FGREW 0x8000 /* file has grown, do a full fsync instead of just + fdatasync. We shouldn't have to do this, according to the POSIX spec. + But common Linux FSs violate the spec and won't sync required metadata + correctly when the file grows. This only makes a difference if the + platform actually distinguishes fdatasync from fsync. + http://www.openldap.org/lists/openldap-devel/201411/msg00000.html */ + +static int +mdb_env_sync0(MDB_env *env, int flag) { - int rc = 0; + int rc = 0, force = flag & FORCE; if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) { if (env->me_flags & MDB_WRITEMAP) { int flags = ((env->me_flags & MDB_MAPASYNC) && !force) @@ -2313,6 +2323,12 @@ mdb_env_sync(MDB_env *env, int force) rc = ErrCode(); #endif } else { +#ifdef HAVE_FDATASYNC + if (flag & FGREW) { + if (fsync(env->me_fd)) /* Avoid ext-fs bugs, do full sync */ + rc = ErrCode(); + } else +#endif if (MDB_FDATASYNC(env->me_fd)) rc = ErrCode(); } @@ -2320,6 +2336,12 @@ mdb_env_sync(MDB_env *env, int force) return rc; } +int +mdb_env_sync(MDB_env *env, int force) +{ + return mdb_env_sync0(env, force != 0); +} + /** Back up parent txn's cursors, then grab the originals for tracking */ static int mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst) @@ -3372,8 +3394,15 @@ mdb_txn_commit(MDB_txn *txn) mdb_audit(txn); #endif + i = 0; +#ifdef HAVE_FDATASYNC + if (txn->mt_next_pgno * env->me_psize > env->me_size) { + i |= FGREW; + env->me_size = txn->mt_next_pgno * env->me_psize; + } +#endif if ((rc = mdb_page_flush(txn, 0)) || - (rc = mdb_env_sync(env, 0)) || + (rc = mdb_env_sync(env, i)) || (rc = mdb_env_write_meta(txn))) goto fail; @@ -3897,6 +3926,10 @@ mdb_env_open2(MDB_env *env) env->me_mapsize = minsize; } + rc = mdb_fsize(env->me_fd, &env->me_size); + if (rc) + return rc; + rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL); if (rc) return rc;