RPAGE_CACHE is now dynamically selectable

Behavior is controlled by MDB_REMAP_CHUNKS envflag
Remapping is always enabled in MDB_VL32
mdb.master3
Howard Chu 8 years ago
parent ac047d1eff
commit 53799e51da
  1. 3
      libraries/liblmdb/lmdb.h
  2. 102
      libraries/liblmdb/mdb.c

@ -188,6 +188,7 @@ typedef mode_t mdb_mode_t;
#endif #endif
#if !defined(MDB_RPAGE_CACHE) || (defined(MDB_VL32) && !(MDB_RPAGE_CACHE)) #if !defined(MDB_RPAGE_CACHE) || (defined(MDB_VL32) && !(MDB_RPAGE_CACHE))
/** Support #MDB_REMAP_CHUNKS. Implied by MDB_VL32. Define as 0 to disable. */
#define MDB_RPAGE_CACHE 1 #define MDB_RPAGE_CACHE 1
#endif #endif
@ -336,6 +337,8 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
#define MDB_NORDAHEAD 0x800000 #define MDB_NORDAHEAD 0x800000
/** don't initialize malloc'd memory before writing to datafile */ /** don't initialize malloc'd memory before writing to datafile */
#define MDB_NOMEMINIT 0x1000000 #define MDB_NOMEMINIT 0x1000000
/** don't use a single mmap, remap individual chunks (needs MDB_RPAGE_CACHE) */
#define MDB_REMAP_CHUNKS 0x2000000
/** @} */ /** @} */
/** @defgroup mdb_dbi_open Database Flags /** @defgroup mdb_dbi_open Database Flags

@ -2051,6 +2051,12 @@ mdb_dlist_free(MDB_txn *txn)
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
#if defined(MDB_VL32) || ((MDB_RPAGE_CACHE) & 2) /* Always remap */
#define MDB_REMAPPING(flags) 1
#else
#define MDB_REMAPPING(flags) ((flags) & MDB_REMAP_CHUNKS)
#endif
static void static void
mdb_page_unref(MDB_txn *txn, MDB_page *mp) mdb_page_unref(MDB_txn *txn, MDB_page *mp)
{ {
@ -2067,7 +2073,8 @@ mdb_page_unref(MDB_txn *txn, MDB_page *mp)
if (tl[x].mref) if (tl[x].mref)
tl[x].mref--; tl[x].mref--;
} }
#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp) #define MDB_PAGE_UNREF(txn, mp) \
(MDB_REMAPPING(txn->mt_env->me_flags) ? mdb_page_unref(txn, mp) : (void)0)
static void static void
mdb_cursor_unref(MDB_cursor *mc) mdb_cursor_unref(MDB_cursor *mc)
@ -2088,11 +2095,13 @@ mdb_cursor_unref(MDB_cursor *mc)
mc->mc_flags &= ~C_INITIALIZED; mc->mc_flags &= ~C_INITIALIZED;
} }
#define MDB_CURSOR_UNREF(mc, force) \ #define MDB_CURSOR_UNREF(mc, force) \
(((force) || ((mc)->mc_flags & C_INITIALIZED)) \ ((MDB_REMAPPING((mc)->mc_txn->mt_env->me_flags) && \
((force) || ((mc)->mc_flags & C_INITIALIZED))) \
? mdb_cursor_unref(mc) \ ? mdb_cursor_unref(mc) \
: (void)0) : (void)0)
#else #else
#define MDB_REMAPPING(flags) 0
#define MDB_PAGE_UNREF(txn, mp) #define MDB_PAGE_UNREF(txn, mp)
#define MDB_CURSOR_UNREF(mc, force) ((void)0) #define MDB_CURSOR_UNREF(mc, force) ((void)0)
#endif /* MDB_RPAGE_CACHE */ #endif /* MDB_RPAGE_CACHE */
@ -2554,8 +2563,8 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
rc = MDB_MAP_FULL; rc = MDB_MAP_FULL;
goto fail; goto fail;
} }
#if defined(_WIN32) && !MDB_RPAGE_CACHE #if defined(_WIN32)
if (!(env->me_flags & MDB_RDONLY)) { if (!MDB_REMAPPING(env->me_flags) && !(env->me_flags & MDB_RDONLY)) {
void *p; void *p;
p = (MDB_page *)(env->me_map + env->me_psize * pgno); p = (MDB_page *)(env->me_map + env->me_psize * pgno);
p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT, p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT,
@ -3068,6 +3077,7 @@ mdb_txn_renew0(MDB_txn *txn)
/* Moved to here to avoid a data race in read TXNs */ /* Moved to here to avoid a data race in read TXNs */
txn->mt_next_pgno = meta->mm_last_pg+1; txn->mt_next_pgno = meta->mm_last_pg+1;
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags))
txn->mt_last_pgno = txn->mt_next_pgno - 1; txn->mt_last_pgno = txn->mt_next_pgno - 1;
#endif #endif
@ -3149,7 +3159,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
return ENOMEM; return ENOMEM;
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (!parent) { if (MDB_REMAPPING(env->me_flags) && !parent) {
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
if (!txn->mt_rpages) { if (!txn->mt_rpages) {
free(txn); free(txn);
@ -3187,6 +3197,7 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
txn->mt_parent = parent; txn->mt_parent = parent;
txn->mt_numdbs = parent->mt_numdbs; txn->mt_numdbs = parent->mt_numdbs;
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags))
txn->mt_rpages = parent->mt_rpages; txn->mt_rpages = parent->mt_rpages;
#endif #endif
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
@ -3216,6 +3227,7 @@ renew:
if (rc) { if (rc) {
if (txn != env->me_txn0) { if (txn != env->me_txn0) {
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags))
free(txn->mt_rpages); free(txn->mt_rpages);
#endif #endif
free(txn); free(txn);
@ -3345,7 +3357,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
mdb_midl_free(pghead); mdb_midl_free(pghead);
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (!txn->mt_parent) { if (MDB_REMAPPING(env->me_flags) && !txn->mt_parent) {
MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages; MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages;
unsigned i, x, n = tl[0].mid; unsigned i, x, n = tl[0].mid;
pthread_mutex_lock(&env->me_rpmutex); pthread_mutex_lock(&env->me_rpmutex);
@ -3722,7 +3734,7 @@ retry_seek:
#endif /* _WIN32 */ #endif /* _WIN32 */
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (pgno > txn->mt_last_pgno) if (MDB_REMAPPING(env->me_flags) && pgno > txn->mt_last_pgno)
txn->mt_last_pgno = pgno; txn->mt_last_pgno = pgno;
#endif #endif
@ -4327,20 +4339,20 @@ mdb_env_map(MDB_env *env, void *addr)
if (rc) if (rc)
return mdb_nt2win32(rc); return mdb_nt2win32(rc);
map = addr; map = addr;
#if MDB_RPAGE_CACHE if (MDB_REMAPPING(env->me_flags))
msize = NUM_METAS * env->me_psize; msize = NUM_METAS * env->me_psize;
#endif
rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot); rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot);
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags))
env->me_fmh = mh; env->me_fmh = mh;
#else else
NtClose(mh);
#endif #endif
NtClose(mh);
if (rc) if (rc)
return mdb_nt2win32(rc); return mdb_nt2win32(rc);
env->me_map = map; env->me_map = map;
#else #else /* !_WIN32 */
#if MDB_RPAGE_CACHE if (MDB_REMAPPING(env->me_flags)) {
(void) flags; (void) flags;
env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED, env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED,
env->me_fd, 0); env->me_fd, 0);
@ -4348,7 +4360,8 @@ mdb_env_map(MDB_env *env, void *addr)
env->me_map = NULL; env->me_map = NULL;
return ErrCode(); return ErrCode();
} }
#else } else
{
int prot = PROT_READ; int prot = PROT_READ;
if (flags & MDB_WRITEMAP) { if (flags & MDB_WRITEMAP) {
prot |= PROT_WRITE; prot |= PROT_WRITE;
@ -4372,6 +4385,7 @@ mdb_env_map(MDB_env *env, void *addr)
#endif /* POSIX_MADV_RANDOM */ #endif /* POSIX_MADV_RANDOM */
#endif /* MADV_RANDOM */ #endif /* MADV_RANDOM */
} }
}
#endif /* _WIN32 */ #endif /* _WIN32 */
/* Can happen because the address argument to mmap() is just a /* Can happen because the address argument to mmap() is just a
@ -4381,7 +4395,6 @@ mdb_env_map(MDB_env *env, void *addr)
*/ */
if (addr && env->me_map != addr) if (addr && env->me_map != addr)
return EBUSY; /* TODO: Make a new MDB_* error code? */ return EBUSY; /* TODO: Make a new MDB_* error code? */
#endif
p = (MDB_page *)env->me_map; p = (MDB_page *)env->me_map;
env->me_metas[0] = METADATA(p); env->me_metas[0] = METADATA(p);
@ -4398,10 +4411,9 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
*/ */
if (env->me_map) { if (env->me_map) {
MDB_meta *meta; MDB_meta *meta;
#if !MDB_RPAGE_CACHE
void *old; void *old;
int rc; int rc;
#endif
if (env->me_txn) if (env->me_txn)
return EINVAL; return EINVAL;
meta = mdb_env_pick_meta(env); meta = mdb_env_pick_meta(env);
@ -4413,8 +4425,9 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
if (size < minsize) if (size < minsize)
size = minsize; size = minsize;
} }
#if !MDB_RPAGE_CACHE if (!(MDB_REMAPPING(env->me_flags)))
/* For MDB_RPAGE_CACHE this bit is a noop since we dynamically remap {
/* For MDB_REMAP_CHUNKS this bit is a noop since we dynamically remap
* chunks of the DB anyway. * chunks of the DB anyway.
*/ */
munmap(env->me_map, env->me_mapsize); munmap(env->me_map, env->me_mapsize);
@ -4423,7 +4436,7 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
rc = mdb_env_map(env, old); rc = mdb_env_map(env, old);
if (rc) if (rc)
return rc; return rc;
#endif /* !MDB_RPAGE_CACHE */ }
} }
env->me_mapsize = size; env->me_mapsize = size;
if (env->me_psize) if (env->me_psize)
@ -5332,7 +5345,7 @@ fail:
*/ */
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT) #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \ #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \
MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD) MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD|MDB_REMAP_CHUNKS)
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS) #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
# error "Persistent DB flags & env flags overlap, but both go in mm_flags" # error "Persistent DB flags & env flags overlap, but both go in mm_flags"
@ -5347,23 +5360,23 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
return EINVAL; return EINVAL;
#if MDB_RPAGE_CACHE flags |= env->me_flags;
if (flags & MDB_WRITEMAP) { if (MDB_REMAPPING(0)) /* if we always remap chunks */
/* silently ignore WRITEMAP with RPAGE_CACHE */ flags |= MDB_REMAP_CHUNKS;
flags ^= MDB_WRITEMAP; if (MDB_REMAPPING(flags)) {
} /* silently ignore WRITEMAP with REMAP_CHUNKS */
if (flags & MDB_FIXEDMAP) { flags &= ~MDB_WRITEMAP;
/* cannot support FIXEDMAP */ /* cannot support FIXEDMAP */
if (flags & MDB_FIXEDMAP)
return EINVAL; return EINVAL;
} }
#endif
flags |= env->me_flags;
rc = mdb_fname_init(path, flags, &fname); rc = mdb_fname_init(path, flags, &fname);
if (rc) if (rc)
return rc; return rc;
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(flags)) {
#ifdef _WIN32 #ifdef _WIN32
env->me_rpmutex = CreateMutex(NULL, FALSE, NULL); env->me_rpmutex = CreateMutex(NULL, FALSE, NULL);
if (!env->me_rpmutex) { if (!env->me_rpmutex) {
@ -5375,6 +5388,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
if (rc) if (rc)
goto leave; goto leave;
#endif #endif
}
#endif #endif
flags |= MDB_ENV_ACTIVE; /* tell mdb_env_close0() to clean up */ flags |= MDB_ENV_ACTIVE; /* tell mdb_env_close0() to clean up */
@ -5392,6 +5406,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
goto leave; goto leave;
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(flags))
{ {
env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3)); env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3));
if (!env->me_rpages) { if (!env->me_rpages) {
@ -5460,6 +5475,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
txn->mt_env = env; txn->mt_env = env;
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags)) {
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
if (!txn->mt_rpages) { if (!txn->mt_rpages) {
free(txn); free(txn);
@ -5468,6 +5484,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
} }
txn->mt_rpages[0].mid = 0; txn->mt_rpages[0].mid = 0;
txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
}
#endif #endif
txn->mt_dbxs = env->me_dbxs; txn->mt_dbxs = env->me_dbxs;
txn->mt_flags = MDB_TXN_FINISHED; txn->mt_flags = MDB_TXN_FINISHED;
@ -5508,6 +5525,7 @@ mdb_env_close0(MDB_env *env, int excl)
free(env->me_path); free(env->me_path);
free(env->me_dirty_list); free(env->me_dirty_list);
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags)) {
if (env->me_txn0 && env->me_txn0->mt_rpages) if (env->me_txn0 && env->me_txn0->mt_rpages)
free(env->me_txn0->mt_rpages); free(env->me_txn0->mt_rpages);
if (env->me_rpages) { if (env->me_rpages) {
@ -5517,6 +5535,7 @@ mdb_env_close0(MDB_env *env, int excl)
munmap(el[x].mptr, el[x].mcnt * env->me_psize); munmap(el[x].mptr, el[x].mcnt * env->me_psize);
free(el); free(el);
} }
}
#endif #endif
free(env->me_txn0); free(env->me_txn0);
mdb_midl_free(env->me_free_pgs); mdb_midl_free(env->me_free_pgs);
@ -5535,11 +5554,10 @@ mdb_env_close0(MDB_env *env, int excl)
} }
if (env->me_map) { if (env->me_map) {
#if MDB_RPAGE_CACHE if (MDB_REMAPPING(env->me_flags))
munmap(env->me_map, NUM_METAS*env->me_psize); munmap(env->me_map, NUM_METAS*env->me_psize);
#else else
munmap(env->me_map, env->me_mapsize); munmap(env->me_map, env->me_mapsize);
#endif
} }
if (env->me_mfd != INVALID_HANDLE_VALUE) if (env->me_mfd != INVALID_HANDLE_VALUE)
(void) close(env->me_mfd); (void) close(env->me_mfd);
@ -5605,12 +5623,15 @@ mdb_env_close0(MDB_env *env, int excl)
(void) close(env->me_lfd); (void) close(env->me_lfd);
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(env->me_flags))
{
#ifdef _WIN32 #ifdef _WIN32
if (env->me_fmh) CloseHandle(env->me_fmh); if (env->me_fmh) CloseHandle(env->me_fmh);
if (env->me_rpmutex) CloseHandle(env->me_rpmutex); if (env->me_rpmutex) CloseHandle(env->me_rpmutex);
#else #else
pthread_mutex_destroy(&env->me_rpmutex); pthread_mutex_destroy(&env->me_rpmutex);
#endif #endif
}
#endif #endif
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
@ -6221,17 +6242,18 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
level = 0; level = 0;
mapped: mapped:
{
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(txn->mt_env->me_flags)) {
int rc = mdb_rpage_get(txn, pgno, &p); int rc = mdb_rpage_get(txn, pgno, &p);
if (rc) { if (rc) {
txn->mt_flags |= MDB_TXN_ERROR; txn->mt_flags |= MDB_TXN_ERROR;
return rc; return rc;
} }
#else } else
#endif
{
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
p = (MDB_page *)(env->me_map + env->me_psize * pgno); p = (MDB_page *)(env->me_map + env->me_psize * pgno);
#endif
} }
done: done:
@ -6422,10 +6444,11 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
} }
#if MDB_RPAGE_CACHE #if MDB_RPAGE_CACHE
if (MDB_REMAPPING(mc->mc_txn->mt_env->me_flags))
{ {
int i; int i;
for (i=1; i<mc->mc_snum; i++) for (i=1; i<mc->mc_snum; i++)
MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]); mdb_page_unref(mc->mc_txn, mc->mc_pg[i]);
} }
#endif #endif
mc->mc_snum = 1; mc->mc_snum = 1;
@ -6577,7 +6600,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
mdb_cursor_init(&mc, txn, dbi, &mx); mdb_cursor_init(&mc, txn, dbi, &mx);
rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact); rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
/* unref all the pages when MDB_RPAGE_CACHE - caller must copy the data /* unref all the pages when MDB_REMAP_CHUNKS - caller must copy the data
* before doing anything else * before doing anything else
*/ */
MDB_CURSOR_UNREF(&mc, 1); MDB_CURSOR_UNREF(&mc, 1);
@ -10723,11 +10746,12 @@ mdb_drop0(MDB_cursor *mc, int subs)
mdb_cursor_pop(mc); mdb_cursor_pop(mc);
mdb_cursor_copy(mc, &mx); mdb_cursor_copy(mc, &mx);
#if MDB_RPAGE_CACHE if (MDB_REMAPPING(mc->mc_txn->mt_env->me_flags)) {
/* bump refcount for mx's pages */ /* bump refcount for mx's pages */
for (i=0; i<mc->mc_snum; i++) for (i=0; i<mc->mc_snum; i++)
mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL); mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL);
#endif }
while (mc->mc_snum > 0) { while (mc->mc_snum > 0) {
MDB_page *mp = mc->mc_pg[mc->mc_top]; MDB_page *mp = mc->mc_pg[mc->mc_top];
unsigned n = NUMKEYS(mp); unsigned n = NUMKEYS(mp);

Loading…
Cancel
Save