|
|
@ -1157,6 +1157,9 @@ struct MDB_txn { |
|
|
|
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ |
|
|
|
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */ |
|
|
|
MDB_txn *mt_child; |
|
|
|
MDB_txn *mt_child; |
|
|
|
pgno_t mt_next_pgno; /**< next unallocated page */ |
|
|
|
pgno_t mt_next_pgno; /**< next unallocated page */ |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
pgno_t mt_last_pgno; /**< last written page */ |
|
|
|
|
|
|
|
#endif |
|
|
|
/** The ID of this transaction. IDs are integers incrementing from 1.
|
|
|
|
/** The ID of this transaction. IDs are integers incrementing from 1.
|
|
|
|
* Only committed write transactions increment the ID. If a transaction |
|
|
|
* Only committed write transactions increment the ID. If a transaction |
|
|
|
* aborts, the ID may be re-used by the next writer. |
|
|
|
* aborts, the ID may be re-used by the next writer. |
|
|
@ -1203,6 +1206,19 @@ struct MDB_txn { |
|
|
|
MDB_cursor **mt_cursors; |
|
|
|
MDB_cursor **mt_cursors; |
|
|
|
/** Array of flags for each DB */ |
|
|
|
/** Array of flags for each DB */ |
|
|
|
unsigned char *mt_dbflags; |
|
|
|
unsigned char *mt_dbflags; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
/** List of read-only pages (actually chunks) */ |
|
|
|
|
|
|
|
MDB_ID3L mt_rpages; |
|
|
|
|
|
|
|
/** We map chunks of 16 pages. Even though Windows uses 4KB pages, all
|
|
|
|
|
|
|
|
* mappings must begin on 64KB boundaries. So we round off all pgnos to |
|
|
|
|
|
|
|
* a chunk boundary. We do the same on Linux for symmetry, and also to |
|
|
|
|
|
|
|
* reduce the frequency of mmap/munmap calls. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
#define MDB_RPAGE_CHUNK 16 |
|
|
|
|
|
|
|
#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */ |
|
|
|
|
|
|
|
#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */ |
|
|
|
|
|
|
|
unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */ |
|
|
|
|
|
|
|
#endif |
|
|
|
/** Number of DB records in use, or 0 when the txn is finished.
|
|
|
|
/** Number of DB records in use, or 0 when the txn is finished.
|
|
|
|
* This number only ever increments until the txn finishes; we |
|
|
|
* This number only ever increments until the txn finishes; we |
|
|
|
* don't decrement it when individual DB handles are closed. |
|
|
|
* don't decrement it when individual DB handles are closed. |
|
|
@ -1315,6 +1331,9 @@ struct MDB_env { |
|
|
|
HANDLE me_fd; /**< The main data file */ |
|
|
|
HANDLE me_fd; /**< The main data file */ |
|
|
|
HANDLE me_lfd; /**< The lock file */ |
|
|
|
HANDLE me_lfd; /**< The lock file */ |
|
|
|
HANDLE me_mfd; /**< just for writing the meta pages */ |
|
|
|
HANDLE me_mfd; /**< just for writing the meta pages */ |
|
|
|
|
|
|
|
#if defined(MDB_VL32) && defined(_WIN32) |
|
|
|
|
|
|
|
HANDLE me_fmh; /**< File Mapping handle */ |
|
|
|
|
|
|
|
#endif |
|
|
|
/** Failed to update the meta page. Probably an I/O error. */ |
|
|
|
/** Failed to update the meta page. Probably an I/O error. */ |
|
|
|
#define MDB_FATAL_ERROR 0x80000000U |
|
|
|
#define MDB_FATAL_ERROR 0x80000000U |
|
|
|
/** Some fields are initialized. */ |
|
|
|
/** Some fields are initialized. */ |
|
|
@ -1372,6 +1391,13 @@ struct MDB_env { |
|
|
|
#else |
|
|
|
#else |
|
|
|
mdb_mutex_t me_rmutex; |
|
|
|
mdb_mutex_t me_rmutex; |
|
|
|
mdb_mutex_t me_wmutex; |
|
|
|
mdb_mutex_t me_wmutex; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */ |
|
|
|
|
|
|
|
mdb_mutex_t me_rpmutex; /**< control access to #me_rpages */ |
|
|
|
|
|
|
|
#define MDB_ERPAGE_SIZE 16384 |
|
|
|
|
|
|
|
#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1) |
|
|
|
|
|
|
|
unsigned int me_rpcheck; |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
void *me_userctx; /**< User-settable context */ |
|
|
|
void *me_userctx; /**< User-settable context */ |
|
|
|
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ |
|
|
|
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ |
|
|
@ -1894,6 +1920,41 @@ mdb_dlist_free(MDB_txn *txn) |
|
|
|
dl[0].mid = 0; |
|
|
|
dl[0].mid = 0; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
static void |
|
|
|
|
|
|
|
mdb_page_unref(MDB_txn *txn, MDB_page *mp) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
pgno_t pgno; |
|
|
|
|
|
|
|
MDB_ID3L tl = txn->mt_rpages; |
|
|
|
|
|
|
|
unsigned x, rem; |
|
|
|
|
|
|
|
if (mp->mp_flags & (P_SUBP|P_DIRTY)) |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1); |
|
|
|
|
|
|
|
pgno = mp->mp_pgno ^ rem; |
|
|
|
|
|
|
|
x = mdb_mid3l_search(tl, pgno); |
|
|
|
|
|
|
|
if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno) |
|
|
|
|
|
|
|
x++; |
|
|
|
|
|
|
|
if (tl[x].mref) |
|
|
|
|
|
|
|
tl[x].mref--; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void |
|
|
|
|
|
|
|
mdb_cursor_unref(MDB_cursor *mc) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int i; |
|
|
|
|
|
|
|
if (!mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0])) |
|
|
|
|
|
|
|
return; |
|
|
|
|
|
|
|
for (i=0; i<mc->mc_snum; i++) |
|
|
|
|
|
|
|
mdb_page_unref(mc->mc_txn, mc->mc_pg[i]); |
|
|
|
|
|
|
|
mc->mc_snum = mc->mc_top = 0; |
|
|
|
|
|
|
|
mc->mc_pg[0] = NULL; |
|
|
|
|
|
|
|
mc->mc_flags &= ~C_INITIALIZED; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
#define MDB_PAGE_UNREF(txn, mp) |
|
|
|
|
|
|
|
#endif /* MDB_VL32 */ |
|
|
|
|
|
|
|
|
|
|
|
/** Loosen or free a single page.
|
|
|
|
/** Loosen or free a single page.
|
|
|
|
* Saves single pages to a list for future reuse |
|
|
|
* Saves single pages to a list for future reuse |
|
|
|
* in this same txn. It has been pulled from the freeDB |
|
|
|
* in this same txn. It has been pulled from the freeDB |
|
|
@ -2573,6 +2634,7 @@ done: |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
MDB_PAGE_UNREF(mc->mc_txn, mp); |
|
|
|
return 0; |
|
|
|
return 0; |
|
|
|
|
|
|
|
|
|
|
|
fail: |
|
|
|
fail: |
|
|
@ -2854,6 +2916,9 @@ mdb_txn_renew0(MDB_txn *txn) |
|
|
|
|
|
|
|
|
|
|
|
/* Moved to here to avoid a data race in read TXNs */ |
|
|
|
/* Moved to here to avoid a data race in read TXNs */ |
|
|
|
txn->mt_next_pgno = meta->mm_last_pg+1; |
|
|
|
txn->mt_next_pgno = meta->mm_last_pg+1; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
txn->mt_last_pgno = txn->mt_next_pgno - 1; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
txn->mt_flags = flags; |
|
|
|
txn->mt_flags = flags; |
|
|
|
|
|
|
|
|
|
|
@ -2932,6 +2997,17 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) |
|
|
|
DPRINTF(("calloc: %s", strerror(errno))); |
|
|
|
DPRINTF(("calloc: %s", strerror(errno))); |
|
|
|
return ENOMEM; |
|
|
|
return ENOMEM; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (!parent) { |
|
|
|
|
|
|
|
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); |
|
|
|
|
|
|
|
if (!txn->mt_rpages) { |
|
|
|
|
|
|
|
free(txn); |
|
|
|
|
|
|
|
return ENOMEM; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
txn->mt_rpages[0].mid = 0; |
|
|
|
|
|
|
|
txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
txn->mt_dbxs = env->me_dbxs; /* static */ |
|
|
|
txn->mt_dbxs = env->me_dbxs; /* static */ |
|
|
|
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); |
|
|
|
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); |
|
|
|
txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; |
|
|
|
txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; |
|
|
@ -2959,6 +3035,9 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) |
|
|
|
parent->mt_child = txn; |
|
|
|
parent->mt_child = txn; |
|
|
|
txn->mt_parent = parent; |
|
|
|
txn->mt_parent = parent; |
|
|
|
txn->mt_numdbs = parent->mt_numdbs; |
|
|
|
txn->mt_numdbs = parent->mt_numdbs; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
txn->mt_rpages = parent->mt_rpages; |
|
|
|
|
|
|
|
#endif |
|
|
|
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); |
|
|
|
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); |
|
|
|
/* Copy parent's mt_dbflags, but clear DB_NEW */ |
|
|
|
/* Copy parent's mt_dbflags, but clear DB_NEW */ |
|
|
|
for (i=0; i<txn->mt_numdbs; i++) |
|
|
|
for (i=0; i<txn->mt_numdbs; i++) |
|
|
@ -2984,8 +3063,12 @@ renew: |
|
|
|
rc = mdb_txn_renew0(txn); |
|
|
|
rc = mdb_txn_renew0(txn); |
|
|
|
} |
|
|
|
} |
|
|
|
if (rc) { |
|
|
|
if (rc) { |
|
|
|
if (txn != env->me_txn0) |
|
|
|
if (txn != env->me_txn0) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
free(txn->mt_rpages); |
|
|
|
|
|
|
|
#endif |
|
|
|
free(txn); |
|
|
|
free(txn); |
|
|
|
|
|
|
|
} |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ |
|
|
|
txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ |
|
|
|
*ret = txn; |
|
|
|
*ret = txn; |
|
|
@ -3110,7 +3193,31 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) |
|
|
|
|
|
|
|
|
|
|
|
mdb_midl_free(pghead); |
|
|
|
mdb_midl_free(pghead); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (!txn->mt_parent) { |
|
|
|
|
|
|
|
MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages; |
|
|
|
|
|
|
|
unsigned i, x, n = tl[0].mid; |
|
|
|
|
|
|
|
LOCK_MUTEX0(env->me_rpmutex); |
|
|
|
|
|
|
|
for (i = 1; i <= n; i++) { |
|
|
|
|
|
|
|
if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { |
|
|
|
|
|
|
|
/* tmp overflow pages that we didn't share in env */ |
|
|
|
|
|
|
|
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
x = mdb_mid3l_search(el, tl[i].mid); |
|
|
|
|
|
|
|
if (tl[i].mptr == el[x].mptr) { |
|
|
|
|
|
|
|
el[x].mref--; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
/* another tmp overflow page */ |
|
|
|
|
|
|
|
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
tl[0].mid = 0; |
|
|
|
|
|
|
|
if (mode & MDB_END_FREE) |
|
|
|
|
|
|
|
free(tl); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
if (mode & MDB_END_FREE) |
|
|
|
if (mode & MDB_END_FREE) |
|
|
|
free(txn); |
|
|
|
free(txn); |
|
|
|
} |
|
|
|
} |
|
|
@ -3460,6 +3567,10 @@ retry_seek: |
|
|
|
n++; |
|
|
|
n++; |
|
|
|
#endif /* _WIN32 */ |
|
|
|
#endif /* _WIN32 */ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (pgno > txn->mt_last_pgno) |
|
|
|
|
|
|
|
txn->mt_last_pgno = pgno; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
|
|
|
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
|
|
|
* Note: for any size >= on-chip cache size, entire on-chip cache is |
|
|
|
* Note: for any size >= on-chip cache size, entire on-chip cache is |
|
|
@ -4046,11 +4157,27 @@ mdb_env_map(MDB_env *env, void *addr) |
|
|
|
if (rc) |
|
|
|
if (rc) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
map = addr; |
|
|
|
map = addr; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
msize = NUM_METAS * env->me_psize; |
|
|
|
|
|
|
|
#endif |
|
|
|
rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot); |
|
|
|
rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot); |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
env->me_fmh = mh; |
|
|
|
|
|
|
|
#else |
|
|
|
NtClose(mh); |
|
|
|
NtClose(mh); |
|
|
|
|
|
|
|
#endif |
|
|
|
if (rc) |
|
|
|
if (rc) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
env->me_map = map; |
|
|
|
env->me_map = map; |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
(void) flags; |
|
|
|
|
|
|
|
env->me_map = mmap(addr, NUM_METAS * env->me_psize, PROT_READ, MAP_SHARED, |
|
|
|
|
|
|
|
env->me_fd, 0); |
|
|
|
|
|
|
|
if (env->me_map == MAP_FAILED) { |
|
|
|
|
|
|
|
env->me_map = NULL; |
|
|
|
|
|
|
|
return ErrCode(); |
|
|
|
|
|
|
|
} |
|
|
|
#else |
|
|
|
#else |
|
|
|
int prot = PROT_READ; |
|
|
|
int prot = PROT_READ; |
|
|
|
if (flags & MDB_WRITEMAP) { |
|
|
|
if (flags & MDB_WRITEMAP) { |
|
|
@ -4084,6 +4211,7 @@ mdb_env_map(MDB_env *env, void *addr) |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (addr && env->me_map != addr) |
|
|
|
if (addr && env->me_map != addr) |
|
|
|
return EBUSY; /* TODO: Make a new MDB_* error code? */ |
|
|
|
return EBUSY; /* TODO: Make a new MDB_* error code? */ |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
p = (MDB_page *)env->me_map; |
|
|
|
p = (MDB_page *)env->me_map; |
|
|
|
env->me_metas[0] = METADATA(p); |
|
|
|
env->me_metas[0] = METADATA(p); |
|
|
@ -4099,9 +4227,11 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) |
|
|
|
* sure there are no active txns. |
|
|
|
* sure there are no active txns. |
|
|
|
*/ |
|
|
|
*/ |
|
|
|
if (env->me_map) { |
|
|
|
if (env->me_map) { |
|
|
|
int rc; |
|
|
|
|
|
|
|
MDB_meta *meta; |
|
|
|
MDB_meta *meta; |
|
|
|
|
|
|
|
#ifndef MDB_VL32 |
|
|
|
void *old; |
|
|
|
void *old; |
|
|
|
|
|
|
|
int rc; |
|
|
|
|
|
|
|
#endif |
|
|
|
if (env->me_txn) |
|
|
|
if (env->me_txn) |
|
|
|
return EINVAL; |
|
|
|
return EINVAL; |
|
|
|
meta = mdb_env_pick_meta(env); |
|
|
|
meta = mdb_env_pick_meta(env); |
|
|
@ -4113,12 +4243,17 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) |
|
|
|
if (size < minsize) |
|
|
|
if (size < minsize) |
|
|
|
size = minsize; |
|
|
|
size = minsize; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef MDB_VL32 |
|
|
|
|
|
|
|
/* For MDB_VL32 this bit is a noop since we dynamically remap
|
|
|
|
|
|
|
|
* chunks of the DB anyway. |
|
|
|
|
|
|
|
*/ |
|
|
|
munmap(env->me_map, env->me_mapsize); |
|
|
|
munmap(env->me_map, env->me_mapsize); |
|
|
|
env->me_mapsize = size; |
|
|
|
env->me_mapsize = size; |
|
|
|
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; |
|
|
|
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL; |
|
|
|
rc = mdb_env_map(env, old); |
|
|
|
rc = mdb_env_map(env, old); |
|
|
|
if (rc) |
|
|
|
if (rc) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
|
|
|
|
#endif /* !MDB_VL32 */ |
|
|
|
} |
|
|
|
} |
|
|
|
env->me_mapsize = size; |
|
|
|
env->me_mapsize = size; |
|
|
|
if (env->me_psize) |
|
|
|
if (env->me_psize) |
|
|
@ -4843,6 +4978,13 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl) |
|
|
|
env->me_wmutex->semnum = 1; |
|
|
|
env->me_wmutex->semnum = 1; |
|
|
|
env->me_rmutex->locked = &env->me_txns->mti_rlocked; |
|
|
|
env->me_rmutex->locked = &env->me_txns->mti_rlocked; |
|
|
|
env->me_wmutex->locked = &env->me_txns->mti_wlocked; |
|
|
|
env->me_wmutex->locked = &env->me_txns->mti_wlocked; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
|
|
|
|
|
|
env->me_rpmutex = CreateMutex(NULL, FALSE, NULL); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
pthread_mutex_init(env->me_rpmutex, NULL); |
|
|
|
|
|
|
|
#endif |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
return MDB_SUCCESS; |
|
|
|
return MDB_SUCCESS; |
|
|
@ -4883,6 +5025,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode |
|
|
|
if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) |
|
|
|
if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) |
|
|
|
return EINVAL; |
|
|
|
return EINVAL; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (flags & MDB_WRITEMAP) { |
|
|
|
|
|
|
|
/* silently ignore WRITEMAP in 32 bit mode */ |
|
|
|
|
|
|
|
flags ^= MDB_WRITEMAP; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (flags & MDB_FIXEDMAP) { |
|
|
|
|
|
|
|
/* cannot support FIXEDMAP */ |
|
|
|
|
|
|
|
return EINVAL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
len = strlen(path); |
|
|
|
len = strlen(path); |
|
|
|
if (flags & MDB_NOSUBDIR) { |
|
|
|
if (flags & MDB_NOSUBDIR) { |
|
|
|
rc = len + sizeof(LOCKSUFF) + len + 1; |
|
|
|
rc = len + sizeof(LOCKSUFF) + len + 1; |
|
|
@ -4912,6 +5065,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode |
|
|
|
(env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) |
|
|
|
(env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2))))) |
|
|
|
rc = ENOMEM; |
|
|
|
rc = ENOMEM; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (!rc) { |
|
|
|
|
|
|
|
env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3)); |
|
|
|
|
|
|
|
if (!env->me_rpages) { |
|
|
|
|
|
|
|
rc = ENOMEM; |
|
|
|
|
|
|
|
goto leave; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
env->me_rpages[0].mid = 0; |
|
|
|
|
|
|
|
env->me_rpcheck = MDB_ERPAGE_SIZE/2; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
env->me_flags = flags |= MDB_ENV_ACTIVE; |
|
|
|
env->me_flags = flags |= MDB_ENV_ACTIVE; |
|
|
|
if (rc) |
|
|
|
if (rc) |
|
|
|
goto leave; |
|
|
|
goto leave; |
|
|
@ -5010,6 +5174,16 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode |
|
|
|
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); |
|
|
|
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); |
|
|
|
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); |
|
|
|
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); |
|
|
|
txn->mt_env = env; |
|
|
|
txn->mt_env = env; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3)); |
|
|
|
|
|
|
|
if (!txn->mt_rpages) { |
|
|
|
|
|
|
|
free(txn); |
|
|
|
|
|
|
|
rc = ENOMEM; |
|
|
|
|
|
|
|
goto leave; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
txn->mt_rpages[0].mid = 0; |
|
|
|
|
|
|
|
txn->mt_rpcheck = MDB_TRPAGE_SIZE/2; |
|
|
|
|
|
|
|
#endif |
|
|
|
txn->mt_dbxs = env->me_dbxs; |
|
|
|
txn->mt_dbxs = env->me_dbxs; |
|
|
|
txn->mt_flags = MDB_TXN_FINISHED; |
|
|
|
txn->mt_flags = MDB_TXN_FINISHED; |
|
|
|
env->me_txn0 = txn; |
|
|
|
env->me_txn0 = txn; |
|
|
@ -5049,6 +5223,13 @@ mdb_env_close0(MDB_env *env, int excl) |
|
|
|
free(env->me_path); |
|
|
|
free(env->me_path); |
|
|
|
free(env->me_dirty_list); |
|
|
|
free(env->me_dirty_list); |
|
|
|
free(env->me_txn0); |
|
|
|
free(env->me_txn0); |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
{ unsigned int x; |
|
|
|
|
|
|
|
for (x=1; x<=env->me_rpages[0].mid; x++) |
|
|
|
|
|
|
|
munmap(env->me_rpages[x].mptr, env->me_rpages[x].mcnt * env->me_psize); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
free(env->me_rpages); |
|
|
|
|
|
|
|
#endif |
|
|
|
mdb_midl_free(env->me_free_pgs); |
|
|
|
mdb_midl_free(env->me_free_pgs); |
|
|
|
|
|
|
|
|
|
|
|
if (env->me_flags & MDB_ENV_TXKEY) { |
|
|
|
if (env->me_flags & MDB_ENV_TXKEY) { |
|
|
@ -5065,7 +5246,11 @@ mdb_env_close0(MDB_env *env, int excl) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (env->me_map) { |
|
|
|
if (env->me_map) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
munmap(env->me_map, NUM_METAS*env->me_psize); |
|
|
|
|
|
|
|
#else |
|
|
|
munmap(env->me_map, env->me_mapsize); |
|
|
|
munmap(env->me_map, env->me_mapsize); |
|
|
|
|
|
|
|
#endif |
|
|
|
} |
|
|
|
} |
|
|
|
if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) |
|
|
|
if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) |
|
|
|
(void) close(env->me_mfd); |
|
|
|
(void) close(env->me_mfd); |
|
|
@ -5130,6 +5315,13 @@ mdb_env_close0(MDB_env *env, int excl) |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
(void) close(env->me_lfd); |
|
|
|
(void) close(env->me_lfd); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
|
|
|
|
|
|
if (env->me_rpmutex) CloseHandle(env->me_rpmutex); |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
pthread_mutex_destroy(env->me_rpmutex); |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); |
|
|
|
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY); |
|
|
|
} |
|
|
|
} |
|
|
@ -5396,6 +5588,309 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) |
|
|
|
return MDB_SUCCESS; |
|
|
|
return MDB_SUCCESS; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
/** Map a read-only page.
|
|
|
|
|
|
|
|
* There are two levels of tracking in use, a per-txn list and a per-env list. |
|
|
|
|
|
|
|
* ref'ing and unref'ing the per-txn list is faster since it requires no |
|
|
|
|
|
|
|
* locking. Pages are cached in the per-env list for global reuse, and a lock |
|
|
|
|
|
|
|
* is required. Pages are not immediately unmapped when their refcnt goes to |
|
|
|
|
|
|
|
* zero; they hang around in case they will be reused again soon. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* When the per-txn list gets full, all pages with refcnt=0 are purged from the |
|
|
|
|
|
|
|
* list and their refcnts in the per-env list are decremented. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* When the per-env list gets full, all pages with refcnt=0 are purged from the |
|
|
|
|
|
|
|
* list and their pages are unmapped. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @note "full" means the list has reached its respective rpcheck threshold. |
|
|
|
|
|
|
|
* This threshold slowly raises if no pages could be purged on a given check, |
|
|
|
|
|
|
|
* and returns to its original value when enough pages were purged. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* If purging doesn't free any slots, filling the per-txn list will return |
|
|
|
|
|
|
|
* MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* Reference tracking in a txn is imperfect, pages can linger with non-zero |
|
|
|
|
|
|
|
* refcnt even without active references. It was deemed to be too invasive |
|
|
|
|
|
|
|
* to add unrefs in every required location. However, all pages are unref'd |
|
|
|
|
|
|
|
* at the end of the transaction. This guarantees that no stale references |
|
|
|
|
|
|
|
* linger in the per-env list. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* Usually we map chunks of 16 pages at a time, but if an overflow page begins |
|
|
|
|
|
|
|
* at the tail of the chunk we extend the chunk to include the entire overflow |
|
|
|
|
|
|
|
* page. Unfortunately, pages can be turned into overflow pages after their |
|
|
|
|
|
|
|
* chunk was already mapped. In that case we must remap the chunk if the |
|
|
|
|
|
|
|
* overflow page is referenced. If the chunk's refcnt is 0 we can just remap |
|
|
|
|
|
|
|
* it, otherwise we temporarily map a new chunk just for the overflow page. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @note this chunk handling means we cannot guarantee that a data item |
|
|
|
|
|
|
|
* returned from the DB will stay alive for the duration of the transaction: |
|
|
|
|
|
|
|
* We unref pages as soon as a cursor moves away from the page |
|
|
|
|
|
|
|
* A subsequent op may cause a purge, which may unmap any unref'd chunks |
|
|
|
|
|
|
|
* The caller must copy the data if it must be used later in the same txn. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* Also - our reference counting revolves around cursors, but overflow pages |
|
|
|
|
|
|
|
* aren't ever pointed to by a cursor. As such, they always remain referenced |
|
|
|
|
|
|
|
* in a txn until it ends. |
|
|
|
|
|
|
|
* |
|
|
|
|
|
|
|
* @param[in] txn the transaction for this access. |
|
|
|
|
|
|
|
* @param[in] pgno the page number for the page to retrieve. |
|
|
|
|
|
|
|
* @param[out] ret address of a pointer where the page's address will be stored. |
|
|
|
|
|
|
|
* @return 0 on success, non-zero on failure. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
static int |
|
|
|
|
|
|
|
mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
MDB_env *env = txn->mt_env; |
|
|
|
|
|
|
|
MDB_page *p; |
|
|
|
|
|
|
|
MDB_ID3L tl = txn->mt_rpages; |
|
|
|
|
|
|
|
MDB_ID3L el = env->me_rpages; |
|
|
|
|
|
|
|
MDB_ID3 id3; |
|
|
|
|
|
|
|
unsigned x, rem; |
|
|
|
|
|
|
|
pgno_t pgno; |
|
|
|
|
|
|
|
int rc, retries = 1; |
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
|
|
|
|
|
|
LARGE_INTEGER off; |
|
|
|
|
|
|
|
SIZE_T len; |
|
|
|
|
|
|
|
#define SET_OFF(off,val) off.QuadPart = val |
|
|
|
|
|
|
|
#define MAP(rc,env,addr,len,off) \ |
|
|
|
|
|
|
|
addr = NULL; \
|
|
|
|
|
|
|
|
rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \
|
|
|
|
|
|
|
|
len, &off, &len, ViewUnmap, MEM_RESERVE, PAGE_READONLY) |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
off_t off; |
|
|
|
|
|
|
|
size_t len; |
|
|
|
|
|
|
|
#define SET_OFF(off,val) off = val |
|
|
|
|
|
|
|
#define MAP(rc,env,addr,len,off) \ |
|
|
|
|
|
|
|
addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \
|
|
|
|
|
|
|
|
rc = (addr == MAP_FAILED) ? errno : 0 |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* remember the offset of the actual page number, so we can
|
|
|
|
|
|
|
|
* return the correct pointer at the end. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
rem = pg0 & (MDB_RPAGE_CHUNK-1); |
|
|
|
|
|
|
|
pgno = pg0 ^ rem; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
id3.mid = 0; |
|
|
|
|
|
|
|
x = mdb_mid3l_search(tl, pgno); |
|
|
|
|
|
|
|
if (x <= tl[0].mid && tl[x].mid == pgno) { |
|
|
|
|
|
|
|
if (x != tl[0].mid && tl[x+1].mid == pg0) |
|
|
|
|
|
|
|
x++; |
|
|
|
|
|
|
|
/* check for overflow size */ |
|
|
|
|
|
|
|
p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize); |
|
|
|
|
|
|
|
if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) { |
|
|
|
|
|
|
|
id3.mcnt = p->mp_pages + rem; |
|
|
|
|
|
|
|
len = id3.mcnt * env->me_psize; |
|
|
|
|
|
|
|
SET_OFF(off, pgno * env->me_psize); |
|
|
|
|
|
|
|
MAP(rc, env, id3.mptr, len, off); |
|
|
|
|
|
|
|
if (rc) |
|
|
|
|
|
|
|
return rc; |
|
|
|
|
|
|
|
/* check for local-only page */ |
|
|
|
|
|
|
|
if (rem) { |
|
|
|
|
|
|
|
mdb_tassert(txn, tl[x].mid != pg0); |
|
|
|
|
|
|
|
/* hope there's room to insert this locally.
|
|
|
|
|
|
|
|
* setting mid here tells later code to just insert |
|
|
|
|
|
|
|
* this id3 instead of searching for a match. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
id3.mid = pg0; |
|
|
|
|
|
|
|
goto notlocal; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
/* ignore the mapping we got from env, use new one */ |
|
|
|
|
|
|
|
tl[x].mptr = id3.mptr; |
|
|
|
|
|
|
|
tl[x].mcnt = id3.mcnt; |
|
|
|
|
|
|
|
/* if no active ref, see if we can replace in env */ |
|
|
|
|
|
|
|
if (!tl[x].mref) { |
|
|
|
|
|
|
|
unsigned i; |
|
|
|
|
|
|
|
LOCK_MUTEX0(env->me_rpmutex); |
|
|
|
|
|
|
|
i = mdb_mid3l_search(el, tl[x].mid); |
|
|
|
|
|
|
|
if (el[i].mref == 1) { |
|
|
|
|
|
|
|
/* just us, replace it */ |
|
|
|
|
|
|
|
munmap(el[i].mptr, el[i].mcnt * env->me_psize); |
|
|
|
|
|
|
|
el[i].mptr = tl[x].mptr; |
|
|
|
|
|
|
|
el[i].mcnt = tl[x].mcnt; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
/* there are others, remove ourself */ |
|
|
|
|
|
|
|
el[i].mref--; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
id3.mptr = tl[x].mptr; |
|
|
|
|
|
|
|
id3.mcnt = tl[x].mcnt; |
|
|
|
|
|
|
|
tl[x].mref++; |
|
|
|
|
|
|
|
goto ok; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
notlocal: |
|
|
|
|
|
|
|
if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) { |
|
|
|
|
|
|
|
unsigned i, y; |
|
|
|
|
|
|
|
/* purge unref'd pages from our list and unref in env */ |
|
|
|
|
|
|
|
LOCK_MUTEX0(env->me_rpmutex); |
|
|
|
|
|
|
|
retry: |
|
|
|
|
|
|
|
y = 0; |
|
|
|
|
|
|
|
for (i=1; i<tl[0].mid; i++) { |
|
|
|
|
|
|
|
if (!tl[i].mref) { |
|
|
|
|
|
|
|
if (!y) y = i; |
|
|
|
|
|
|
|
/* tmp overflow pages don't go to env */ |
|
|
|
|
|
|
|
if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) { |
|
|
|
|
|
|
|
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize); |
|
|
|
|
|
|
|
continue; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
x = mdb_mid3l_search(el, tl[i].mid); |
|
|
|
|
|
|
|
el[x].mref--; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
if (!y) { |
|
|
|
|
|
|
|
/* we didn't find any unref'd chunks.
|
|
|
|
|
|
|
|
* if we're out of room, fail. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
if (tl[0].mid >= MDB_TRPAGE_MAX) |
|
|
|
|
|
|
|
return MDB_TXN_FULL; |
|
|
|
|
|
|
|
/* otherwise, raise threshold for next time around
|
|
|
|
|
|
|
|
* and let this go. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
txn->mt_rpcheck /= 2; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
/* we found some unused; consolidate the list */ |
|
|
|
|
|
|
|
for (i=y+1; i<= tl[0].mid; i++) |
|
|
|
|
|
|
|
if (tl[i].mref) |
|
|
|
|
|
|
|
tl[y++] = tl[i]; |
|
|
|
|
|
|
|
tl[0].mid = y-1; |
|
|
|
|
|
|
|
/* decrease the check threshold toward its original value */ |
|
|
|
|
|
|
|
if (!txn->mt_rpcheck) |
|
|
|
|
|
|
|
txn->mt_rpcheck = 1; |
|
|
|
|
|
|
|
while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2) |
|
|
|
|
|
|
|
txn->mt_rpcheck *= 2; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (tl[0].mid < MDB_TRPAGE_SIZE) { |
|
|
|
|
|
|
|
id3.mref = 1; |
|
|
|
|
|
|
|
if (id3.mid) |
|
|
|
|
|
|
|
goto found; |
|
|
|
|
|
|
|
len = env->me_psize * MDB_RPAGE_CHUNK; |
|
|
|
|
|
|
|
id3.mid = pgno; |
|
|
|
|
|
|
|
id3.mcnt = MDB_RPAGE_CHUNK; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* search for page in env */ |
|
|
|
|
|
|
|
LOCK_MUTEX0(env->me_rpmutex); |
|
|
|
|
|
|
|
x = mdb_mid3l_search(el, pgno); |
|
|
|
|
|
|
|
if (x <= el[0].mid && el[x].mid == pgno) { |
|
|
|
|
|
|
|
id3.mptr = el[x].mptr; |
|
|
|
|
|
|
|
id3.mcnt = el[x].mcnt; |
|
|
|
|
|
|
|
/* check for overflow size */ |
|
|
|
|
|
|
|
p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); |
|
|
|
|
|
|
|
if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) { |
|
|
|
|
|
|
|
id3.mcnt = p->mp_pages + rem; |
|
|
|
|
|
|
|
len = id3.mcnt * env->me_psize; |
|
|
|
|
|
|
|
SET_OFF(off, pgno * env->me_psize); |
|
|
|
|
|
|
|
MAP(rc, env, id3.mptr, len, off); |
|
|
|
|
|
|
|
if (rc) |
|
|
|
|
|
|
|
goto fail; |
|
|
|
|
|
|
|
if (!el[x].mref) { |
|
|
|
|
|
|
|
munmap(el[x].mptr, el[x].mcnt); |
|
|
|
|
|
|
|
el[x].mptr = id3.mptr; |
|
|
|
|
|
|
|
el[x].mcnt = id3.mcnt; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
id3.mid = pg0; |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
goto found; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
el[x].mref++; |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
goto found; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) { |
|
|
|
|
|
|
|
/* purge unref'd pages */ |
|
|
|
|
|
|
|
unsigned i, y = 0; |
|
|
|
|
|
|
|
for (i=1; i<el[0].mid; i++) { |
|
|
|
|
|
|
|
if (!el[i].mref) { |
|
|
|
|
|
|
|
if (!y) y = i; |
|
|
|
|
|
|
|
munmap(el[i].mptr, env->me_psize * el[i].mcnt); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (!y) { |
|
|
|
|
|
|
|
if (retries) { |
|
|
|
|
|
|
|
/* see if we can unref some local pages */ |
|
|
|
|
|
|
|
retries--; |
|
|
|
|
|
|
|
id3.mid = 0; |
|
|
|
|
|
|
|
goto retry; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
if (el[0].mid >= MDB_ERPAGE_MAX) { |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
return MDB_MAP_FULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
env->me_rpcheck /= 2; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
for (i=y+1; i<= el[0].mid; i++) |
|
|
|
|
|
|
|
if (el[i].mref) |
|
|
|
|
|
|
|
el[y++] = el[i]; |
|
|
|
|
|
|
|
el[0].mid = y-1; |
|
|
|
|
|
|
|
if (!env->me_rpcheck) |
|
|
|
|
|
|
|
env->me_rpcheck = 1; |
|
|
|
|
|
|
|
while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2) |
|
|
|
|
|
|
|
env->me_rpcheck *= 2; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
SET_OFF(off, pgno * env->me_psize); |
|
|
|
|
|
|
|
MAP(rc, env, id3.mptr, len, off); |
|
|
|
|
|
|
|
if (rc) { |
|
|
|
|
|
|
|
fail: |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
return rc; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/* If this page is far enough from the end of the env, scan for
|
|
|
|
|
|
|
|
* any overflow pages that would spill onto another block. |
|
|
|
|
|
|
|
* Note we must compare against mt_last_pgno, the last written |
|
|
|
|
|
|
|
* page in the environment. Not mt_next_pgno, which increases |
|
|
|
|
|
|
|
* for every newly allocated (but not yet written) page. If |
|
|
|
|
|
|
|
* we scanned beyond the last written page we'd get a bus error. |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
if (pgno + MDB_RPAGE_CHUNK <= txn->mt_last_pgno) { |
|
|
|
|
|
|
|
int i; |
|
|
|
|
|
|
|
char *cp = (char *)id3.mptr + rem * env->me_psize; |
|
|
|
|
|
|
|
for (i=rem; i<MDB_RPAGE_CHUNK;) { |
|
|
|
|
|
|
|
p = (MDB_page *)cp; |
|
|
|
|
|
|
|
if (IS_OVERFLOW(p)) { |
|
|
|
|
|
|
|
int nop = p->mp_pages; |
|
|
|
|
|
|
|
if (nop + i > MDB_RPAGE_CHUNK) { |
|
|
|
|
|
|
|
munmap(id3.mptr, len); |
|
|
|
|
|
|
|
id3.mcnt = nop + i; |
|
|
|
|
|
|
|
len = id3.mcnt * env->me_psize; |
|
|
|
|
|
|
|
MAP(rc, env, id3.mptr, len, off); |
|
|
|
|
|
|
|
if (rc) |
|
|
|
|
|
|
|
goto fail; |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
i += nop; |
|
|
|
|
|
|
|
cp += nop * env->me_psize; |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
i++; |
|
|
|
|
|
|
|
cp += env->me_psize; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
mdb_mid3l_insert(el, &id3); |
|
|
|
|
|
|
|
UNLOCK_MUTEX(env->me_rpmutex); |
|
|
|
|
|
|
|
found: |
|
|
|
|
|
|
|
mdb_mid3l_insert(tl, &id3); |
|
|
|
|
|
|
|
} else { |
|
|
|
|
|
|
|
return MDB_TXN_FULL; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
ok: |
|
|
|
|
|
|
|
p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize); |
|
|
|
|
|
|
|
#if MDB_DEBUG /* we don't need this check any more */ |
|
|
|
|
|
|
|
if (IS_OVERFLOW(p)) { |
|
|
|
|
|
|
|
mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
*ret = p; |
|
|
|
|
|
|
|
return MDB_SUCCESS; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
/** Find the address of the page corresponding to a given page number.
|
|
|
|
/** Find the address of the page corresponding to a given page number.
|
|
|
|
* @param[in] txn the transaction for this access. |
|
|
|
* @param[in] txn the transaction for this access. |
|
|
|
* @param[in] pgno the page number for the page to retrieve. |
|
|
|
* @param[in] pgno the page number for the page to retrieve. |
|
|
@ -5406,7 +5901,9 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp) |
|
|
|
static int |
|
|
|
static int |
|
|
|
mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) |
|
|
|
mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) |
|
|
|
{ |
|
|
|
{ |
|
|
|
|
|
|
|
#ifndef MDB_VL32 |
|
|
|
MDB_env *env = txn->mt_env; |
|
|
|
MDB_env *env = txn->mt_env; |
|
|
|
|
|
|
|
#endif |
|
|
|
MDB_page *p = NULL; |
|
|
|
MDB_page *p = NULL; |
|
|
|
int level; |
|
|
|
int level; |
|
|
|
|
|
|
|
|
|
|
@ -5425,7 +5922,13 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) |
|
|
|
MDB_ID pn = pgno << 1; |
|
|
|
MDB_ID pn = pgno << 1; |
|
|
|
x = mdb_midl_search(tx2->mt_spill_pgs, pn); |
|
|
|
x = mdb_midl_search(tx2->mt_spill_pgs, pn); |
|
|
|
if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { |
|
|
|
if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
int rc = mdb_rpage_get(txn, pgno, &p); |
|
|
|
|
|
|
|
if (rc) |
|
|
|
|
|
|
|
return rc; |
|
|
|
|
|
|
|
#else |
|
|
|
p = (MDB_page *)(env->me_map + env->me_psize * pgno); |
|
|
|
p = (MDB_page *)(env->me_map + env->me_psize * pgno); |
|
|
|
|
|
|
|
#endif |
|
|
|
goto done; |
|
|
|
goto done; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
@ -5442,7 +5945,15 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) |
|
|
|
|
|
|
|
|
|
|
|
if (pgno < txn->mt_next_pgno) { |
|
|
|
if (pgno < txn->mt_next_pgno) { |
|
|
|
level = 0; |
|
|
|
level = 0; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int rc = mdb_rpage_get(txn, pgno, &p); |
|
|
|
|
|
|
|
if (rc) |
|
|
|
|
|
|
|
return rc; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#else |
|
|
|
p = (MDB_page *)(env->me_map + env->me_psize * pgno); |
|
|
|
p = (MDB_page *)(env->me_map + env->me_psize * pgno); |
|
|
|
|
|
|
|
#endif |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
DPRINTF(("page %"Y"u not found", pgno)); |
|
|
|
DPRINTF(("page %"Y"u not found", pgno)); |
|
|
|
txn->mt_flags |= MDB_TXN_ERROR; |
|
|
|
txn->mt_flags |= MDB_TXN_ERROR; |
|
|
@ -5617,10 +6128,22 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
mdb_cassert(mc, root > 1); |
|
|
|
mdb_cassert(mc, root > 1); |
|
|
|
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) |
|
|
|
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (mc->mc_pg[0]) |
|
|
|
|
|
|
|
MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]); |
|
|
|
|
|
|
|
#endif |
|
|
|
if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) |
|
|
|
if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
int i; |
|
|
|
|
|
|
|
for (i=1; i<mc->mc_snum; i++) |
|
|
|
|
|
|
|
MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
mc->mc_snum = 1; |
|
|
|
mc->mc_snum = 1; |
|
|
|
mc->mc_top = 0; |
|
|
|
mc->mc_top = 0; |
|
|
|
|
|
|
|
|
|
|
@ -5752,7 +6275,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, |
|
|
|
{ |
|
|
|
{ |
|
|
|
MDB_cursor mc; |
|
|
|
MDB_cursor mc; |
|
|
|
MDB_xcursor mx; |
|
|
|
MDB_xcursor mx; |
|
|
|
int exact = 0; |
|
|
|
int exact = 0, rc; |
|
|
|
DKBUF; |
|
|
|
DKBUF; |
|
|
|
|
|
|
|
|
|
|
|
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); |
|
|
|
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key))); |
|
|
@ -5764,7 +6287,16 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi, |
|
|
|
return MDB_BAD_TXN; |
|
|
|
return MDB_BAD_TXN; |
|
|
|
|
|
|
|
|
|
|
|
mdb_cursor_init(&mc, txn, dbi, &mx); |
|
|
|
mdb_cursor_init(&mc, txn, dbi, &mx); |
|
|
|
return mdb_cursor_set(&mc, key, data, MDB_SET, &exact); |
|
|
|
rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact); |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
/* unref all the pages - caller must copy the data
|
|
|
|
|
|
|
|
* before doing anything else |
|
|
|
|
|
|
|
*/ |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
return rc; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** Find a sibling for a page.
|
|
|
|
/** Find a sibling for a page.
|
|
|
@ -5781,11 +6313,17 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) |
|
|
|
int rc; |
|
|
|
int rc; |
|
|
|
MDB_node *indx; |
|
|
|
MDB_node *indx; |
|
|
|
MDB_page *mp; |
|
|
|
MDB_page *mp; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
MDB_page *op; |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
if (mc->mc_snum < 2) { |
|
|
|
if (mc->mc_snum < 2) { |
|
|
|
return MDB_NOTFOUND; /* root has no siblings */ |
|
|
|
return MDB_NOTFOUND; /* root has no siblings */ |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
op = mc->mc_pg[mc->mc_top]; |
|
|
|
|
|
|
|
#endif |
|
|
|
mdb_cursor_pop(mc); |
|
|
|
mdb_cursor_pop(mc); |
|
|
|
DPRINTF(("parent page is page %"Y"u, index %u", |
|
|
|
DPRINTF(("parent page is page %"Y"u, index %u", |
|
|
|
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); |
|
|
|
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); |
|
|
@ -5810,6 +6348,8 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) |
|
|
|
} |
|
|
|
} |
|
|
|
mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); |
|
|
|
mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MDB_PAGE_UNREF(mc->mc_txn, op); |
|
|
|
|
|
|
|
|
|
|
|
indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); |
|
|
|
indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); |
|
|
|
if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { |
|
|
|
if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { |
|
|
|
/* mc will be inconsistent if caller does mc_snum++ as above */ |
|
|
|
/* mc will be inconsistent if caller does mc_snum++ as above */ |
|
|
@ -5851,6 +6391,13 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
else { |
|
|
|
|
|
|
|
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
if (op == MDB_NEXT_DUP) |
|
|
|
if (op == MDB_NEXT_DUP) |
|
|
@ -5930,6 +6477,13 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op) |
|
|
|
return rc; |
|
|
|
return rc; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
else { |
|
|
|
|
|
|
|
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
if (op == MDB_PREV_DUP) |
|
|
|
if (op == MDB_PREV_DUP) |
|
|
@ -6129,6 +6683,11 @@ set1: |
|
|
|
return MDB_SUCCESS; |
|
|
|
return MDB_SUCCESS; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (mc->mc_xcursor && mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { |
|
|
|
if (F_ISSET(leaf->mn_flags, F_DUPDATA)) { |
|
|
|
mdb_xcursor_init1(mc, leaf); |
|
|
|
mdb_xcursor_init1(mc, leaf); |
|
|
|
} |
|
|
|
} |
|
|
@ -6189,8 +6748,14 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data) |
|
|
|
int rc; |
|
|
|
int rc; |
|
|
|
MDB_node *leaf; |
|
|
|
MDB_node *leaf; |
|
|
|
|
|
|
|
|
|
|
|
if (mc->mc_xcursor) |
|
|
|
if (mc->mc_xcursor) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { |
|
|
|
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { |
|
|
|
rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); |
|
|
|
rc = mdb_page_search(mc, NULL, MDB_PS_FIRST); |
|
|
@ -6233,8 +6798,14 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data) |
|
|
|
int rc; |
|
|
|
int rc; |
|
|
|
MDB_node *leaf; |
|
|
|
MDB_node *leaf; |
|
|
|
|
|
|
|
|
|
|
|
if (mc->mc_xcursor) |
|
|
|
if (mc->mc_xcursor) { |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { |
|
|
|
|
|
|
|
mdb_cursor_unref(&mc->mc_xcursor->mx_cursor); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
if (!(mc->mc_flags & C_EOF)) { |
|
|
|
if (!(mc->mc_flags & C_EOF)) { |
|
|
|
|
|
|
|
|
|
|
@ -9854,6 +10425,11 @@ mdb_drop0(MDB_cursor *mc, int subs) |
|
|
|
mdb_cursor_pop(mc); |
|
|
|
mdb_cursor_pop(mc); |
|
|
|
|
|
|
|
|
|
|
|
mdb_cursor_copy(mc, &mx); |
|
|
|
mdb_cursor_copy(mc, &mx); |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
/* bump refcount for mx's pages */ |
|
|
|
|
|
|
|
for (i=0; i<mc->mc_snum; i++) |
|
|
|
|
|
|
|
mdb_page_get(txn, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL); |
|
|
|
|
|
|
|
#endif |
|
|
|
while (mc->mc_snum > 0) { |
|
|
|
while (mc->mc_snum > 0) { |
|
|
|
MDB_page *mp = mc->mc_pg[mc->mc_top]; |
|
|
|
MDB_page *mp = mc->mc_pg[mc->mc_top]; |
|
|
|
unsigned n = NUMKEYS(mp); |
|
|
|
unsigned n = NUMKEYS(mp); |
|
|
@ -9913,6 +10489,10 @@ mdb_drop0(MDB_cursor *mc, int subs) |
|
|
|
done: |
|
|
|
done: |
|
|
|
if (rc) |
|
|
|
if (rc) |
|
|
|
txn->mt_flags |= MDB_TXN_ERROR; |
|
|
|
txn->mt_flags |= MDB_TXN_ERROR; |
|
|
|
|
|
|
|
#ifdef MDB_VL32 |
|
|
|
|
|
|
|
/* drop refcount for mx's pages */ |
|
|
|
|
|
|
|
mdb_cursor_unref(&mx); |
|
|
|
|
|
|
|
#endif |
|
|
|
} else if (rc == MDB_NOTFOUND) { |
|
|
|
} else if (rc == MDB_NOTFOUND) { |
|
|
|
rc = MDB_SUCCESS; |
|
|
|
rc = MDB_SUCCESS; |
|
|
|
} |
|
|
|
} |
|
|
|