|
|
|
@ -1340,14 +1340,27 @@ mdb_find_oldest(MDB_txn *txn) |
|
|
|
|
static int |
|
|
|
|
mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) |
|
|
|
|
{ |
|
|
|
|
#ifdef MDB_PARANOID /* Seems like we can ignore this now */ |
|
|
|
|
/* Get at most <Max_retries> more freeDB records once me_pghead
|
|
|
|
|
* has enough pages. If not enough, use new pages from the map. |
|
|
|
|
* If <Paranoid> and mc is updating the freeDB, only get new |
|
|
|
|
* records if me_pghead is empty. Then the freelist cannot play |
|
|
|
|
* catch-up with itself by growing while trying to save it. |
|
|
|
|
*/ |
|
|
|
|
enum { Paranoid = 1, Max_retries = 500 }; |
|
|
|
|
#else |
|
|
|
|
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ }; |
|
|
|
|
#endif |
|
|
|
|
int rc, n2 = num-1, retry = Max_retries; |
|
|
|
|
MDB_txn *txn = mc->mc_txn; |
|
|
|
|
MDB_env *env = txn->mt_env; |
|
|
|
|
pgno_t pgno = P_INVALID, *mop = env->me_pghead; |
|
|
|
|
pgno_t pgno, *mop = env->me_pghead; |
|
|
|
|
unsigned mop_len = mop ? mop[0] : 0; |
|
|
|
|
MDB_page *np; |
|
|
|
|
MDB_ID2 mid; |
|
|
|
|
txnid_t oldest = 0, last; |
|
|
|
|
int rc; |
|
|
|
|
MDB_cursor_op op; |
|
|
|
|
MDB_cursor m2; |
|
|
|
|
|
|
|
|
|
*mp = NULL; |
|
|
|
|
|
|
|
|
@ -1355,117 +1368,52 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) |
|
|
|
|
if (txn->mt_dirty_room == 0) |
|
|
|
|
return MDB_TXN_FULL; |
|
|
|
|
|
|
|
|
|
/* Pages freed by txn#1 (after allocating but discarding them)
|
|
|
|
|
* are used when txn#1 is unreferenced, i.e. txn#3. |
|
|
|
|
*/ |
|
|
|
|
if (txn->mt_txnid >= 3) { |
|
|
|
|
if (!mop_len && txn->mt_dbs[FREE_DBI].md_root != P_INVALID) { |
|
|
|
|
/* See if there's anything in the free DB */ |
|
|
|
|
MDB_cursor m2; |
|
|
|
|
for (op = MDB_FIRST;; op = MDB_NEXT) { |
|
|
|
|
unsigned int i, j, k; |
|
|
|
|
MDB_val key, data; |
|
|
|
|
MDB_node *leaf; |
|
|
|
|
MDB_val data; |
|
|
|
|
txnid_t *kptr; |
|
|
|
|
|
|
|
|
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL); |
|
|
|
|
if (!txn->mt_env->me_pglast) { |
|
|
|
|
mdb_page_search(&m2, NULL, 0); |
|
|
|
|
leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0); |
|
|
|
|
kptr = (txnid_t *)NODEKEY(leaf); |
|
|
|
|
last = *kptr; |
|
|
|
|
} else { |
|
|
|
|
MDB_val key; |
|
|
|
|
again: |
|
|
|
|
last = txn->mt_env->me_pglast + 1; |
|
|
|
|
leaf = NULL; |
|
|
|
|
key.mv_data = &last; |
|
|
|
|
key.mv_size = sizeof(last); |
|
|
|
|
rc = mdb_cursor_set(&m2, &key, &data, MDB_SET_RANGE, NULL); |
|
|
|
|
if (rc) |
|
|
|
|
goto none; |
|
|
|
|
last = *(txnid_t *)key.mv_data; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (!oldest) |
|
|
|
|
oldest = mdb_find_oldest(txn); |
|
|
|
|
|
|
|
|
|
if (oldest > last) { |
|
|
|
|
/* It's usable, grab it.
|
|
|
|
|
*/ |
|
|
|
|
pgno_t *idl; |
|
|
|
|
|
|
|
|
|
if (!txn->mt_env->me_pglast) { |
|
|
|
|
mdb_node_read(txn, leaf, &data); |
|
|
|
|
} |
|
|
|
|
idl = (MDB_ID *) data.mv_data; |
|
|
|
|
mop_len = idl[0]; |
|
|
|
|
if (!mop) { |
|
|
|
|
if (!(env->me_pghead = mop = mdb_midl_alloc(mop_len))) |
|
|
|
|
return ENOMEM; |
|
|
|
|
} else if (mop_len > mop[-1]) { |
|
|
|
|
if ((rc = mdb_midl_grow(&env->me_pghead, mop_len)) != 0) |
|
|
|
|
return rc; |
|
|
|
|
mop = env->me_pghead; |
|
|
|
|
} |
|
|
|
|
txn->mt_env->me_pglast = last; |
|
|
|
|
memcpy(mop, idl, MDB_IDL_SIZEOF(idl)); |
|
|
|
|
pgno_t *idl, old_id, new_id; |
|
|
|
|
|
|
|
|
|
#if MDB_DEBUG > 1 |
|
|
|
|
{ |
|
|
|
|
unsigned int i; |
|
|
|
|
DPRINTF("IDL read txn %zu root %zu num %zu", |
|
|
|
|
last, txn->mt_dbs[FREE_DBI].md_root, idl[0]); |
|
|
|
|
for (i = idl[0]; i; i--) |
|
|
|
|
DPRINTF("IDL %zu", idl[i]); |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
/* We might have a zero-length IDL due to freelist growth
|
|
|
|
|
* during a prior commit |
|
|
|
|
/* Seek a big enough contiguous page range. Prefer
|
|
|
|
|
* pages at the tail, just truncating the list. |
|
|
|
|
*/ |
|
|
|
|
if (!mop_len) |
|
|
|
|
goto again; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
none: |
|
|
|
|
if (mop_len) { |
|
|
|
|
if (num > 1) { |
|
|
|
|
MDB_cursor m2; |
|
|
|
|
int retry = 1, readit = 0, n2 = num-1; |
|
|
|
|
unsigned int i, j, k; |
|
|
|
|
|
|
|
|
|
/* If current list is too short, must fetch more and coalesce */ |
|
|
|
|
if (mop[0] < (unsigned)num) |
|
|
|
|
readit = 1; |
|
|
|
|
|
|
|
|
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL); |
|
|
|
|
if (mop_len >= (unsigned)num) { |
|
|
|
|
i = mop_len; |
|
|
|
|
do { |
|
|
|
|
#ifdef MDB_PARANOID /* Seems like we can ignore this now */ |
|
|
|
|
/* If on freelist, don't try to read more. If what we have
|
|
|
|
|
* right now isn't enough just use new pages. |
|
|
|
|
* TODO: get all of this working. Many circular dependencies... |
|
|
|
|
*/ |
|
|
|
|
if (mc->mc_dbi == FREE_DBI) { |
|
|
|
|
retry = 0; |
|
|
|
|
readit = 0; |
|
|
|
|
pgno = mop[i]; |
|
|
|
|
if (mop[i-n2] == pgno+n2) { |
|
|
|
|
mop[0] = mop_len -= num; |
|
|
|
|
/* Move any stragglers down */ |
|
|
|
|
for (j = i-n2; j <= mop_len; ) |
|
|
|
|
mop[j++] = mop[++i]; |
|
|
|
|
goto search_done; |
|
|
|
|
} |
|
|
|
|
} while (--i >= (unsigned)num); |
|
|
|
|
if (Max_retries < INT_MAX && --retry < 0) |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
if (readit) { |
|
|
|
|
MDB_val key, data; |
|
|
|
|
pgno_t *idl, old_id, new_id; |
|
|
|
|
|
|
|
|
|
last = txn->mt_env->me_pglast + 1; |
|
|
|
|
|
|
|
|
|
/* We haven't hit the readers list yet? */ |
|
|
|
|
if (!oldest) { |
|
|
|
|
if (op == MDB_FIRST) { /* 1st iteration */ |
|
|
|
|
/* Prepare to fetch more and coalesce */ |
|
|
|
|
oldest = mdb_find_oldest(txn); |
|
|
|
|
last = env->me_pglast; |
|
|
|
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL); |
|
|
|
|
if (last) { |
|
|
|
|
op = MDB_SET_RANGE; |
|
|
|
|
key.mv_data = &last; /* will loop up last+1 */ |
|
|
|
|
key.mv_size = sizeof(last); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* There's nothing we can use on the freelist */ |
|
|
|
|
if (oldest - last < 1) |
|
|
|
|
if (Paranoid && mc->mc_dbi == FREE_DBI) |
|
|
|
|
retry = -1; |
|
|
|
|
} |
|
|
|
|
if (Paranoid && retry < 0 && mop_len) |
|
|
|
|
break; |
|
|
|
|
|
|
|
|
|
key.mv_data = &last; |
|
|
|
|
key.mv_size = sizeof(last); |
|
|
|
|
rc = mdb_cursor_set(&m2,&key,&data,MDB_SET_RANGE,NULL); |
|
|
|
|
last++; |
|
|
|
|
/* Do not fetch more if the record will be too recent */ |
|
|
|
|
if (oldest <= last) |
|
|
|
|
break; |
|
|
|
|
rc = mdb_cursor_get(&m2, &key, NULL, op); |
|
|
|
|
if (rc) { |
|
|
|
|
if (rc == MDB_NOTFOUND) |
|
|
|
|
break; |
|
|
|
@ -1474,23 +1422,32 @@ none: |
|
|
|
|
last = *(txnid_t*)key.mv_data; |
|
|
|
|
if (oldest <= last) |
|
|
|
|
break; |
|
|
|
|
np = m2.mc_pg[m2.mc_top]; |
|
|
|
|
leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]); |
|
|
|
|
if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS) |
|
|
|
|
return rc; |
|
|
|
|
|
|
|
|
|
idl = (MDB_ID *) data.mv_data; |
|
|
|
|
i = idl[0]; |
|
|
|
|
if (mop_len+i > mop[-1]) { |
|
|
|
|
if (!mop) { |
|
|
|
|
if (!(env->me_pghead = mop = mdb_midl_alloc(i))) |
|
|
|
|
return ENOMEM; |
|
|
|
|
} else if (mop_len+i > mop[-1]) { |
|
|
|
|
if ((rc = mdb_midl_grow(&env->me_pghead, i)) != 0) |
|
|
|
|
return rc; |
|
|
|
|
mop = env->me_pghead; |
|
|
|
|
} |
|
|
|
|
env->me_pglast = last; |
|
|
|
|
#if MDB_DEBUG > 1 |
|
|
|
|
DPRINTF("IDL read txn %zu root %zu num %u", |
|
|
|
|
last, txn->mt_dbs[FREE_DBI].md_root, i); |
|
|
|
|
for (k = i; k; k--) |
|
|
|
|
DPRINTF("IDL %zu", idl[k]); |
|
|
|
|
#endif |
|
|
|
|
/* merge in sorted order */ |
|
|
|
|
/* Merge in descending sorted order */ |
|
|
|
|
j = mop_len; |
|
|
|
|
k = mop_len += i; |
|
|
|
|
mop[0] = P_INVALID; |
|
|
|
|
mop[0] = (pgno_t)-1; |
|
|
|
|
old_id = mop[j]; |
|
|
|
|
while (i) { |
|
|
|
|
new_id = idl[i--]; |
|
|
|
@ -1499,67 +1456,33 @@ none: |
|
|
|
|
mop[k--] = new_id; |
|
|
|
|
} |
|
|
|
|
mop[0] = mop_len; |
|
|
|
|
txn->mt_env->me_pglast = last; |
|
|
|
|
/* Keep trying to read until we have enough */ |
|
|
|
|
if (mop[0] < (unsigned)num) { |
|
|
|
|
continue; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* current list has enough pages, but are they contiguous? */ |
|
|
|
|
for (i=mop[0]; i>=(unsigned)num; i--) { |
|
|
|
|
if (mop[i-n2] == mop[i] + n2) { |
|
|
|
|
pgno = mop[i]; |
|
|
|
|
i -= n2; |
|
|
|
|
/* move any stragglers down */ |
|
|
|
|
for (j=i+num; j<=mop[0]; j++) |
|
|
|
|
mop[i++] = mop[j]; |
|
|
|
|
mop[0] -= num; |
|
|
|
|
break; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* Stop if we succeeded, or no retries */ |
|
|
|
|
if (!retry || pgno != P_INVALID) |
|
|
|
|
break; |
|
|
|
|
readit = 1; |
|
|
|
|
|
|
|
|
|
} while (1); |
|
|
|
|
} else { |
|
|
|
|
/* peel pages off tail, so we only have to truncate the list */ |
|
|
|
|
pgno = MDB_IDL_LAST(mop); |
|
|
|
|
mop[0]--; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (pgno == P_INVALID) { |
|
|
|
|
/* DB size is maxed out */ |
|
|
|
|
if (txn->mt_next_pgno + num >= txn->mt_env->me_maxpg) { |
|
|
|
|
/* Use new pages from the map when nothing suitable in the freeDB */ |
|
|
|
|
pgno = P_INVALID; |
|
|
|
|
if (txn->mt_next_pgno + num >= env->me_maxpg) { |
|
|
|
|
DPUTS("DB size maxed out"); |
|
|
|
|
return MDB_MAP_FULL; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (txn->mt_env->me_flags & MDB_WRITEMAP) { |
|
|
|
|
|
|
|
|
|
search_done: |
|
|
|
|
if (env->me_flags & MDB_WRITEMAP) { |
|
|
|
|
if (pgno == P_INVALID) { |
|
|
|
|
pgno = txn->mt_next_pgno; |
|
|
|
|
txn->mt_next_pgno += num; |
|
|
|
|
} |
|
|
|
|
np = (MDB_page *)(txn->mt_env->me_map + txn->mt_env->me_psize * pgno); |
|
|
|
|
np->mp_pgno = pgno; |
|
|
|
|
np = (MDB_page *)(env->me_map + env->me_psize * pgno); |
|
|
|
|
} else { |
|
|
|
|
if (!(np = mdb_page_malloc(mc, num))) |
|
|
|
|
return ENOMEM; |
|
|
|
|
if (pgno == P_INVALID) { |
|
|
|
|
np->mp_pgno = txn->mt_next_pgno; |
|
|
|
|
pgno = txn->mt_next_pgno; |
|
|
|
|
txn->mt_next_pgno += num; |
|
|
|
|
} else { |
|
|
|
|
np->mp_pgno = pgno; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
mid.mid = np->mp_pgno; |
|
|
|
|
mid.mid = np->mp_pgno = pgno; |
|
|
|
|
mid.mptr = np; |
|
|
|
|
if (txn->mt_env->me_flags & MDB_WRITEMAP) { |
|
|
|
|
if (env->me_flags & MDB_WRITEMAP) { |
|
|
|
|
mdb_mid2l_append(txn->mt_u.dirty_list, &mid); |
|
|
|
|
} else { |
|
|
|
|
mdb_mid2l_insert(txn->mt_u.dirty_list, &mid); |
|
|
|
|