Merge branch 'mdb.master' of ssh://git-master.openldap.org/~git/git/openldap into mdb.master

vmware
Howard Chu 13 years ago
commit 92a63ff790
  1. 2
      libraries/libmdb/.gitignore
  2. 313
      libraries/libmdb/mdb.c
  3. 18
      libraries/libmdb/mdb.h
  4. 8
      libraries/libmdb/mdb_stat.c
  5. 6
      libraries/libmdb/midl.c
  6. 11
      libraries/libmdb/midl.h

@ -11,3 +11,5 @@ mdb_stat
core core
core.* core.*
valgrind.* valgrind.*
man/
html/

@ -48,6 +48,7 @@
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <limits.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
@ -63,6 +64,12 @@
#include "mdb.h" #include "mdb.h"
#include "midl.h" #include "midl.h"
#if (__BYTE_ORDER == __LITTLE_ENDIAN) == (__BYTE_ORDER == __BIG_ENDIAN)
# error "Unknown or unsupported endianness (__BYTE_ORDER)"
#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
# error "Two's complement, reasonably sized integer types, please"
#endif
/** @defgroup internal MDB Internals /** @defgroup internal MDB Internals
* @{ * @{
*/ */
@ -78,38 +85,38 @@
#define pthread_mutex_t HANDLE #define pthread_mutex_t HANDLE
#define pthread_key_t DWORD #define pthread_key_t DWORD
#define pthread_self() GetCurrentThreadId() #define pthread_self() GetCurrentThreadId()
#define pthread_key_create(x,y) *(x) = TlsAlloc() #define pthread_key_create(x,y) (*(x) = TlsAlloc())
#define pthread_key_delete(x) TlsFree(x) #define pthread_key_delete(x) TlsFree(x)
#define pthread_getspecific(x) TlsGetValue(x) #define pthread_getspecific(x) TlsGetValue(x)
#define pthread_setspecific(x,y) TlsSetValue(x,y) #define pthread_setspecific(x,y) TlsSetValue(x,y)
#define pthread_mutex_unlock(x) ReleaseMutex(x) #define pthread_mutex_unlock(x) ReleaseMutex(x)
#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE) #define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
#define LOCK_MUTEX_R(env) pthread_mutex_lock(env->me_rmutex) #define LOCK_MUTEX_R(env) pthread_mutex_lock((env)->me_rmutex)
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(env->me_rmutex) #define UNLOCK_MUTEX_R(env) pthread_mutex_unlock((env)->me_rmutex)
#define LOCK_MUTEX_W(env) pthread_mutex_lock(env->me_wmutex) #define LOCK_MUTEX_W(env) pthread_mutex_lock((env)->me_wmutex)
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(env->me_wmutex) #define UNLOCK_MUTEX_W(env) pthread_mutex_unlock((env)->me_wmutex)
#define getpid() GetCurrentProcessId() #define getpid() GetCurrentProcessId()
#define fdatasync(fd) !FlushFileBuffers(fd) #define fdatasync(fd) (!FlushFileBuffers(fd))
#define ErrCode() GetLastError() #define ErrCode() GetLastError()
#define GetPageSize(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;} #define GET_PAGESIZE(x) {SYSTEM_INFO si; GetSystemInfo(&si); (x) = si.dwPageSize;}
#define close(fd) CloseHandle(fd) #define close(fd) CloseHandle(fd)
#define munmap(ptr,len) UnmapViewOfFile(ptr) #define munmap(ptr,len) UnmapViewOfFile(ptr)
#else #else
/** Lock the reader mutex. /** Lock the reader mutex.
*/ */
#define LOCK_MUTEX_R(env) pthread_mutex_lock(&env->me_txns->mti_mutex) #define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_txns->mti_mutex)
/** Unlock the reader mutex. /** Unlock the reader mutex.
*/ */
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&env->me_txns->mti_mutex) #define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_txns->mti_mutex)
/** Lock the writer mutex. /** Lock the writer mutex.
* Only a single write transaction is allowed at a time. Other writers * Only a single write transaction is allowed at a time. Other writers
* will block waiting for this mutex. * will block waiting for this mutex.
*/ */
#define LOCK_MUTEX_W(env) pthread_mutex_lock(&env->me_txns->mti_wmutex) #define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_txns->mti_wmutex)
/** Unlock the writer mutex. /** Unlock the writer mutex.
*/ */
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&env->me_txns->mti_wmutex) #define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_txns->mti_wmutex)
/** Get the error code for the last failed system function. /** Get the error code for the last failed system function.
*/ */
@ -125,13 +132,13 @@
* Mainly used to initialize file variables and signify that they are * Mainly used to initialize file variables and signify that they are
* unused. * unused.
*/ */
#define INVALID_HANDLE_VALUE -1 #define INVALID_HANDLE_VALUE (-1)
/** Get the size of a memory page for the system. /** Get the size of a memory page for the system.
* This is the basic size that the platform's memory manager uses, and is * This is the basic size that the platform's memory manager uses, and is
* fundamental to the use of memory-mapped files. * fundamental to the use of memory-mapped files.
*/ */
#define GetPageSize(x) (x) = sysconf(_SC_PAGE_SIZE) #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
#endif #endif
/** @} */ /** @} */
@ -159,7 +166,12 @@
* @note In the #MDB_node structure, we only store 48 bits of this value, * @note In the #MDB_node structure, we only store 48 bits of this value,
* which thus limits us to only 60 bits of addressable data. * which thus limits us to only 60 bits of addressable data.
*/ */
typedef ULONG pgno_t; typedef ID pgno_t;
/** A transaction ID.
* See struct MDB_txn.mt_txnid for details.
*/
typedef ID txnid_t;
/** @defgroup debug Debug Macros /** @defgroup debug Debug Macros
* @{ * @{
@ -190,7 +202,7 @@ typedef ULONG pgno_t;
/** A default memory page size. /** A default memory page size.
* The actual size is platform-dependent, but we use this for * The actual size is platform-dependent, but we use this for
* boot-strapping. We probably should not be using this any more. * boot-strapping. We probably should not be using this any more.
* The #GetPageSize() macro is used to get the actual size. * The #GET_PAGESIZE() macro is used to get the actual size.
* *
* Note that we don't currently support Huge pages. On Linux, * Note that we don't currently support Huge pages. On Linux,
* regular data files cannot use Huge pages, and in general * regular data files cannot use Huge pages, and in general
@ -243,7 +255,7 @@ typedef ULONG pgno_t;
*/ */
#define DKEY(x) mdb_dkey(x, kbuf) #define DKEY(x) mdb_dkey(x, kbuf)
#else #else
#define DKBUF #define DKBUF typedef int dummy_kbuf /* so we can put ';' after */
#define DKEY(x) #define DKEY(x)
#endif #endif
@ -268,7 +280,7 @@ typedef ULONG pgno_t;
#define LAZY_RWLOCK_WRLOCK(x) #define LAZY_RWLOCK_WRLOCK(x)
/** Grab the DB table read lock */ /** Grab the DB table read lock */
#define LAZY_RWLOCK_RDLOCK(x) #define LAZY_RWLOCK_RDLOCK(x)
/** Declare the DB table rwlock */ /** Declare the DB table rwlock. Should not be followed by ';'. */
#define LAZY_RWLOCK_DEF(x) #define LAZY_RWLOCK_DEF(x)
/** Initialize the DB table rwlock */ /** Initialize the DB table rwlock */
#define LAZY_RWLOCK_INIT(x,y) #define LAZY_RWLOCK_INIT(x,y)
@ -280,7 +292,7 @@ typedef ULONG pgno_t;
#define LAZY_RWLOCK_UNLOCK(x) pthread_rwlock_unlock(x) #define LAZY_RWLOCK_UNLOCK(x) pthread_rwlock_unlock(x)
#define LAZY_RWLOCK_WRLOCK(x) pthread_rwlock_wrlock(x) #define LAZY_RWLOCK_WRLOCK(x) pthread_rwlock_wrlock(x)
#define LAZY_RWLOCK_RDLOCK(x) pthread_rwlock_rdlock(x) #define LAZY_RWLOCK_RDLOCK(x) pthread_rwlock_rdlock(x)
#define LAZY_RWLOCK_DEF(x) pthread_rwlock_t x #define LAZY_RWLOCK_DEF(x) pthread_rwlock_t x;
#define LAZY_RWLOCK_INIT(x,y) pthread_rwlock_init(x,y) #define LAZY_RWLOCK_INIT(x,y) pthread_rwlock_init(x,y)
#define LAZY_RWLOCK_DESTROY(x) pthread_rwlock_destroy(x) #define LAZY_RWLOCK_DESTROY(x) pthread_rwlock_destroy(x)
#endif #endif
@ -375,7 +387,7 @@ typedef struct MDB_rxbody {
* started from so we can avoid overwriting any data used in that * started from so we can avoid overwriting any data used in that
* particular version. * particular version.
*/ */
ULONG mrb_txnid; txnid_t mrb_txnid;
/** The process ID of the process owning this reader txn. */ /** The process ID of the process owning this reader txn. */
pid_t mrb_pid; pid_t mrb_pid;
/** The thread ID of the thread owning this txn. */ /** The thread ID of the thread owning this txn. */
@ -424,12 +436,12 @@ typedef struct MDB_txbody {
* This is recorded here only for convenience; the value can always * This is recorded here only for convenience; the value can always
* be determined by reading the main database meta pages. * be determined by reading the main database meta pages.
*/ */
ULONG mtb_txnid; txnid_t mtb_txnid;
/** The number of slots that have been used in the reader table. /** The number of slots that have been used in the reader table.
* This always records the maximum count, it is not decremented * This always records the maximum count, it is not decremented
* when readers release their slots. * when readers release their slots.
*/ */
uint32_t mtb_numreaders; unsigned mtb_numreaders;
/** The ID of the most recent meta page in the database. /** The ID of the most recent meta page in the database.
* This is recorded here only for convenience; the value can always * This is recorded here only for convenience; the value can always
* be determined by reading the main database meta pages. * be determined by reading the main database meta pages.
@ -533,10 +545,13 @@ typedef struct MDB_page {
typedef struct MDB_node { typedef struct MDB_node {
/** lo and hi are used for data size on leaf nodes and for /** lo and hi are used for data size on leaf nodes and for
* child pgno on branch nodes. On 64 bit platforms, flags * child pgno on branch nodes. On 64 bit platforms, flags
* is also used for pgno. (branch nodes ignore flags) * is also used for pgno. (Branch nodes have no flags).
* They are in in host byte order in case that lets some
* accesses be optimized into a 32-bit word access.
*/ */
unsigned short mn_lo; #define mn_lo mn_offset[__BYTE_ORDER!=__LITTLE_ENDIAN]
unsigned short mn_hi; /**< part of dsize or pgno */ #define mn_hi mn_offset[__BYTE_ORDER==__LITTLE_ENDIAN] /**< part of dsize or pgno */
unsigned short mn_offset[2];
unsigned short mn_flags; /**< flags for special node types */ unsigned short mn_flags; /**< flags for special node types */
#define F_BIGDATA 0x01 /**< data put on overflow page */ #define F_BIGDATA 0x01 /**< data put on overflow page */
#define F_SUBDATA 0x02 /**< data is a sub-database */ #define F_SUBDATA 0x02 /**< data is a sub-database */
@ -548,6 +563,9 @@ typedef struct MDB_node {
/** Size of the node header, excluding dynamic data at the end */ /** Size of the node header, excluding dynamic data at the end */
#define NODESIZE offsetof(MDB_node, mn_data) #define NODESIZE offsetof(MDB_node, mn_data)
/** Bit position of top word in page number, for shifting mn_flags */
#define PGNO_TOPWORD ((pgno_t)-1 > 0xffffffffu ? 32 : 0)
/** Size of a node in a branch page with a given key. /** Size of a node in a branch page with a given key.
* This is just the node header plus the key, there is no data. * This is just the node header plus the key, there is no data.
*/ */
@ -568,18 +586,13 @@ typedef struct MDB_node {
#define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize) #define NODEDATA(node) (void *)((char *)(node)->mn_data + (node)->mn_ksize)
/** Get the page number pointed to by a branch node */ /** Get the page number pointed to by a branch node */
#if LONG_MAX == 0x7fffffff #define NODEPGNO(node) \
#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16)) ((node)->mn_lo | ((pgno_t) (node)->mn_hi << 16) | \
/** Set the page number in a branch node */ (PGNO_TOPWORD ? ((pgno_t) (node)->mn_flags << PGNO_TOPWORD) : 0))
#define SETPGNO(node,pgno) do { \
(node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16;} while(0)
#else
#define NODEPGNO(node) ((node)->mn_lo | ((node)->mn_hi << 16) | ((unsigned long)(node)->mn_flags << 32))
/** Set the page number in a branch node */ /** Set the page number in a branch node */
#define SETPGNO(node,pgno) do { \ #define SETPGNO(node,pgno) do { \
(node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \ (node)->mn_lo = (pgno) & 0xffff; (node)->mn_hi = (pgno) >> 16; \
(node)->mn_flags = (pgno) >> 32; } while(0) if (PGNO_TOPWORD) (node)->mn_flags = (pgno) >> PGNO_TOPWORD; } while(0)
#endif
/** Get the size of the data in a leaf node */ /** Get the size of the data in a leaf node */
#define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16)) #define NODEDSZ(node) ((node)->mn_lo | ((unsigned)(node)->mn_hi << 16))
@ -596,17 +609,18 @@ typedef struct MDB_node {
#define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks))) #define LEAF2KEY(p, i, ks) ((char *)(p) + PAGEHDRSZ + ((i)*(ks)))
/** Set the \b node's key into \b key, if requested. */ /** Set the \b node's key into \b key, if requested. */
#define MDB_SET_KEY(node, key) if (key!=NULL) {(key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node);} #define MDB_SET_KEY(node, key) { if ((key) != NULL) { \
(key)->mv_size = NODEKSZ(node); (key)->mv_data = NODEKEY(node); } }
/** Information about a single database in the environment. */ /** Information about a single database in the environment. */
typedef struct MDB_db { typedef struct MDB_db {
uint32_t md_pad; /**< also ksize for LEAF2 pages */ uint32_t md_pad; /**< also ksize for LEAF2 pages */
uint16_t md_flags; /**< @ref mdb_open */ uint16_t md_flags; /**< @ref mdb_open */
uint16_t md_depth; /**< depth of this tree */ uint16_t md_depth; /**< depth of this tree */
ULONG md_branch_pages; /**< number of internal pages */ pgno_t md_branch_pages; /**< number of internal pages */
ULONG md_leaf_pages; /**< number of leaf pages */ pgno_t md_leaf_pages; /**< number of leaf pages */
ULONG md_overflow_pages; /**< number of overflow pages */ pgno_t md_overflow_pages; /**< number of overflow pages */
ULONG md_entries; /**< number of data items */ size_t md_entries; /**< number of data items */
pgno_t md_root; /**< the root page of this tree */ pgno_t md_root; /**< the root page of this tree */
} MDB_db; } MDB_db;
@ -633,7 +647,7 @@ typedef struct MDB_meta {
/** Any persistent environment flags. @ref mdb_env */ /** Any persistent environment flags. @ref mdb_env */
#define mm_flags mm_dbs[0].md_flags #define mm_flags mm_dbs[0].md_flags
pgno_t mm_last_pg; /**< last used page in file */ pgno_t mm_last_pg; /**< last used page in file */
ULONG mm_txnid; /**< txnid that committed this page */ txnid_t mm_txnid; /**< txnid that committed this page */
} MDB_meta; } MDB_meta;
/** Auxiliary DB info. /** Auxiliary DB info.
@ -661,7 +675,7 @@ struct MDB_txn {
* Only committed write transactions increment the ID. If a transaction * Only committed write transactions increment the ID. If a transaction
* aborts, the ID may be re-used by the next writer. * aborts, the ID may be re-used by the next writer.
*/ */
ULONG mt_txnid; txnid_t mt_txnid;
MDB_env *mt_env; /**< the DB environment */ MDB_env *mt_env; /**< the DB environment */
/** The list of pages that became unused during this transaction. /** The list of pages that became unused during this transaction.
* This is an #IDL. * This is an #IDL.
@ -678,7 +692,7 @@ struct MDB_txn {
/** Number of DB records in use. This number only ever increments; /** Number of DB records in use. This number only ever increments;
* we don't decrement it when individual DB handles are closed. * we don't decrement it when individual DB handles are closed.
*/ */
unsigned int mt_numdbs; MDB_dbi mt_numdbs;
#define MDB_TXN_RDONLY 0x01 /**< read-only transaction */ #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
#define MDB_TXN_ERROR 0x02 /**< an error has occurred */ #define MDB_TXN_ERROR 0x02 /**< an error has occurred */
@ -740,7 +754,7 @@ typedef struct MDB_oldpages {
*/ */
struct MDB_oldpages *mo_next; struct MDB_oldpages *mo_next;
/** The ID of the transaction in which these pages were freed. */ /** The ID of the transaction in which these pages were freed. */
ULONG mo_txnid; txnid_t mo_txnid;
/** An #IDL of the pages */ /** An #IDL of the pages */
pgno_t mo_pages[1]; /* dynamic */ pgno_t mo_pages[1]; /* dynamic */
} MDB_oldpages; } MDB_oldpages;
@ -755,8 +769,8 @@ struct MDB_env {
uint32_t me_flags; uint32_t me_flags;
uint32_t me_extrapad; /**< unused for now */ uint32_t me_extrapad; /**< unused for now */
unsigned int me_maxreaders; /**< size of the reader table */ unsigned int me_maxreaders; /**< size of the reader table */
unsigned int me_numdbs; /**< number of DBs opened */ MDB_dbi me_numdbs; /**< number of DBs opened */
unsigned int me_maxdbs; /**< size of the DB table */ MDB_dbi me_maxdbs; /**< size of the DB table */
char *me_path; /**< path to the DB files */ char *me_path; /**< path to the DB files */
char *me_map; /**< the memory map of the data file */ char *me_map; /**< the memory map of the data file */
MDB_txninfo *me_txns; /**< the memory map of the lock file */ MDB_txninfo *me_txns; /**< the memory map of the lock file */
@ -765,7 +779,7 @@ struct MDB_env {
size_t me_mapsize; /**< size of the data memory map */ size_t me_mapsize; /**< size of the data memory map */
off_t me_size; /**< current file size */ off_t me_size; /**< current file size */
pgno_t me_maxpg; /**< me_mapsize / me_psize */ pgno_t me_maxpg; /**< me_mapsize / me_psize */
unsigned int me_psize; /**< size of a page, from #GetPageSize */ unsigned int me_psize; /**< size of a page, from #GET_PAGESIZE */
unsigned int me_db_toggle; /**< which DB table is current */ unsigned int me_db_toggle; /**< which DB table is current */
MDB_dbx *me_dbxs; /**< array of static DB info */ MDB_dbx *me_dbxs; /**< array of static DB info */
MDB_db *me_dbs[2]; /**< two arrays of MDB_db info */ MDB_db *me_dbs[2]; /**< two arrays of MDB_db info */
@ -777,7 +791,7 @@ struct MDB_env {
/** ID2L of pages that were written during a write txn */ /** ID2L of pages that were written during a write txn */
ID2 me_dirty_list[MDB_IDL_UM_SIZE]; ID2 me_dirty_list[MDB_IDL_UM_SIZE];
/** rwlock for the DB tables, if #LAZY_LOCKS is false */ /** rwlock for the DB tables, if #LAZY_LOCKS is false */
LAZY_RWLOCK_DEF(me_dblock); LAZY_RWLOCK_DEF(me_dblock)
#ifdef _WIN32 #ifdef _WIN32
HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */ HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
HANDLE me_wmutex; HANDLE me_wmutex;
@ -951,18 +965,18 @@ mdb_alloc_page(MDB_cursor *mc, int num)
/* See if there's anything in the free DB */ /* See if there's anything in the free DB */
MDB_cursor m2; MDB_cursor m2;
MDB_node *leaf; MDB_node *leaf;
ULONG *kptr, oldest; txnid_t *kptr, oldest;
mdb_cursor_init(&m2, txn, FREE_DBI); mdb_cursor_init(&m2, txn, FREE_DBI);
mdb_search_page(&m2, NULL, 0); mdb_search_page(&m2, NULL, 0);
leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0); leaf = NODEPTR(m2.mc_pg[m2.mc_top], 0);
kptr = (ULONG *)NODEKEY(leaf); kptr = (txnid_t *)NODEKEY(leaf);
{ {
unsigned int i; unsigned int i;
oldest = txn->mt_txnid - 1; oldest = txn->mt_txnid - 1;
for (i=0; i<txn->mt_env->me_txns->mti_numreaders; i++) { for (i=0; i<txn->mt_env->me_txns->mti_numreaders; i++) {
ULONG mr = txn->mt_env->me_txns->mti_readers[i].mr_txnid; txnid_t mr = txn->mt_env->me_txns->mti_readers[i].mr_txnid;
if (mr && mr < oldest) if (mr && mr < oldest)
oldest = mr; oldest = mr;
} }
@ -976,7 +990,7 @@ mdb_alloc_page(MDB_cursor *mc, int num)
pgno_t *idl; pgno_t *idl;
mdb_read_data(txn, leaf, &data); mdb_read_data(txn, leaf, &data);
idl = (ULONG *)data.mv_data; idl = (ID *) data.mv_data;
mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t)); mop = malloc(sizeof(MDB_oldpages) + MDB_IDL_SIZEOF(idl) - sizeof(pgno_t));
mop->mo_next = txn->mt_env->me_pghead; mop->mo_next = txn->mt_env->me_pghead;
mop->mo_txnid = *kptr; mop->mo_txnid = *kptr;
@ -986,10 +1000,10 @@ mdb_alloc_page(MDB_cursor *mc, int num)
#if DEBUG > 1 #if DEBUG > 1
{ {
unsigned int i; unsigned int i;
DPRINTF("IDL read txn %lu root %lu num %lu", DPRINTF("IDL read txn %zu root %zu num %zu",
mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); mop->mo_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
for (i=0; i<idl[0]; i++) { for (i=0; i<idl[0]; i++) {
DPRINTF("IDL %lu", idl[i+1]); DPRINTF("IDL %zu", idl[i+1]);
} }
} }
#endif #endif
@ -1066,7 +1080,7 @@ mdb_touch(MDB_cursor *mc)
MDB_page *np; MDB_page *np;
if ((np = mdb_alloc_page(mc, 1)) == NULL) if ((np = mdb_alloc_page(mc, 1)) == NULL)
return ENOMEM; return ENOMEM;
DPRINTF("touched db %u page %lu -> %lu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno); DPRINTF("touched db %u page %zu -> %zu", mc->mc_dbi, mp->mp_pgno, np->mp_pgno);
assert(mp->mp_pgno != np->mp_pgno); assert(mp->mp_pgno != np->mp_pgno);
mdb_midl_append(mc->mc_txn->mt_free_pgs, mp->mp_pgno); mdb_midl_append(mc->mc_txn->mt_free_pgs, mp->mp_pgno);
pgno = np->mp_pgno; pgno = np->mp_pgno;
@ -1178,9 +1192,9 @@ mdb_txn_renew(MDB_txn *txn)
rc = mdb_txn_renew0(txn); rc = mdb_txn_renew0(txn);
if (rc == MDB_SUCCESS) { if (rc == MDB_SUCCESS) {
DPRINTF("renew txn %lu%c %p on mdbenv %p, root page %lu", DPRINTF("renew txn %zu%c %p on mdbenv %p, root page %zu",
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
(void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
} }
return rc; return rc;
} }
@ -1210,9 +1224,9 @@ mdb_txn_begin(MDB_env *env, unsigned int flags, MDB_txn **ret)
free(txn); free(txn);
else { else {
*ret = txn; *ret = txn;
DPRINTF("begin txn %lu%c %p on mdbenv %p, root page %lu", DPRINTF("begin txn %zu%c %p on mdbenv %p, root page %zu",
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
(void *) env, txn->mt_dbs[MAIN_DBI].md_root); (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root);
} }
return rc; return rc;
@ -1231,6 +1245,7 @@ mdb_txn_reset0(MDB_txn *txn)
} else { } else {
MDB_oldpages *mop; MDB_oldpages *mop;
MDB_page *dp; MDB_page *dp;
MDB_dbi dbi;
unsigned int i; unsigned int i;
/* return all dirty pages to dpage list */ /* return all dirty pages to dpage list */
@ -1251,8 +1266,8 @@ mdb_txn_reset0(MDB_txn *txn)
} }
env->me_txn = NULL; env->me_txn = NULL;
for (i=2; i<env->me_numdbs; i++) for (dbi=2; dbi<env->me_numdbs; dbi++)
env->me_dbxs[i].md_dirty = 0; env->me_dbxs[dbi].md_dirty = 0;
/* The writer mutex was locked in mdb_txn_begin. */ /* The writer mutex was locked in mdb_txn_begin. */
UNLOCK_MUTEX_W(env); UNLOCK_MUTEX_W(env);
} }
@ -1264,9 +1279,9 @@ mdb_txn_reset(MDB_txn *txn)
if (txn == NULL) if (txn == NULL)
return; return;
DPRINTF("reset txn %lu%c %p on mdbenv %p, root page %lu", DPRINTF("reset txn %zu%c %p on mdbenv %p, root page %zu",
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
(void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); (void *) txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
mdb_txn_reset0(txn); mdb_txn_reset0(txn);
} }
@ -1277,9 +1292,9 @@ mdb_txn_abort(MDB_txn *txn)
if (txn == NULL) if (txn == NULL)
return; return;
DPRINTF("abort txn %lu%c %p on mdbenv %p, root page %lu", DPRINTF("abort txn %zu%c %p on mdbenv %p, root page %zu",
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w', txn, txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
(void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root); (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root);
mdb_txn_reset0(txn); mdb_txn_reset0(txn);
free(txn); free(txn);
@ -1322,8 +1337,8 @@ mdb_txn_commit(MDB_txn *txn)
if (!txn->mt_u.dirty_list[0].mid) if (!txn->mt_u.dirty_list[0].mid)
goto done; goto done;
DPRINTF("committing txn %lu %p on mdbenv %p, root page %lu", DPRINTF("committing txn %zu %p on mdbenv %p, root page %zu",
txn->mt_txnid, txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root); txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root);
mdb_cursor_init(&mc, txn, FREE_DBI); mdb_cursor_init(&mc, txn, FREE_DBI);
@ -1335,7 +1350,7 @@ mdb_txn_commit(MDB_txn *txn)
/* save to free list */ /* save to free list */
if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) { if (!MDB_IDL_IS_ZERO(txn->mt_free_pgs)) {
MDB_val key, data; MDB_val key, data;
ULONG i; pgno_t i;
/* make sure last page of freeDB is touched and on freelist */ /* make sure last page of freeDB is touched and on freelist */
key.mv_size = MAXKEYSIZE+1; key.mv_size = MAXKEYSIZE+1;
@ -1346,17 +1361,17 @@ mdb_txn_commit(MDB_txn *txn)
#if DEBUG > 1 #if DEBUG > 1
{ {
unsigned int i; unsigned int i;
ULONG *idl = txn->mt_free_pgs; ID *idl = txn->mt_free_pgs;
DPRINTF("IDL write txn %lu root %lu num %lu", DPRINTF("IDL write txn %zu root %zu num %zu",
txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]); txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, idl[0]);
for (i=0; i<idl[0]; i++) { for (i=0; i<idl[0]; i++) {
DPRINTF("IDL %lu", idl[i+1]); DPRINTF("IDL %zu", idl[i+1]);
} }
} }
#endif #endif
/* write to last page of freeDB */ /* write to last page of freeDB */
key.mv_size = sizeof(pgno_t); key.mv_size = sizeof(pgno_t);
key.mv_data = (char *)&txn->mt_txnid; key.mv_data = &txn->mt_txnid;
data.mv_data = txn->mt_free_pgs; data.mv_data = txn->mt_free_pgs;
/* The free list can still grow during this call, /* The free list can still grow during this call,
* despite the pre-emptive touches above. So check * despite the pre-emptive touches above. So check
@ -1379,7 +1394,7 @@ mdb_txn_commit(MDB_txn *txn)
mop = env->me_pghead; mop = env->me_pghead;
key.mv_size = sizeof(pgno_t); key.mv_size = sizeof(pgno_t);
key.mv_data = (char *)&mop->mo_txnid; key.mv_data = &mop->mo_txnid;
data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages); data.mv_size = MDB_IDL_SIZEOF(mop->mo_pages);
data.mv_data = mop->mo_pages; data.mv_data = mop->mo_pages;
mdb_cursor_put(&mc, &key, &data, 0); mdb_cursor_put(&mc, &key, &data, 0);
@ -1391,6 +1406,7 @@ mdb_txn_commit(MDB_txn *txn)
* touched so this is all in-place and cannot fail. * touched so this is all in-place and cannot fail.
*/ */
{ {
MDB_dbi i;
MDB_val data; MDB_val data;
data.mv_size = sizeof(MDB_db); data.mv_size = sizeof(MDB_db);
@ -1421,7 +1437,7 @@ mdb_txn_commit(MDB_txn *txn)
for (; i<=txn->mt_u.dirty_list[0].mid; i++) { for (; i<=txn->mt_u.dirty_list[0].mid; i++) {
size_t wsize; size_t wsize;
dp = txn->mt_u.dirty_list[i].mptr; dp = txn->mt_u.dirty_list[i].mptr;
DPRINTF("committing page %lu", dp->mp_pgno); DPRINTF("committing page %zu", dp->mp_pgno);
size = dp->mp_pgno * env->me_psize; size = dp->mp_pgno * env->me_psize;
ov.Offset = size & 0xffffffff; ov.Offset = size & 0xffffffff;
ov.OffsetHigh = size >> 16; ov.OffsetHigh = size >> 16;
@ -1465,7 +1481,7 @@ mdb_txn_commit(MDB_txn *txn)
lseek(env->me_fd, dp->mp_pgno * env->me_psize, SEEK_SET); lseek(env->me_fd, dp->mp_pgno * env->me_psize, SEEK_SET);
next = dp->mp_pgno; next = dp->mp_pgno;
} }
DPRINTF("committing page %lu", dp->mp_pgno); DPRINTF("committing page %zu", dp->mp_pgno);
iov[n].iov_len = env->me_psize; iov[n].iov_len = env->me_psize;
if (IS_OVERFLOW(dp)) iov[n].iov_len *= dp->mp_pages; if (IS_OVERFLOW(dp)) iov[n].iov_len *= dp->mp_pages;
iov[n].iov_base = dp; iov[n].iov_base = dp;
@ -1523,6 +1539,7 @@ done:
{ {
int toggle = !env->me_db_toggle; int toggle = !env->me_db_toggle;
MDB_db *ip, *jp; MDB_db *ip, *jp;
MDB_dbi i;
ip = &env->me_dbs[toggle][2]; ip = &env->me_dbs[toggle][2];
jp = &txn->mt_dbs[2]; jp = &txn->mt_dbs[2];
@ -1584,7 +1601,7 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
p = (MDB_page *)page; p = (MDB_page *)page;
if (!F_ISSET(p->mp_flags, P_META)) { if (!F_ISSET(p->mp_flags, P_META)) {
DPRINTF("page %lu not a meta page", p->mp_pgno); DPRINTF("page %zu not a meta page", p->mp_pgno);
return EINVAL; return EINVAL;
} }
@ -1619,7 +1636,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
DPUTS("writing new meta page"); DPUTS("writing new meta page");
GetPageSize(psize); GET_PAGESIZE(psize);
meta->mm_magic = MDB_MAGIC; meta->mm_magic = MDB_MAGIC;
meta->mm_version = MDB_VERSION; meta->mm_version = MDB_VERSION;
@ -1679,7 +1696,7 @@ mdb_env_write_meta(MDB_txn *txn)
assert(txn->mt_env != NULL); assert(txn->mt_env != NULL);
toggle = !txn->mt_toggle; toggle = !txn->mt_toggle;
DPRINTF("writing meta page %d for root page %lu", DPRINTF("writing meta page %d for root page %zu",
toggle, txn->mt_dbs[MAIN_DBI].md_root); toggle, txn->mt_dbs[MAIN_DBI].md_root);
env = txn->mt_env; env = txn->mt_env;
@ -1792,7 +1809,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
} }
int int
mdb_env_set_maxdbs(MDB_env *env, int dbs) mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
{ {
if (env->me_map) if (env->me_map)
return EINVAL; return EINVAL;
@ -1801,16 +1818,16 @@ mdb_env_set_maxdbs(MDB_env *env, int dbs)
} }
int int
mdb_env_set_maxreaders(MDB_env *env, int readers) mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
{ {
if (env->me_map) if (env->me_map || readers < 1)
return EINVAL; return EINVAL;
env->me_maxreaders = readers; env->me_maxreaders = readers;
return MDB_SUCCESS; return MDB_SUCCESS;
} }
int int
mdb_env_get_maxreaders(MDB_env *env, int *readers) mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
{ {
if (!env || !readers) if (!env || !readers)
return EINVAL; return EINVAL;
@ -1902,11 +1919,11 @@ mdb_env_open2(MDB_env *env, unsigned int flags)
DPRINTF("opened database version %u, pagesize %u", DPRINTF("opened database version %u, pagesize %u",
env->me_metas[toggle]->mm_version, env->me_psize); env->me_metas[toggle]->mm_version, env->me_psize);
DPRINTF("depth: %u", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_depth); DPRINTF("depth: %u", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_depth);
DPRINTF("entries: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_entries); DPRINTF("entries: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_entries);
DPRINTF("branch pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_branch_pages); DPRINTF("branch pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_branch_pages);
DPRINTF("leaf pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_leaf_pages); DPRINTF("leaf pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_leaf_pages);
DPRINTF("overflow pages: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_overflow_pages); DPRINTF("overflow pages: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_overflow_pages);
DPRINTF("root: %lu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_root); DPRINTF("root: %zu", env->me_metas[toggle]->mm_dbs[MAIN_DBI].md_root);
return MDB_SUCCESS; return MDB_SUCCESS;
} }
@ -2308,8 +2325,8 @@ cintcmp(const MDB_val *a, const MDB_val *b)
unsigned short *u, *c; unsigned short *u, *c;
int x; int x;
u = a->mv_data + a->mv_size; u = (unsigned short *) ((char *) a->mv_data + a->mv_size);
c = b->mv_data + a->mv_size; c = (unsigned short *) ((char *) b->mv_data + a->mv_size);
do { do {
x = *--u - *--c; x = *--u - *--c;
} while(!x && u > (unsigned short *)a->mv_data); } while(!x && u > (unsigned short *)a->mv_data);
@ -2322,45 +2339,44 @@ cintcmp(const MDB_val *a, const MDB_val *b)
static int static int
memncmp(const MDB_val *a, const MDB_val *b) memncmp(const MDB_val *a, const MDB_val *b)
{ {
int diff, len_diff; int diff;
ssize_t len_diff;
unsigned int len; unsigned int len;
len = a->mv_size; len = a->mv_size;
len_diff = a->mv_size - b->mv_size; len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
if (len_diff > 0) if (len_diff > 0) {
len = b->mv_size; len = b->mv_size;
len_diff = 1;
}
diff = memcmp(a->mv_data, b->mv_data, len); diff = memcmp(a->mv_data, b->mv_data, len);
return diff ? diff : len_diff; return diff ? diff : len_diff<0 ? -1 : len_diff;
} }
static int static int
memnrcmp(const MDB_val *a, const MDB_val *b) memnrcmp(const MDB_val *a, const MDB_val *b)
{ {
const unsigned char *p1, *p2, *p1_lim; const unsigned char *p1, *p2, *p1_lim;
int diff, len_diff; ssize_t len_diff;
int diff;
if (b->mv_size == 0) p1_lim = (const unsigned char *)a->mv_data;
return a->mv_size != 0; p1 = (const unsigned char *)a->mv_data + a->mv_size;
if (a->mv_size == 0) p2 = (const unsigned char *)b->mv_data + b->mv_size;
return -1;
p1 = (const unsigned char *)a->mv_data + a->mv_size - 1; len_diff = (ssize_t) a->mv_size - (ssize_t) b->mv_size;
p2 = (const unsigned char *)b->mv_data + b->mv_size - 1; if (len_diff > 0) {
p1_lim += len_diff;
len_diff = a->mv_size - b->mv_size; len_diff = 1;
if (len_diff < 0) }
p1_lim = p1 - a->mv_size;
else
p1_lim = p1 - b->mv_size;
while (p1 > p1_lim) { while (p1 > p1_lim) {
diff = *p1 - *p2; diff = *--p1 - *--p2;
if (diff) if (diff)
return diff; return diff;
p1--;
p2--;
} }
return len_diff; return len_diff<0 ? -1 : len_diff;
} }
/* Search for key within a leaf page, using binary search. /* Search for key within a leaf page, using binary search.
@ -2384,7 +2400,7 @@ mdb_search_node(MDB_cursor *mc, MDB_val *key, int *exactp)
nkeys = NUMKEYS(mp); nkeys = NUMKEYS(mp);
DPRINTF("searching %u keys in %s page %lu", DPRINTF("searching %u keys in %s page %zu",
nkeys, IS_LEAF(mp) ? "leaf" : "branch", nkeys, IS_LEAF(mp) ? "leaf" : "branch",
mp->mp_pgno); mp->mp_pgno);
@ -2416,7 +2432,7 @@ mdb_search_node(MDB_cursor *mc, MDB_val *key, int *exactp)
DPRINTF("found leaf index %u [%s], rc = %i", DPRINTF("found leaf index %u [%s], rc = %i",
i, DKEY(&nodekey), rc); i, DKEY(&nodekey), rc);
else else
DPRINTF("found branch index %u [%s -> %lu], rc = %i", DPRINTF("found branch index %u [%s -> %zu], rc = %i",
i, DKEY(&nodekey), NODEPGNO(node), rc); i, DKEY(&nodekey), NODEPGNO(node), rc);
#endif #endif
@ -2456,7 +2472,7 @@ cursor_pop_page(MDB_cursor *mc)
if (mc->mc_snum) if (mc->mc_snum)
mc->mc_top--; mc->mc_top--;
DPRINTF("popped page %lu off db %u cursor %p", top->mp_pgno, DPRINTF("popped page %zu off db %u cursor %p", top->mp_pgno,
mc->mc_dbi, (void *) mc); mc->mc_dbi, (void *) mc);
} }
} }
@ -2464,7 +2480,7 @@ cursor_pop_page(MDB_cursor *mc)
static int static int
cursor_push_page(MDB_cursor *mc, MDB_page *mp) cursor_push_page(MDB_cursor *mc, MDB_page *mp)
{ {
DPRINTF("pushing page %lu on db %u cursor %p", mp->mp_pgno, DPRINTF("pushing page %zu on db %u cursor %p", mp->mp_pgno,
mc->mc_dbi, (void *) mc); mc->mc_dbi, (void *) mc);
if (mc->mc_snum >= CURSOR_STACK) { if (mc->mc_snum >= CURSOR_STACK) {
@ -2497,7 +2513,7 @@ mdb_get_page(MDB_txn *txn, pgno_t pgno, MDB_page **ret)
} }
*ret = p; *ret = p;
if (!p) { if (!p) {
DPRINTF("page %lu not found", pgno); DPRINTF("page %zu not found", pgno);
assert(p != NULL); assert(p != NULL);
} }
return (p != NULL) ? MDB_SUCCESS : MDB_PAGE_NOTFOUND; return (p != NULL) ? MDB_SUCCESS : MDB_PAGE_NOTFOUND;
@ -2514,9 +2530,9 @@ mdb_search_page_root(MDB_cursor *mc, MDB_val *key, int modify)
while (IS_BRANCH(mp)) { while (IS_BRANCH(mp)) {
MDB_node *node; MDB_node *node;
DPRINTF("branch page %lu has %u keys", mp->mp_pgno, NUMKEYS(mp)); DPRINTF("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp));
assert(NUMKEYS(mp) > 1); assert(NUMKEYS(mp) > 1);
DPRINTF("found index 0 to page %lu", NODEPGNO(NODEPTR(mp, 0))); DPRINTF("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0)));
if (key == NULL) /* Initialize cursor to first page. */ if (key == NULL) /* Initialize cursor to first page. */
mc->mc_ki[mc->mc_top] = 0; mc->mc_ki[mc->mc_top] = 0;
@ -2559,7 +2575,7 @@ mdb_search_page_root(MDB_cursor *mc, MDB_val *key, int modify)
return MDB_CORRUPTED; return MDB_CORRUPTED;
} }
DPRINTF("found leaf page %lu for key [%s]", mp->mp_pgno, DPRINTF("found leaf page %zu for key [%s]", mp->mp_pgno,
key ? DKEY(key) : NULL); key ? DKEY(key) : NULL);
return MDB_SUCCESS; return MDB_SUCCESS;
@ -2596,7 +2612,7 @@ mdb_search_page(MDB_cursor *mc, MDB_val *key, int modify)
mc->mc_snum = 1; mc->mc_snum = 1;
mc->mc_top = 0; mc->mc_top = 0;
DPRINTF("db %u root page %lu has flags 0x%X", DPRINTF("db %u root page %zu has flags 0x%X",
mc->mc_dbi, root, mc->mc_pg[0]->mp_flags); mc->mc_dbi, root, mc->mc_pg[0]->mp_flags);
if (modify) { if (modify) {
@ -2637,7 +2653,7 @@ mdb_read_data(MDB_txn *txn, MDB_node *leaf, MDB_val *data)
data->mv_size = NODEDSZ(leaf); data->mv_size = NODEDSZ(leaf);
memcpy(&pgno, NODEDATA(leaf), sizeof(pgno)); memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
if ((rc = mdb_get_page(txn, pgno, &omp))) { if ((rc = mdb_get_page(txn, pgno, &omp))) {
DPRINTF("read overflow page %lu failed", pgno); DPRINTF("read overflow page %zu failed", pgno);
return rc; return rc;
} }
data->mv_data = METADATA(omp); data->mv_data = METADATA(omp);
@ -2687,7 +2703,7 @@ mdb_sibling(MDB_cursor *mc, int move_right)
} }
cursor_pop_page(mc); cursor_pop_page(mc);
DPRINTF("parent page is page %lu, index %u", DPRINTF("parent page is page %zu, index %u",
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]); mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]);
if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top])) if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top]))
@ -2745,7 +2761,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
} }
} }
DPRINTF("cursor_next: top page is %lu in cursor %p", mp->mp_pgno, (void *) mc); DPRINTF("cursor_next: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc);
if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) { if (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mp)) {
DPUTS("=====> move to next sibling page"); DPUTS("=====> move to next sibling page");
@ -2754,11 +2770,11 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
return MDB_NOTFOUND; return MDB_NOTFOUND;
} }
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
DPRINTF("next page is %lu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); DPRINTF("next page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]);
} else } else
mc->mc_ki[mc->mc_top]++; mc->mc_ki[mc->mc_top]++;
DPRINTF("==> cursor points to page %lu with %u keys, key index %u", DPRINTF("==> cursor points to page %zu with %u keys, key index %u",
mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]);
if (IS_LEAF2(mp)) { if (IS_LEAF2(mp)) {
@ -2814,7 +2830,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
} }
} }
DPRINTF("cursor_prev: top page is %lu in cursor %p", mp->mp_pgno, (void *) mc); DPRINTF("cursor_prev: top page is %zu in cursor %p", mp->mp_pgno, (void *) mc);
if (mc->mc_ki[mc->mc_top] == 0) { if (mc->mc_ki[mc->mc_top] == 0) {
DPUTS("=====> move to prev sibling page"); DPUTS("=====> move to prev sibling page");
@ -2824,13 +2840,13 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
} }
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1; mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1;
DPRINTF("prev page is %lu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]); DPRINTF("prev page is %zu, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]);
} else } else
mc->mc_ki[mc->mc_top]--; mc->mc_ki[mc->mc_top]--;
mc->mc_flags &= ~C_EOF; mc->mc_flags &= ~C_EOF;
DPRINTF("==> cursor points to page %lu with %u keys, key index %u", DPRINTF("==> cursor points to page %zu with %u keys, key index %u",
mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]); mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]);
if (IS_LEAF2(mp)) { if (IS_LEAF2(mp)) {
@ -3485,7 +3501,7 @@ mdb_new_page(MDB_cursor *mc, uint32_t flags, int num)
if ((np = mdb_alloc_page(mc, num)) == NULL) if ((np = mdb_alloc_page(mc, num)) == NULL)
return NULL; return NULL;
DPRINTF("allocated new mpage %lu, page size %u", DPRINTF("allocated new mpage %zu, page size %u",
np->mp_pgno, mc->mc_txn->mt_env->me_psize); np->mp_pgno, mc->mc_txn->mt_env->me_psize);
np->mp_flags = flags | P_DIRTY; np->mp_flags = flags | P_DIRTY;
np->mp_lower = PAGEHDRSZ; np->mp_lower = PAGEHDRSZ;
@ -3547,7 +3563,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx,
assert(mp->mp_upper >= mp->mp_lower); assert(mp->mp_upper >= mp->mp_lower);
DPRINTF("add to %s page %lu index %i, data size %zu key size %zu [%s]", DPRINTF("add to %s page %zu index %i, data size %zu key size %zu [%s]",
IS_LEAF(mp) ? "leaf" : "branch", IS_LEAF(mp) ? "leaf" : "branch",
mp->mp_pgno, indx, data ? data->mv_size : 0, mp->mp_pgno, indx, data ? data->mv_size : 0,
key ? key->mv_size : 0, key ? DKEY(key) : NULL); key ? key->mv_size : 0, key ? DKEY(key) : NULL);
@ -3584,7 +3600,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx,
node_size += sizeof(pgno_t); node_size += sizeof(pgno_t);
if ((ofp = mdb_new_page(mc, P_OVERFLOW, ovpages)) == NULL) if ((ofp = mdb_new_page(mc, P_OVERFLOW, ovpages)) == NULL)
return ENOMEM; return ENOMEM;
DPRINTF("allocated overflow page %lu", ofp->mp_pgno); DPRINTF("allocated overflow page %zu", ofp->mp_pgno);
flags |= F_BIGDATA; flags |= F_BIGDATA;
} else { } else {
node_size += data->mv_size; node_size += data->mv_size;
@ -3593,7 +3609,7 @@ mdb_add_node(MDB_cursor *mc, indx_t indx,
node_size += node_size & 1; node_size += node_size & 1;
if (node_size + sizeof(indx_t) > SIZELEFT(mp)) { if (node_size + sizeof(indx_t) > SIZELEFT(mp)) {
DPRINTF("not enough room in page %lu, got %u ptrs", DPRINTF("not enough room in page %zu, got %u ptrs",
mp->mp_pgno, NUMKEYS(mp)); mp->mp_pgno, NUMKEYS(mp));
DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower, DPRINTF("upper - lower = %u - %u = %u", mp->mp_upper, mp->mp_lower,
mp->mp_upper - mp->mp_lower); mp->mp_upper - mp->mp_lower);
@ -3651,7 +3667,7 @@ mdb_del_node(MDB_page *mp, indx_t indx, int ksize)
MDB_node *node; MDB_node *node;
char *base; char *base;
DPRINTF("delete node %u on %s page %lu", indx, DPRINTF("delete node %u on %s page %zu", indx,
IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno); IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno);
assert(indx < NUMKEYS(mp)); assert(indx < NUMKEYS(mp));
@ -3768,7 +3784,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
/* Return the count of duplicate data items for the current key */ /* Return the count of duplicate data items for the current key */
int int
mdb_cursor_count(MDB_cursor *mc, unsigned long *countp) mdb_cursor_count(MDB_cursor *mc, size_t *countp)
{ {
MDB_node *leaf; MDB_node *leaf;
@ -3810,7 +3826,7 @@ mdb_update_key(MDB_page *mp, indx_t indx, MDB_val *key)
node = NODEPTR(mp, indx); node = NODEPTR(mp, indx);
ptr = mp->mp_ptrs[indx]; ptr = mp->mp_ptrs[indx];
DPRINTF("update key %u (ofs %u) [%.*s] to [%s] on page %lu", DPRINTF("update key %u (ofs %u) [%.*s] to [%s] on page %zu",
indx, ptr, indx, ptr,
(int)node->mn_ksize, (char *)NODEKEY(node), (int)node->mn_ksize, (char *)NODEKEY(node),
DKEY(key), DKEY(key),
@ -3883,7 +3899,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst)
data.mv_size = NODEDSZ(srcnode); data.mv_size = NODEDSZ(srcnode);
data.mv_data = NODEDATA(srcnode); data.mv_data = NODEDATA(srcnode);
} }
DPRINTF("moving %s node %u [%s] on page %lu to node %u on page %lu", DPRINTF("moving %s node %u [%s] on page %zu to node %u on page %zu",
IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch", IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch",
csrc->mc_ki[csrc->mc_top], csrc->mc_ki[csrc->mc_top],
DKEY(&key), DKEY(&key),
@ -3912,7 +3928,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst)
key.mv_size = NODEKSZ(srcnode); key.mv_size = NODEKSZ(srcnode);
key.mv_data = NODEKEY(srcnode); key.mv_data = NODEKEY(srcnode);
} }
DPRINTF("update separator for source page %lu to [%s]", DPRINTF("update separator for source page %zu to [%s]",
csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)); csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key));
if ((rc = mdb_update_key(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], if ((rc = mdb_update_key(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1],
&key)) != MDB_SUCCESS) &key)) != MDB_SUCCESS)
@ -3934,7 +3950,7 @@ mdb_move_node(MDB_cursor *csrc, MDB_cursor *cdst)
key.mv_size = NODEKSZ(srcnode); key.mv_size = NODEKSZ(srcnode);
key.mv_data = NODEKEY(srcnode); key.mv_data = NODEKEY(srcnode);
} }
DPRINTF("update separator for destination page %lu to [%s]", DPRINTF("update separator for destination page %zu to [%s]",
cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)); cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key));
if ((rc = mdb_update_key(cdst->mc_pg[cdst->mc_top-1], cdst->mc_ki[cdst->mc_top-1], if ((rc = mdb_update_key(cdst->mc_pg[cdst->mc_top-1], cdst->mc_ki[cdst->mc_top-1],
&key)) != MDB_SUCCESS) &key)) != MDB_SUCCESS)
@ -3958,7 +3974,8 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst)
MDB_node *srcnode; MDB_node *srcnode;
MDB_val key, data; MDB_val key, data;
DPRINTF("merging page %lu into %lu", csrc->mc_pg[csrc->mc_top]->mp_pgno, cdst->mc_pg[cdst->mc_top]->mp_pgno); DPRINTF("merging page %zu into %zu", csrc->mc_pg[csrc->mc_top]->mp_pgno,
cdst->mc_pg[cdst->mc_top]->mp_pgno);
assert(csrc->mc_snum > 1); /* can't merge root page */ assert(csrc->mc_snum > 1); /* can't merge root page */
assert(cdst->mc_snum > 1); assert(cdst->mc_snum > 1);
@ -3993,7 +4010,7 @@ mdb_merge(MDB_cursor *csrc, MDB_cursor *cdst)
} }
} }
DPRINTF("dst page %lu now has %u keys (%.1f%% filled)", DPRINTF("dst page %zu now has %u keys (%.1f%% filled)",
cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10); cdst->mc_pg[cdst->mc_top]->mp_pgno, NUMKEYS(cdst->mc_pg[cdst->mc_top]), (float)PAGEFILL(cdst->mc_txn->mt_env, cdst->mc_pg[cdst->mc_top]) / 10);
/* Unlink the src page from parent and add to free list. /* Unlink the src page from parent and add to free list.
@ -4042,12 +4059,12 @@ mdb_rebalance(MDB_cursor *mc)
unsigned int ptop; unsigned int ptop;
MDB_cursor mn; MDB_cursor mn;
DPRINTF("rebalancing %s page %lu (has %u keys, %.1f%% full)", DPRINTF("rebalancing %s page %zu (has %u keys, %.1f%% full)",
IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch", IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
mc->mc_pg[mc->mc_top]->mp_pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10); mc->mc_pg[mc->mc_top]->mp_pgno, NUMKEYS(mc->mc_pg[mc->mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10);
if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD) { if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD) {
DPRINTF("no need to rebalance page %lu, above fill threshold", DPRINTF("no need to rebalance page %zu, above fill threshold",
mc->mc_pg[mc->mc_top]->mp_pgno); mc->mc_pg[mc->mc_top]->mp_pgno);
return MDB_SUCCESS; return MDB_SUCCESS;
} }
@ -4112,7 +4129,7 @@ mdb_rebalance(MDB_cursor *mc)
mc->mc_ki[mc->mc_top] = 0; mc->mc_ki[mc->mc_top] = 0;
} }
DPRINTF("found neighbor page %lu (%u keys, %.1f%% full)", DPRINTF("found neighbor page %zu (%u keys, %.1f%% full)",
mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10); mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]), (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10);
/* If the neighbor page is above threshold and has at least two /* If the neighbor page is above threshold and has at least two
@ -4143,7 +4160,7 @@ mdb_del0(MDB_cursor *mc, MDB_node *leaf)
memcpy(&pg, NODEDATA(leaf), sizeof(pg)); memcpy(&pg, NODEDATA(leaf), sizeof(pg));
ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize); ovpages = OVPAGES(NODEDSZ(leaf), mc->mc_txn->mt_env->me_psize);
for (i=0; i<ovpages; i++) { for (i=0; i<ovpages; i++) {
DPRINTF("freed ov page %lu", pg); DPRINTF("freed ov page %zu", pg);
mdb_midl_append(mc->mc_txn->mt_free_pgs, pg); mdb_midl_append(mc->mc_txn->mt_free_pgs, pg);
pg++; pg++;
} }
@ -4230,7 +4247,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno)
mp = mc->mc_pg[mc->mc_top]; mp = mc->mc_pg[mc->mc_top];
newindx = mc->mc_ki[mc->mc_top]; newindx = mc->mc_ki[mc->mc_top];
DPRINTF("-----> splitting %s page %lu and adding [%s] at index %i", DPRINTF("-----> splitting %s page %zu and adding [%s] at index %i",
IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno, IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno,
DKEY(newkey), mc->mc_ki[mc->mc_top]); DKEY(newkey), mc->mc_ki[mc->mc_top]);
@ -4260,7 +4277,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno)
ptop = 0; ptop = 0;
} else { } else {
ptop = mc->mc_top-1; ptop = mc->mc_top-1;
DPRINTF("parent branch page is %lu", mc->mc_pg[ptop]->mp_pgno); DPRINTF("parent branch page is %zu", mc->mc_pg[ptop]->mp_pgno);
} }
/* Create a right sibling. */ /* Create a right sibling. */
@ -4269,7 +4286,7 @@ mdb_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno)
mdb_cursor_copy(mc, &mn); mdb_cursor_copy(mc, &mn);
mn.mc_pg[mn.mc_top] = rp; mn.mc_pg[mn.mc_top] = rp;
mn.mc_ki[ptop] = mc->mc_ki[ptop]+1; mn.mc_ki[ptop] = mc->mc_ki[ptop]+1;
DPRINTF("new right sibling: page %lu", rp->mp_pgno); DPRINTF("new right sibling: page %zu", rp->mp_pgno);
nkeys = NUMKEYS(mp); nkeys = NUMKEYS(mp);
split_indx = nkeys / 2 + 1; split_indx = nkeys / 2 + 1;

@ -232,10 +232,10 @@ typedef struct MDB_stat {
unsigned int ms_psize; /**< Size of a database page. unsigned int ms_psize; /**< Size of a database page.
This is currently the same for all databases. */ This is currently the same for all databases. */
unsigned int ms_depth; /**< Depth (height) of the B-tree */ unsigned int ms_depth; /**< Depth (height) of the B-tree */
unsigned long ms_branch_pages; /**< Number of internal (non-leaf) pages */ size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */
unsigned long ms_leaf_pages; /**< Number of leaf pages */ size_t ms_leaf_pages; /**< Number of leaf pages */
unsigned long ms_overflow_pages; /**< Number of overflow pages */ size_t ms_overflow_pages; /**< Number of overflow pages */
unsigned long ms_entries; /**< Number of data items */ size_t ms_entries; /**< Number of data items */
} MDB_stat; } MDB_stat;
/** Return the mdb library version information. /** Return the mdb library version information.
@ -414,7 +414,7 @@ int mdb_env_set_mapsize(MDB_env *env, size_t size);
* <li>EINVAL - an invalid parameter was specified, or the environment is already open. * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
* </ul> * </ul>
*/ */
int mdb_env_set_maxreaders(MDB_env *env, int readers); int mdb_env_set_maxreaders(MDB_env *env, unsigned int readers);
/** Get the maximum number of threads for the environment. /** Get the maximum number of threads for the environment.
* @param[in] env An environment handle returned by #mdb_env_create() * @param[in] env An environment handle returned by #mdb_env_create()
@ -425,7 +425,7 @@ int mdb_env_set_maxreaders(MDB_env *env, int readers);
* <li>EINVAL - an invalid parameter was specified. * <li>EINVAL - an invalid parameter was specified.
* </ul> * </ul>
*/ */
int mdb_env_get_maxreaders(MDB_env *env, int *readers); int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
/** Set the maximum number of databases for the environment. /** Set the maximum number of databases for the environment.
* This function is only needed if multiple databases will be used in the * This function is only needed if multiple databases will be used in the
@ -440,7 +440,7 @@ int mdb_env_get_maxreaders(MDB_env *env, int *readers);
* <li>EINVAL - an invalid parameter was specified, or the environment is already open. * <li>EINVAL - an invalid parameter was specified, or the environment is already open.
* </ul> * </ul>
*/ */
int mdb_env_set_maxdbs(MDB_env *env, int dbs); int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
/** Create a transaction for use with the environment. /** Create a transaction for use with the environment.
* The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit(). * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
@ -542,7 +542,7 @@ int mdb_txn_renew(MDB_txn *txn);
* <li>#MDB_INTEGERKEY * <li>#MDB_INTEGERKEY
* Keys are binary integers in native byte order. Setting this option * Keys are binary integers in native byte order. Setting this option
* requires all keys to be the same size, typically sizeof(int) * requires all keys to be the same size, typically sizeof(int)
* or sizeof(long). * or sizeof(size_t).
* <li>#MDB_DUPFIXED * <li>#MDB_DUPFIXED
* This flag may only be used in combination with #MDB_DUPSORT. This option * This flag may only be used in combination with #MDB_DUPSORT. This option
* tells the library that the data items for this database are all the same * tells the library that the data items for this database are all the same
@ -839,7 +839,7 @@ int mdb_cursor_del(MDB_cursor *cursor, unsigned int flags);
* <li>EINVAL - cursor is not initialized, or an invalid parameter was specified. * <li>EINVAL - cursor is not initialized, or an invalid parameter was specified.
* </ul> * </ul>
*/ */
int mdb_cursor_count(MDB_cursor *cursor, unsigned long *countp); int mdb_cursor_count(MDB_cursor *cursor, size_t *countp);
/** Compare two data items according to a particular database. /** Compare two data items according to a particular database.
* This returns a comparison as if the two data items were keys in the * This returns a comparison as if the two data items were keys in the

@ -52,10 +52,10 @@ int main(int argc,char * argv[])
rc = mdb_stat(txn, dbi, &mst); rc = mdb_stat(txn, dbi, &mst);
printf("Page size: %u\n", mst.ms_psize); printf("Page size: %u\n", mst.ms_psize);
printf("Tree depth: %u\n", mst.ms_depth); printf("Tree depth: %u\n", mst.ms_depth);
printf("Branch pages: %lu\n", mst.ms_branch_pages); printf("Branch pages: %zu\n", mst.ms_branch_pages);
printf("Leaf pages: %lu\n", mst.ms_leaf_pages); printf("Leaf pages: %zu\n", mst.ms_leaf_pages);
printf("Overflow pages: %lu\n", mst.ms_overflow_pages); printf("Overflow pages: %zu\n", mst.ms_overflow_pages);
printf("Entries: %lu\n", mst.ms_entries); printf("Entries: %zu\n", mst.ms_entries);
mdb_close(txn, dbi); mdb_close(txn, dbi);
mdb_txn_abort(txn); mdb_txn_abort(txn);
mdb_env_close(env); mdb_env_close(env);

@ -15,6 +15,7 @@
* <http://www.OpenLDAP.org/license.html>. * <http://www.OpenLDAP.org/license.html>.
*/ */
#include <limits.h>
#include <string.h> #include <string.h>
#include <sys/types.h> #include <sys/types.h>
#include <assert.h> #include <assert.h>
@ -129,12 +130,13 @@ int mdb_midl_append( IDL ids, ID id )
/* Quicksort + Insertion sort for small arrays */ /* Quicksort + Insertion sort for small arrays */
#define SMALL 8 #define SMALL 8
#define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp #define SWAP(a,b) { itmp=(a); (a)=(b); (b)=itmp; }
void void
mdb_midl_sort( ID *ids ) mdb_midl_sort( ID *ids )
{ {
int istack[16*sizeof(int)]; /* Max possible depth of int-indexed tree * 2 items/level */
int istack[sizeof(int)*CHAR_BIT * 2];
int i,j,k,l,ir,jstack; int i,j,k,l,ir,jstack;
ID a, itmp; ID a, itmp;

@ -26,19 +26,20 @@
#ifndef _MDB_MIDL_H_ #ifndef _MDB_MIDL_H_
#define _MDB_MIDL_H_ #define _MDB_MIDL_H_
#include <stddef.h>
/** @defgroup internal MDB Internals /** @defgroup internal MDB Internals
* @{ * @{
*/ */
/** ULONG should be the largest integer type supported on a machine.
* It should be equal to the size of a pointer.
*/
#define ULONG unsigned long
/** @defgroup idls ID List Management /** @defgroup idls ID List Management
* @{ * @{
*/ */
/** A generic ID number. These were entryIDs in back-bdb. /** A generic ID number. These were entryIDs in back-bdb.
* It should be the largest integer type supported on a machine.
* It should be equal to the size of a pointer.
*/ */
typedef ULONG ID; typedef size_t ID;
/** An IDL is an ID List, a sorted array of IDs. The first /** An IDL is an ID List, a sorted array of IDs. The first
* element of the array is a counter for how many actual * element of the array is a counter for how many actual

Loading…
Cancel
Save