diff --git a/libraries/liblmdb/Makefile b/libraries/liblmdb/Makefile index f3c93a2..fa469a2 100644 --- a/libraries/liblmdb/Makefile +++ b/libraries/liblmdb/Makefile @@ -18,11 +18,12 @@ # There may be other macros in mdb.c of interest. You should # read mdb.c before changing any of them. # -CC = gcc +CC = gcc -m32 AR = ar W = -W -Wall -Wno-unused-parameter -Wbad-function-cast -Wuninitialized THREADS = -pthread OPT = -O2 -g +XCFLAGS = -DMDB_VL32 CFLAGS = $(THREADS) $(OPT) $(W) $(XCFLAGS) LDLIBS = # -lntdll # Windows needs ntdll SOLIBS = # -lntdll diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index e080e76..d704677 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -170,6 +170,14 @@ typedef int mdb_mode_t; typedef mode_t mdb_mode_t; #endif +#define MDB_VL32 1 + +#ifdef MDB_VL32 +typedef u_int64_t mdb_size_t; +#else +typedef size_t mdb_size_t; +#endif + /** An abstraction for a file handle. * On POSIX systems file handles are small integers. On Windows * they're opaque pointers. @@ -447,18 +455,18 @@ typedef struct MDB_stat { unsigned int ms_psize; /**< Size of a database page. This is currently the same for all databases. */ unsigned int ms_depth; /**< Depth (height) of the B-tree */ - size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ - size_t ms_leaf_pages; /**< Number of leaf pages */ - size_t ms_overflow_pages; /**< Number of overflow pages */ - size_t ms_entries; /**< Number of data items */ + mdb_size_t ms_branch_pages; /**< Number of internal (non-leaf) pages */ + mdb_size_t ms_leaf_pages; /**< Number of leaf pages */ + mdb_size_t ms_overflow_pages; /**< Number of overflow pages */ + mdb_size_t ms_entries; /**< Number of data items */ } MDB_stat; /** @brief Information about the environment */ typedef struct MDB_envinfo { void *me_mapaddr; /**< Address of map, if fixed */ - size_t me_mapsize; /**< Size of the data memory map */ - size_t me_last_pgno; /**< ID of the last used page */ - size_t me_last_txnid; /**< ID of the last committed transaction */ + mdb_size_t me_mapsize; /**< Size of the data memory map */ + mdb_size_t me_last_pgno; /**< ID of the last used page */ + mdb_size_t me_last_txnid; /**< ID of the last committed transaction */ unsigned int me_maxreaders; /**< max reader slots in the environment */ unsigned int me_numreaders; /**< max reader slots used in the environment */ } MDB_envinfo; @@ -825,7 +833,7 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd); * an active write transaction. * */ -int mdb_env_set_mapsize(MDB_env *env, size_t size); +int mdb_env_set_mapsize(MDB_env *env, mdb_size_t size); /** @brief Set the maximum number of threads/reader slots for the environment. * @@ -974,7 +982,7 @@ MDB_env *mdb_txn_env(MDB_txn *txn); * @param[in] txn A transaction handle returned by #mdb_txn_begin() * @return A transaction ID, valid if input is an active transaction. */ -size_t mdb_txn_id(MDB_txn *txn); +mdb_size_t mdb_txn_id(MDB_txn *txn); /** @brief Commit all the operations of a transaction into the database. * diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6c88c96..c6b4fe2 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -35,6 +35,9 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE 1 #endif +#ifdef MDB_VL32 +#define _FILE_OFFSET_BITS 64 +#endif #ifdef _WIN32 #include #include @@ -452,6 +455,12 @@ typedef pthread_mutex_t mdb_mutex_t[1], *mdb_mutexref_t; #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE)) #endif +#ifdef MDB_VL32 +#define Y "ll" +#else +#define Y Z +#endif + #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM) #define MNAME_LEN 32 #elif defined(MDB_USE_SYSV_SEM) @@ -1062,7 +1071,7 @@ typedef struct MDB_db { pgno_t md_branch_pages; /**< number of internal pages */ pgno_t md_leaf_pages; /**< number of leaf pages */ pgno_t md_overflow_pages; /**< number of overflow pages */ - size_t md_entries; /**< number of data items */ + pgno_t md_entries; /**< number of data items */ pgno_t md_root; /**< the root page of this tree */ } MDB_db; @@ -1092,8 +1101,16 @@ typedef struct MDB_meta { uint32_t mm_magic; /** Version number of this file. Must be set to #MDB_DATA_VERSION. */ uint32_t mm_version; +#ifdef MDB_VL32 + union { /* always zero since we don't support fixed mapping in MDB_VL32 */ + MDB_ID mmun_ull; + void *mmun_address; + } mm_un; +#define mm_address mm_un.mmun_address +#else void *mm_address; /**< address for fixed mapping */ - size_t mm_mapsize; /**< size of mmap region */ +#endif + pgno_t mm_mapsize; /**< size of mmap region */ MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */ /** The size of pages used in this DB */ #define mm_psize mm_dbs[FREE_DBI].md_pad @@ -1182,6 +1199,10 @@ struct MDB_txn { MDB_cursor **mt_cursors; /** Array of flags for each DB */ unsigned char *mt_dbflags; +#ifdef MDB_VL32 + /** List of read-only pages */ + MDB_ID2L mt_rpages; +#endif /** Number of DB records in use, or 0 when the txn is finished. * This number only ever increments until the txn finishes; we * don't decrement it when individual DB handles are closed. @@ -1294,6 +1315,9 @@ struct MDB_env { HANDLE me_fd; /**< The main data file */ HANDLE me_lfd; /**< The lock file */ HANDLE me_mfd; /**< just for writing the meta pages */ +#if defined(MDB_VL32) && defined(_WIN32) + HANDLE me_fmh; /**< File Mapping handle */ +#endif /** Failed to update the meta page. Probably an I/O error. */ #define MDB_FATAL_ERROR 0x80000000U /** Some fields are initialized. */ @@ -1318,7 +1342,7 @@ struct MDB_env { void *me_pbuf; /**< scratch area for DUPSORT put() */ MDB_txn *me_txn; /**< current write transaction */ MDB_txn *me_txn0; /**< prealloc'd write transaction */ - size_t me_mapsize; /**< size of the data memory map */ + mdb_size_t me_mapsize; /**< size of the data memory map */ off_t me_size; /**< current file size */ pgno_t me_maxpg; /**< me_mapsize / me_psize */ MDB_dbx *me_dbxs; /**< array of static DB info */ @@ -2552,6 +2576,20 @@ done: } } } +#ifdef MDB_VL32 + { + MDB_ID2L rl = mc->mc_txn->mt_rpages; + unsigned x = mdb_mid2l_search(rl, mp->mp_pgno); + if (x <= rl[0].mid && rl[x].mid == mp->mp_pgno) { + munmap(mp, mc->mc_txn->mt_env->me_psize); + while (x < rl[0].mid) { + rl[x] = rl[x+1]; + x++; + } + rl[0].mid--; + } + } +#endif return 0; fail: @@ -2666,6 +2704,22 @@ mdb_cursors_close(MDB_txn *txn, unsigned merge) } cursors[i] = NULL; } +#ifdef MDB_VL32 + { + unsigned i, n = txn->mt_rpages[0].mid; + for (i = 1; i <= n; i++) { +#ifdef _WIN32 + UnmapViewOfFile(txn->mt_rpages[i].mptr); +#else + MDB_page *mp = txn->mt_rpages[i].mptr; + int size = txn->mt_env->me_psize; + if (IS_OVERFLOW(mp)) size *= mp->mp_pages; + munmap(mp, size); +#endif + } + } + txn->mt_rpages[0].mid = 0; +#endif } #if !(MDB_PIDLOCK) /* Currently the same as defined(_WIN32) */ @@ -2911,6 +2965,15 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) DPRINTF(("calloc: %s", strerror(errno))); return ENOMEM; } +#ifdef MDB_VL32 + if (!parent) { + txn->mt_rpages = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)); + if (!txn->mt_rpages) { + free(txn); + return ENOMEM; + } + } +#endif txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_dbs = (MDB_db *) ((char *)txn + tsize); txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs; @@ -2938,6 +3001,9 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret) parent->mt_child = txn; txn->mt_parent = parent; txn->mt_numdbs = parent->mt_numdbs; +#ifdef MDB_VL32 + txn->mt_rpages = parent->mt_rpages; +#endif memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db)); /* Copy parent's mt_dbflags, but clear DB_NEW */ for (i=0; imt_numdbs; i++) @@ -2963,8 +3029,12 @@ renew: rc = mdb_txn_renew0(txn); } if (rc) { - if (txn != env->me_txn0) + if (txn != env->me_txn0) { +#ifdef MDB_VL32 + free(txn->mt_rpages); +#endif free(txn); + } } else { txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */ *ret = txn; @@ -2983,7 +3053,7 @@ mdb_txn_env(MDB_txn *txn) return txn->mt_env; } -size_t +mdb_size_t mdb_txn_id(MDB_txn *txn) { if(!txn) return 0; @@ -3090,8 +3160,12 @@ mdb_txn_end(MDB_txn *txn, unsigned mode) mdb_midl_free(pghead); } - if (mode & MDB_END_FREE) + if (mode & MDB_END_FREE) { +#ifdef MDB_VL32 + free(txn->mt_rpages); +#endif free(txn); + } } void @@ -3837,7 +3911,7 @@ mdb_env_write_meta(MDB_txn *txn) MDB_env *env; MDB_meta meta, metab, *mp; unsigned flags; - size_t mapsize; + mdb_size_t mapsize; off_t off; int rc, len, toggle; char *ptr; @@ -4014,12 +4088,29 @@ mdb_env_map(MDB_env *env, void *addr) if (rc) return rc; map = addr; +#ifdef MDB_VL32 + msize = 2 * env->me_psize; +#else msize = env->me_mapsize; +#endif rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, MEM_RESERVE, pageprot); +#ifdef MDB_VL32 + env->me_fmh = mh; +#else NtClose(mh); +#endif if (rc) return rc; env->me_map = map; +#else +#ifdef MDB_VL32 + (void) flags; + env->me_map = mmap(addr, 2 * env->me_psize, PROT_READ, MAP_SHARED, + env->me_fd, 0); + if (env->me_map == MAP_FAILED) { + env->me_map = NULL; + return ErrCode(); + } #else int prot = PROT_READ; if (flags & MDB_WRITEMAP) { @@ -4053,6 +4144,7 @@ mdb_env_map(MDB_env *env, void *addr) */ if (addr && env->me_map != addr) return EBUSY; /* TODO: Make a new MDB_* error code? */ +#endif p = (MDB_page *)env->me_map; env->me_metas[0] = METADATA(p); @@ -4062,7 +4154,7 @@ mdb_env_map(MDB_env *env, void *addr) } int ESECT -mdb_env_set_mapsize(MDB_env *env, size_t size) +mdb_env_set_mapsize(MDB_env *env, mdb_size_t size) { /* If env is already open, caller is responsible for making * sure there are no active txns. @@ -4849,6 +4941,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS))) return EINVAL; +#ifdef MDB_VL32 + if (flags & MDB_WRITEMAP) { + /* silently ignore WRITEMAP in 32 bit mode */ + flags ^= MDB_WRITEMAP; + } + if (flags & MDB_FIXEDMAP) { + /* cannot support FIXEDMAP */ + return EINVAL; + } +#endif + len = strlen(path); if (flags & MDB_NOSUBDIR) { rc = len + sizeof(LOCKSUFF) + len + 1; @@ -4972,6 +5075,13 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs); txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs); txn->mt_env = env; +#ifdef MDB_VL32 + txn->mt_rpages = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)); + if (!txn->mt_rpages) { + free(txn); + rc = ENOMEM; + } +#endif txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDB_TXN_FINISHED; env->me_txn0 = txn; @@ -5027,7 +5137,11 @@ mdb_env_close0(MDB_env *env, int excl) } if (env->me_map) { +#ifdef MDB_VL32 + munmap(env->me_map, 2*env->me_psize); +#else munmap(env->me_map, env->me_mapsize); +#endif } if (env->me_mfd != env->me_fd && env->me_mfd != INVALID_HANDLE_VALUE) (void) close(env->me_mfd); @@ -5404,7 +5518,60 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl) if (pgno < txn->mt_next_pgno) { level = 0; +#ifdef MDB_VL32 + { + unsigned x = mdb_mid2l_search(txn->mt_rpages, pgno); + if (x <= txn->mt_rpages[0].mid && txn->mt_rpages[x].mid == pgno) { + p = txn->mt_rpages[x].mptr; + goto done; + } + if (txn->mt_rpages[0].mid >= MDB_IDL_UM_MAX) { + /* unmap some other page */ + mdb_tassert(txn, 0); + } + if (txn->mt_rpages[0].mid < MDB_IDL_UM_SIZE) { + MDB_ID2 id2; + size_t len = env->me_psize; + int np; +#ifdef _WIN32 + off_t off = pgno * env->me_psize; + DWORD lo, hi; + lo = off & 0xffffffff; + hi = off >> 16 >> 16; + p = MapViewOfFile(env->me_fmh, FILE_MAP_READ, hi, lo, len); + if (p == NULL) + return ErrCode(); + if (IS_OVERFLOW(p)) { + np = p->mp_pages; + UnmapViewOfFile(p); + len *= np; + p = MapViewOfFile(env->me_fmh, FILE_MAP_READ, hi, lo, len); + if (p == NULL) + return ErrCode(); + } +#else + off_t off = pgno * env->me_psize; + p = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); + if (p == MAP_FAILED) + return errno; + if (IS_OVERFLOW(p)) { + np = p->mp_pages; + munmap(p, len); + len *= np; + p = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); + if (p == MAP_FAILED) + return errno; + } +#endif + id2.mid = pgno; + id2.mptr = p; + mdb_mid2l_insert(txn->mt_rpages, &id2); + goto done; + } + } +#else p = (MDB_page *)(env->me_map + env->me_psize * pgno); +#endif } else { DPRINTF(("page %"Z"u not found", pgno)); txn->mt_flags |= MDB_TXN_ERROR; @@ -5739,11 +5906,17 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) int rc; MDB_node *indx; MDB_page *mp; +#ifdef MDB_VL32 + MDB_page *op; +#endif if (mc->mc_snum < 2) { return MDB_NOTFOUND; /* root has no siblings */ } +#ifdef MDB_VL32 + op = mc->mc_pg[mc->mc_top]; +#endif mdb_cursor_pop(mc); DPRINTF(("parent page is page %"Z"u, index %u", mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top])); @@ -5768,6 +5941,21 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right) } mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top])); +#ifdef MDB_VL32 + { + MDB_ID2L rl = mc->mc_txn->mt_rpages; + unsigned x = mdb_mid2l_search(rl, op->mp_pgno); + if (x <= rl[0].mid && rl[x].mid == op->mp_pgno) { + munmap(op, mc->mc_txn->mt_env->me_psize); + while (x < rl[0].mid) { + rl[x] = rl[x+1]; + x++; + } + rl[0].mid--; + } + } +#endif + indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) { /* mc will be inconsistent if caller does mc_snum++ as above */ @@ -9977,7 +10165,7 @@ mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx) if (mr[i].mr_pid) { txnid_t txnid = mr[i].mr_txnid; sprintf(buf, txnid == (txnid_t)-1 ? - "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n", + "%10d %"Z"x -\n" : "%10d %"Z"x %"Y"u\n", (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid); if (first) { first = 0; diff --git a/libraries/liblmdb/mdb_dump.c b/libraries/liblmdb/mdb_dump.c index 16c0aae..f0e4831 100644 --- a/libraries/liblmdb/mdb_dump.c +++ b/libraries/liblmdb/mdb_dump.c @@ -25,6 +25,11 @@ #else #define Z "z" #endif +#ifdef MDB_VL32 +#define Y "ll" +#else +#define Y Z +#endif #define PRINT 1 static int mode; @@ -115,7 +120,7 @@ static int dumpit(MDB_txn *txn, MDB_dbi dbi, char *name) if (name) printf("database=%s\n", name); printf("type=btree\n"); - printf("mapsize=%" Z "u\n", info.me_mapsize); + printf("mapsize=%" Y "u\n", info.me_mapsize); if (info.me_mapaddr) printf("mapaddr=%p\n", info.me_mapaddr); printf("maxreaders=%u\n", info.me_maxreaders); diff --git a/libraries/liblmdb/mdb_load.c b/libraries/liblmdb/mdb_load.c index d2f0968..dc27f5f 100644 --- a/libraries/liblmdb/mdb_load.c +++ b/libraries/liblmdb/mdb_load.c @@ -43,6 +43,11 @@ static MDB_val kbuf, dbuf; #else #define Z "z" #endif +#ifdef MDB_VL32 +#define Y "ll" +#else +#define Y Z +#endif #define STRLENOF(s) (sizeof(s)-1) @@ -112,7 +117,7 @@ static void readhdr(void) int i; ptr = memchr(dbuf.mv_data, '\n', dbuf.mv_size); if (ptr) *ptr = '\0'; - i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Z "u", &info.me_mapsize); + i = sscanf((char *)dbuf.mv_data+STRLENOF("mapsize="), "%" Y "u", &info.me_mapsize); if (i != 1) { fprintf(stderr, "%s: line %" Z "d: invalid mapsize %s\n", prog, lineno, (char *)dbuf.mv_data+STRLENOF("mapsize=")); diff --git a/libraries/liblmdb/mdb_stat.c b/libraries/liblmdb/mdb_stat.c index 6f4b3ee..7880131 100644 --- a/libraries/liblmdb/mdb_stat.c +++ b/libraries/liblmdb/mdb_stat.c @@ -22,6 +22,11 @@ #else #define Z "z" #endif +#ifdef MDB_VL32 +#define Y "ll" +#else +#define Y Z +#endif static void prstat(MDB_stat *ms) { @@ -29,10 +34,10 @@ static void prstat(MDB_stat *ms) printf(" Page size: %u\n", ms->ms_psize); #endif printf(" Tree depth: %u\n", ms->ms_depth); - printf(" Branch pages: %"Z"u\n", ms->ms_branch_pages); - printf(" Leaf pages: %"Z"u\n", ms->ms_leaf_pages); - printf(" Overflow pages: %"Z"u\n", ms->ms_overflow_pages); - printf(" Entries: %"Z"u\n", ms->ms_entries); + printf(" Branch pages: %"Y"u\n", ms->ms_branch_pages); + printf(" Leaf pages: %"Y"u\n", ms->ms_leaf_pages); + printf(" Overflow pages: %"Y"u\n", ms->ms_overflow_pages); + printf(" Entries: %"Y"u\n", ms->ms_entries); } static void usage(char *prog) @@ -125,11 +130,11 @@ int main(int argc, char *argv[]) (void)mdb_env_info(env, &mei); printf("Environment Info\n"); printf(" Map address: %p\n", mei.me_mapaddr); - printf(" Map size: %"Z"u\n", mei.me_mapsize); + printf(" Map size: %"Y"u\n", mei.me_mapsize); printf(" Page size: %u\n", mst.ms_psize); - printf(" Max pages: %"Z"u\n", mei.me_mapsize / mst.ms_psize); - printf(" Number of pages used: %"Z"u\n", mei.me_last_pgno+1); - printf(" Last transaction ID: %"Z"u\n", mei.me_last_txnid); + printf(" Max pages: %"Y"u\n", mei.me_mapsize / mst.ms_psize); + printf(" Number of pages used: %"Y"u\n", mei.me_last_pgno+1); + printf(" Last transaction ID: %"Y"u\n", mei.me_last_txnid); printf(" Max readers: %u\n", mei.me_maxreaders); printf(" Number of readers used: %u\n", mei.me_numreaders); } diff --git a/libraries/liblmdb/midl.h b/libraries/liblmdb/midl.h index 2331e78..3df5f70 100644 --- a/libraries/liblmdb/midl.h +++ b/libraries/liblmdb/midl.h @@ -42,7 +42,11 @@ extern "C" { /** A generic unsigned ID number. These were entryIDs in back-bdb. * Preferably it should have the same size as a pointer. */ +#ifdef MDB_VL32 +typedef u_int64_t MDB_ID; +#else typedef size_t MDB_ID; +#endif /** An IDL is an ID List, a sorted array of IDs. The first * element of the array is a counter for how many actual @@ -55,7 +59,11 @@ typedef MDB_ID *MDB_IDL; /* IDL sizes - likely should be even bigger * limiting factors: sizeof(ID), thread stack size */ +#ifdef MDB_VL32 +#define MDB_IDL_LOGN 10 /* DB_SIZE is 2^10, UM_SIZE is 2^11 */ +#else #define MDB_IDL_LOGN 16 /* DB_SIZE is 2^16, UM_SIZE is 2^17 */ +#endif #define MDB_IDL_DB_SIZE (1<