Summary: Make RocksDb build and run on Windows to be functionally complete and performant. All existing test cases run with no regressions. Performance numbers are in the pull-request. Test plan: make all of the existing unit tests pass, obtain perf numbers. Co-authored-by: Praveen Rao praveensinghrao@outlook.com Co-authored-by: Sherlock Huang baihan.huang@gmail.com Co-authored-by: Alex Zinoviev alexander.zinoviev@me.com Co-authored-by: Dmitri Smirnov dmitrism@microsoft.commain
							parent
							
								
									0b1ffe2e1d
								
							
						
					
					
						commit
						18285c1e2f
					
				| @ -0,0 +1,347 @@ | ||||
| # This cmake build is for Windows only. | ||||
| # | ||||
| # Prerequisites: | ||||
| #     You must have Visual Studio 2013 installed. Start the Developer Command Prompt window that is a part of Visual Studio installation. | ||||
| #     Run the build commands from within the Developer Command Prompt window to have paths to the compiler and runtime libraries set. | ||||
| # | ||||
| # To build Rocksdb for Windows is as easy as 1-2-3-4-5: | ||||
| #  | ||||
| # 1. Update paths to thirdparty libraries in thirdparty.cmake file | ||||
| # 2. Create a new directory for build artifacts | ||||
| #        mkdir build | ||||
| #        cd build | ||||
| # 3. Run cmake to generate project files for Windows | ||||
| #        cmake -G "Visual Studio 12 Win64" .. | ||||
| # 4. Then build the project in debug mode (you may want to add /m:<N> flag to run msbuild in <N> parallel threads) | ||||
| #        msbuild ALL_BUILD.vcxproj | ||||
| # 5. And release mode (/m:<N> is also supported) | ||||
| #        msbuild ALL_BUILD.vcxproj /p:Configuration=Release | ||||
| # | ||||
| 
 | ||||
| cmake_minimum_required(VERSION 2.6) | ||||
| project(rocksdb) | ||||
| 
 | ||||
| include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) | ||||
| 
 | ||||
| execute_process(COMMAND $ENV{COMSPEC} " /C date /T" OUTPUT_VARIABLE DATE) | ||||
| execute_process(COMMAND $ENV{COMSPEC} " /C time /T" OUTPUT_VARIABLE TIME) | ||||
| string(REGEX REPLACE "(..)/(..)/..(..).*" "\\1/\\2/\\3" DATE ${DATE}) | ||||
| string(REGEX REPLACE "(..):(.....).*" " \\1:\\2" TIME ${TIME}) | ||||
| string(CONCAT GIT_DATE_TIME ${DATE} ${TIME}) | ||||
| 
 | ||||
| execute_process(COMMAND $ENV{COMSPEC} " /C git rev-parse HEAD 2>nil" OUTPUT_VARIABLE GIT_SHA) | ||||
| string(REGEX REPLACE "[^0-9a-f]+" "" GIT_SHA ${GIT_SHA}) | ||||
| 
 | ||||
| set(BUILD_VERSION_CC ${CMAKE_CURRENT_SOURCE_DIR}/util/build_version.cc) | ||||
| 
 | ||||
| add_custom_command(OUTPUT ${BUILD_VERSION_CC} | ||||
|     COMMAND echo "#include \"build_version.h\"" > ${BUILD_VERSION_CC} | ||||
|     COMMAND echo "const char* rocksdb_build_git_sha = \"rocksdb_build_git_sha:${GIT_SHA}\";" >> ${BUILD_VERSION_CC} | ||||
|     COMMAND echo "const char* rocksdb_build_git_datetime = \"rocksdb_build_git_datetime:${GIT_DATE_TIME}\";" >> ${BUILD_VERSION_CC} | ||||
|     COMMAND echo const char* rocksdb_build_compile_date = __DATE__\; >> ${BUILD_VERSION_CC} | ||||
| ) | ||||
| 
 | ||||
| add_custom_target(GenerateBuildVersion DEPENDS ${BUILD_VERSION_CC}) | ||||
| 
 | ||||
| add_definitions(${GFLAGS_CXX_FLAGS} ${SNAPPY_CXX_FLAGS}) | ||||
| include_directories(${GFLAGS_INCLUDE} ${SNAPPY_INCLUDE} ${JEMALLOC_INCLUDE}) | ||||
| set (THIRDPARTY_LIBS ${GFLAGS_LIBS} ${SNAPPY_LIBS} ${JEMALLOC_LIBS}) | ||||
| 
 | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /W3 /WX /EHsc /GS /fp:precise /Zc:wchar_t /Zc:forScope /Gd /TP /errorReport:queue") | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /wd4018 /wd4100 /wd4101 /wd4127 /wd4189 /wd4200 /wd4244 /wd4267 /wd4296 /wd4305 /wd4307 /wd4309 /wd4512 /wd4701 /wd4702 /wd4800 /wd4804 /wd4996") | ||||
| 
 | ||||
| set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Od /RTC1 /Gm /MDd") | ||||
| set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2 /Oi /Gm- /Gy /MD") | ||||
| 
 | ||||
| set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /DEBUG") | ||||
| 
 | ||||
| add_definitions(-DWIN32 -DOS_WIN -D_MBCS -DWIN64) | ||||
| 
 | ||||
| include_directories(${PROJECT_SOURCE_DIR}) | ||||
| include_directories(${PROJECT_SOURCE_DIR}/include) | ||||
| include_directories(${PROJECT_SOURCE_DIR}/port) | ||||
| include_directories(${PROJECT_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src) | ||||
| 
 | ||||
| set(ROCKSDB_LIBS rocksdblib) | ||||
| set(ROCKSDB_LIBS_JE rocksdblib_je) | ||||
| set(THIRDPARTY_LIBS ${THIRDPARTY_LIBS} gtest) | ||||
| set(SYSTEM_LIBS Shlwapi.lib Rpcrt4.lib) | ||||
| 
 | ||||
| set(LIBS ${ROCKSDB_LIBS} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) | ||||
| set(LIBS_JE ${ROCKSDB_LIBS_JE} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) | ||||
| 
 | ||||
| add_subdirectory(third-party/gtest-1.7.0/fused-src/gtest) | ||||
| 
 | ||||
| set(SOURCES | ||||
|         db/builder.cc | ||||
|         db/c.cc | ||||
|         db/column_family.cc | ||||
|         db/compaction.cc | ||||
|         db/compaction_job.cc | ||||
|         db/compaction_picker.cc | ||||
|         db/dbformat.cc | ||||
|         db/db_filesnapshot.cc | ||||
|         db/db_impl.cc | ||||
|         db/db_impl_debug.cc | ||||
|         db/db_impl_experimental.cc | ||||
|         db/db_impl_readonly.cc | ||||
|         db/db_iter.cc | ||||
|         db/event_helpers.cc | ||||
|         db/experimental.cc | ||||
|         db/filename.cc | ||||
|         db/file_indexer.cc | ||||
|         db/flush_job.cc | ||||
|         db/flush_scheduler.cc | ||||
|         db/forward_iterator.cc | ||||
|         db/internal_stats.cc | ||||
|         db/log_reader.cc | ||||
|         db/log_writer.cc | ||||
|         db/managed_iterator.cc | ||||
|         db/memtable.cc | ||||
|         db/memtable_allocator.cc | ||||
|         db/memtable_list.cc | ||||
|         db/merge_helper.cc | ||||
|         db/merge_operator.cc | ||||
|         db/repair.cc | ||||
|         db/slice.cc | ||||
|         db/table_cache.cc | ||||
|         db/table_properties_collector.cc | ||||
|         db/transaction_log_impl.cc | ||||
|         db/version_builder.cc | ||||
|         db/version_edit.cc | ||||
|         db/version_set.cc | ||||
|         db/wal_manager.cc | ||||
|         db/write_batch.cc | ||||
|         db/write_batch_base.cc | ||||
|         db/write_controller.cc | ||||
|         db/write_thread.cc | ||||
|         port/stack_trace.cc | ||||
|         port/win/env_win.cc | ||||
|         port/win/port_win.cc | ||||
|         port/win/win_logger.cc | ||||
|         table/adaptive_table_factory.cc | ||||
|         table/block.cc | ||||
|         table/block_based_filter_block.cc | ||||
|         table/block_based_table_builder.cc | ||||
|         table/block_based_table_factory.cc | ||||
|         table/block_based_table_reader.cc | ||||
|         table/block_builder.cc | ||||
|         table/block_hash_index.cc | ||||
|         table/block_prefix_index.cc | ||||
|         table/bloom_block.cc | ||||
|         table/cuckoo_table_builder.cc | ||||
|         table/cuckoo_table_factory.cc | ||||
|         table/cuckoo_table_reader.cc | ||||
|         table/flush_block_policy.cc | ||||
|         table/format.cc | ||||
|         table/full_filter_block.cc | ||||
|         table/get_context.cc | ||||
|         table/iterator.cc | ||||
|         table/merger.cc | ||||
|         table/meta_blocks.cc | ||||
|         table/mock_table.cc | ||||
|         table/plain_table_builder.cc | ||||
|         table/plain_table_factory.cc | ||||
|         table/plain_table_index.cc | ||||
|         table/plain_table_key_coding.cc | ||||
|         table/plain_table_reader.cc | ||||
|         table/table_properties.cc | ||||
|         table/two_level_iterator.cc | ||||
|         util/arena.cc | ||||
|         util/auto_roll_logger.cc | ||||
|         util/bloom.cc | ||||
|         util/build_version.cc | ||||
|         util/cache.cc | ||||
|         util/coding.cc | ||||
|         util/compaction_job_stats_impl.cc | ||||
|         util/comparator.cc | ||||
|         util/crc32c.cc | ||||
|         util/db_info_dumper.cc | ||||
|         util/dynamic_bloom.cc | ||||
|         util/env.cc | ||||
|         util/env_hdfs.cc | ||||
|         util/event_logger.cc | ||||
|         util/file_util.cc | ||||
|         util/filter_policy.cc | ||||
|         util/hash.cc | ||||
|         util/hash_cuckoo_rep.cc | ||||
|         util/hash_linklist_rep.cc | ||||
|         util/hash_skiplist_rep.cc | ||||
|         util/histogram.cc | ||||
|         util/instrumented_mutex.cc | ||||
|         util/iostats_context.cc | ||||
|         util/ldb_cmd.cc | ||||
|         util/ldb_tool.cc | ||||
|         util/logging.cc | ||||
|         util/log_buffer.cc | ||||
|         util/memenv.cc | ||||
|         util/mock_env.cc | ||||
|         util/murmurhash.cc | ||||
|         util/mutable_cf_options.cc | ||||
|         util/options.cc | ||||
|         util/options_builder.cc | ||||
|         util/options_helper.cc | ||||
|         util/perf_context.cc | ||||
|         util/perf_level.cc | ||||
|         util/rate_limiter.cc | ||||
|         util/skiplistrep.cc | ||||
|         util/slice.cc | ||||
|         util/sst_dump_tool.cc | ||||
|         util/statistics.cc | ||||
|         util/status.cc | ||||
|         util/string_util.cc | ||||
|         util/sync_point.cc | ||||
|         util/testharness.cc | ||||
|         util/testutil.cc | ||||
|         util/thread_local.cc | ||||
|         util/thread_status_impl.cc | ||||
|         util/thread_status_updater.cc | ||||
|         util/thread_status_updater_debug.cc | ||||
|         util/thread_status_util.cc | ||||
|         util/thread_status_util_debug.cc | ||||
|         util/vectorrep.cc | ||||
|         util/xfunc.cc | ||||
|         util/xxhash.cc | ||||
|         utilities/backupable/backupable_db.cc | ||||
|         utilities/checkpoint/checkpoint.cc | ||||
|         utilities/compacted_db/compacted_db_impl.cc | ||||
|         utilities/convenience/convenience.cc | ||||
|         utilities/document/document_db.cc | ||||
|         utilities/document/json_document.cc | ||||
|         utilities/document/json_document_builder.cc | ||||
|         utilities/flashcache/flashcache.cc | ||||
|         utilities/geodb/geodb_impl.cc | ||||
|         utilities/leveldb_options/leveldb_options.cc | ||||
|         utilities/merge_operators/string_append/stringappend.cc | ||||
|         utilities/merge_operators/string_append/stringappend2.cc | ||||
|         utilities/merge_operators/put.cc | ||||
|         utilities/merge_operators/uint64add.cc | ||||
|         utilities/redis/redis_lists.cc | ||||
|         utilities/spatialdb/spatial_db.cc | ||||
|         utilities/transactions/optimistic_transaction_db_impl.cc | ||||
|         utilities/transactions/optimistic_transaction_impl.cc | ||||
|         utilities/ttl/db_ttl_impl.cc | ||||
|         utilities/write_batch_with_index/write_batch_with_index.cc | ||||
|         utilities/write_batch_with_index/write_batch_with_index_internal.cc | ||||
| ) | ||||
| 
 | ||||
| add_library(rocksdblib ${SOURCES}) | ||||
| set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "/Fd${CMAKE_CFG_INTDIR}/rocksdblib.pdb") | ||||
| add_dependencies(rocksdblib GenerateBuildVersion) | ||||
| 
 | ||||
| add_library(rocksdblib_je ${SOURCES}) | ||||
| set_target_properties(rocksdblib_je PROPERTIES COMPILE_FLAGS "${JEMALLOC_CXX_FLAGS} /Fd${CMAKE_CFG_INTDIR}/rocksdblib_je.pdb") | ||||
| add_dependencies(rocksdblib_je GenerateBuildVersion) | ||||
| 
 | ||||
| add_library(rocksdb SHARED ${SOURCES}) | ||||
| set_target_properties(rocksdb PROPERTIES COMPILE_FLAGS "-DROCKSDB_DLL -DROCKSDB_LIBRARY_EXPORTS /Fd${CMAKE_CFG_INTDIR}/rocksdb.pdb") | ||||
| add_dependencies(rocksdb GenerateBuildVersion) | ||||
| target_link_libraries(rocksdb ${LIBS}) | ||||
| 
 | ||||
| add_library(rocksdb_je SHARED ${SOURCES}) | ||||
| set_target_properties(rocksdb_je PROPERTIES COMPILE_FLAGS "${JEMALLOC_CXX_FLAGS} -DROCKSDB_DLL -DROCKSDB_LIBRARY_EXPORTS /Fd${CMAKE_CFG_INTDIR}/rocksdb_je.pdb") | ||||
| add_dependencies(rocksdb_je GenerateBuildVersion) | ||||
| target_link_libraries(rocksdb_je ${LIBS_JE}) | ||||
| 
 | ||||
| set(APPS | ||||
|         db/db_bench.cc | ||||
|         db/memtablerep_bench.cc | ||||
|         table/table_reader_bench.cc | ||||
|         tools/db_stress.cc | ||||
|         tools/db_repl_stress.cc | ||||
|         tools/sst_dump.cc | ||||
|         tools/dump/rocksdb_dump.cc | ||||
|         tools/dump/rocksdb_undump.cc | ||||
|         util/cache_bench.cc | ||||
| ) | ||||
| 
 | ||||
| set(TESTS | ||||
|         db/c_test.c | ||||
|         db/column_family_test.cc | ||||
|         db/compact_files_test.cc | ||||
|         db/compaction_job_test.cc | ||||
|         db/compaction_job_stats_test.cc | ||||
|         db/compaction_picker_test.cc | ||||
|         db/comparator_db_test.cc | ||||
|         db/corruption_test.cc | ||||
|         db/cuckoo_table_db_test.cc | ||||
|         db/db_iter_test.cc | ||||
|         db/db_test.cc | ||||
|         db/dbformat_test.cc | ||||
|         db/deletefile_test.cc | ||||
|         db/fault_injection_test.cc | ||||
|         db/file_indexer_test.cc | ||||
|         db/filename_test.cc | ||||
|         db/flush_job_test.cc | ||||
|         db/listener_test.cc | ||||
|         db/log_test.cc | ||||
|         db/memtable_list_test.cc | ||||
|         db/merge_test.cc | ||||
|         db/perf_context_test.cc | ||||
|         db/plain_table_db_test.cc | ||||
|         db/prefix_test.cc | ||||
|         db/skiplist_test.cc | ||||
|         db/table_properties_collector_test.cc | ||||
|         db/version_builder_test.cc | ||||
|         db/version_edit_test.cc | ||||
|         db/version_set_test.cc | ||||
|         db/wal_manager_test.cc | ||||
|         db/write_batch_test.cc | ||||
|         db/write_callback_test.cc | ||||
|         db/write_controller_test.cc | ||||
|         table/block_based_filter_block_test.cc | ||||
|         table/block_hash_index_test.cc | ||||
|         table/block_test.cc | ||||
|         table/cuckoo_table_builder_test.cc | ||||
|         table/cuckoo_table_reader_test.cc | ||||
|         table/full_filter_block_test.cc | ||||
|         table/merger_test.cc | ||||
|         table/table_test.cc | ||||
|         tools/db_sanity_test.cc | ||||
|         tools/reduce_levels_test.cc | ||||
|         util/arena_test.cc | ||||
|         util/autovector_test.cc | ||||
|         util/auto_roll_logger_test.cc | ||||
|         util/bloom_test.cc | ||||
|         util/cache_test.cc | ||||
|         util/coding_test.cc | ||||
|         util/crc32c_test.cc | ||||
|         util/dynamic_bloom_test.cc | ||||
|         util/env_test.cc | ||||
|         util/event_logger_test.cc | ||||
|         util/filelock_test.cc | ||||
|         util/histogram_test.cc | ||||
|         util/manual_compaction_test.cc | ||||
|         util/memenv_test.cc | ||||
|         util/mock_env_test.cc | ||||
|         util/options_test.cc | ||||
|         util/rate_limiter_test.cc | ||||
|         util/slice_transform_test.cc | ||||
|         util/sst_dump_test.cc | ||||
|         util/thread_list_test.cc | ||||
|         util/thread_local_test.cc | ||||
|         utilities/backupable/backupable_db_test.cc | ||||
|         utilities/checkpoint/checkpoint_test.cc | ||||
|         utilities/document/document_db_test.cc | ||||
|         utilities/document/json_document_test.cc | ||||
|         utilities/geodb/geodb_test.cc | ||||
|         utilities/merge_operators/string_append/stringappend_test.cc | ||||
|         utilities/redis/redis_lists_test.cc | ||||
|         utilities/spatialdb/spatial_db_test.cc | ||||
|         utilities/transactions/optimistic_transaction_test.cc | ||||
|         utilities/ttl/ttl_test.cc | ||||
|         utilities/write_batch_with_index/write_batch_with_index_test.cc | ||||
| ) | ||||
| 
 | ||||
| set(EXES ${APPS} ${TESTS}) | ||||
| 
 | ||||
| foreach(sourcefile ${EXES}) | ||||
|     string(REPLACE ".cc" "" exename ${sourcefile}) | ||||
|     string(REPLACE ".c" "" exename ${exename}) | ||||
|     string(REGEX REPLACE "^((.+)/)+" "" exename ${exename}) | ||||
|     add_executable(${exename} ${sourcefile}) | ||||
|     target_link_libraries(${exename} ${LIBS}) | ||||
|     add_executable(${exename}_je ${sourcefile}) | ||||
|     set_target_properties(${exename}_je PROPERTIES COMPILE_FLAGS ${JEMALLOC_CXX_FLAGS}) | ||||
|     target_link_libraries(${exename}_je ${LIBS_JE}) | ||||
| endforeach(sourcefile ${EXES}) | ||||
| @ -0,0 +1,226 @@ | ||||
| # Microsoft Contribution Notes | ||||
| 
 | ||||
| ## Contributors | ||||
| * Alexander Zinoviev https://github.com/zinoale | ||||
| * Dmitri Smirnov https://github.com/yuslepukhin | ||||
| * Praveen Rao  https://github.com/PraveenSinghRao | ||||
| * Sherlock Huang  https://github.com/SherlockNoMad | ||||
| 
 | ||||
| ## Introduction | ||||
| RocksDB is a well proven open source key-value persistent store, optimized for fast storage. It provides scalability with number of CPUs and storage IOPS, to support IO-bound, in-memory and write-once workloads, most importantly, to be flexible to allow for innovation. | ||||
| 
 | ||||
| As Microsoft Bing team we have been continuously pushing hard to improve the scalability, efficiency of platform and eventually benefit Bing end-user satisfaction.  We would like to explore the opportunity to embrace open source, RocksDB here, to use, enhance and customize for our usage, and also contribute back to the RocksDB community. Herein, we are pleased to offer this RocksDB port for Windows platform. | ||||
| 
 | ||||
| These notes describe some decisions and changes we had to make with regards to porting RocksDB on Windows. We hope this will help both reviewers and users of the Windows port. | ||||
| We are open for comments and improvements. | ||||
| 
 | ||||
| ## OS specifics | ||||
| All of the porting, testing and benchmarking was done on Windows Server 2012 R2 Datacenter but to the best of our knowledge there is not a specific API we used during porting that is unsupported on other Windows OS after Vista. | ||||
| 
 | ||||
| ## Porting goals | ||||
| We strive to achieve the following goals: | ||||
| * make use of the existing porting interface of RocksDB | ||||
| * make minimum [WY2]modifications within platform independent code. | ||||
| * make all unit test pass both in debug and release builds.  | ||||
|   * Note: latest introduction of SyncPoint seems to disable running db_test in Release. | ||||
| * make performance on par with published benchmarks accounting for HW differences | ||||
| * we would like to keep the port code inline with the master branch with no forking | ||||
| 
 | ||||
| ## Build system | ||||
| We have chosen CMake as a widely accepted build system to build the Windows port. It is very fast and convenient.  | ||||
| 
 | ||||
| At the same time it generates Visual Studio projects that are both usable from a command line and IDE. | ||||
| 
 | ||||
| The top-level CMakeLists.txt file contains description of all targets and build rules. It also provides brief instructions on how to build the software for Windows. One more build related file is thirdparty.inc that also resides on the top level. This file must be edited to point to actual third party libraries location. | ||||
| We think that it would be beneficial to merge the existing make-based build system and the new cmake-based build system into a single one to use on all platforms. | ||||
| 
 | ||||
| ## C++ and STL notes | ||||
| We had to make some minimum changes within the portable files that either account for OS differences or the shortcomings of C++11 support in the current version of the MS compiler. Most or all of them are expected to be fixed in the upcoming compiler releases. | ||||
| 
 | ||||
| We plan to use this port for our business purposes here at Bing and this provided business justification for this port. This also means, we do not have at present to choose the compiler version at will. | ||||
| 
 | ||||
| * Certain headers that are not present and not necessary on Windows were simply `#ifndef OS_WIN` in a few places (`unistd.h`) | ||||
| * All posix specific headers were replaced to port/port.h which worked well | ||||
| * Replaced `dirent.h` for `port/dirent.h` (very few places) with the implementation of the relevant interfaces within `rocksdb::port` namespace | ||||
| * Replaced `sys/time.h` to `port/sys_time.h` (few places) implemented equivalents within `rocksdb::port` | ||||
| * `printf %z` specification is not supported on Windows. To imitate existing standards we came up with a string macro `ROCKSDB_PRIszt` which expands to `%z` on posix systems and to Iu on windows. | ||||
| * in class member initialization were moved to a __ctors in some cases | ||||
| * `constexpr` is not supported. We had to replace `std::numeric_limits<>::max/min()` to its C macros for constants. Sometimes we had to make class members `static const` and place a definition within a .cc file. | ||||
| * `constexpr` for functions was replaced to a template specialization (1 place) | ||||
| * Union members that have non-trivial constructors were replaced to `char[]` in one place along with bug fixes (spatial experimental feature) | ||||
| * Zero-sized arrays are deemed a non-standard extension which we converted to 1 size array and that should work well for the purposes of these classes. | ||||
| * `std::chrono` lacks nanoseconds support (fixed in the upcoming release of the STL) and we had to use `QueryPerfCounter()` within env_win.cc | ||||
| * Function local statics initialization is still not safe. Used `std::once` to mitigate within WinEnv. | ||||
| 
 | ||||
| ## Windows Environments notes | ||||
| We endeavored to make it functionally on par with posix_env. This means we replicated the functionality of the thread pool and other things as precise as possible, including: | ||||
| * Replicate posix logic using std:thread primitives. | ||||
| * Implement all posix_env disk access functionality. | ||||
| * Set `use_os_buffer=false` to disable OS disk buffering for WinWritableFile and WinRandomAccessFile. | ||||
| * Replace `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure. | ||||
| * Use `SetFileInformationByHandle` to compensate absence of `fallocate`. | ||||
| 
 | ||||
| ### In detail | ||||
| Even though Windows provides its own efficient thread-pool implementation we chose to replicate posix logic using `std::thread` primitives. This allows anyone to quickly detect any changes within the posix source code and replicate them within windows env. This has proven to work very well. At the same time for anyone who wishes to replace the built-in thread-pool can do so using RocksDB stackable environments. | ||||
| 
 | ||||
| For disk access we implemented all of the functionality present within the posix_env which includes memory mapped files, random access, rate-limiter support etc. | ||||
| The `use_os_buffer` flag on Posix platforms currently denotes disabling read-ahead log via `fadvise` mechanism. Windows does not have `fadvise` system call. What is more, it implements disk cache in a way that differs from Linux greatly. It’s not an uncommon practice on Windows to perform un-buffered disk access to gain control of the memory consumption. We think that in our use case this may also be a good configuration option at the expense of disk throughput. To compensate one may increase the configured in-memory cache size instead. Thus we have chosen  `use_os_buffer=false` to disable OS disk buffering for `WinWritableFile` and `WinRandomAccessFile`. The OS imposes restrictions on the alignment of the disk offsets, buffers used and the amount of data that is read/written when accessing files in un-buffered mode. When the option is true, the classes behave in a standard way. This allows to perform writes and reads in cases when un-buffered access does not make sense such as WAL and MANIFEST. | ||||
| 
 | ||||
| We have replaced `pread/pwrite` with `WriteFile/ReadFile` with `OVERLAPPED` structure so we can atomically seek to the position of the disk operation but still perform the operation synchronously. Thus we able to emulate that functionality of `pread/pwrite` reasonably well. The only difference is that the file pointer is not returned to its original position but that hardly matters given the random nature of access. | ||||
| 
 | ||||
| We used `SetFileInformationByHandle` both to truncate files after writing a full final page to disk and to pre-allocate disk space for faster I/O thus compensating for the absence of `fallocate` although some differences remain. For example, the pre-allocated space is not filled with zeros like on Linux, however, on a positive note, the end of file position is also not modified after pre-allocation. | ||||
| 
 | ||||
| RocksDB renames, copies and deletes files at will even though they may be opened with another handle at the same time. We had to relax and allow nearly all the concurrent access permissions possible. | ||||
| 
 | ||||
| ## Thread-Local Storage | ||||
| Thread-Local storage plays a significant role for RocksDB performance. Rather than creating a separate implementation we chose to create inline wrappers that forward `pthread_specific` calls to Windows `Tls` interfaces within `rocksdb::port` namespace. This leaves the existing meat of the logic in tact and unchanged and just as maintainable. | ||||
| 
 | ||||
| To mitigate the lack of thread local storage cleanup on thread-exit we added a limited amount of windows specific code within the same thread_local.cc file that injects a cleanup callback into a `"__tls"` structure within `".CRT$XLB"` data segment. This approach guarantees that the callback is invoked regardless of whether RocksDB used within an executable, standalone DLL or within another DLL. | ||||
| 
 | ||||
| ## Jemalloc usage | ||||
| 
 | ||||
| When RocksDB is used with Jemalloc the latter needs to be initialized before any of the C++ globals or statics. To accomplish that we injected an initialization routine into `".CRT$XCT"` that is automatically invoked by the runtime before initializing static objects. je-uninit is queued to `atexit()`.  | ||||
| 
 | ||||
| The jemalloc redirecting `new/delete` global operators are used by the linker providing certain conditions are met. See build section in these notes. | ||||
| 
 | ||||
| ## Stack Trace and Unhandled Exception Handler | ||||
| 
 | ||||
| We decided not to implement these two features because the hosting program as a rule has these two things in it. | ||||
| We experienced no inconveniences debugging issues in the debugger or analyzing process dumps if need be and thus we did not | ||||
| see this as a priority. | ||||
| 
 | ||||
| ## Performance results | ||||
| ### Setup | ||||
| All of the benchmarks are run on the same set of machines. Here are the details of the test setup: | ||||
| * 2 Intel(R) Xeon(R) E5 2450 0 @ 2.10 GHz (total 16 cores) | ||||
| * 2 XK0480GDQPH SSD Device, total 894GB free disk | ||||
| * Machine has 128 GB of RAM | ||||
| * Operating System: Windows Server 2012 R2 Datacenter | ||||
| * 100 Million keys; each key is of size 10 bytes, each value is of size 800 bytes | ||||
| * total database size is ~76GB | ||||
| * The performance result is based on RocksDB 3.11. | ||||
| * The parameters used, unless specified, were exactly the same as published in the GitHub Wiki page.  | ||||
| 
 | ||||
| ### RocksDB on flash storage | ||||
| 
 | ||||
| #### Test 1. Bulk Load of keys in Random Order | ||||
| 
 | ||||
| Version 3.11  | ||||
| 
 | ||||
| * Total Run Time: 17.6 min | ||||
| * Fillrandom: 5.480 micros/op 182465 ops/sec;  142.0 MB/s | ||||
| * Compact: 486056544.000 micros/op 0 ops/sec | ||||
| 
 | ||||
| Version 3.10  | ||||
| 
 | ||||
| * Total Run Time: 16.2 min  | ||||
| * Fillrandom: 5.018 micros/op 199269 ops/sec;  155.1 MB/s  | ||||
| * Compact: 441313173.000 micros/op 0 ops/sec;  | ||||
| 
 | ||||
| 
 | ||||
| #### Test 2. Bulk Load of keys in Sequential Order | ||||
| 
 | ||||
| Version 3.11  | ||||
| 
 | ||||
| * Fillseq: 4.944 micros/op 202k ops/sec;  157.4 MB/s | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Fillseq: 4.105 micros/op 243.6k ops/sec;  189.6 MB/s  | ||||
| 
 | ||||
| 
 | ||||
| #### Test 3. Random Write | ||||
| 
 | ||||
| Version 3.11  | ||||
| 
 | ||||
| * Unbuffered I/O enabled | ||||
| * Overwrite: 52.661 micros/op 18.9k ops/sec;   14.8 MB/s | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Unbuffered I/O enabled  | ||||
| * Overwrite: 52.661 micros/op 18.9k ops/sec;  | ||||
| 
 | ||||
| 
 | ||||
| #### Test 4. Random Read | ||||
| 
 | ||||
| Version 3.11  | ||||
| 
 | ||||
| * Unbuffered I/O enabled | ||||
| * Readrandom: 15.716 micros/op 63.6k ops/sec; 49.5 MB/s  | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Unbuffered I/O enabled  | ||||
| * Readrandom: 15.548 micros/op 64.3k ops/sec;  | ||||
| 
 | ||||
| 
 | ||||
| #### Test 5. Multi-threaded read and single-threaded write | ||||
| 
 | ||||
| Version 3.11 | ||||
| 
 | ||||
| * Unbuffered I/O enabled | ||||
| * Readwhilewriting: 25.128 micros/op 39.7k ops/sec;  | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Unbuffered I/O enabled  | ||||
| * Readwhilewriting: 24.854 micros/op 40.2k ops/sec;  | ||||
| 
 | ||||
| 
 | ||||
| ### RocksDB In Memory  | ||||
| 
 | ||||
| #### Test 1. Point Lookup | ||||
| 
 | ||||
| Version 3.11 | ||||
| 
 | ||||
| 80K writes/sec | ||||
| * Write Rate Achieved: 40.5k write/sec; | ||||
| * Readwhilewriting: 0.314 micros/op 3187455 ops/sec;  364.8 MB/s (715454999 of 715454999 found) | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Write Rate Achieved:  50.6k write/sec  | ||||
| * Readwhilewriting: 0.316 micros/op 3162028 ops/sec; (719576999 of 719576999 found)  | ||||
| 
 | ||||
| 
 | ||||
| *10K writes/sec* | ||||
| 
 | ||||
| Version 3.11 | ||||
| 
 | ||||
| * Write Rate Achieved: 5.8k/s write/sec | ||||
| * Readwhilewriting: 0.246 micros/op 4062669 ops/sec;  464.9 MB/s (915481999 of 915481999 found) | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Write Rate Achieved: 5.8k/s write/sec  | ||||
| * Readwhilewriting: 0.244 micros/op 4106253 ops/sec; (927986999 of 927986999 found)  | ||||
| 
 | ||||
| 
 | ||||
| #### Test 2. Prefix Range Query | ||||
| 
 | ||||
| Version 3.11 | ||||
| 
 | ||||
| 80K writes/sec | ||||
| * Write Rate Achieved:  46.3k/s write/sec | ||||
| * Readwhilewriting: 0.362 micros/op 2765052 ops/sec;  316.4 MB/s (611549999 of 611549999 found) | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Write Rate Achieved: 45.8k/s write/sec  | ||||
| * Readwhilewriting: 0.317 micros/op 3154941 ops/sec; (708158999 of 708158999 found)  | ||||
| 
 | ||||
| Version 3.11 | ||||
| 
 | ||||
| 10K writes/sec | ||||
| * Write Rate Achieved: 5.78k write/sec | ||||
| * Readwhilewriting: 0.269 micros/op 3716692 ops/sec;  425.3 MB/s (837401999 of 837401999 found) | ||||
| 
 | ||||
| Version 3.10 | ||||
| 
 | ||||
| * Write Rate Achieved: 5.7k write/sec  | ||||
| * Readwhilewriting: 0.261 micros/op 3830152 ops/sec; (863482999 of 863482999 found)  | ||||
| 
 | ||||
| 
 | ||||
| We think that there is still big room to improve the performance, which will be an ongoing effort for us. | ||||
| 
 | ||||
| @ -0,0 +1,24 @@ | ||||
| @echo off | ||||
| 
 | ||||
| REM Record the version of the source that we are compiling. | ||||
| REM We keep a record of the git revision in util/version.cc. This source file | ||||
| REM is then built as a regular source file as part of the compilation process. | ||||
| REM One can run "strings executable_filename | grep _build_" to find the version of | ||||
| REM the source that we used to build the executable file. | ||||
| 
 | ||||
| set CONFIGURATION=%1 | ||||
| 
 | ||||
| pushd "%~dp0" | ||||
| set "OUTFILE="..\util\build_version_%CONFIGURATION%.cc" | ||||
| 
 | ||||
| REM GIT_SHA="" | ||||
| REM if command -v git >/dev/null 2>&1; then | ||||
| REM     GIT_SHA=$(git rev-parse HEAD 2>/dev/null) | ||||
| REM fi | ||||
| 
 | ||||
| @echo #include "build_version.h" > %OUTFILE% | ||||
| @echo const char* rocksdb_build_git_sha = "rocksdb_build_git_sha:${GIT_SHA}"; >> %OUTFILE% | ||||
| @echo const char* rocksdb_build_git_datetime = "rocksdb_build_git_datetime:$(date)"; >> %OUTFILE% | ||||
| @echo const char* rocksdb_build_compile_date = __DATE__; >> %OUTFILE% | ||||
| 
 | ||||
| @popd | ||||
| @ -0,0 +1,99 @@ | ||||
| @echo off | ||||
| call :init | ||||
| call :runtest arena_test.exe | ||||
| call :runtest autovector_test.exe | ||||
| call :runtest auto_roll_logger_test.exe | ||||
| call :runtest backupable_db_test.exe | ||||
| rem call :runtest benchharness_test.exe | ||||
| call :runtest block_based_filter_block_test.exe | ||||
| call :runtest block_hash_index_test.exe | ||||
| call :runtest block_test.exe | ||||
| call :runtest bloom_test.exe | ||||
| call :runtest cache_test.exe | ||||
| call :runtest coding_test.exe | ||||
| call :runtest column_family_test.exe | ||||
| call :runtest compaction_job_test.exe | ||||
| call :runtest compaction_picker_test.exe | ||||
| call :runtest comparator_db_test.exe | ||||
| call :runtest corruption_test.exe | ||||
| call :runtest crc32c_test.exe | ||||
| call :runtest cuckoo_table_builder_test.exe | ||||
| call :runtest cuckoo_table_db_test.exe | ||||
| call :runtest cuckoo_table_reader_test.exe | ||||
| call :runtest dbformat_test.exe | ||||
| call :runtest db_iter_test.exe | ||||
| call :runtest db_test.exe | ||||
| call :runtest deletefile_test.exe | ||||
| call :runtest dynamic_bloom_test.exe | ||||
| call :runtest env_test.exe | ||||
| call :runtest fault_injection_test.exe | ||||
| call :runtest filelock_test.exe | ||||
| call :runtest filename_test.exe | ||||
| call :runtest file_indexer_test.exe | ||||
| call :runtest full_filter_block_test.exe | ||||
| call :runtest histogram_test.exe | ||||
| call :runtest listener_test.exe | ||||
| call :runtest log_test.exe | ||||
| call :runtest manual_compaction_test.exe | ||||
| call :runtest memenv_test.exe | ||||
| call :runtest merger_test.exe | ||||
| call :runtest merge_test.exe | ||||
| call :runtest mock_env_test.exe | ||||
| call :runtest options_test.exe | ||||
| call :runtest perf_context_test.exe | ||||
| call :runtest plain_table_db_test.exe | ||||
| call :runtest prefix_test.exe | ||||
| call :runtest rate_limiter_test.exe | ||||
| call :runtest redis_lists_test.exe | ||||
| rem call :runtest signal_test.exe | ||||
| call :runtest skiplist_test.exe | ||||
| call :runtest slice_transform_test.exe | ||||
| call :runtest sst_dump_test.exe | ||||
| call :runtest stringappend_test.exe | ||||
| call :runtest table_properties_collector_test.exe | ||||
| call :runtest table_test.exe | ||||
| call :runtest thread_list_test.exe | ||||
| call :runtest thread_local_test.exe | ||||
| call :runtest ttl_test.exe | ||||
| call :runtest version_builder_test.exe | ||||
| call :runtest version_edit_test.exe | ||||
| call :runtest version_set_test.exe | ||||
| call :runtest wal_manager_test.exe | ||||
| call :runtest write_batch_test.exe | ||||
| rem call :runtest write_batch_with_index_test.exe | ||||
| call :runtest write_controller_test.exe | ||||
| call :stat | ||||
| goto :eof | ||||
| 
 | ||||
| :init | ||||
| set tests=0 | ||||
| set passed=0 | ||||
| set failed=0 | ||||
| goto :eof | ||||
| 
 | ||||
| :runtest | ||||
| set /A tests=%tests% + 1 | ||||
| echo|set /p=Running %1...  | ||||
| %1 > %1.log 2>&1 | ||||
| findstr /C:"PASSED" %1.log > nul 2>&1 | ||||
| IF ERRORLEVEL 1 ( | ||||
|     findstr /C:"Passed all tests" %1.log > nul 2>&1 | ||||
|     IF ERRORLEVEL 1 ( | ||||
|         echo ***FAILED*** | ||||
|         set /A failed=%failed% + 1 | ||||
|     ) ELSE ( | ||||
|         echo OK | ||||
|         set /A passed=%passed% + 1 | ||||
|     ) | ||||
| ) ELSE ( | ||||
|     echo OK | ||||
|     set /A passed=%passed% + 1 | ||||
| ) | ||||
| goto :eof | ||||
| 
 | ||||
| :stat | ||||
| echo ================= | ||||
| echo Total tests : %tests% | ||||
| echo Passed      : %passed% | ||||
| echo Failed      : %failed% | ||||
| goto :eof | ||||
									
										
											File diff suppressed because it is too large
											Load Diff
										
									
								
							
						| @ -0,0 +1,37 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_UTILITIES_PRAGMA_ERROR_H_ | ||||
| #define STORAGE_LEVELDB_UTILITIES_PRAGMA_ERROR_H_ | ||||
| 
 | ||||
| #define RDB_STR__(x) #x | ||||
| #define RDB_STR(x) RDB_STR__(x) | ||||
| 
 | ||||
| 
 | ||||
| #if defined(ROCKSDB_PLATFORM_POSIX) | ||||
| // Wrap unportable warning macro
 | ||||
| 
 | ||||
| #   define ROCKSDB_WARNING(x)  _Pragma(RDB_STR(GCC warning(x))) | ||||
| 
 | ||||
| 
 | ||||
| #elif defined(OS_WIN) | ||||
| 
 | ||||
| // Wrap unportable warning macro
 | ||||
| #if defined(_MSC_VER) | ||||
|  // format it according to visual studio output (to get source lines and warnings in the IDE)
 | ||||
|  #define ROCKSDB_WARNING(x) __pragma( message(__FILE__ "(" RDB_STR(__LINE__) ") : warning: " x) ) | ||||
| #else | ||||
|  // make #warning into #pragma GCC warning gcc 4.7+ and clang 3.2+ supported
 | ||||
|  #define ROCKSDB_WARNING(x)  _Pragma(RDB_STR(GCC warning(x))) | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| #endif  // STORAGE_LEVELDB_UTILITIES_PRAGMA_ERROR_H_
 | ||||
| @ -0,0 +1,51 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| //
 | ||||
| // See port_example.h for documentation for the following types/functions.
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_PORT_DIRENT_H_ | ||||
| #define STORAGE_LEVELDB_PORT_DIRENT_H_ | ||||
| 
 | ||||
| #ifdef ROCKSDB_PLATFORM_POSIX | ||||
| #  include <sys/typed.h> | ||||
| #  include <dirent.h> | ||||
| #elif defined(OS_WIN) | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| namespace port { | ||||
| 
 | ||||
| struct dirent { | ||||
|     char           d_name[_MAX_PATH]; /* filename */ | ||||
| }; | ||||
| 
 | ||||
| struct DIR; | ||||
| 
 | ||||
| DIR* opendir(const char* name); | ||||
| 
 | ||||
| dirent* readdir(DIR* dirp); | ||||
| 
 | ||||
| int closedir(DIR* dirp); | ||||
| 
 | ||||
| } // namespace port
 | ||||
| 
 | ||||
| using port::dirent; | ||||
| using port::DIR; | ||||
| using port::opendir; | ||||
| using port::readdir; | ||||
| using port::closedir; | ||||
| 
 | ||||
| } // namespace rocksdb
 | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| #endif // STORAGE_LEVELDB_PORT_DIRENT_H_
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| @ -0,0 +1,49 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| // This file is a portable substitute for sys/time.h which does not exist on Windows
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_PORT_SYS_TIME_H_ | ||||
| #define STORAGE_LEVELDB_PORT_SYS_TIME_H_ | ||||
| 
 | ||||
| #if defined(_WIN32) && defined(_MSC_VER) | ||||
| 
 | ||||
| #include <time.h> | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| namespace port { | ||||
| 
 | ||||
| // Avoid including winsock2.h for this definition
 | ||||
| typedef struct timeval { | ||||
|     long tv_sec; | ||||
|     long tv_usec; | ||||
| } timeval; | ||||
| 
 | ||||
| void gettimeofday(struct timeval* tv, struct timezone* tz); | ||||
| 
 | ||||
| inline | ||||
| struct tm* localtime_r(const time_t *timep, struct tm *result) { | ||||
|     errno_t ret = localtime_s(result, timep); | ||||
|     return (ret == 0) ? result : NULL; | ||||
| } | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| using port::timeval; | ||||
| using port::gettimeofday; | ||||
| using port::localtime_r; | ||||
| } | ||||
| 
 | ||||
| #else | ||||
| #  include <time.h> | ||||
| #  include <sys/time.h> | ||||
| #endif | ||||
| 
 | ||||
| #endif // STORAGE_LEVELDB_PORT_SYS_TIME_H_
 | ||||
| @ -0,0 +1,24 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_ | ||||
| #define STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_ | ||||
| 
 | ||||
| // Include the appropriate platform specific file below.  If you are
 | ||||
| // porting to a new platform, see "port_example.h" for documentation
 | ||||
| // of what the new port_<platform>.h file must provide.
 | ||||
| 
 | ||||
| 
 | ||||
| #if defined(ROCKSDB_PLATFORM_POSIX) | ||||
| # include "util/posix_logger.h" | ||||
| #elif defined(OS_WIN) | ||||
| # include "port/win/win_logger.h" | ||||
| #endif | ||||
| 
 | ||||
| #endif // STORAGE_LEVELDB_PORT_UTIL_LOGGER_H_
 | ||||
									
										
											File diff suppressed because it is too large
											Load Diff
										
									
								
							
						| @ -0,0 +1,330 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| #if !defined(OS_WIN) && !defined(WIN32) && !defined(_WIN32) | ||||
| #error Windows Specific Code | ||||
| #endif | ||||
| 
 | ||||
| #include "port/win/port_win.h" | ||||
| 
 | ||||
| #include <io.h> | ||||
| #include "port/dirent.h" | ||||
| #include "port/sys_time.h" | ||||
| 
 | ||||
| #include <cstdlib> | ||||
| #include <stdio.h> | ||||
| #include <assert.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <exception> | ||||
| #include <chrono> | ||||
| 
 | ||||
| #include "util/logging.h" | ||||
| 
 | ||||
| namespace rocksdb 
 | ||||
| { | ||||
| namespace port 
 | ||||
| { | ||||
| 
 | ||||
| void gettimeofday(struct timeval* tv, struct timezone* /* tz */) { | ||||
| 
 | ||||
|     using namespace std::chrono; | ||||
| 
 | ||||
|     microseconds usNow (duration_cast<microseconds>(system_clock::now().time_since_epoch())); | ||||
| 
 | ||||
|     seconds secNow(duration_cast<seconds>(usNow)); | ||||
| 
 | ||||
|     tv->tv_sec = secNow.count(); | ||||
|     tv->tv_usec = usNow.count() - duration_cast<microseconds>(secNow).count(); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| Mutex::Mutex(bool adaptive) : lock(m_mutex, std::defer_lock) { | ||||
| } | ||||
| 
 | ||||
| Mutex::~Mutex() { 
 | ||||
| } | ||||
| 
 | ||||
| void Mutex::Lock() { | ||||
| 
 | ||||
|     lock.lock(); | ||||
| #ifndef NDEBUG | ||||
|     locked_ = true; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| void Mutex::Unlock() { | ||||
| 
 | ||||
| #ifndef NDEBUG | ||||
|     locked_ = false; | ||||
| #endif | ||||
|     lock.unlock(); | ||||
| } | ||||
| 
 | ||||
| void Mutex::AssertHeld() { | ||||
| #ifndef NDEBUG | ||||
|     assert(locked_); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| CondVar::CondVar(Mutex* mu) : mu_(mu) { | ||||
| } | ||||
| 
 | ||||
| CondVar::~CondVar() { 
 | ||||
| } | ||||
| 
 | ||||
| void CondVar::Wait() { | ||||
| #ifndef NDEBUG | ||||
|     mu_->locked_ = false; | ||||
| #endif | ||||
|     cv_.wait(mu_->getLock()); | ||||
| #ifndef NDEBUG | ||||
|     mu_->locked_ = true; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| bool CondVar::TimedWait(uint64_t abs_time_us) { | ||||
| #ifndef NDEBUG | ||||
|     mu_->locked_ = false; | ||||
| #endif | ||||
| 
 | ||||
|     using namespace std::chrono; | ||||
| 
 | ||||
|     microseconds usAbsTime(abs_time_us); | ||||
|     microseconds usNow(duration_cast<microseconds>(system_clock::now().time_since_epoch())); | ||||
|     microseconds relTimeUs = (usAbsTime > usNow) ? (usAbsTime - usNow) : microseconds::zero(); | ||||
| 
 | ||||
|     std::_Cv_status cvStatus = cv_.wait_for(mu_->getLock(), relTimeUs); | ||||
| 
 | ||||
| #ifndef NDEBUG | ||||
|     mu_->locked_ = true; | ||||
| #endif | ||||
| 
 | ||||
|     if (cvStatus == std::cv_status::timeout) { | ||||
|         return true; | ||||
|     } | ||||
| 
 | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| void CondVar::Signal() { | ||||
| 
 | ||||
|     cv_.notify_one(); | ||||
| } | ||||
| 
 | ||||
| void CondVar::SignalAll() { | ||||
|     cv_.notify_all (); | ||||
| } | ||||
| 
 | ||||
| void InitOnce(OnceType* once, void (*initializer)()) { | ||||
| 
 | ||||
|     std::call_once(*once, initializer); | ||||
| } | ||||
| 
 | ||||
| // Private structure, exposed only by pointer
 | ||||
| struct DIR { | ||||
|     intptr_t               handle_; | ||||
|     bool                   firstread_; | ||||
|     struct __finddata64_t  data_; | ||||
|     dirent                 entry_; | ||||
| 
 | ||||
|     DIR() : handle_(-1), firstread_(true) {} | ||||
| 
 | ||||
|     DIR(const DIR&) = delete; | ||||
|     DIR& operator=(const DIR&) = delete; | ||||
| 
 | ||||
|     ~DIR() { | ||||
| 
 | ||||
|         if (-1 != handle_) { | ||||
|             _findclose(handle_); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| DIR* opendir(const char* name) { | ||||
| 
 | ||||
|     if (!name || *name == 0) { | ||||
|         errno = ENOENT; | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     std::string pattern(name); | ||||
|     pattern.append("\\").append("*"); | ||||
| 
 | ||||
|     std::unique_ptr<DIR> dir(new DIR); | ||||
| 
 | ||||
|     dir->handle_ = _findfirst64(pattern.c_str(), &dir->data_); | ||||
| 
 | ||||
|     if (dir->handle_ == -1) { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     strncpy_s(dir->entry_.d_name, dir->data_.name, strlen(dir->data_.name)); | ||||
| 
 | ||||
|     return dir.release(); | ||||
| } | ||||
| 
 | ||||
| struct dirent* readdir(DIR* dirp) { | ||||
| 
 | ||||
|     if (!dirp || dirp->handle_ == -1) { | ||||
|         errno = EBADF; | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     if (dirp->firstread_) { | ||||
|         dirp->firstread_ = false; | ||||
|         return &dirp->entry_; | ||||
|     } | ||||
| 
 | ||||
|     auto ret = _findnext64(dirp->handle_, &dirp->data_); | ||||
| 
 | ||||
|     if (ret != 0) { | ||||
|         return nullptr; | ||||
|     } | ||||
| 
 | ||||
|     strncpy_s(dirp->entry_.d_name, dirp->data_.name, strlen(dirp->data_.name)); | ||||
| 
 | ||||
|     return &dirp->entry_; | ||||
| } | ||||
| 
 | ||||
| int closedir(DIR* dirp) { | ||||
|     delete dirp; | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| int truncate(const char* path, int64_t len) { | ||||
| 
 | ||||
|   if (path == nullptr) { | ||||
|     errno = EFAULT; | ||||
|     return -1; | ||||
|   } | ||||
| 
 | ||||
|   if (len < 0) { | ||||
|     errno = EINVAL; | ||||
|     return -1; | ||||
|   } | ||||
| 
 | ||||
|   HANDLE hFile = CreateFile(path, | ||||
|     GENERIC_READ | GENERIC_WRITE, | ||||
|     0, // No sharing while truncating
 | ||||
|     NULL, // Security attrs
 | ||||
|     OPEN_EXISTING, // Truncate existing file only
 | ||||
|     FILE_ATTRIBUTE_NORMAL, | ||||
|     NULL); | ||||
| 
 | ||||
|   if (INVALID_HANDLE_VALUE == hFile) { | ||||
|     auto lastError = GetLastError(); | ||||
|     if (lastError == ERROR_FILE_NOT_FOUND) { | ||||
|       errno = ENOENT; | ||||
|     } else if (lastError == ERROR_ACCESS_DENIED) { | ||||
|       errno = EACCES; | ||||
|     } else { | ||||
|       errno = EIO; | ||||
|     } | ||||
|     return -1; | ||||
|   } | ||||
| 
 | ||||
|   int result = 0; | ||||
|   FILE_END_OF_FILE_INFO end_of_file; | ||||
|   end_of_file.EndOfFile.QuadPart = len; | ||||
| 
 | ||||
|   if (!SetFileInformationByHandle(hFile, | ||||
|         FileEndOfFileInfo, | ||||
|         &end_of_file, | ||||
|         sizeof(FILE_END_OF_FILE_INFO))) { | ||||
|     errno = EIO; | ||||
|     result = -1; | ||||
|   } | ||||
| 
 | ||||
|   CloseHandle(hFile); | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
| }  // namespace port
 | ||||
| }  // namespace rocksdb
 | ||||
| 
 | ||||
| #ifdef JEMALLOC | ||||
| 
 | ||||
| #include "jemalloc/jemalloc.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| namespace port { | ||||
| 
 | ||||
| __declspec(noinline) | ||||
| void WINAPI InitializeJemalloc() { | ||||
|     je_init(); | ||||
|     atexit(je_uninit); | ||||
| } | ||||
| 
 | ||||
| } // port
 | ||||
| } // rocksdb
 | ||||
| 
 | ||||
| extern "C" { | ||||
| 
 | ||||
| #ifdef _WIN64 | ||||
| 
 | ||||
| #pragma comment(linker, "/INCLUDE:p_rocksdb_init_jemalloc") | ||||
| 
 | ||||
| typedef void (WINAPI *CRT_Startup_Routine)(void); | ||||
| 
 | ||||
| // .CRT section is merged with .rdata on x64 so it must be constant data.
 | ||||
| // must be of external linkage
 | ||||
| // We put this into XCT since we want to run this earlier than C++ static constructors
 | ||||
| // which are placed into XCU
 | ||||
| #pragma const_seg(".CRT$XCT") | ||||
| extern const CRT_Startup_Routine p_rocksdb_init_jemalloc; | ||||
| const CRT_Startup_Routine p_rocksdb_init_jemalloc = rocksdb::port::InitializeJemalloc; | ||||
| #pragma const_seg() | ||||
| 
 | ||||
| #else // _WIN64
 | ||||
| 
 | ||||
| // x86 untested
 | ||||
| 
 | ||||
| #pragma comment(linker, "/INCLUDE:_p_rocksdb_init_jemalloc") | ||||
| 
 | ||||
| #pragma section(".CRT$XCT", read) | ||||
| JEMALLOC_SECTION(".CRT$XCT") JEMALLOC_ATTR(used) | ||||
| static const void (WINAPI *p_rocksdb_init_jemalloc)(void) = rocksdb::port::InitializeJemalloc; | ||||
| 
 | ||||
| #endif // _WIN64
 | ||||
| 
 | ||||
| } // extern "C"
 | ||||
| 
 | ||||
| // Global operators to be replaced by a linker
 | ||||
| 
 | ||||
| void* operator new(size_t size) { | ||||
|   void* p = je_malloc(size); | ||||
|   if (!p) { | ||||
|       throw std::bad_alloc(); | ||||
|   } | ||||
|   return p; | ||||
| } | ||||
| 
 | ||||
| void* operator new[](size_t size) { | ||||
|   void* p = je_malloc(size); | ||||
|   if (!p) { | ||||
|     throw std::bad_alloc(); | ||||
|   } | ||||
|   return p; | ||||
| } | ||||
| 
 | ||||
| void operator delete(void* p) { | ||||
|   je_free(p); | ||||
| } | ||||
| 
 | ||||
| void operator delete[](void* p) { | ||||
|   je_free(p); | ||||
| } | ||||
| 
 | ||||
| #endif // JEMALLOC
 | ||||
| 
 | ||||
| @ -0,0 +1,576 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| //
 | ||||
| // See port_example.h for documentation for the following types/functions.
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_PORT_PORT_WIN_H_ | ||||
| #define STORAGE_LEVELDB_PORT_PORT_WIN_H_ | ||||
| 
 | ||||
| // Always want minimum headers
 | ||||
| #ifndef WIN32_LEAN_AND_MEAN | ||||
| #  define WIN32_LEAN_AND_MEAN | ||||
| #endif | ||||
| 
 | ||||
| // Assume that for everywhere
 | ||||
| #undef PLATFORM_IS_LITTLE_ENDIAN | ||||
| #define PLATFORM_IS_LITTLE_ENDIAN true | ||||
| 
 | ||||
| #include <windows.h> | ||||
| #include <string> | ||||
| #include <string.h> | ||||
| #include <mutex> | ||||
| #include <condition_variable> | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| #include "rocksdb/options.h" | ||||
| 
 | ||||
| #ifndef strcasecmp | ||||
| #define strcasecmp _stricmp | ||||
| #endif | ||||
| 
 | ||||
| // defined in stdio.h
 | ||||
| #ifndef snprintf | ||||
| #define snprintf _snprintf | ||||
| #endif | ||||
| 
 | ||||
| typedef SSIZE_T ssize_t; | ||||
| 
 | ||||
| // size_t printf formatting named in the manner of C99 standard formatting strings such as PRIu64
 | ||||
| // in fact, we could use that one
 | ||||
| #define ROCKSDB_PRIszt "Iu" | ||||
| 
 | ||||
| #define __attribute__(A) | ||||
| 
 | ||||
| #ifdef ZLIB | ||||
| #include <zlib.h> | ||||
| #endif | ||||
| 
 | ||||
| #ifdef BZIP2 | ||||
| #include <bzlib.h> | ||||
| #endif | ||||
| 
 | ||||
| #if defined(LZ4) | ||||
| #include <lz4.h> | ||||
| #include <lz4hc.h> | ||||
| #endif | ||||
| 
 | ||||
| #ifdef SNAPPY | ||||
| #include "snappy.h" | ||||
| #endif | ||||
| 
 | ||||
| // Thread local storage on Linux
 | ||||
| // There is thread_local in C++11
 | ||||
| #define __thread __declspec(thread) | ||||
| 
 | ||||
| #ifndef PLATFORM_IS_LITTLE_ENDIAN | ||||
| #define PLATFORM_IS_LITTLE_ENDIAN (__BYTE_ORDER == __LITTLE_ENDIAN) | ||||
| #endif | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| #define PREFETCH(addr, rw, locality) | ||||
| 
 | ||||
| namespace port 
 | ||||
| { | ||||
| 
 | ||||
| const bool kLittleEndian = true; | ||||
| 
 | ||||
| class CondVar; | ||||
| 
 | ||||
| class Mutex 
 | ||||
| { | ||||
| public: | ||||
|     /* implicit */ 
 | ||||
|     Mutex(bool adaptive = false); | ||||
|     ~Mutex(); | ||||
| 
 | ||||
|     void Lock(); | ||||
|     void Unlock(); | ||||
|   
 | ||||
|     // this will assert if the mutex is not locked
 | ||||
|     // it does NOT verify that mutex is held by a calling thread
 | ||||
|     void AssertHeld(); | ||||
|     std::unique_lock<std::mutex>& getLock() | ||||
|     { | ||||
|         return lock; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     friend class CondVar; | ||||
|     std::mutex m_mutex; | ||||
|     std::unique_lock<std::mutex> lock; | ||||
| #ifndef NDEBUG | ||||
|     bool locked_; | ||||
| #endif | ||||
| 
 | ||||
|     // No copying
 | ||||
|     Mutex(const Mutex&); | ||||
|     void operator=(const Mutex&); | ||||
| }; | ||||
| 
 | ||||
| class RWMutex 
 | ||||
| { | ||||
| private: | ||||
|     SRWLOCK srwLock_; | ||||
| public: | ||||
|     RWMutex(){ | ||||
|         InitializeSRWLock(&srwLock_); | ||||
|     } | ||||
| 
 | ||||
|     void ReadLock() { | ||||
|         AcquireSRWLockShared(&srwLock_); | ||||
|     } | ||||
| 
 | ||||
|     void WriteLock() { | ||||
|         AcquireSRWLockExclusive(&srwLock_); | ||||
|     } | ||||
| 
 | ||||
|     void ReadUnlock() { | ||||
|         ReleaseSRWLockShared(&srwLock_); | ||||
|     } | ||||
| 
 | ||||
|     void WriteUnlock() { | ||||
|         ReleaseSRWLockExclusive(&srwLock_); | ||||
|     } | ||||
| 
 | ||||
|     void AssertHeld() { | ||||
|         //TODO: psrao - should be implemented
 | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
| 
 | ||||
|     // No copying allowed
 | ||||
|     RWMutex(const RWMutex&); | ||||
|     void operator=(const RWMutex&); | ||||
| }; | ||||
| 
 | ||||
| class CondVar 
 | ||||
| { | ||||
| public: | ||||
|     explicit CondVar(Mutex* mu); | ||||
|     ~CondVar(); | ||||
|     void Wait(); | ||||
|     bool TimedWait(uint64_t expiration_time); | ||||
|     void Signal(); | ||||
|     void SignalAll(); | ||||
| private: | ||||
|     std::condition_variable cv_; | ||||
|     Mutex * mu_; | ||||
| }; | ||||
| 
 | ||||
| typedef std::once_flag OnceType; | ||||
| #define LEVELDB_ONCE_INIT std::once_flag::once_flag(); | ||||
| extern void InitOnce(OnceType* once, void (*initializer)()); | ||||
| 
 | ||||
| inline bool Snappy_Compress(const CompressionOptions& opts, const char* input, | ||||
|                             size_t length, ::std::string* output) 
 | ||||
| { | ||||
| #ifdef SNAPPY | ||||
|     output->resize(snappy::MaxCompressedLength(length)); | ||||
|     size_t outlen; | ||||
|     snappy::RawCompress(input, length, &(*output)[0], &outlen); | ||||
|     output->resize(outlen); | ||||
|     return true; | ||||
| #endif | ||||
|     return false; | ||||
| } | ||||
| 
 | ||||
| inline bool Snappy_GetUncompressedLength(const char* input, size_t length, | ||||
|                                          size_t* result) { | ||||
| #ifdef SNAPPY | ||||
|   return snappy::GetUncompressedLength(input, length, result); | ||||
| #else | ||||
|   return false; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| inline bool Snappy_Uncompress(const char* input, size_t length, | ||||
|                               char* output) { | ||||
| #ifdef SNAPPY | ||||
|   return snappy::RawUncompress(input, length, output); | ||||
| #else | ||||
|   return false; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| inline bool Zlib_Compress(const CompressionOptions& opts, const char* input, | ||||
|                           size_t length, ::std::string* output) { | ||||
| #ifdef ZLIB | ||||
|   // The memLevel parameter specifies how much memory should be allocated for
 | ||||
|   // the internal compression state.
 | ||||
|   // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
 | ||||
|   // memLevel=9 uses maximum memory for optimal speed.
 | ||||
|   // The default value is 8. See zconf.h for more details.
 | ||||
|   static const int memLevel = 8; | ||||
|   z_stream _stream; | ||||
|   memset(&_stream, 0, sizeof(z_stream)); | ||||
|   int st = deflateInit2(&_stream, opts.level, Z_DEFLATED, opts.window_bits, | ||||
|                         memLevel, opts.strategy); | ||||
|   if (st != Z_OK) { | ||||
|     return false; | ||||
|   } | ||||
| 
 | ||||
|   // Resize output to be the plain data length.
 | ||||
|   // This may not be big enough if the compression actually expands data.
 | ||||
|   output->resize(length); | ||||
| 
 | ||||
|   // Compress the input, and put compressed data in output.
 | ||||
|   _stream.next_in = (Bytef *)input; | ||||
|   _stream.avail_in = length; | ||||
| 
 | ||||
|   // Initialize the output size.
 | ||||
|   _stream.avail_out = length; | ||||
|   _stream.next_out = (Bytef *)&(*output)[0]; | ||||
| 
 | ||||
|   int old_sz =0, new_sz =0, new_sz_delta =0; | ||||
|   bool done = false; | ||||
|   while (!done) { | ||||
|     int st = deflate(&_stream, Z_FINISH); | ||||
|     switch (st) { | ||||
|       case Z_STREAM_END: | ||||
|         done = true; | ||||
|         break; | ||||
|       case Z_OK: | ||||
|         // No output space. Increase the output space by 20%.
 | ||||
|         // (Should we fail the compression since it expands the size?)
 | ||||
|         old_sz = output->size(); | ||||
|         new_sz_delta = (int)(output->size() * 0.2); | ||||
|         new_sz = output->size() + (new_sz_delta < 10 ? 10 : new_sz_delta); | ||||
|         output->resize(new_sz); | ||||
|         // Set more output.
 | ||||
|         _stream.next_out = (Bytef *)&(*output)[old_sz]; | ||||
|         _stream.avail_out = new_sz - old_sz; | ||||
|         break; | ||||
|       case Z_BUF_ERROR: | ||||
|       default: | ||||
|         deflateEnd(&_stream); | ||||
|         return false; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   output->resize(output->size() - _stream.avail_out); | ||||
|   deflateEnd(&_stream); | ||||
|   return true; | ||||
| #endif | ||||
|   return false; | ||||
| } | ||||
| 
 | ||||
| inline char* Zlib_Uncompress(const char* input_data, size_t input_length, | ||||
|     int* decompress_size, int windowBits = -14) { | ||||
| #ifdef ZLIB | ||||
|   z_stream _stream; | ||||
|   memset(&_stream, 0, sizeof(z_stream)); | ||||
| 
 | ||||
|   // For raw inflate, the windowBits should be -8..-15.
 | ||||
|   // If windowBits is bigger than zero, it will use either zlib
 | ||||
|   // header or gzip header. Adding 32 to it will do automatic detection.
 | ||||
|   int st = inflateInit2(&_stream, | ||||
|       windowBits > 0 ? windowBits + 32 : windowBits); | ||||
|   if (st != Z_OK) { | ||||
|     return nullptr; | ||||
|   } | ||||
| 
 | ||||
|   _stream.next_in = (Bytef *)input_data; | ||||
|   _stream.avail_in = input_length; | ||||
| 
 | ||||
|   // Assume the decompressed data size will 5x of compressed size.
 | ||||
|   int output_len = input_length * 5; | ||||
|   char* output = new char[output_len]; | ||||
|   int old_sz = output_len; | ||||
| 
 | ||||
|   _stream.next_out = (Bytef *)output; | ||||
|   _stream.avail_out = output_len; | ||||
| 
 | ||||
|   char* tmp = nullptr; | ||||
|   int output_len_delta; | ||||
|   bool done = false; | ||||
| 
 | ||||
|   //while(_stream.next_in != nullptr && _stream.avail_in != 0) {
 | ||||
|   while (!done) { | ||||
|     int st = inflate(&_stream, Z_SYNC_FLUSH); | ||||
|     switch (st) { | ||||
|       case Z_STREAM_END: | ||||
|         done = true; | ||||
|         break; | ||||
|       case Z_OK: | ||||
|         // No output space. Increase the output space by 20%.
 | ||||
|         old_sz = output_len; | ||||
|         output_len_delta = (int)(output_len * 0.2); | ||||
|         output_len += output_len_delta < 10 ? 10 : output_len_delta; | ||||
|         tmp = new char[output_len]; | ||||
|         memcpy(tmp, output, old_sz); | ||||
|         delete[] output; | ||||
|         output = tmp; | ||||
| 
 | ||||
|         // Set more output.
 | ||||
|         _stream.next_out = (Bytef *)(output + old_sz); | ||||
|         _stream.avail_out = output_len - old_sz; | ||||
|         break; | ||||
|       case Z_BUF_ERROR: | ||||
|       default: | ||||
|         delete[] output; | ||||
|         inflateEnd(&_stream); | ||||
|         return nullptr; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   *decompress_size = output_len - _stream.avail_out; | ||||
|   inflateEnd(&_stream); | ||||
|   return output; | ||||
| #endif | ||||
| 
 | ||||
|   return nullptr; | ||||
| } | ||||
| 
 | ||||
| inline bool BZip2_Compress(const CompressionOptions& opts, const char* input, | ||||
|                            size_t length, ::std::string* output) { | ||||
| #ifdef BZIP2 | ||||
|   bz_stream _stream; | ||||
|   memset(&_stream, 0, sizeof(bz_stream)); | ||||
| 
 | ||||
|   // Block size 1 is 100K.
 | ||||
|   // 0 is for silent.
 | ||||
|   // 30 is the default workFactor
 | ||||
|   int st = BZ2_bzCompressInit(&_stream, 1, 0, 30); | ||||
|   if (st != BZ_OK) { | ||||
|     return false; | ||||
|   } | ||||
| 
 | ||||
|   // Resize output to be the plain data length.
 | ||||
|   // This may not be big enough if the compression actually expands data.
 | ||||
|   output->resize(length); | ||||
| 
 | ||||
|   // Compress the input, and put compressed data in output.
 | ||||
|   _stream.next_in = (char *)input; | ||||
|   _stream.avail_in = length; | ||||
| 
 | ||||
|   // Initialize the output size.
 | ||||
|   _stream.next_out = (char *)&(*output)[0]; | ||||
|   _stream.avail_out = length; | ||||
| 
 | ||||
|   int old_sz =0, new_sz =0; | ||||
|   while(_stream.next_in != nullptr && _stream.avail_in != 0) { | ||||
|     int st = BZ2_bzCompress(&_stream, BZ_FINISH); | ||||
|     switch (st) { | ||||
|       case BZ_STREAM_END: | ||||
|         break; | ||||
|       case BZ_FINISH_OK: | ||||
|         // No output space. Increase the output space by 20%.
 | ||||
|         // (Should we fail the compression since it expands the size?)
 | ||||
|         old_sz = output->size(); | ||||
|         new_sz = (int)(output->size() * 1.2); | ||||
|         output->resize(new_sz); | ||||
|         // Set more output.
 | ||||
|         _stream.next_out = (char *)&(*output)[old_sz]; | ||||
|         _stream.avail_out = new_sz - old_sz; | ||||
|         break; | ||||
|       case BZ_SEQUENCE_ERROR: | ||||
|       default: | ||||
|         BZ2_bzCompressEnd(&_stream); | ||||
|         return false; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   output->resize(output->size() - _stream.avail_out); | ||||
|   BZ2_bzCompressEnd(&_stream); | ||||
|   return true; | ||||
| #endif | ||||
|   return false; | ||||
| } | ||||
| 
 | ||||
| inline char* BZip2_Uncompress(const char* input_data, size_t input_length, | ||||
|                               int* decompress_size) { | ||||
| #ifdef BZIP2 | ||||
|   bz_stream _stream; | ||||
|   memset(&_stream, 0, sizeof(bz_stream)); | ||||
| 
 | ||||
|   int st = BZ2_bzDecompressInit(&_stream, 0, 0); | ||||
|   if (st != BZ_OK) { | ||||
|     return nullptr; | ||||
|   } | ||||
| 
 | ||||
|   _stream.next_in = (char *)input_data; | ||||
|   _stream.avail_in = input_length; | ||||
| 
 | ||||
|   // Assume the decompressed data size will be 5x of compressed size.
 | ||||
|   int output_len = input_length * 5; | ||||
|   char* output = new char[output_len]; | ||||
|   int old_sz = output_len; | ||||
| 
 | ||||
|   _stream.next_out = (char *)output; | ||||
|   _stream.avail_out = output_len; | ||||
| 
 | ||||
|   char* tmp = nullptr; | ||||
| 
 | ||||
|   while(_stream.next_in != nullptr && _stream.avail_in != 0) { | ||||
|     int st = BZ2_bzDecompress(&_stream); | ||||
|     switch (st) { | ||||
|       case BZ_STREAM_END: | ||||
|         break; | ||||
|       case BZ_OK: | ||||
|         // No output space. Increase the output space by 20%.
 | ||||
|         old_sz = output_len; | ||||
|         output_len = (int)(output_len * 1.2); | ||||
|         tmp = new char[output_len]; | ||||
|         memcpy(tmp, output, old_sz); | ||||
|         delete[] output; | ||||
|         output = tmp; | ||||
| 
 | ||||
|         // Set more output.
 | ||||
|         _stream.next_out = (char *)(output + old_sz); | ||||
|         _stream.avail_out = output_len - old_sz; | ||||
|         break; | ||||
|       default: | ||||
|         delete[] output; | ||||
|         BZ2_bzDecompressEnd(&_stream); | ||||
|         return nullptr; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   *decompress_size = output_len - _stream.avail_out; | ||||
|   BZ2_bzDecompressEnd(&_stream); | ||||
|   return output; | ||||
| #endif | ||||
|   return nullptr; | ||||
| } | ||||
| 
 | ||||
| inline bool LZ4_Compress(const CompressionOptions &opts, const char *input, | ||||
|                          size_t length, ::std::string* output) { | ||||
| #ifdef LZ4 | ||||
|   int compressBound = LZ4_compressBound(length); | ||||
|   output->resize(8 + compressBound); | ||||
|   char *p = const_cast<char *>(output->c_str()); | ||||
|   memcpy(p, &length, sizeof(length)); | ||||
|   size_t outlen; | ||||
|   outlen = LZ4_compress_limitedOutput(input, p + 8, length, compressBound); | ||||
|   if (outlen == 0) { | ||||
|     return false; | ||||
|   } | ||||
|   output->resize(8 + outlen); | ||||
|   return true; | ||||
| #endif | ||||
|   return false; | ||||
| } | ||||
| 
 | ||||
| inline char* LZ4_Uncompress(const char* input_data, size_t input_length, | ||||
|                             int* decompress_size) { | ||||
| #ifdef LZ4 | ||||
|   if (input_length < 8) { | ||||
|     return nullptr; | ||||
|   } | ||||
|   int output_len; | ||||
|   memcpy(&output_len, input_data, sizeof(output_len)); | ||||
|   char *output = new char[output_len]; | ||||
|   *decompress_size = LZ4_decompress_safe_partial( | ||||
|       input_data + 8, output, input_length - 8, output_len, output_len); | ||||
|   if (*decompress_size < 0) { | ||||
|     delete[] output; | ||||
|     return nullptr; | ||||
|   } | ||||
|   return output; | ||||
| #endif | ||||
|   return nullptr; | ||||
| } | ||||
| 
 | ||||
| inline bool LZ4HC_Compress(const CompressionOptions &opts, const char* input, | ||||
|                            size_t length, ::std::string* output) { | ||||
| #ifdef LZ4 | ||||
|   int compressBound = LZ4_compressBound(length); | ||||
|   output->resize(8 + compressBound); | ||||
|   char *p = const_cast<char *>(output->c_str()); | ||||
|   memcpy(p, &length, sizeof(length)); | ||||
|   size_t outlen; | ||||
| #ifdef LZ4_VERSION_MAJOR  // they only started defining this since r113
 | ||||
|   outlen = LZ4_compressHC2_limitedOutput(input, p + 8, length, compressBound, | ||||
|                                          opts.level); | ||||
| #else | ||||
|   outlen = LZ4_compressHC_limitedOutput(input, p + 8, length, compressBound); | ||||
| #endif | ||||
|   if (outlen == 0) { | ||||
|     return false; | ||||
|   } | ||||
|   output->resize(8 + outlen); | ||||
|   return true; | ||||
| #endif | ||||
|   return false; | ||||
| } | ||||
| 
 | ||||
| #define CACHE_LINE_SIZE 64U | ||||
| 
 | ||||
| #ifdef min | ||||
| #undef min | ||||
| #endif | ||||
| #ifdef max | ||||
| #undef max | ||||
| #endif | ||||
| 
 | ||||
| // For Thread Local Storage abstraction
 | ||||
| typedef DWORD pthread_key_t; | ||||
| 
 | ||||
| inline | ||||
| int pthread_key_create(pthread_key_t *key, void(*destructor)(void*)) { | ||||
|     // Not used
 | ||||
|     (void)destructor; | ||||
| 
 | ||||
|     pthread_key_t k = TlsAlloc(); | ||||
|     if (k == TLS_OUT_OF_INDEXES) { | ||||
|         return ENOMEM; | ||||
|     } | ||||
| 
 | ||||
|     *key = k; | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| inline | ||||
| int pthread_key_delete(pthread_key_t key) { | ||||
|     if(!TlsFree(key)) { | ||||
|         return EINVAL; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| inline | ||||
| int pthread_setspecific(pthread_key_t key, const void *value) { | ||||
|     if(!TlsSetValue(key, const_cast<void*>(value))) { | ||||
|         return ENOMEM; | ||||
|     } | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| inline | ||||
| void* pthread_getspecific(pthread_key_t key) { | ||||
|     void* result = TlsGetValue(key); | ||||
|     if(!result) { | ||||
|         if(GetLastError() != ERROR_SUCCESS) { | ||||
|             errno = EINVAL; | ||||
|         } else { | ||||
|             errno = NOERROR; | ||||
|         } | ||||
|     } | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| // UNIX equiv although errno numbers will be off
 | ||||
| // using C-runtime to implement. Note, this does not
 | ||||
| // feel space with zeros in case the file is extended.
 | ||||
| int truncate(const char* path, int64_t length); | ||||
| 
 | ||||
| } // namespace port
 | ||||
| 
 | ||||
| using port::pthread_key_t; | ||||
| using port::pthread_key_create; | ||||
| using port::pthread_key_delete; | ||||
| using port::pthread_setspecific; | ||||
| using port::pthread_getspecific; | ||||
| using port::truncate; | ||||
| 
 | ||||
| } // namespace rocksdb
 | ||||
| 
 | ||||
| #endif  // STORAGE_LEVELDB_PORT_PORT_POSIX_H_
 | ||||
| @ -1,24 +0,0 @@ | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| 
 | ||||
| // MSVC didn't ship with this file until the 2010 version.
 | ||||
| 
 | ||||
| #ifndef STORAGE_LEVELDB_PORT_WIN_STDINT_H_ | ||||
| #define STORAGE_LEVELDB_PORT_WIN_STDINT_H_ | ||||
| 
 | ||||
| #if !defined(_MSC_VER) | ||||
| #error This file should only be included when compiling with MSVC. | ||||
| #endif | ||||
| 
 | ||||
| // Define C99 equivalent types.
 | ||||
| typedef signed char           int8_t; | ||||
| typedef signed short          int16_t; | ||||
| typedef signed int            int32_t; | ||||
| typedef signed long long      int64_t; | ||||
| typedef unsigned char         uint8_t; | ||||
| typedef unsigned short        uint16_t; | ||||
| typedef unsigned int          uint32_t; | ||||
| typedef unsigned long long    uint64_t; | ||||
| 
 | ||||
| #endif  // STORAGE_LEVELDB_PORT_WIN_STDINT_H_
 | ||||
| @ -0,0 +1,154 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| //
 | ||||
| // Logger implementation that can be shared by all environments
 | ||||
| // where enough posix functionality is available.
 | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <algorithm> | ||||
| #include <stdio.h> | ||||
| #include <time.h> | ||||
| #include <fcntl.h> | ||||
| #include <atomic> | ||||
| 
 | ||||
| #include "rocksdb/env.h" | ||||
| #include "port/win/win_logger.h" | ||||
| #include "port/sys_time.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| //const int kDebugLogChunkSize = 128 * 1024;
 | ||||
| 
 | ||||
| WinLogger::WinLogger(uint64_t (*gettid)(), Env* env, FILE * file, const InfoLogLevel log_level) | ||||
|             : Logger(log_level), | ||||
|             gettid_(gettid), | ||||
|             log_size_(0), | ||||
|             last_flush_micros_(0), | ||||
|             env_(env), | ||||
|             flush_pending_(false), | ||||
|             file_(file) { | ||||
| } | ||||
| 
 | ||||
| void WinLogger::DebugWriter(const char* str, int len) { | ||||
|     size_t sz = fwrite(str, 1, len, file_); | ||||
|     if (sz == 0) { | ||||
|         perror("fwrite .. [BAD]"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| WinLogger::~WinLogger() { | ||||
|     close(); | ||||
| } | ||||
| 
 | ||||
| void WinLogger::close() { | ||||
|     fclose(file_); | ||||
| } | ||||
| 
 | ||||
| void WinLogger::Flush() { | ||||
|     if (flush_pending_) { | ||||
|         flush_pending_ = false; | ||||
|         fflush(file_); | ||||
|     } | ||||
| 
 | ||||
|     last_flush_micros_ = env_->NowMicros(); | ||||
| } | ||||
| 
 | ||||
| void WinLogger::Logv(const char* format, va_list ap) { | ||||
|     const uint64_t thread_id = (*gettid_)(); | ||||
| 
 | ||||
|     // We try twice: the first time with a fixed-size stack allocated buffer,
 | ||||
|     // and the second time with a much larger dynamically allocated buffer.
 | ||||
|     char buffer[500]; | ||||
|     std::unique_ptr<char[]> largeBuffer; | ||||
|     for (int iter = 0; iter < 2; ++iter) { | ||||
|         char* base; | ||||
|         int bufsize; | ||||
|         if (iter == 0) { | ||||
|             bufsize = sizeof(buffer); | ||||
|             base = buffer; | ||||
|         } else { | ||||
|             bufsize = 30000; | ||||
|             largeBuffer.reset(new char[bufsize]); | ||||
|             base = largeBuffer.get(); | ||||
|         } | ||||
| 
 | ||||
|         char* p = base; | ||||
|         char* limit = base + bufsize; | ||||
| 
 | ||||
|         struct timeval now_tv; | ||||
|         gettimeofday(&now_tv, nullptr); | ||||
|         const time_t seconds = now_tv.tv_sec; | ||||
|         struct tm t; | ||||
|         localtime_s(&t, &seconds); | ||||
|         p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ", t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, | ||||
|                         t.tm_hour, | ||||
|                         t.tm_min, | ||||
|                         t.tm_sec, | ||||
|                         static_cast<int>(now_tv.tv_usec), | ||||
|                         static_cast<long long unsigned int>(thread_id)); | ||||
| 
 | ||||
|         // Print the message
 | ||||
|         if (p < limit) { | ||||
|             va_list backup_ap; | ||||
|             va_copy(backup_ap, ap); | ||||
|             int done = vsnprintf(p, limit - p, format, backup_ap); | ||||
|             if (done > 0){ | ||||
|               p += done; | ||||
|             } else { | ||||
|               continue; | ||||
|             } | ||||
|             va_end(backup_ap); | ||||
|         } | ||||
| 
 | ||||
|         // Truncate to available space if necessary
 | ||||
|         if (p >= limit) { | ||||
|             if (iter == 0) 
 | ||||
|             { | ||||
|                 continue;       // Try again with larger buffer
 | ||||
|             } else { | ||||
|                 p = limit - 1; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // Add newline if necessary
 | ||||
|         if (p == base || p[-1] != '\n') { | ||||
|             *p++ = '\n'; | ||||
|         } | ||||
| 
 | ||||
|         assert(p <= limit); | ||||
|         const size_t write_size = p - base; | ||||
| 
 | ||||
|         size_t sz = fwrite(base, 1, write_size, file_); | ||||
|         if (sz == 0) { | ||||
|           perror("fwrite .. [BAD]"); | ||||
|         } | ||||
| 
 | ||||
|         flush_pending_ = true; | ||||
|         assert(sz == write_size); | ||||
|         if (sz > 0) { | ||||
|             log_size_ += write_size; | ||||
|         } | ||||
| 
 | ||||
|         uint64_t now_micros = static_cast<uint64_t>(now_tv.tv_sec) * 1000000 + | ||||
|         now_tv.tv_usec; | ||||
|         if (now_micros - last_flush_micros_ >= flush_every_seconds_ * 1000000) { | ||||
|             flush_pending_ = false; | ||||
|             fflush(file_); | ||||
|             last_flush_micros_ = now_micros; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| size_t WinLogger::GetLogFileSize() const { | ||||
|     return log_size_; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1,52 @@ | ||||
| //  Copyright (c) 2013, Facebook, Inc.  All rights reserved.
 | ||||
| //  This source code is licensed under the BSD-style license found in the
 | ||||
| //  LICENSE file in the root directory of this source tree. An additional grant
 | ||||
| //  of patent rights can be found in the PATENTS file in the same directory.
 | ||||
| //
 | ||||
| // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 | ||||
| // Use of this source code is governed by a BSD-style license that can be
 | ||||
| // found in the LICENSE file. See the AUTHORS file for names of contributors.
 | ||||
| //
 | ||||
| // Logger implementation that can be shared by all environments
 | ||||
| // where enough posix functionality is available.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <atomic> | ||||
| 
 | ||||
| #include "rocksdb/env.h" | ||||
| 
 | ||||
| namespace rocksdb { | ||||
| 
 | ||||
| class Env; | ||||
| 
 | ||||
| const int kDebugLogChunkSize = 128 * 1024; | ||||
| 
 | ||||
| class WinLogger : public rocksdb::Logger { | ||||
| private: | ||||
|     FILE*                     file_; | ||||
|     uint64_t                  (*gettid_)();  // Return the thread id for the current thread
 | ||||
|     std::atomic_size_t        log_size_; | ||||
|     std::atomic_uint_fast64_t last_flush_micros_; | ||||
|     Env*                      env_; | ||||
|     bool                      flush_pending_; | ||||
| 
 | ||||
|     const static uint64_t flush_every_seconds_ = 5; | ||||
| 
 | ||||
| public: | ||||
|     WinLogger(uint64_t(*gettid)(), Env* env, FILE * file, const InfoLogLevel log_level = InfoLogLevel::ERROR_LEVEL); | ||||
| 
 | ||||
|     virtual ~WinLogger(); | ||||
| 
 | ||||
|     void close(); | ||||
| 
 | ||||
|     void Flush() override; | ||||
| 
 | ||||
|     void Logv(const char* format, va_list ap) override; | ||||
| 
 | ||||
|     size_t GetLogFileSize() const override; | ||||
| 
 | ||||
|     void DebugWriter(const char* str, int len); | ||||
| }; | ||||
| 
 | ||||
| }  // namespace rocksdb
 | ||||
| @ -0,0 +1 @@ | ||||
| add_library(gtest gtest-all.cc) | ||||
| @ -0,0 +1,40 @@ | ||||
| # Edit definitions below to specify paths to include files and libraries of all 3rd party libraries | ||||
| 
 | ||||
| # This example assumes all the libraries locate in the same directory tree under THIRDPARTY_HOME environment variable | ||||
| # Set environment variable THIRDPARTY_HOME to point to your third party libraries home (Unix style dir separators) | ||||
| 
 | ||||
| # | ||||
| # Edit these 4 lines to define paths to GFLAGS | ||||
| # | ||||
| set(GFLAGS_HOME $ENV{THIRDPARTY_HOME}/Gflags.Library) | ||||
| set(GFLAGS_INCLUDE ${GFLAGS_HOME}/inc/include) | ||||
| set(GFLAGS_LIB_DEBUG ${GFLAGS_HOME}/bin/debug/amd64/gflags.lib) | ||||
| set(GFLAGS_LIB_RELEASE ${GFLAGS_HOME}/bin/retail/amd64/gflags.lib) | ||||
| 
 | ||||
| # Don't touch these lines | ||||
| set(GFLAGS_CXX_FLAGS -DGFLAGS=gflags) | ||||
| set(GFLAGS_LIBS debug ${GFLAGS_LIB_DEBUG} optimized ${GFLAGS_LIB_RELEASE}) | ||||
| 
 | ||||
| # | ||||
| # Edit these 4 lines to define paths to Snappy | ||||
| # | ||||
| set(SNAPPY_HOME $ENV{THIRDPARTY_HOME}/Snappy.Library) | ||||
| set(SNAPPY_INCLUDE ${SNAPPY_HOME}/inc/inc) | ||||
| set(SNAPPY_LIB_DEBUG ${SNAPPY_HOME}/bin/debug/amd64/snappy.lib) | ||||
| set(SNAPPY_LIB_RELEASE ${SNAPPY_HOME}/bin/retail/amd64/snappy.lib) | ||||
| 
 | ||||
| # Don't touch these lines | ||||
| set(SNAPPY_CXX_FLAGS -DSNAPPY) | ||||
| set(SNAPPY_LIBS debug ${SNAPPY_LIB_DEBUG} optimized ${SNAPPY_LIB_RELEASE}) | ||||
| 
 | ||||
| # | ||||
| # Edit these 4 lines to define paths to Jemalloc | ||||
| # | ||||
| set(JEMALLOC_HOME $ENV{THIRDPARTY_HOME}/Jemalloc.Library) | ||||
| set(JEMALLOC_INCLUDE ${JEMALLOC_HOME}/inc/include) | ||||
| set(JEMALLOC_LIB_DEBUG ${JEMALLOC_HOME}/bin/debug/amd64/jemalloc.lib) | ||||
| set(JEMALLOC_LIB_RELEASE ${JEMALLOC_HOME}/bin/retail/amd64/jemalloc.lib) | ||||
| 
 | ||||
| # Don't touch these lines | ||||
| set(JEMALLOC_CXX_FLAGS -DJEMALLOC) | ||||
| set(JEMALLOC_LIBS debug ${JEMALLOC_LIB_DEBUG} optimized ${JEMALLOC_LIB_RELEASE}) | ||||
Some files were not shown because too many files have changed in this diff Show More
					Loading…
					
					
				
		Reference in new issue
	
	 Dmitri Smirnov
						Dmitri Smirnov