diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cac7cd15..43a170745 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,31 +106,58 @@ if(NOT WIN32) string(STRIP "${ROCKSDB_VERSION_MAJOR}" ROCKSDB_VERSION_MAJOR) endif() -if(WIN32) - option(WITH_AVX2 "build with AVX2" ON) - if(WITH_AVX2) - if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") - endif() +option(WITH_MD_LIBRARY "build with MD" ON) +if(WIN32 AND MSVC) + if(WITH_MD_LIBRARY) + set(RUNTIME_LIBRARY "MD") + else() + set(RUNTIME_LIBRARY "MT") endif() +endif() - option(WITH_MD_LIBRARY "build with MD" ON) - if(MSVC) - if(WITH_MD_LIBRARY) - set(RUNTIME_LIBRARY "MD") - else() - set(RUNTIME_LIBRARY "MT") - endif() +option(PORTABLE "build a portable binary" OFF) +option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF) +if(PORTABLE) + # MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h + # is available, it is available by default. + if(FORCE_SSE42 AND NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") endif() else() - option(WITH_SSE42 "build with SSE4.2" ON) - if(WITH_SSE42) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() endif() +set(CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS}) +include(CheckCXXSourceCompiles) +CHECK_CXX_SOURCE_COMPILES(" +#include +#include +int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); +} +" HAVE_SSE42) +if(HAVE_SSE42) + add_definitions(-DHAVE_SSE42) +elseif(FORCE_SSE42) + message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#if defined(_MSC_VER) && !defined(__thread) +#define __thread __declspec(thread) +#endif +int main() { + static __thread int tls; +} +" HAVE_THREAD_LOCAL) +if(HAVE_THREAD_LOCAL) + add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) +endif() + set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) add_library(build_version OBJECT ${BUILD_VERSION_CC}) @@ -520,7 +547,7 @@ if(WIN32) set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) set(LIBS ${ROCKSDB_STATIC_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) else() - set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} rt) + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT}) set(LIBS ${ROCKSDB_SHARED_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES}) diff --git a/INSTALL.md b/INSTALL.md index ce371d471..cb7916381 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -21,9 +21,11 @@ depend on gflags. You will need to have gflags installed to run `make all`. This use binaries compiled by `make all` in production. * By default the binary we produce is optimized for the platform you're compiling on -(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before -your make commands, like this: `PORTABLE=1 make static_lib`. If you want to build a binary that -makes use of SSE4, add 'USE_SSE=1' before your make commands, like this: `USE_SSE=1 make static_lib`. +(`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your +CPU supports it. To print a warning if your CPU does not support SSE4.2, build with +`USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want +to build a portable binary, add `PORTABLE=1` before your make commands, like this: +`PORTABLE=1 make static_lib`. ## Dependencies diff --git a/TARGETS b/TARGETS index 714ab3f80..2dc861081 100644 --- a/TARGETS +++ b/TARGETS @@ -11,6 +11,7 @@ rocksdb_compiler_flags = [ "-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT", + "-DROCKSDB_SUPPORT_THREAD_LOCAL", "-DOS_LINUX", # Flags to enable libs we include "-DSNAPPY", diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 139841a3d..440c6a5e3 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -442,14 +442,8 @@ if test "$USE_HDFS"; then JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS" fi -if [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then - # Intel SSE instructions breaks compilation on FreeBSD i386 - unset USE_SSE -fi - if test "$USE_SSE"; then - # if Intel SSE instruction set is supported, set USE_SSE=1 - COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 " + COMMON_FLAGS="$COMMON_FLAGS -msse4.2" elif test -z "$PORTABLE"; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then # Tune for this POWER processor, treating '+' models as base models @@ -462,6 +456,36 @@ elif test -z "$PORTABLE"; then fi fi +$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + #include + int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" +fi + +# iOS doesn't support thread-local storage, but this check would erroneously +# succeed because the cross-compiler flags are added by the Makefile, not this +# script. +if [ "$PLATFORM" != IOS ]; then + $CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < #include +#include "port/port.h" #include "rocksdb/perf_level.h" // A thread local context for gathering io-stats efficiently and transparently. @@ -46,12 +47,8 @@ struct IOStatsContext { uint64_t logger_nanos; }; -#ifndef IOS_CROSS_COMPILE -# ifdef _MSC_VER -extern __declspec(thread) IOStatsContext iostats_context; -# else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL extern __thread IOStatsContext iostats_context; -# endif -#endif // IOS_CROSS_COMPILE +#endif } // namespace rocksdb diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 76a5cac68..e3c86c39c 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -9,6 +9,7 @@ #include #include +#include "port/port.h" #include "rocksdb/perf_level.h" namespace rocksdb { @@ -150,10 +151,8 @@ struct PerfContext { uint64_t env_new_logger_nanos; }; -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) extern PerfContext perf_context; -#elif defined(_MSC_VER) -extern __declspec(thread) PerfContext perf_context; #else #if defined(OS_SOLARIS) PerfContext *getPerfContext(); diff --git a/include/rocksdb/thread_status.h b/include/rocksdb/thread_status.h index 3950b81e5..7681a98e0 100644 --- a/include/rocksdb/thread_status.h +++ b/include/rocksdb/thread_status.h @@ -22,8 +22,7 @@ #if !defined(ROCKSDB_LITE) && \ !defined(NROCKSDB_THREAD_STATUS) && \ - !defined(OS_MACOSX) && \ - !defined(IOS_CROSS_COMPILE) + defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #define ROCKSDB_USING_THREAD_STATUS #endif diff --git a/monitoring/iostats_context.cc b/monitoring/iostats_context.cc index e9970e1f7..0bcc99afd 100644 --- a/monitoring/iostats_context.cc +++ b/monitoring/iostats_context.cc @@ -9,13 +9,9 @@ namespace rocksdb { -#ifndef IOS_CROSS_COMPILE -# ifdef _MSC_VER -__declspec(thread) IOStatsContext iostats_context; -# else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread IOStatsContext iostats_context; -# endif -#endif // IOS_CROSS_COMPILE +#endif void IOStatsContext::Reset() { thread_pool_id = Env::Priority::TOTAL; diff --git a/monitoring/iostats_context_imp.h b/monitoring/iostats_context_imp.h index 4758f39fc..b787c70df 100644 --- a/monitoring/iostats_context_imp.h +++ b/monitoring/iostats_context_imp.h @@ -9,7 +9,7 @@ #include "monitoring/perf_step_timer.h" #include "rocksdb/iostats_context.h" -#ifndef IOS_CROSS_COMPILE +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL // increment a specific counter by the specified value #define IOSTATS_ADD(metric, value) \ @@ -41,7 +41,7 @@ PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \ iostats_step_timer_ ## metric.Start(); -#else // IOS_CROSS_COMPILE +#else // ROCKSDB_SUPPORT_THREAD_LOCAL #define IOSTATS_ADD(metric, value) #define IOSTATS_ADD_IF_POSITIVE(metric, value) @@ -53,4 +53,4 @@ #define IOSTATS_TIMER_GUARD(metric) -#endif // IOS_CROSS_COMPILE +#endif // ROCKSDB_SUPPORT_THREAD_LOCAL diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 2fdcde506..65158c769 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -11,10 +11,8 @@ namespace rocksdb { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) PerfContext perf_context; -#elif defined(_MSC_VER) - __declspec(thread) PerfContext perf_context; #else #if defined(OS_SOLARIS) __thread PerfContext perf_context_; @@ -24,7 +22,7 @@ namespace rocksdb { #endif void PerfContext::Reset() { -#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE) +#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) user_key_comparison_count = 0; block_cache_hit_count = 0; block_read_count = 0; @@ -98,7 +96,7 @@ void PerfContext::Reset() { } std::string PerfContext::ToString(bool exclude_zero_counters) const { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) return ""; #else std::ostringstream ss; diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index 104f0a8e5..1b9ac2f40 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -12,7 +12,7 @@ namespace rocksdb { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #define PERF_TIMER_GUARD(metric) #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition) diff --git a/monitoring/perf_level.cc b/monitoring/perf_level.cc index d04725f04..32c8ed392 100644 --- a/monitoring/perf_level.cc +++ b/monitoring/perf_level.cc @@ -7,16 +7,14 @@ // #include -#include #include "monitoring/perf_level_imp.h" -#include "port/port.h" namespace rocksdb { -#if defined(IOS_CROSS_COMPILE) -PerfLevel perf_level = kEnableCount; -#else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread PerfLevel perf_level = kEnableCount; +#else +PerfLevel perf_level = kEnableCount; #endif void SetPerfLevel(PerfLevel level) { diff --git a/monitoring/perf_level_imp.h b/monitoring/perf_level_imp.h index ab02f9d72..46f701e5d 100644 --- a/monitoring/perf_level_imp.h +++ b/monitoring/perf_level_imp.h @@ -11,10 +11,10 @@ namespace rocksdb { -#if defined(IOS_CROSS_COMPILE) -extern PerfLevel perf_level; -#else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL extern __thread PerfLevel perf_level; +#else +extern PerfLevel perf_level; #endif } // namespace rocksdb diff --git a/util/crc32c.cc b/util/crc32c.cc index c2334d673..a171b409e 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -15,14 +15,9 @@ #include "util/crc32c.h" #include -#ifdef __SSE4_2__ +#ifdef HAVE_SSE42 #include #endif -#if defined(_WIN64) -#ifdef __AVX2__ -#include -#endif -#endif #include "util/coding.h" namespace rocksdb { @@ -298,21 +293,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) { return DecodeFixed32(reinterpret_cast(p)); } -#ifdef __SSE4_2__ -#ifdef __LP64__ +#if defined(HAVE_SSE42) && (defined(__LP64__) || defined(_WIN64)) static inline uint64_t LE_LOAD64(const uint8_t *p) { return DecodeFixed64(reinterpret_cast(p)); } #endif -#endif -#if defined(_WIN64) -#ifdef __AVX2__ -static inline uint64_t LE_LOAD64(const uint8_t *p) { - return DecodeFixed64(reinterpret_cast(p)); -} -#endif -#endif static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); *p += 4; @@ -330,8 +316,9 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { } static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { -#ifdef __SSE4_2__ -#ifdef __LP64__ +#ifndef HAVE_SSE42 + Slow_CRC32(l, p); +#elif defined(__LP64__) || defined(_WIN64) *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *p += 8; #else @@ -340,16 +327,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); *p += 4; #endif -#elif defined(_WIN64) -#ifdef __AVX2__ - *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); - *p += 8; -#else - Slow_CRC32(l, p); -#endif -#else - Slow_CRC32(l, p); -#endif } template @@ -418,9 +395,7 @@ static inline Function Choose_Extend() { } bool IsFastCrc32Supported() { -#ifdef __SSE4_2__ - return isSSE42(); -#elif defined(_WIN64) +#if defined(__SSE4_2__) || defined(_WIN64) return isSSE42(); #else return false; diff --git a/util/thread_local.h b/util/thread_local.h index 3befddc8a..1f2fa4689 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -20,14 +20,6 @@ #include "util/autovector.h" #include "port/port.h" -#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL -# if defined(OS_WIN) || defined(OS_MACOSX) || defined(IOS_CROSS_COMPILE) -# define ROCKSDB_SUPPORT_THREAD_LOCAL 0 -# else -# define ROCKSDB_SUPPORT_THREAD_LOCAL 1 -# endif -#endif - namespace rocksdb { // Cleanup function that will be called for a stored thread local