From 11c5d4741a1e11a1315d5ca644ce555e07e91f61 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Mon, 15 May 2017 14:42:32 -0700 Subject: [PATCH] cross-platform compatibility improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: We've had a couple CockroachDB users fail to build RocksDB on exotic platforms, so I figured I'd try my hand at solving these issues upstream. The problems stem from a) `USE_SSE=1` being too aggressive about turning on SSE4.2, even on toolchains that don't support SSE4.2 and b) RocksDB attempting to detect support for thread-local storage based on OS, even though it can vary by compiler on the same OS. See the individual commit messages for details. Regarding SSE support, this PR should change virtually nothing for non-CMake based builds. `make`, `PORTABLE=1 make`, `USE_SSE=1 make`, and `PORTABLE=1 USE_SSE=1 make` function exactly as before, except that SSE support will be automatically disabled when a simple SSE4.2-using test program fails to compile, as it does on OpenBSD. (OpenBSD's ports GCC supports SSE4.2, but its binutils do not, so `__SSE_4_2__` is defined but an SSE4.2-using program will fail to assemble.) A warning is emitted in this case. The CMake build is modified to support the same set of options, except that `USE_SSE` is spelled `FORCE_SSE42` because `USE_SSE` is rather useless now that we can automatically detect SSE support, and I figure changing options in the CMake build is less disruptive than changing the non-CMake build. I've tested these changes on all the platforms I can get my hands on (macOS, Windows MSVC, Windows MinGW, and OpenBSD) and it all works splendidly. Let me know if there's anything you object to—I obviously don't mean to break any of your build pipelines in the process of fixing ours downstream. Closes https://github.com/facebook/rocksdb/pull/2199 Differential Revision: D5054042 Pulled By: yiwu-arbug fbshipit-source-id: 938e1fc665c049c02ae15698e1409155b8e72171 --- CMakeLists.txt | 65 ++++++++++++++++++++++--------- INSTALL.md | 8 ++-- TARGETS | 1 + build_tools/build_detect_platform | 38 ++++++++++++++---- include/rocksdb/iostats_context.h | 9 ++--- include/rocksdb/perf_context.h | 5 +-- include/rocksdb/thread_status.h | 3 +- monitoring/iostats_context.cc | 8 +--- monitoring/iostats_context_imp.h | 6 +-- monitoring/perf_context.cc | 8 ++-- monitoring/perf_context_imp.h | 2 +- monitoring/perf_level.cc | 8 ++-- monitoring/perf_level_imp.h | 6 +-- util/crc32c.cc | 37 +++--------------- util/thread_local.h | 8 ---- 15 files changed, 110 insertions(+), 102 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cac7cd15..43a170745 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,31 +106,58 @@ if(NOT WIN32) string(STRIP "${ROCKSDB_VERSION_MAJOR}" ROCKSDB_VERSION_MAJOR) endif() -if(WIN32) - option(WITH_AVX2 "build with AVX2" ON) - if(WITH_AVX2) - if(MSVC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") - endif() +option(WITH_MD_LIBRARY "build with MD" ON) +if(WIN32 AND MSVC) + if(WITH_MD_LIBRARY) + set(RUNTIME_LIBRARY "MD") + else() + set(RUNTIME_LIBRARY "MT") endif() +endif() - option(WITH_MD_LIBRARY "build with MD" ON) - if(MSVC) - if(WITH_MD_LIBRARY) - set(RUNTIME_LIBRARY "MD") - else() - set(RUNTIME_LIBRARY "MT") - endif() +option(PORTABLE "build a portable binary" OFF) +option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF) +if(PORTABLE) + # MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h + # is available, it is available by default. + if(FORCE_SSE42 AND NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") endif() else() - option(WITH_SSE42 "build with SSE4.2" ON) - if(WITH_SSE42) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + if(MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") endif() endif() +set(CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS}) +include(CheckCXXSourceCompiles) +CHECK_CXX_SOURCE_COMPILES(" +#include +#include +int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); +} +" HAVE_SSE42) +if(HAVE_SSE42) + add_definitions(-DHAVE_SSE42) +elseif(FORCE_SSE42) + message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled") +endif() + +CHECK_CXX_SOURCE_COMPILES(" +#if defined(_MSC_VER) && !defined(__thread) +#define __thread __declspec(thread) +#endif +int main() { + static __thread int tls; +} +" HAVE_THREAD_LOCAL) +if(HAVE_THREAD_LOCAL) + add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL) +endif() + set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) configure_file(util/build_version.cc.in ${BUILD_VERSION_CC} @ONLY) add_library(build_version OBJECT ${BUILD_VERSION_CC}) @@ -520,7 +547,7 @@ if(WIN32) set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) set(LIBS ${ROCKSDB_STATIC_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) else() - set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} rt) + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT}) set(LIBS ${ROCKSDB_SHARED_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES}) diff --git a/INSTALL.md b/INSTALL.md index ce371d471..cb7916381 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -21,9 +21,11 @@ depend on gflags. You will need to have gflags installed to run `make all`. This use binaries compiled by `make all` in production. * By default the binary we produce is optimized for the platform you're compiling on -(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before -your make commands, like this: `PORTABLE=1 make static_lib`. If you want to build a binary that -makes use of SSE4, add 'USE_SSE=1' before your make commands, like this: `USE_SSE=1 make static_lib`. +(`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your +CPU supports it. To print a warning if your CPU does not support SSE4.2, build with +`USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want +to build a portable binary, add `PORTABLE=1` before your make commands, like this: +`PORTABLE=1 make static_lib`. ## Dependencies diff --git a/TARGETS b/TARGETS index 714ab3f80..2dc861081 100644 --- a/TARGETS +++ b/TARGETS @@ -11,6 +11,7 @@ rocksdb_compiler_flags = [ "-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT", + "-DROCKSDB_SUPPORT_THREAD_LOCAL", "-DOS_LINUX", # Flags to enable libs we include "-DSNAPPY", diff --git a/build_tools/build_detect_platform b/build_tools/build_detect_platform index 139841a3d..440c6a5e3 100755 --- a/build_tools/build_detect_platform +++ b/build_tools/build_detect_platform @@ -442,14 +442,8 @@ if test "$USE_HDFS"; then JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS" fi -if [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then - # Intel SSE instructions breaks compilation on FreeBSD i386 - unset USE_SSE -fi - if test "$USE_SSE"; then - # if Intel SSE instruction set is supported, set USE_SSE=1 - COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 " + COMMON_FLAGS="$COMMON_FLAGS -msse4.2" elif test -z "$PORTABLE"; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then # Tune for this POWER processor, treating '+' models as base models @@ -462,6 +456,36 @@ elif test -z "$PORTABLE"; then fi fi +$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < + #include + int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + } +EOF +if [ "$?" = 0 ]; then + COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42" +elif test "$USE_SSE"; then + echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling" +fi + +# iOS doesn't support thread-local storage, but this check would erroneously +# succeed because the cross-compiler flags are added by the Makefile, not this +# script. +if [ "$PLATFORM" != IOS ]; then + $CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null < #include +#include "port/port.h" #include "rocksdb/perf_level.h" // A thread local context for gathering io-stats efficiently and transparently. @@ -46,12 +47,8 @@ struct IOStatsContext { uint64_t logger_nanos; }; -#ifndef IOS_CROSS_COMPILE -# ifdef _MSC_VER -extern __declspec(thread) IOStatsContext iostats_context; -# else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL extern __thread IOStatsContext iostats_context; -# endif -#endif // IOS_CROSS_COMPILE +#endif } // namespace rocksdb diff --git a/include/rocksdb/perf_context.h b/include/rocksdb/perf_context.h index 76a5cac68..e3c86c39c 100644 --- a/include/rocksdb/perf_context.h +++ b/include/rocksdb/perf_context.h @@ -9,6 +9,7 @@ #include #include +#include "port/port.h" #include "rocksdb/perf_level.h" namespace rocksdb { @@ -150,10 +151,8 @@ struct PerfContext { uint64_t env_new_logger_nanos; }; -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) extern PerfContext perf_context; -#elif defined(_MSC_VER) -extern __declspec(thread) PerfContext perf_context; #else #if defined(OS_SOLARIS) PerfContext *getPerfContext(); diff --git a/include/rocksdb/thread_status.h b/include/rocksdb/thread_status.h index 3950b81e5..7681a98e0 100644 --- a/include/rocksdb/thread_status.h +++ b/include/rocksdb/thread_status.h @@ -22,8 +22,7 @@ #if !defined(ROCKSDB_LITE) && \ !defined(NROCKSDB_THREAD_STATUS) && \ - !defined(OS_MACOSX) && \ - !defined(IOS_CROSS_COMPILE) + defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #define ROCKSDB_USING_THREAD_STATUS #endif diff --git a/monitoring/iostats_context.cc b/monitoring/iostats_context.cc index e9970e1f7..0bcc99afd 100644 --- a/monitoring/iostats_context.cc +++ b/monitoring/iostats_context.cc @@ -9,13 +9,9 @@ namespace rocksdb { -#ifndef IOS_CROSS_COMPILE -# ifdef _MSC_VER -__declspec(thread) IOStatsContext iostats_context; -# else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread IOStatsContext iostats_context; -# endif -#endif // IOS_CROSS_COMPILE +#endif void IOStatsContext::Reset() { thread_pool_id = Env::Priority::TOTAL; diff --git a/monitoring/iostats_context_imp.h b/monitoring/iostats_context_imp.h index 4758f39fc..b787c70df 100644 --- a/monitoring/iostats_context_imp.h +++ b/monitoring/iostats_context_imp.h @@ -9,7 +9,7 @@ #include "monitoring/perf_step_timer.h" #include "rocksdb/iostats_context.h" -#ifndef IOS_CROSS_COMPILE +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL // increment a specific counter by the specified value #define IOSTATS_ADD(metric, value) \ @@ -41,7 +41,7 @@ PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \ iostats_step_timer_ ## metric.Start(); -#else // IOS_CROSS_COMPILE +#else // ROCKSDB_SUPPORT_THREAD_LOCAL #define IOSTATS_ADD(metric, value) #define IOSTATS_ADD_IF_POSITIVE(metric, value) @@ -53,4 +53,4 @@ #define IOSTATS_TIMER_GUARD(metric) -#endif // IOS_CROSS_COMPILE +#endif // ROCKSDB_SUPPORT_THREAD_LOCAL diff --git a/monitoring/perf_context.cc b/monitoring/perf_context.cc index 2fdcde506..65158c769 100644 --- a/monitoring/perf_context.cc +++ b/monitoring/perf_context.cc @@ -11,10 +11,8 @@ namespace rocksdb { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) PerfContext perf_context; -#elif defined(_MSC_VER) - __declspec(thread) PerfContext perf_context; #else #if defined(OS_SOLARIS) __thread PerfContext perf_context_; @@ -24,7 +22,7 @@ namespace rocksdb { #endif void PerfContext::Reset() { -#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE) +#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) user_key_comparison_count = 0; block_cache_hit_count = 0; block_read_count = 0; @@ -98,7 +96,7 @@ void PerfContext::Reset() { } std::string PerfContext::ToString(bool exclude_zero_counters) const { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) return ""; #else std::ostringstream ss; diff --git a/monitoring/perf_context_imp.h b/monitoring/perf_context_imp.h index 104f0a8e5..1b9ac2f40 100644 --- a/monitoring/perf_context_imp.h +++ b/monitoring/perf_context_imp.h @@ -12,7 +12,7 @@ namespace rocksdb { -#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) +#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL) #define PERF_TIMER_GUARD(metric) #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition) diff --git a/monitoring/perf_level.cc b/monitoring/perf_level.cc index d04725f04..32c8ed392 100644 --- a/monitoring/perf_level.cc +++ b/monitoring/perf_level.cc @@ -7,16 +7,14 @@ // #include -#include #include "monitoring/perf_level_imp.h" -#include "port/port.h" namespace rocksdb { -#if defined(IOS_CROSS_COMPILE) -PerfLevel perf_level = kEnableCount; -#else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL __thread PerfLevel perf_level = kEnableCount; +#else +PerfLevel perf_level = kEnableCount; #endif void SetPerfLevel(PerfLevel level) { diff --git a/monitoring/perf_level_imp.h b/monitoring/perf_level_imp.h index ab02f9d72..46f701e5d 100644 --- a/monitoring/perf_level_imp.h +++ b/monitoring/perf_level_imp.h @@ -11,10 +11,10 @@ namespace rocksdb { -#if defined(IOS_CROSS_COMPILE) -extern PerfLevel perf_level; -#else +#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL extern __thread PerfLevel perf_level; +#else +extern PerfLevel perf_level; #endif } // namespace rocksdb diff --git a/util/crc32c.cc b/util/crc32c.cc index c2334d673..a171b409e 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -15,14 +15,9 @@ #include "util/crc32c.h" #include -#ifdef __SSE4_2__ +#ifdef HAVE_SSE42 #include #endif -#if defined(_WIN64) -#ifdef __AVX2__ -#include -#endif -#endif #include "util/coding.h" namespace rocksdb { @@ -298,21 +293,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) { return DecodeFixed32(reinterpret_cast(p)); } -#ifdef __SSE4_2__ -#ifdef __LP64__ +#if defined(HAVE_SSE42) && (defined(__LP64__) || defined(_WIN64)) static inline uint64_t LE_LOAD64(const uint8_t *p) { return DecodeFixed64(reinterpret_cast(p)); } #endif -#endif -#if defined(_WIN64) -#ifdef __AVX2__ -static inline uint64_t LE_LOAD64(const uint8_t *p) { - return DecodeFixed64(reinterpret_cast(p)); -} -#endif -#endif static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { uint32_t c = static_cast(*l ^ LE_LOAD32(*p)); *p += 4; @@ -330,8 +316,9 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { } static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { -#ifdef __SSE4_2__ -#ifdef __LP64__ +#ifndef HAVE_SSE42 + Slow_CRC32(l, p); +#elif defined(__LP64__) || defined(_WIN64) *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *p += 8; #else @@ -340,16 +327,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { *l = _mm_crc32_u32(static_cast(*l), LE_LOAD32(*p)); *p += 4; #endif -#elif defined(_WIN64) -#ifdef __AVX2__ - *l = _mm_crc32_u64(*l, LE_LOAD64(*p)); - *p += 8; -#else - Slow_CRC32(l, p); -#endif -#else - Slow_CRC32(l, p); -#endif } template @@ -418,9 +395,7 @@ static inline Function Choose_Extend() { } bool IsFastCrc32Supported() { -#ifdef __SSE4_2__ - return isSSE42(); -#elif defined(_WIN64) +#if defined(__SSE4_2__) || defined(_WIN64) return isSSE42(); #else return false; diff --git a/util/thread_local.h b/util/thread_local.h index 3befddc8a..1f2fa4689 100644 --- a/util/thread_local.h +++ b/util/thread_local.h @@ -20,14 +20,6 @@ #include "util/autovector.h" #include "port/port.h" -#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL -# if defined(OS_WIN) || defined(OS_MACOSX) || defined(IOS_CROSS_COMPILE) -# define ROCKSDB_SUPPORT_THREAD_LOCAL 0 -# else -# define ROCKSDB_SUPPORT_THREAD_LOCAL 1 -# endif -#endif - namespace rocksdb { // Cleanup function that will be called for a stored thread local