cross-platform compatibility improvements

Summary:
We've had a couple CockroachDB users fail to build RocksDB on exotic platforms, so I figured I'd try my hand at solving these issues upstream. The problems stem from a) `USE_SSE=1` being too aggressive about turning on SSE4.2, even on toolchains that don't support SSE4.2 and b) RocksDB attempting to detect support for thread-local storage based on OS, even though it can vary by compiler on the same OS.

See the individual commit messages for details. Regarding SSE support, this PR should change virtually nothing for non-CMake based builds. `make`, `PORTABLE=1 make`, `USE_SSE=1 make`, and `PORTABLE=1 USE_SSE=1 make` function exactly as before, except that SSE support will be automatically disabled when a simple SSE4.2-using test program fails to compile, as it does on OpenBSD. (OpenBSD's ports GCC supports SSE4.2, but its binutils do not, so `__SSE_4_2__` is defined but an SSE4.2-using program will fail to assemble.) A warning is emitted in this case. The CMake build is modified to support the same set of options, except that `USE_SSE` is spelled `FORCE_SSE42` because `USE_SSE` is rather useless now that we can automatically detect SSE support, and I figure changing options in the CMake build is less disruptive than changing the non-CMake build.

I've tested these changes on all the platforms I can get my hands on (macOS, Windows MSVC, Windows MinGW, and OpenBSD) and it all works splendidly. Let me know if there's anything you object to—I obviously don't mean to break any of your build pipelines in the process of fixing ours downstream.
Closes https://github.com/facebook/rocksdb/pull/2199

Differential Revision: D5054042

Pulled By: yiwu-arbug

fbshipit-source-id: 938e1fc665c049c02ae15698e1409155b8e72171
main
Nikhil Benesch 8 years ago committed by Facebook Github Bot
parent d004333021
commit 11c5d4741a
  1. 57
      CMakeLists.txt
  2. 8
      INSTALL.md
  3. 1
      TARGETS
  4. 38
      build_tools/build_detect_platform
  5. 7
      include/rocksdb/iostats_context.h
  6. 5
      include/rocksdb/perf_context.h
  7. 3
      include/rocksdb/thread_status.h
  8. 6
      monitoring/iostats_context.cc
  9. 6
      monitoring/iostats_context_imp.h
  10. 8
      monitoring/perf_context.cc
  11. 2
      monitoring/perf_context_imp.h
  12. 8
      monitoring/perf_level.cc
  13. 6
      monitoring/perf_level_imp.h
  14. 37
      util/crc32c.cc
  15. 8
      util/thread_local.h

@ -106,29 +106,56 @@ if(NOT WIN32)
string(STRIP "${ROCKSDB_VERSION_MAJOR}" ROCKSDB_VERSION_MAJOR) string(STRIP "${ROCKSDB_VERSION_MAJOR}" ROCKSDB_VERSION_MAJOR)
endif() endif()
if(WIN32)
option(WITH_AVX2 "build with AVX2" ON)
if(WITH_AVX2)
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
endif()
endif()
option(WITH_MD_LIBRARY "build with MD" ON) option(WITH_MD_LIBRARY "build with MD" ON)
if(MSVC) if(WIN32 AND MSVC)
if(WITH_MD_LIBRARY) if(WITH_MD_LIBRARY)
set(RUNTIME_LIBRARY "MD") set(RUNTIME_LIBRARY "MD")
else() else()
set(RUNTIME_LIBRARY "MT") set(RUNTIME_LIBRARY "MT")
endif() endif()
endif() endif()
else()
option(WITH_SSE42 "build with SSE4.2" ON) option(PORTABLE "build a portable binary" OFF)
if(WITH_SSE42) option(FORCE_SSE42 "force building with SSE4.2, even when PORTABLE=ON" OFF)
if(PORTABLE)
# MSVC does not need a separate compiler flag to enable SSE4.2; if nmmintrin.h
# is available, it is available by default.
if(FORCE_SSE42 AND NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
endif() endif()
else()
if(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")
endif()
endif()
set(CMAKE_REQUIRED_FLAGS ${CMAKE_CXX_FLAGS})
include(CheckCXXSourceCompiles)
CHECK_CXX_SOURCE_COMPILES("
#include <cstdint>
#include <nmmintrin.h>
int main() {
volatile uint32_t x = _mm_crc32_u32(0, 0);
}
" HAVE_SSE42)
if(HAVE_SSE42)
add_definitions(-DHAVE_SSE42)
elseif(FORCE_SSE42)
message(FATAL_ERROR "FORCE_SSE42=ON but unable to compile with SSE4.2 enabled")
endif()
CHECK_CXX_SOURCE_COMPILES("
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
}
" HAVE_THREAD_LOCAL)
if(HAVE_THREAD_LOCAL)
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
endif() endif()
set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc) set(BUILD_VERSION_CC ${CMAKE_BINARY_DIR}/build_version.cc)
@ -520,7 +547,7 @@ if(WIN32)
set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib)
set(LIBS ${ROCKSDB_STATIC_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) set(LIBS ${ROCKSDB_STATIC_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
else() else()
set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} rt) set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT})
set(LIBS ${ROCKSDB_SHARED_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) set(LIBS ${ROCKSDB_SHARED_LIB} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES}) add_library(${ROCKSDB_SHARED_LIB} SHARED ${SOURCES})

@ -21,9 +21,11 @@ depend on gflags. You will need to have gflags installed to run `make all`. This
use binaries compiled by `make all` in production. use binaries compiled by `make all` in production.
* By default the binary we produce is optimized for the platform you're compiling on * By default the binary we produce is optimized for the platform you're compiling on
(-march=native or the equivalent). If you want to build a portable binary, add 'PORTABLE=1' before (`-march=native` or the equivalent). SSE4.2 will thus be enabled automatically if your
your make commands, like this: `PORTABLE=1 make static_lib`. If you want to build a binary that CPU supports it. To print a warning if your CPU does not support SSE4.2, build with
makes use of SSE4, add 'USE_SSE=1' before your make commands, like this: `USE_SSE=1 make static_lib`. `USE_SSE=1 make static_lib` or, if using CMake, `cmake -DFORCE_SSE42=ON`. If you want
to build a portable binary, add `PORTABLE=1` before your make commands, like this:
`PORTABLE=1 make static_lib`.
## Dependencies ## Dependencies

@ -11,6 +11,7 @@ rocksdb_compiler_flags = [
"-DROCKSDB_MALLOC_USABLE_SIZE", "-DROCKSDB_MALLOC_USABLE_SIZE",
"-DROCKSDB_RANGESYNC_PRESENT", "-DROCKSDB_RANGESYNC_PRESENT",
"-DROCKSDB_SCHED_GETCPU_PRESENT", "-DROCKSDB_SCHED_GETCPU_PRESENT",
"-DROCKSDB_SUPPORT_THREAD_LOCAL",
"-DOS_LINUX", "-DOS_LINUX",
# Flags to enable libs we include # Flags to enable libs we include
"-DSNAPPY", "-DSNAPPY",

@ -442,14 +442,8 @@ if test "$USE_HDFS"; then
JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS" JAVA_LDFLAGS="$JAVA_LDFLAGS $HDFS_LDFLAGS"
fi fi
if [ "$TARGET_OS" = FreeBSD -a "$TARGET_ARCHITECTURE" = i386 ]; then
# Intel SSE instructions breaks compilation on FreeBSD i386
unset USE_SSE
fi
if test "$USE_SSE"; then if test "$USE_SSE"; then
# if Intel SSE instruction set is supported, set USE_SSE=1 COMMON_FLAGS="$COMMON_FLAGS -msse4.2"
COMMON_FLAGS="$COMMON_FLAGS -msse -msse4.2 "
elif test -z "$PORTABLE"; then elif test -z "$PORTABLE"; then
if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then if test -n "`echo $TARGET_ARCHITECTURE | grep ^ppc64`"; then
# Tune for this POWER processor, treating '+' models as base models # Tune for this POWER processor, treating '+' models as base models
@ -462,6 +456,36 @@ elif test -z "$PORTABLE"; then
fi fi
fi fi
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <cstdint>
#include <nmmintrin.h>
int main() {
volatile uint32_t x = _mm_crc32_u32(0, 0);
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DHAVE_SSE42"
elif test "$USE_SSE"; then
echo "warning: USE_SSE specified but compiler could not use SSE intrinsics, disabling"
fi
# iOS doesn't support thread-local storage, but this check would erroneously
# succeed because the cross-compiler flags are added by the Makefile, not this
# script.
if [ "$PLATFORM" != IOS ]; then
$CXX $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#if defined(_MSC_VER) && !defined(__thread)
#define __thread __declspec(thread)
#endif
int main() {
static __thread int tls;
}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
fi
fi
PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS" PLATFORM_CCFLAGS="$PLATFORM_CCFLAGS $COMMON_FLAGS"
PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS" PLATFORM_CXXFLAGS="$PLATFORM_CXXFLAGS $COMMON_FLAGS"

@ -7,6 +7,7 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include "port/port.h"
#include "rocksdb/perf_level.h" #include "rocksdb/perf_level.h"
// A thread local context for gathering io-stats efficiently and transparently. // A thread local context for gathering io-stats efficiently and transparently.
@ -46,12 +47,8 @@ struct IOStatsContext {
uint64_t logger_nanos; uint64_t logger_nanos;
}; };
#ifndef IOS_CROSS_COMPILE #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
# ifdef _MSC_VER
extern __declspec(thread) IOStatsContext iostats_context;
# else
extern __thread IOStatsContext iostats_context; extern __thread IOStatsContext iostats_context;
#endif #endif
#endif // IOS_CROSS_COMPILE
} // namespace rocksdb } // namespace rocksdb

@ -9,6 +9,7 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
#include "port/port.h"
#include "rocksdb/perf_level.h" #include "rocksdb/perf_level.h"
namespace rocksdb { namespace rocksdb {
@ -150,10 +151,8 @@ struct PerfContext {
uint64_t env_new_logger_nanos; uint64_t env_new_logger_nanos;
}; };
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
extern PerfContext perf_context; extern PerfContext perf_context;
#elif defined(_MSC_VER)
extern __declspec(thread) PerfContext perf_context;
#else #else
#if defined(OS_SOLARIS) #if defined(OS_SOLARIS)
PerfContext *getPerfContext(); PerfContext *getPerfContext();

@ -22,8 +22,7 @@
#if !defined(ROCKSDB_LITE) && \ #if !defined(ROCKSDB_LITE) && \
!defined(NROCKSDB_THREAD_STATUS) && \ !defined(NROCKSDB_THREAD_STATUS) && \
!defined(OS_MACOSX) && \ defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
!defined(IOS_CROSS_COMPILE)
#define ROCKSDB_USING_THREAD_STATUS #define ROCKSDB_USING_THREAD_STATUS
#endif #endif

@ -9,13 +9,9 @@
namespace rocksdb { namespace rocksdb {
#ifndef IOS_CROSS_COMPILE #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
# ifdef _MSC_VER
__declspec(thread) IOStatsContext iostats_context;
# else
__thread IOStatsContext iostats_context; __thread IOStatsContext iostats_context;
#endif #endif
#endif // IOS_CROSS_COMPILE
void IOStatsContext::Reset() { void IOStatsContext::Reset() {
thread_pool_id = Env::Priority::TOTAL; thread_pool_id = Env::Priority::TOTAL;

@ -9,7 +9,7 @@
#include "monitoring/perf_step_timer.h" #include "monitoring/perf_step_timer.h"
#include "rocksdb/iostats_context.h" #include "rocksdb/iostats_context.h"
#ifndef IOS_CROSS_COMPILE #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
// increment a specific counter by the specified value // increment a specific counter by the specified value
#define IOSTATS_ADD(metric, value) \ #define IOSTATS_ADD(metric, value) \
@ -41,7 +41,7 @@
PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \ PerfStepTimer iostats_step_timer_ ## metric(&(iostats_context.metric)); \
iostats_step_timer_ ## metric.Start(); iostats_step_timer_ ## metric.Start();
#else // IOS_CROSS_COMPILE #else // ROCKSDB_SUPPORT_THREAD_LOCAL
#define IOSTATS_ADD(metric, value) #define IOSTATS_ADD(metric, value)
#define IOSTATS_ADD_IF_POSITIVE(metric, value) #define IOSTATS_ADD_IF_POSITIVE(metric, value)
@ -53,4 +53,4 @@
#define IOSTATS_TIMER_GUARD(metric) #define IOSTATS_TIMER_GUARD(metric)
#endif // IOS_CROSS_COMPILE #endif // ROCKSDB_SUPPORT_THREAD_LOCAL

@ -11,10 +11,8 @@
namespace rocksdb { namespace rocksdb {
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
PerfContext perf_context; PerfContext perf_context;
#elif defined(_MSC_VER)
__declspec(thread) PerfContext perf_context;
#else #else
#if defined(OS_SOLARIS) #if defined(OS_SOLARIS)
__thread PerfContext perf_context_; __thread PerfContext perf_context_;
@ -24,7 +22,7 @@ namespace rocksdb {
#endif #endif
void PerfContext::Reset() { void PerfContext::Reset() {
#if !defined(NPERF_CONTEXT) && !defined(IOS_CROSS_COMPILE) #if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
user_key_comparison_count = 0; user_key_comparison_count = 0;
block_cache_hit_count = 0; block_cache_hit_count = 0;
block_read_count = 0; block_read_count = 0;
@ -98,7 +96,7 @@ void PerfContext::Reset() {
} }
std::string PerfContext::ToString(bool exclude_zero_counters) const { std::string PerfContext::ToString(bool exclude_zero_counters) const {
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
return ""; return "";
#else #else
std::ostringstream ss; std::ostringstream ss;

@ -12,7 +12,7 @@
namespace rocksdb { namespace rocksdb {
#if defined(NPERF_CONTEXT) || defined(IOS_CROSS_COMPILE) #if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
#define PERF_TIMER_GUARD(metric) #define PERF_TIMER_GUARD(metric)
#define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition) #define PERF_CONDITIONAL_TIMER_FOR_MUTEX_GUARD(metric, condition)

@ -7,16 +7,14 @@
// //
#include <assert.h> #include <assert.h>
#include <sstream>
#include "monitoring/perf_level_imp.h" #include "monitoring/perf_level_imp.h"
#include "port/port.h"
namespace rocksdb { namespace rocksdb {
#if defined(IOS_CROSS_COMPILE) #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
PerfLevel perf_level = kEnableCount;
#else
__thread PerfLevel perf_level = kEnableCount; __thread PerfLevel perf_level = kEnableCount;
#else
PerfLevel perf_level = kEnableCount;
#endif #endif
void SetPerfLevel(PerfLevel level) { void SetPerfLevel(PerfLevel level) {

@ -11,10 +11,10 @@
namespace rocksdb { namespace rocksdb {
#if defined(IOS_CROSS_COMPILE) #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
extern PerfLevel perf_level;
#else
extern __thread PerfLevel perf_level; extern __thread PerfLevel perf_level;
#else
extern PerfLevel perf_level;
#endif #endif
} // namespace rocksdb } // namespace rocksdb

@ -15,14 +15,9 @@
#include "util/crc32c.h" #include "util/crc32c.h"
#include <stdint.h> #include <stdint.h>
#ifdef __SSE4_2__ #ifdef HAVE_SSE42
#include <nmmintrin.h> #include <nmmintrin.h>
#endif #endif
#if defined(_WIN64)
#ifdef __AVX2__
#include <nmmintrin.h>
#endif
#endif
#include "util/coding.h" #include "util/coding.h"
namespace rocksdb { namespace rocksdb {
@ -298,21 +293,12 @@ static inline uint32_t LE_LOAD32(const uint8_t *p) {
return DecodeFixed32(reinterpret_cast<const char*>(p)); return DecodeFixed32(reinterpret_cast<const char*>(p));
} }
#ifdef __SSE4_2__ #if defined(HAVE_SSE42) && (defined(__LP64__) || defined(_WIN64))
#ifdef __LP64__
static inline uint64_t LE_LOAD64(const uint8_t *p) { static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p)); return DecodeFixed64(reinterpret_cast<const char*>(p));
} }
#endif #endif
#endif
#if defined(_WIN64)
#ifdef __AVX2__
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}
#endif
#endif
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p)); uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4; *p += 4;
@ -330,8 +316,9 @@ static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
} }
static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) { static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
#ifdef __SSE4_2__ #ifndef HAVE_SSE42
#ifdef __LP64__ Slow_CRC32(l, p);
#elif defined(__LP64__) || defined(_WIN64)
*l = _mm_crc32_u64(*l, LE_LOAD64(*p)); *l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8; *p += 8;
#else #else
@ -340,16 +327,6 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p)); *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
*p += 4; *p += 4;
#endif #endif
#elif defined(_WIN64)
#ifdef __AVX2__
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8;
#else
Slow_CRC32(l, p);
#endif
#else
Slow_CRC32(l, p);
#endif
} }
template<void (*CRC32)(uint64_t*, uint8_t const**)> template<void (*CRC32)(uint64_t*, uint8_t const**)>
@ -418,9 +395,7 @@ static inline Function Choose_Extend() {
} }
bool IsFastCrc32Supported() { bool IsFastCrc32Supported() {
#ifdef __SSE4_2__ #if defined(__SSE4_2__) || defined(_WIN64)
return isSSE42();
#elif defined(_WIN64)
return isSSE42(); return isSSE42();
#else #else
return false; return false;

@ -20,14 +20,6 @@
#include "util/autovector.h" #include "util/autovector.h"
#include "port/port.h" #include "port/port.h"
#ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
# if defined(OS_WIN) || defined(OS_MACOSX) || defined(IOS_CROSS_COMPILE)
# define ROCKSDB_SUPPORT_THREAD_LOCAL 0
# else
# define ROCKSDB_SUPPORT_THREAD_LOCAL 1
# endif
#endif
namespace rocksdb { namespace rocksdb {
// Cleanup function that will be called for a stored thread local // Cleanup function that will be called for a stored thread local

Loading…
Cancel
Save