Enable Fast CRC32 for Win64

Summary:
Currently the fast crc32 path is not enabled on Windows. I am trying to enable it here, hopefully, with the minimum impact to the existing code structure.
Closes https://github.com/facebook/rocksdb/pull/2033

Differential Revision: D4770635

Pulled By: siying

fbshipit-source-id: 676f8b8
main
Min Wei 8 years ago committed by Facebook Github Bot
parent f9813b853d
commit 8a8c967460
  1. 9
      CMakeLists.txt
  2. 25
      util/crc32c.cc

@ -42,6 +42,11 @@ endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules/") list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/modules/")
if(WIN32) if(WIN32)
if (DEFINED AVX2)
set(USE_AVX2 ${AVX2})
else ()
set(USE_AVX2 1)
endif ()
include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc) include(${CMAKE_CURRENT_SOURCE_DIR}/thirdparty.inc)
else() else()
option(WITH_JEMALLOC "build with JeMalloc" OFF) option(WITH_JEMALLOC "build with JeMalloc" OFF)
@ -111,7 +116,11 @@ add_library(build_version OBJECT ${BUILD_VERSION_CC})
target_include_directories(build_version PRIVATE target_include_directories(build_version PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/util) ${CMAKE_CURRENT_SOURCE_DIR}/util)
if(WIN32) if(WIN32)
if (${USE_AVX2} EQUAL 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /arch:AVX2 /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
else ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi /nologo /EHsc /GS /Gd /GR /GF /fp:precise /Zc:wchar_t /Zc:forScope /errorReport:queue")
endif ()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W3 /wd4127 /wd4800 /wd4996 /wd4351") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /FC /d2Zi+ /W3 /wd4127 /wd4800 /wd4996 /wd4351")
else() else()

@ -16,6 +16,11 @@
#ifdef __SSE4_2__ #ifdef __SSE4_2__
#include <nmmintrin.h> #include <nmmintrin.h>
#endif #endif
#if defined(_WIN64)
#ifdef __AVX2__
#include <nmmintrin.h>
#endif
#endif
#include "util/coding.h" #include "util/coding.h"
namespace rocksdb { namespace rocksdb {
@ -299,6 +304,13 @@ static inline uint64_t LE_LOAD64(const uint8_t *p) {
#endif #endif
#endif #endif
#if defined(_WIN64)
#ifdef __AVX2__
static inline uint64_t LE_LOAD64(const uint8_t *p) {
return DecodeFixed64(reinterpret_cast<const char*>(p));
}
#endif
#endif
static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) { static inline void Slow_CRC32(uint64_t* l, uint8_t const **p) {
uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p)); uint32_t c = static_cast<uint32_t>(*l ^ LE_LOAD32(*p));
*p += 4; *p += 4;
@ -326,6 +338,13 @@ static inline void Fast_CRC32(uint64_t* l, uint8_t const **p) {
*l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p)); *l = _mm_crc32_u32(static_cast<unsigned int>(*l), LE_LOAD32(*p));
*p += 4; *p += 4;
#endif #endif
#elif defined(_WIN64)
#ifdef __AVX2__
*l = _mm_crc32_u64(*l, LE_LOAD64(*p));
*p += 8;
#else
Slow_CRC32(l, p);
#endif
#else #else
Slow_CRC32(l, p); Slow_CRC32(l, p);
#endif #endif
@ -381,6 +400,10 @@ static bool isSSE42() {
uint32_t d_; uint32_t d_;
__asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx"); __asm__("cpuid" : "=c"(c_), "=d"(d_) : "a"(1) : "ebx");
return c_ & (1U << 20); // copied from CpuId.h in Folly. return c_ & (1U << 20); // copied from CpuId.h in Folly.
#elif defined(_WIN64)
int info[4];
__cpuidex(info, 0x00000001, 0);
return (info[2] & ((int)1 << 20)) != 0;
#else #else
return false; return false;
#endif #endif
@ -395,6 +418,8 @@ static inline Function Choose_Extend() {
bool IsFastCrc32Supported() { bool IsFastCrc32Supported() {
#ifdef __SSE4_2__ #ifdef __SSE4_2__
return isSSE42(); return isSSE42();
#elif defined(_WIN64)
return isSSE42();
#else #else
return false; return false;
#endif #endif

Loading…
Cancel
Save