RocksDB CRC32c optimization with ARMv8 Intrinsic (#5221)

Summary:
1. Add Arm linear crc32c implemtation for RocksDB.
2. Arm runtime check for crc32
Pull Request resolved: https://github.com/facebook/rocksdb/pull/5221

Differential Revision: D15013685

Pulled By: siying

fbshipit-source-id: 2c2983743d26656d93f212dc7c1a3cf66a1acf12
main
Yuqi Gu 6 years ago committed by Facebook Github Bot
parent a5debd7ed8
commit 03c7ae24c2
  1. 6
      Makefile
  2. 5
      src.mk
  3. 27
      util/crc32c.cc
  4. 56
      util/crc32c_arm64.cc
  5. 21
      util/crc32c_arm64.h

@ -137,6 +137,12 @@ CFLAGS += -DHAVE_POWER8
HAVE_POWER8=1 HAVE_POWER8=1
endif endif
ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc -xc /dev/null 2>&1))
CXXFLAGS += -march=armv8-a+crc
CFLAGS += -march=armv8-a+crc
ARMCRC_SOURCE=1
endif
# if we're compiling for release, compile without debug code (-DNDEBUG) # if we're compiling for release, compile without debug code (-DNDEBUG)
ifeq ($(DEBUG_LEVEL),0) ifeq ($(DEBUG_LEVEL),0)
OPT += -DNDEBUG OPT += -DNDEBUG

@ -216,6 +216,11 @@ LIB_SOURCES = \
utilities/write_batch_with_index/write_batch_with_index.cc \ utilities/write_batch_with_index/write_batch_with_index.cc \
utilities/write_batch_with_index/write_batch_with_index_internal.cc \ utilities/write_batch_with_index/write_batch_with_index_internal.cc \
ifeq ($(ARMCRC_SOURCE),1)
LIB_SOURCES +=\
util/crc32c_arm64.cc
endif
ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1)) ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1))
LIB_SOURCES_ASM =\ LIB_SOURCES_ASM =\
util/crc32c_ppc_asm.S util/crc32c_ppc_asm.S

@ -18,6 +18,8 @@
#include "util/coding.h" #include "util/coding.h"
#include "util/util.h" #include "util/util.h"
#include "util/crc32c_arm64.h"
#ifdef __powerpc64__ #ifdef __powerpc64__
#include "util/crc32c_ppc.h" #include "util/crc32c_ppc.h"
#include "util/crc32c_ppc_constants.h" #include "util/crc32c_ppc_constants.h"
@ -463,6 +465,11 @@ static bool isAltiVec() {
} }
#endif #endif
#if defined(__linux__) && defined(HAVE_ARM64_CRC)
uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) {
return crc32c_arm64(crc, (const unsigned char *)buf, size);
}
#endif
std::string IsFastCrc32Supported() { std::string IsFastCrc32Supported() {
bool has_fast_crc = false; bool has_fast_crc = false;
@ -478,6 +485,14 @@ std::string IsFastCrc32Supported() {
has_fast_crc = false; has_fast_crc = false;
arch = "PPC"; arch = "PPC";
#endif #endif
#elif defined(__linux__) && defined(HAVE_ARM64_CRC)
if (crc32c_runtime_check()) {
has_fast_crc = true;
arch = "Arm64";
} else {
has_fast_crc = false;
arch = "Arm64";
}
#else #else
has_fast_crc = isSSE42(); has_fast_crc = isSSE42();
arch = "x86"; arch = "x86";
@ -1200,7 +1215,15 @@ uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) {
#endif //HAVE_SSE42 && HAVE_PCLMUL #endif //HAVE_SSE42 && HAVE_PCLMUL
static inline Function Choose_Extend() { static inline Function Choose_Extend() {
#ifndef HAVE_POWER8 #ifdef HAVE_POWER8
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
#elif defined(__linux__) && defined(HAVE_ARM64_CRC)
if(crc32c_runtime_check()) {
return ExtendARMImpl;
} else {
return ExtendImpl<Slow_CRC32>;
}
#else
if (isSSE42()) { if (isSSE42()) {
if (isPCLMULQDQ()) { if (isPCLMULQDQ()) {
#if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C #if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C
@ -1216,8 +1239,6 @@ static inline Function Choose_Extend() {
else { else {
return ExtendImpl<Slow_CRC32>; return ExtendImpl<Slow_CRC32>;
} }
#else //HAVE_POWER8
return isAltiVec() ? ExtendPPCImpl : ExtendImpl<Slow_CRC32>;
#endif #endif
} }

@ -0,0 +1,56 @@
// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "util/crc32c_arm64.h"
#if defined(__linux__) && defined(HAVE_ARM64_CRC)
#include <asm/hwcap.h>
#include <sys/auxv.h>
#ifndef HWCAP_CRC32
#define HWCAP_CRC32 (1 << 7)
#endif
uint32_t crc32c_runtime_check(void) {
uint64_t auxv = getauxval(AT_HWCAP);
return (auxv & HWCAP_CRC32) != 0;
}
uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data,
unsigned len) {
const uint8_t *buf1;
const uint16_t *buf2;
const uint32_t *buf4;
const uint64_t *buf8;
int64_t length = (int64_t)len;
crc ^= 0xffffffff;
buf8 = (const uint64_t *)data;
while ((length -= sizeof(uint64_t)) >= 0) {
crc = __crc32cd(crc, *buf8++);
}
/* The following is more efficient than the straight loop */
buf4 = (const uint32_t *)buf8;
if (length & sizeof(uint32_t)) {
crc = __crc32cw(crc, *buf4++);
length -= 4;
}
buf2 = (const uint16_t *)buf4;
if (length & sizeof(uint16_t)) {
crc = __crc32ch(crc, *buf2++);
length -= 2;
}
buf1 = (const uint8_t *)buf2;
if (length & sizeof(uint8_t))
crc = __crc32cb(crc, *buf1);
crc ^= 0xffffffff;
return crc;
}
#endif

@ -0,0 +1,21 @@
// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef UTIL_CRC32C_ARM64_H
#define UTIL_CRC32C_ARM64_H
#include <inttypes.h>
#if defined(__aarch64__) || defined(__AARCH64__)
#ifdef __ARM_FEATURE_CRC32
#define HAVE_ARM64_CRC
#include <arm_acle.h>
extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, unsigned len);
extern uint32_t crc32c_runtime_check(void);
#endif
#endif
#endif
Loading…
Cancel
Save