From 03c7ae24c20d0123ef3e45077fd683946ff3384d Mon Sep 17 00:00:00 2001 From: Yuqi Gu Date: Tue, 30 Apr 2019 10:56:06 -0700 Subject: [PATCH] RocksDB CRC32c optimization with ARMv8 Intrinsic (#5221) Summary: 1. Add Arm linear crc32c implemtation for RocksDB. 2. Arm runtime check for crc32 Pull Request resolved: https://github.com/facebook/rocksdb/pull/5221 Differential Revision: D15013685 Pulled By: siying fbshipit-source-id: 2c2983743d26656d93f212dc7c1a3cf66a1acf12 --- Makefile | 6 +++++ src.mk | 5 ++++ util/crc32c.cc | 27 ++++++++++++++++++--- util/crc32c_arm64.cc | 56 ++++++++++++++++++++++++++++++++++++++++++++ util/crc32c_arm64.h | 21 +++++++++++++++++ 5 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 util/crc32c_arm64.cc create mode 100644 util/crc32c_arm64.h diff --git a/Makefile b/Makefile index eee0f9fba..928046f00 100644 --- a/Makefile +++ b/Makefile @@ -137,6 +137,12 @@ CFLAGS += -DHAVE_POWER8 HAVE_POWER8=1 endif +ifeq (,$(shell $(CXX) -fsyntax-only -march=armv8-a+crc -xc /dev/null 2>&1)) +CXXFLAGS += -march=armv8-a+crc +CFLAGS += -march=armv8-a+crc +ARMCRC_SOURCE=1 +endif + # if we're compiling for release, compile without debug code (-DNDEBUG) ifeq ($(DEBUG_LEVEL),0) OPT += -DNDEBUG diff --git a/src.mk b/src.mk index 55b4e3427..e3fe5632f 100644 --- a/src.mk +++ b/src.mk @@ -216,6 +216,11 @@ LIB_SOURCES = \ utilities/write_batch_with_index/write_batch_with_index.cc \ utilities/write_batch_with_index/write_batch_with_index_internal.cc \ +ifeq ($(ARMCRC_SOURCE),1) +LIB_SOURCES +=\ + util/crc32c_arm64.cc +endif + ifeq (,$(shell $(CXX) -fsyntax-only -maltivec -xc /dev/null 2>&1)) LIB_SOURCES_ASM =\ util/crc32c_ppc_asm.S diff --git a/util/crc32c.cc b/util/crc32c.cc index 9e4b65e66..e8d4116ff 100644 --- a/util/crc32c.cc +++ b/util/crc32c.cc @@ -18,6 +18,8 @@ #include "util/coding.h" #include "util/util.h" +#include "util/crc32c_arm64.h" + #ifdef __powerpc64__ #include "util/crc32c_ppc.h" #include "util/crc32c_ppc_constants.h" @@ -463,6 +465,11 @@ static bool isAltiVec() { } #endif +#if defined(__linux__) && defined(HAVE_ARM64_CRC) +uint32_t ExtendARMImpl(uint32_t crc, const char *buf, size_t size) { + return crc32c_arm64(crc, (const unsigned char *)buf, size); +} +#endif std::string IsFastCrc32Supported() { bool has_fast_crc = false; @@ -478,6 +485,14 @@ std::string IsFastCrc32Supported() { has_fast_crc = false; arch = "PPC"; #endif +#elif defined(__linux__) && defined(HAVE_ARM64_CRC) + if (crc32c_runtime_check()) { + has_fast_crc = true; + arch = "Arm64"; + } else { + has_fast_crc = false; + arch = "Arm64"; + } #else has_fast_crc = isSSE42(); arch = "x86"; @@ -1200,7 +1215,15 @@ uint32_t crc32c_3way(uint32_t crc, const char* buf, size_t len) { #endif //HAVE_SSE42 && HAVE_PCLMUL static inline Function Choose_Extend() { -#ifndef HAVE_POWER8 +#ifdef HAVE_POWER8 + return isAltiVec() ? ExtendPPCImpl : ExtendImpl; +#elif defined(__linux__) && defined(HAVE_ARM64_CRC) + if(crc32c_runtime_check()) { + return ExtendARMImpl; + } else { + return ExtendImpl; + } +#else if (isSSE42()) { if (isPCLMULQDQ()) { #if defined HAVE_SSE42 && defined HAVE_PCLMUL && !defined NO_THREEWAY_CRC32C @@ -1216,8 +1239,6 @@ static inline Function Choose_Extend() { else { return ExtendImpl; } -#else //HAVE_POWER8 - return isAltiVec() ? ExtendPPCImpl : ExtendImpl; #endif } diff --git a/util/crc32c_arm64.cc b/util/crc32c_arm64.cc new file mode 100644 index 000000000..62fabe99e --- /dev/null +++ b/util/crc32c_arm64.cc @@ -0,0 +1,56 @@ +// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "util/crc32c_arm64.h" + +#if defined(__linux__) && defined(HAVE_ARM64_CRC) + +#include +#include +#ifndef HWCAP_CRC32 +#define HWCAP_CRC32 (1 << 7) +#endif +uint32_t crc32c_runtime_check(void) { + uint64_t auxv = getauxval(AT_HWCAP); + return (auxv & HWCAP_CRC32) != 0; +} + +uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, + unsigned len) { + const uint8_t *buf1; + const uint16_t *buf2; + const uint32_t *buf4; + const uint64_t *buf8; + + int64_t length = (int64_t)len; + + crc ^= 0xffffffff; + buf8 = (const uint64_t *)data; + while ((length -= sizeof(uint64_t)) >= 0) { + crc = __crc32cd(crc, *buf8++); + } + + /* The following is more efficient than the straight loop */ + buf4 = (const uint32_t *)buf8; + if (length & sizeof(uint32_t)) { + crc = __crc32cw(crc, *buf4++); + length -= 4; + } + + buf2 = (const uint16_t *)buf4; + if (length & sizeof(uint16_t)) { + crc = __crc32ch(crc, *buf2++); + length -= 2; + } + + buf1 = (const uint8_t *)buf2; + if (length & sizeof(uint8_t)) + crc = __crc32cb(crc, *buf1); + + crc ^= 0xffffffff; + return crc; +} + +#endif diff --git a/util/crc32c_arm64.h b/util/crc32c_arm64.h new file mode 100644 index 000000000..0e77ecd0e --- /dev/null +++ b/util/crc32c_arm64.h @@ -0,0 +1,21 @@ +// Copyright (c) 2018, Arm Limited and affiliates. All rights reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#ifndef UTIL_CRC32C_ARM64_H +#define UTIL_CRC32C_ARM64_H + +#include + +#if defined(__aarch64__) || defined(__AARCH64__) +#ifdef __ARM_FEATURE_CRC32 +#define HAVE_ARM64_CRC +#include +extern uint32_t crc32c_arm64(uint32_t crc, unsigned char const *data, unsigned len); +extern uint32_t crc32c_runtime_check(void); +#endif +#endif + + +#endif