From 3e745053b7c3a5453f823b74d8479beba07d50a8 Mon Sep 17 00:00:00 2001 From: Koby Kahane Date: Thu, 1 Oct 2020 09:21:30 -0700 Subject: [PATCH] Fix MSVC-related build issues (#7439) Summary: This PR addresses some build and functional issues on MSVC targets, as a step towards an eventual goal of having RocksDB build successfully for Windows on ARM64. Addressed issues include: - BitsSetToOne and CountTrailingZeroBits do not compile on non-x64 MSVC targets. A fallback implementation of BitsSetToOne when Intel intrinsics are not available is added, based on the C++20 `` popcount implementation in Microsoft's STL. - The implementation of FloorLog2 for MSVC targets (including x64) gives incorrect results. The unit test easily detects this, but CircleCI is currently configured to only run a specific set of tests for Windows CMake builds, so this seems to have been unnoticed. - AsmVolatilePause does not use YieldProcessor on Windows ARM64 targets, even though it is available. - When CondVar::TimedWait calls Microsoft STL's condition_variable::wait_for, it can potentially trigger a bug (just recently fixed in the upcoming VS 16.8's STL) that deadlocks various tests that wait for a timer to execute, since `Timer::Run` doesn't get a chance to execute before being blocked by the test function acquiring the mutex. - In c_test, `GetTempDir` assumes a POSIX-style temp path. - `NormalizePath` did not eliminate consecutive POSIX-style path separators on Windows, resulting in test failures in e.g., wal_manager_test. - Various other test failures. In a followup PR I hope to modify CircleCI's config.yml to invoke all RocksDB unit tests in Windows CMake builds with CTest, instead of the current use of `run_ci_db_test.ps1` which requires individual tests to be specified and is missing many of the existing tests. Notes from peterd: FloorLog2 is not yet used in production code (it's for something in progress). I also added a few more inexpensive platform-dependent tests to Windows CircleCI runs. And included facebook/folly#1461 as requested Pull Request resolved: https://github.com/facebook/rocksdb/pull/7439 Reviewed By: jay-zhuang Differential Revision: D24021563 Pulled By: pdillinger fbshipit-source-id: 0ec2027c0d6a494d8a0fe38d9667fc2f7e29f7e7 --- .circleci/config.yml | 2 +- db/c_test.c | 6 ++- db/perf_context_test.cc | 7 +++ file/filename.cc | 4 +- port/win/port_win.cc | 9 ++++ port/win/port_win.h | 2 +- third-party/folly/folly/chrono/Hardware.h | 4 +- util/filelock_test.cc | 2 + util/math.h | 66 +++++++++++++++++++++-- util/xxh3p.h | 4 +- 10 files changed, 94 insertions(+), 12 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 236c26522..eafd8ff9a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -276,7 +276,7 @@ jobs: name: "Test RocksDB" shell: powershell.exe command: | - build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,env_basic_test,env_test,db_merge_operand_test -Concurrency 16 + build_tools\run_ci_db_test.ps1 -SuiteRun db_basic_test,db_test,db_test2,db_merge_operand_test,bloom_test,c_test,coding_test,crc32c_test,dynamic_bloom_test,env_basic_test,env_test,hash_test,random_test -Concurrency 16 build-linux-java: machine: diff --git a/db/c_test.c b/db/c_test.c index 4fb7d016d..a01336738 100644 --- a/db/c_test.c +++ b/db/c_test.c @@ -58,7 +58,11 @@ static void StartPhase(const char* name) { static const char* GetTempDir(void) { const char* ret = getenv("TEST_TMPDIR"); if (ret == NULL || ret[0] == '\0') - ret = "/tmp"; +#ifdef OS_WIN + ret = getenv("TEMP"); +#else + ret = "/tmp"; +#endif return ret; } #ifdef _MSC_VER diff --git a/db/perf_context_test.cc b/db/perf_context_test.cc index 0410ada7a..5a714b9b8 100644 --- a/db/perf_context_test.cc +++ b/db/perf_context_test.cc @@ -817,6 +817,13 @@ TEST_F(PerfContextTest, PerfContextByLevelGetSet) { } TEST_F(PerfContextTest, CPUTimer) { + if (Env::Default()->NowCPUNanos() == 0) { + // TODO: This should be a GTEST_SKIP when the embedded gtest is updated + // to 1.10 or higher. + GTEST_SUCCESS_("Skipped on target without NowCPUNanos support"); + return; + } + DestroyDB(kDbName, Options()); auto db = OpenDb(); WriteOptions write_options; diff --git a/file/filename.cc b/file/filename.cc index 968adbaa7..0ac4e6ef8 100644 --- a/file/filename.cc +++ b/file/filename.cc @@ -461,8 +461,8 @@ Status GetInfoLogFiles(Env* env, const std::string& db_log_dir, std::string NormalizePath(const std::string& path) { std::string dst; for (auto c : path) { - if (!dst.empty() && c == kFilePathSeparator && - dst.back() == kFilePathSeparator) { + if (!dst.empty() && (c == kFilePathSeparator || c == '/') && + (dst.back() == kFilePathSeparator || dst.back() == '/')) { continue; } dst.push_back(c); diff --git a/port/win/port_win.cc b/port/win/port_win.cc index 16599911d..bd7b55f42 100644 --- a/port/win/port_win.cc +++ b/port/win/port_win.cc @@ -98,6 +98,15 @@ bool CondVar::TimedWait(uint64_t abs_time_us) { // Caller must ensure that mutex is held prior to calling this method std::unique_lock lk(mu_->getLock(), std::adopt_lock); + + // Work around https://github.com/microsoft/STL/issues/369 +#if defined(_MSC_VER) && \ + (!defined(_MSVC_STL_UPDATE) || _MSVC_STL_UPDATE < 202008L) + if (relTimeUs == microseconds::zero()) { + lk.unlock(); + lk.lock(); + } +#endif #ifndef NDEBUG mu_->locked_ = false; #endif diff --git a/port/win/port_win.h b/port/win/port_win.h index a3ffd559c..2c5b8ff05 100644 --- a/port/win/port_win.h +++ b/port/win/port_win.h @@ -283,7 +283,7 @@ extern const size_t kPageSize; #endif static inline void AsmVolatilePause() { -#if defined(_M_IX86) || defined(_M_X64) +#if defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) YieldProcessor(); #endif // it would be nice to get "wfe" on ARM here diff --git a/third-party/folly/folly/chrono/Hardware.h b/third-party/folly/folly/chrono/Hardware.h index ec7be82e8..6635b8717 100644 --- a/third-party/folly/folly/chrono/Hardware.h +++ b/third-party/folly/folly/chrono/Hardware.h @@ -10,7 +10,7 @@ #include #include -#if _MSC_VER +#if _MSC_VER && (defined(_M_IX86) || defined(_M_X64)) extern "C" std::uint64_t __rdtsc(); #pragma intrinsic(__rdtsc) #endif @@ -18,7 +18,7 @@ extern "C" std::uint64_t __rdtsc(); namespace folly { inline std::uint64_t hardware_timestamp() { -#if _MSC_VER +#if _MSC_VER && (defined(_M_IX86) || defined(_M_X64)) return __rdtsc(); #elif __GNUC__ && (__i386__ || FOLLY_X64) return __builtin_ia32_rdtsc(); diff --git a/util/filelock_test.cc b/util/filelock_test.cc index 611cd46c6..fe2f7e0d8 100644 --- a/util/filelock_test.cc +++ b/util/filelock_test.cc @@ -127,9 +127,11 @@ TEST_F(LockTest, LockBySameThread) { // re-acquire the lock on the same file. This should fail. Status s = LockFile(&lock2); ASSERT_TRUE(s.IsIOError()); +#ifndef OS_WIN // Validate that error message contains current thread ID. ASSERT_TRUE(s.ToString().find(ToString(Env::Default()->GetThreadID())) != std::string::npos); +#endif // check the file is locked ASSERT_TRUE( AssertFileIsLocked() ); diff --git a/util/math.h b/util/math.h index f0abc577a..795efb018 100644 --- a/util/math.h +++ b/util/math.h @@ -23,13 +23,23 @@ inline int FloorLog2(T v) { assert(v > 0); #ifdef _MSC_VER static_assert(sizeof(T) <= sizeof(uint64_t), "type too big"); - unsigned long lz = 0; + unsigned long idx = 0; if (sizeof(T) <= sizeof(uint32_t)) { - _BitScanReverse(&lz, static_cast(v)); + _BitScanReverse(&idx, static_cast(v)); } else { - _BitScanReverse64(&lz, static_cast(v)); +#if defined(_M_X64) || defined(_M_ARM64) + _BitScanReverse64(&idx, static_cast(v)); +#else + const auto vh = static_cast(static_cast(v) >> 32); + if (vh != 0) { + _BitScanReverse(&idx, static_cast(vh)); + idx += 32; + } else { + _BitScanReverse(&idx, static_cast(v)); + } +#endif } - return 63 - static_cast(lz); + return idx; #else static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); if (sizeof(T) <= sizeof(unsigned int)) { @@ -56,7 +66,16 @@ inline int CountTrailingZeroBits(T v) { if (sizeof(T) <= sizeof(uint32_t)) { _BitScanForward(&tz, static_cast(v)); } else { +#if defined(_M_X64) || defined(_M_ARM64) _BitScanForward64(&tz, static_cast(v)); +#else + _BitScanForward(&tz, static_cast(v)); + if (tz == 0) { + _BitScanForward(&tz, + static_cast(static_cast(v) >> 32)); + tz += 32; + } +#endif } return static_cast(tz); #else @@ -71,6 +90,29 @@ inline int CountTrailingZeroBits(T v) { #endif } +#if defined(_MSC_VER) && !defined(_M_X64) +namespace detail { +template +int BitsSetToOneFallback(T v) { + const int kBits = static_cast(sizeof(T)) * 8; + static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits"); + // we static_cast these bit patterns in order to truncate them to the correct + // size + v = static_cast(v - ((v >> 1) & static_cast(0x5555555555555555ull))); + v = static_cast((v & static_cast(0x3333333333333333ull)) + + ((v >> 2) & static_cast(0x3333333333333333ull))); + v = static_cast((v + (v >> 4)) & static_cast(0x0F0F0F0F0F0F0F0Full)); + for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) { + v += static_cast(v >> shift_bits); + } + // we want the bottom "slot" that's big enough to represent a value up to + // (and including) kBits. + return static_cast(v & static_cast(kBits | (kBits - 1))); +} + +} // namespace detail +#endif + // Number of bits set to 1. Also known as "population count". template inline int BitsSetToOne(T v) { @@ -82,11 +124,27 @@ inline int BitsSetToOne(T v) { constexpr auto mm = 8 * sizeof(uint32_t) - 1; // The bit mask is to neutralize sign extension on small signed types constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1; +#if defined(_M_X64) || defined(_M_IX86) return static_cast(__popcnt(static_cast(v) & m)); +#else + return static_cast(detail::BitsSetToOneFallback(v) & m); +#endif } else if (sizeof(T) == sizeof(uint32_t)) { +#if defined(_M_X64) || defined(_M_IX86) return static_cast(__popcnt(static_cast(v))); +#else + return detail::BitsSetToOneFallback(static_cast(v)); +#endif } else { +#ifdef _M_X64 return static_cast(__popcnt64(static_cast(v))); +#elif defined(_M_IX86) + return static_cast( + __popcnt(static_cast(static_cast(v) >> 32) + + __popcnt(static_cast(v)))); +#else + return detail::BitsSetToOneFallback(static_cast(v)); +#endif } #else static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big"); diff --git a/util/xxh3p.h b/util/xxh3p.h index d1fc2bba2..05696cecd 100644 --- a/util/xxh3p.h +++ b/util/xxh3p.h @@ -253,7 +253,9 @@ XXH_FORCE_INLINE U64x2 XXH_vec_mule(U32x4 a, U32x4 b) { #if defined(XXH_NO_PREFETCH) # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ #else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +#if defined(_MSC_VER) && \ + (defined(_M_X64) || \ + defined(_M_IX86)) /* _mm_prefetch() is not defined outside of x86/x64 */ # include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )