diff --git a/CMakeLists.txt b/CMakeLists.txt index ff86426b8..52c8b8e8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -766,6 +766,7 @@ set(SOURCES util/murmurhash.cc util/random.cc util/rate_limiter.cc + util/ribbon_config.cc util/slice.cc util/file_checksum_helper.cc util/status.cc diff --git a/TARGETS b/TARGETS index e891c3775..c2f658eae 100644 --- a/TARGETS +++ b/TARGETS @@ -342,6 +342,7 @@ cpp_library( "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", + "util/ribbon_config.cc", "util/slice.cc", "util/status.cc", "util/string_util.cc", @@ -647,6 +648,7 @@ cpp_library( "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", + "util/ribbon_config.cc", "util/slice.cc", "util/status.cc", "util/string_util.cc", diff --git a/src.mk b/src.mk index 15c109470..9925260f1 100644 --- a/src.mk +++ b/src.mk @@ -208,6 +208,7 @@ LIB_SOURCES = \ util/murmurhash.cc \ util/random.cc \ util/rate_limiter.cc \ + util/ribbon_config.cc \ util/slice.cc \ util/file_checksum_helper.cc \ util/status.cc \ diff --git a/table/block_based/filter_policy.cc b/table/block_based/filter_policy.cc index 534bd220d..0f79143d1 100644 --- a/table/block_based/filter_policy.cc +++ b/table/block_based/filter_policy.cc @@ -21,6 +21,7 @@ #include "util/bloom_impl.h" #include "util/coding.h" #include "util/hash.h" +#include "util/ribbon_config.h" #include "util/ribbon_impl.h" namespace ROCKSDB_NAMESPACE { @@ -399,6 +400,7 @@ struct Standard128RibbonRehasherTypesAndSettings { // These are schema-critical. Any change almost certainly changes // underlying data. static constexpr bool kIsFilter = true; + static constexpr bool kHomogeneous = false; static constexpr bool kFirstCoeffAlwaysOne = true; static constexpr bool kUseSmash = false; using CoeffRow = ROCKSDB_NAMESPACE::Unsigned128; @@ -598,8 +600,7 @@ class Standard128RibbonBitsBuilder : public XXH3pFilterBitsBuilder { // Let's not bother accounting for overflow to Bloom filter // (Includes NaN case) - if (!(max_slots < - BandingType::GetNumSlotsFor95PctSuccess(kMaxRibbonEntries))) { + if (!(max_slots < ConfigHelper::GetNumSlots(kMaxRibbonEntries))) { return kMaxRibbonEntries; } @@ -628,12 +629,7 @@ class Standard128RibbonBitsBuilder : public XXH3pFilterBitsBuilder { slots = SolnType::RoundDownNumSlots(slots - 1); } - // Using slots instead of entries to get overhead factor estimate - double f = BandingType::GetFactorFor95PctSuccess(slots); - uint32_t num_entries = static_cast(slots / f); - // Improve precision with another round - f = BandingType::GetFactorFor95PctSuccess(num_entries); - num_entries = static_cast(slots / f + 0.999999999); + uint32_t num_entries = ConfigHelper::GetNumToAdd(slots); // Consider possible Bloom fallback for small filters if (slots < 1024) { @@ -675,9 +671,10 @@ class Standard128RibbonBitsBuilder : public XXH3pFilterBitsBuilder { using TS = Standard128RibbonTypesAndSettings; using SolnType = ribbon::SerializableInterleavedSolution; using BandingType = ribbon::StandardBanding; + using ConfigHelper = ribbon::BandingConfigHelper1TS; static uint32_t NumEntriesToNumSlots(uint32_t num_entries) { - uint32_t num_slots1 = BandingType::GetNumSlotsFor95PctSuccess(num_entries); + uint32_t num_slots1 = ConfigHelper::GetNumSlots(num_entries); return SolnType::RoundUpNumSlots(num_slots1); } diff --git a/util/bloom_test.cc b/util/bloom_test.cc index bc2514bc3..44ebfc636 100644 --- a/util/bloom_test.cc +++ b/util/bloom_test.cc @@ -431,10 +431,10 @@ TEST_P(FullBloomTest, FilterSize) { size_t n2 = bits_builder->ApproximateNumEntries(space); EXPECT_GE(n2, n); size_t space2 = bits_builder->CalculateSpace(n2); - if (n > 6000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) { + if (n > 12000 && GetParam() == BloomFilterPolicy::kStandard128Ribbon) { // TODO(peterd): better approximation? EXPECT_GE(space2, space); - EXPECT_LE(space2 * 0.98 - 16.0, space * 1.0); + EXPECT_LE(space2 * 0.998, space * 1.0); } else { EXPECT_EQ(space2, space); } diff --git a/util/coding.h b/util/coding.h index 5215b3e9c..876f181f1 100644 --- a/util/coding.h +++ b/util/coding.h @@ -320,38 +320,6 @@ inline bool GetVarsignedint64(Slice* input, int64_t* value) { } } -// Swaps between big and little endian. Can be used to in combination -// with the little-endian encoding/decoding functions to encode/decode -// big endian. -template -inline T EndianSwapValue(T v) { - static_assert(std::is_integral::value, "non-integral type"); - -#ifdef _MSC_VER - if (sizeof(T) == 2) { - return static_cast(_byteswap_ushort(static_cast(v))); - } else if (sizeof(T) == 4) { - return static_cast(_byteswap_ulong(static_cast(v))); - } else if (sizeof(T) == 8) { - return static_cast(_byteswap_uint64(static_cast(v))); - } -#else - if (sizeof(T) == 2) { - return static_cast(__builtin_bswap16(static_cast(v))); - } else if (sizeof(T) == 4) { - return static_cast(__builtin_bswap32(static_cast(v))); - } else if (sizeof(T) == 8) { - return static_cast(__builtin_bswap64(static_cast(v))); - } -#endif - // Recognized by clang as bswap, but not by gcc :( - T ret_val = 0; - for (size_t i = 0; i < sizeof(T); ++i) { - ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i)); - } - return ret_val; -} - inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) { uint32_t len = 0; if (GetVarint32(input, &len) && input->size() >= len) { diff --git a/util/coding_lean.h b/util/coding_lean.h index 6966f7a66..0b205c269 100644 --- a/util/coding_lean.h +++ b/util/coding_lean.h @@ -98,4 +98,36 @@ inline uint64_t DecodeFixed64(const char* ptr) { } } +// Swaps between big and little endian. Can be used to in combination +// with the little-endian encoding/decoding functions to encode/decode +// big endian. +template +inline T EndianSwapValue(T v) { + static_assert(std::is_integral::value, "non-integral type"); + +#ifdef _MSC_VER + if (sizeof(T) == 2) { + return static_cast(_byteswap_ushort(static_cast(v))); + } else if (sizeof(T) == 4) { + return static_cast(_byteswap_ulong(static_cast(v))); + } else if (sizeof(T) == 8) { + return static_cast(_byteswap_uint64(static_cast(v))); + } +#else + if (sizeof(T) == 2) { + return static_cast(__builtin_bswap16(static_cast(v))); + } else if (sizeof(T) == 4) { + return static_cast(__builtin_bswap32(static_cast(v))); + } else if (sizeof(T) == 8) { + return static_cast(__builtin_bswap64(static_cast(v))); + } +#endif + // Recognized by clang as bswap, but not by gcc :( + T ret_val = 0; + for (size_t i = 0; i < sizeof(T); ++i) { + ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i)); + } + return ret_val; +} + } // namespace ROCKSDB_NAMESPACE diff --git a/util/math128.h b/util/math128.h index a1f6d4151..a531ea7f0 100644 --- a/util/math128.h +++ b/util/math128.h @@ -215,7 +215,7 @@ inline int BitsSetToOne(Unsigned128 v) { template <> inline int BitParity(Unsigned128 v) { - return BitParity(Lower64of128(v)) ^ BitParity(Upper64of128(v)); + return BitParity(Lower64of128(v) ^ Upper64of128(v)); } template diff --git a/util/ribbon_alg.h b/util/ribbon_alg.h index a737712a3..da017f639 100644 --- a/util/ribbon_alg.h +++ b/util/ribbon_alg.h @@ -8,6 +8,7 @@ #include #include +#include "rocksdb/rocksdb_namespace.h" #include "util/math128.h" namespace ROCKSDB_NAMESPACE { @@ -501,12 +502,13 @@ namespace ribbon { // // slot index i. // void Prefetch(Index i) const; // -// // Returns a pointer to CoeffRow for slot index i. -// CoeffRow* CoeffRowPtr(Index i); -// -// // Returns a pointer to ResultRow for slot index i. (Gaussian row -// // operations involve both side of the equation.) -// ResultRow* ResultRowPtr(Index i); +// // Load or store CoeffRow and ResultRow for slot index i. +// // (Gaussian row operations involve both sides of the equation.) +// // Bool `for_back_subst` indicates that customizing values for +// // unconstrained solution rows (cr == 0) is allowed. +// void LoadRow(Index i, CoeffRow *cr, ResultRow *rr, bool for_back_subst) +// const; +// void StoreRow(Index i, CoeffRow cr, ResultRow rr); // // // Returns the number of columns that can start an r-sequence of // // coefficients, which is the number of slots minus r (kCoeffBits) @@ -548,6 +550,7 @@ bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start, typename BandingStorage::CoeffRow cr, BacktrackStorage *bts, typename BandingStorage::Index *backtrack_pos) { using CoeffRow = typename BandingStorage::CoeffRow; + using ResultRow = typename BandingStorage::ResultRow; using Index = typename BandingStorage::Index; Index i = start; @@ -561,18 +564,19 @@ bool BandingAdd(BandingStorage *bs, typename BandingStorage::Index start, for (;;) { assert((cr & 1) == 1); - CoeffRow other = *(bs->CoeffRowPtr(i)); - if (other == 0) { - *(bs->CoeffRowPtr(i)) = cr; - *(bs->ResultRowPtr(i)) = rr; + CoeffRow cr_at_i; + ResultRow rr_at_i; + bs->LoadRow(i, &cr_at_i, &rr_at_i, /* for_back_subst */ false); + if (cr_at_i == 0) { + bs->StoreRow(i, cr, rr); bts->BacktrackPut(*backtrack_pos, i); ++*backtrack_pos; return true; } - assert((other & 1) == 1); + assert((cr_at_i & 1) == 1); // Gaussian row reduction - cr ^= other; - rr ^= *(bs->ResultRowPtr(i)); + cr ^= cr_at_i; + rr ^= rr_at_i; if (cr == 0) { // Inconsistency or (less likely) redundancy break; @@ -678,12 +682,11 @@ bool BandingAddRange(BandingStorage *bs, BacktrackStorage *bts, while (backtrack_pos > 0) { --backtrack_pos; Index i = bts->BacktrackGet(backtrack_pos); - *(bs->CoeffRowPtr(i)) = 0; - // Not strictly required, but is required for good FP rate on - // inputs that might have been backtracked out. (We don't want - // anything we've backtracked on to leak into final result, as - // that might not be "harmless".) - *(bs->ResultRowPtr(i)) = 0; + // Clearing the ResultRow is not strictly required, but is required + // for good FP rate on inputs that might have been backtracked out. + // (We don't want anything we've backtracked on to leak into final + // result, as that might not be "harmless".) + bs->StoreRow(i, 0, 0); } } return false; @@ -780,8 +783,9 @@ void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) { for (Index i = num_slots; i > 0;) { --i; - CoeffRow cr = *const_cast(bs).CoeffRowPtr(i); - ResultRow rr = *const_cast(bs).ResultRowPtr(i); + CoeffRow cr; + ResultRow rr; + bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true); // solution row ResultRow sr = 0; for (Index j = 0; j < kResultBits; ++j) { @@ -976,8 +980,9 @@ inline void BackSubstBlock(typename BandingStorage::CoeffRow *state, for (Index i = start_slot + kCoeffBits; i > start_slot;) { --i; - CoeffRow cr = *const_cast(bs).CoeffRowPtr(i); - ResultRow rr = *const_cast(bs).ResultRowPtr(i); + CoeffRow cr; + ResultRow rr; + bs.LoadRow(i, &cr, &rr, /* for_back_subst */ true); for (Index j = 0; j < num_columns; ++j) { // Compute next solution bit at row i, column j (see derivation below) CoeffRow tmp = state[j] << 1; diff --git a/util/ribbon_config.cc b/util/ribbon_config.cc new file mode 100644 index 000000000..c1046f4aa --- /dev/null +++ b/util/ribbon_config.cc @@ -0,0 +1,506 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#include "util/ribbon_config.h" + +namespace ROCKSDB_NAMESPACE { + +namespace ribbon { + +namespace detail { + +// Each instantiation of this struct is sufficiently unique for configuration +// purposes, and is only instantiated for settings where we support the +// configuration API. An application might only reference one instantiation, +// meaning the rest could be pruned at link time. +template +struct BandingConfigHelperData { + static constexpr size_t kKnownSize = 18U; + + // Because of complexity in the data, for smaller numbers of slots + // (powers of two up to 2^17), we record known numbers that can be added + // with kCfc chance of construction failure and settings in template + // parameters. Zero means "unsupported (too small) number of slots". + // (GetNumToAdd below will use interpolation for numbers of slots + // between powers of two; double rather than integer values here make + // that more accurate.) + static const std::array kKnownToAddByPow2; + + // For sufficiently large number of slots, doubling the number of + // slots will increase the expected overhead (slots over number added) + // by approximately this constant. + // (This is roughly constant regardless of ConstructionFailureChance and + // smash setting.) + // (Would be a constant if we had partial template specialization for + // static const members.) + static inline double GetFactorPerPow2() { + if (kCoeffBits == 128U) { + return 0.0038; + } else { + assert(kCoeffBits == 64U); + return 0.0083; + } + } + + // Overhead factor for 2^(kKnownSize-1) slots + // (Would be a constant if we had partial template specialization for + // static const members.) + static inline double GetFinalKnownFactor() { + return 1.0 * (uint32_t{1} << (kKnownSize - 1)) / + kKnownToAddByPow2[kKnownSize - 1]; + } + + // GetFinalKnownFactor() - (kKnownSize-1) * GetFactorPerPow2() + // (Would be a constant if we had partial template specialization for + // static const members.) + static inline double GetBaseFactor() { + return GetFinalKnownFactor() - (kKnownSize - 1) * GetFactorPerPow2(); + } + + // Get overhead factor (slots over number to add) for sufficiently large + // number of slots (by log base 2) + static inline double GetFactorForLarge(double log2_num_slots) { + return GetBaseFactor() + log2_num_slots * GetFactorPerPow2(); + } + + // For a given power of two number of slots (specified by whole number + // log base 2), implements GetNumToAdd for such limited case, returning + // double for better interpolation in GetNumToAdd and GetNumSlots. + static inline double GetNumToAddForPow2(uint32_t log2_num_slots) { + assert(log2_num_slots <= 32); // help clang-analyze + if (log2_num_slots < kKnownSize) { + return kKnownToAddByPow2[log2_num_slots]; + } else { + return 1.0 * (uint64_t{1} << log2_num_slots) / + GetFactorForLarge(1.0 * log2_num_slots); + } + } +}; + +// Based on data from FindOccupancy in ribbon_test +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 252.984, + 506.109, + 1013.71, + 2029.47, + 4060.43, + 8115.63, + 16202.2, + 32305.1, + 64383.5, + 128274, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 126.274, + 254.279, + 510.27, + 1022.24, + 2046.02, + 4091.99, + 8154.98, + 16244.3, + 32349.7, + 64426.6, + 128307, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 124.94, + 249.968, + 501.234, + 1004.06, + 2006.15, + 3997.89, + 7946.99, + 15778.4, + 31306.9, + 62115.3, + 123284, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 62.2683, + 126.259, + 254.268, + 509.975, + 1019.98, + 2026.16, + 4019.75, + 7969.8, + 15798.2, + 31330.3, + 62134.2, + 123255, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 248.851, + 499.532, + 1001.26, + 2003.97, + 4005.59, + 8000.39, + 15966.6, + 31828.1, + 63447.3, + 126506, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 122.637, + 250.651, + 506.625, + 1018.54, + 2036.43, + 4041.6, + 8039.25, + 16005, + 31869.6, + 63492.8, + 126537, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 120.659, + 243.346, + 488.168, + 976.373, + 1948.86, + 3875.85, + 7704.97, + 15312.4, + 30395.1, + 60321.8, + 119813, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 58.6016, + 122.619, + 250.641, + 503.595, + 994.165, + 1967.36, + 3898.17, + 7727.21, + 15331.5, + 30405.8, + 60376.2, + 119836, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 242.61, + 491.887, + 983.603, + 1968.21, + 3926.98, + 7833.99, + 15629, + 31199.9, + 62307.8, + 123870, + }}; + +template <> +const std::array BandingConfigHelperData< + kOneIn1000, 128U, /*smash*/ true>::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 117.19, + 245.105, + 500.748, + 1010.67, + 1993.4, + 3950.01, + 7863.31, + 15652, + 31262.1, + 62462.8, + 124095, +}}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{{ + 0, + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 114, + 234.8, + 471.498, + 940.165, + 1874, + 3721.5, + 7387.5, + 14592, + 29160, + 57745, + 115082, + }}; + +template <> +const std::array + BandingConfigHelperData::kKnownToAddByPow2{ + { + 0, + 0, + 0, + 0, + 0, + 0, // unsupported + 53.0434, + 117, + 245.312, + 483.571, + 950.251, + 1878, + 3736.34, + 7387.97, + 14618, + 29142.9, + 57838.8, + 114932, + }}; + +// We hide these implementation details from the .h file with explicit +// instantiations below these partial specializations. + +template +uint32_t BandingConfigHelper1MaybeSupported< + kCfc, kCoeffBits, kUseSmash, kHomogeneous, + true /* kIsSupported */>::GetNumToAdd(uint32_t num_slots) { + using Data = detail::BandingConfigHelperData; + if (num_slots == 0) { + return 0; + } + uint32_t num_to_add; + double log2_num_slots = std::log(num_slots) * 1.4426950409; + uint32_t floor_log2 = static_cast(log2_num_slots); + if (floor_log2 + 1 < Data::kKnownSize) { + double ceil_portion = 1.0 * num_slots / (uint32_t{1} << floor_log2) - 1.0; + // Must be a supported number of slots + assert(Data::kKnownToAddByPow2[floor_log2] > 0.0); + // Weighted average of two nearest known data points + num_to_add = static_cast( + ceil_portion * Data::kKnownToAddByPow2[floor_log2 + 1] + + (1.0 - ceil_portion) * Data::kKnownToAddByPow2[floor_log2]); + } else { + // Use formula for large values + double factor = Data::GetFactorForLarge(log2_num_slots); + assert(factor >= 1.0); + num_to_add = static_cast(num_slots / factor); + } + if (kHomogeneous) { + // Even when standard filter construction would succeed, we might + // have loaded things up too much for Homogeneous filter. (Complete + // explanation not known but observed empirically.) This seems to + // correct for that, mostly affecting small filter configurations. + if (num_to_add >= 8) { + num_to_add -= 8; + } else { + assert(false); + } + } + return num_to_add; +} + +template +uint32_t BandingConfigHelper1MaybeSupported< + kCfc, kCoeffBits, kUseSmash, kHomogeneous, + true /* kIsSupported */>::GetNumSlots(uint32_t num_to_add) { + using Data = detail::BandingConfigHelperData; + + if (num_to_add == 0) { + return 0; + } + if (kHomogeneous) { + // Reverse of above in GetNumToAdd + num_to_add += 8; + } + double log2_num_to_add = std::log(num_to_add) * 1.4426950409; + uint32_t approx_log2_slots = static_cast(log2_num_to_add + 0.5); + assert(approx_log2_slots <= 32); // help clang-analyze + + double lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots); + double upper_num_to_add; + if (approx_log2_slots == 0 || lower_num_to_add == /* unsupported */ 0) { + // Return minimum non-zero slots in standard implementation + return kUseSmash ? kCoeffBits : 2 * kCoeffBits; + } else if (num_to_add < lower_num_to_add) { + upper_num_to_add = lower_num_to_add; + --approx_log2_slots; + lower_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots); + } else { + upper_num_to_add = Data::GetNumToAddForPow2(approx_log2_slots + 1); + } + + assert(num_to_add >= lower_num_to_add); + assert(num_to_add < upper_num_to_add); + + double upper_portion = + (num_to_add - lower_num_to_add) / (upper_num_to_add - lower_num_to_add); + + double lower_num_slots = 1.0 * (uint64_t{1} << approx_log2_slots); + + // Interpolation, round up + return static_cast(upper_portion * lower_num_slots + + lower_num_slots + 0.999999999); +} + +// These explicit instantiations enable us to hide most of the +// implementation details from the .h file. (The .h file currently +// needs to determine whether settings are "supported" or not.) + +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; + +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported; + +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 128U, /*sm*/ false, /*hm*/ false, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 128U, /*sm*/ true, /*hm*/ false, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 128U, /*sm*/ false, /*hm*/ true, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 128U, /*sm*/ true, /*hm*/ true, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 64U, /*sm*/ false, /*hm*/ false, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported; +template struct BandingConfigHelper1MaybeSupported< + kOneIn1000, 64U, /*sm*/ false, /*hm*/ true, /*sup*/ true>; +template struct BandingConfigHelper1MaybeSupported; + +} // namespace detail + +} // namespace ribbon + +} // namespace ROCKSDB_NAMESPACE diff --git a/util/ribbon_config.h b/util/ribbon_config.h new file mode 100644 index 000000000..0e3edf073 --- /dev/null +++ b/util/ribbon_config.h @@ -0,0 +1,182 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#include +#include +#include +#include + +#include "port/lang.h" // for FALLTHROUGH_INTENDED +#include "rocksdb/rocksdb_namespace.h" + +namespace ROCKSDB_NAMESPACE { + +namespace ribbon { + +// RIBBON PHSF & RIBBON Filter (Rapid Incremental Boolean Banding ON-the-fly) +// +// ribbon_config.h: APIs for relating numbers of slots with numbers of +// additions for tolerable construction failure probabilities. This is +// separate from ribbon_impl.h because it might not be needed for +// some applications. +// +// This API assumes uint32_t for number of slots, as a single Ribbon +// linear system should not normally overflow that without big penalties. +// +// Template parameter kCoeffBits uses uint64_t for convenience in case it +// comes from size_t. +// +// Most of the complexity here is trying to optimize speed and +// compiled code size, using templates to minimize table look-ups and +// the compiled size of all linked look-up tables. Look-up tables are +// required because we don't have good formulas, and the data comes +// from running FindOccupancy in ribbon_test. + +// Represents a chosen chance of successful Ribbon construction for a single +// seed. Allowing higher chance of failed construction can reduce space +// overhead but takes extra time in construction. +enum ConstructionFailureChance { + kOneIn2, + kOneIn20, + // When using kHomogeneous==true, construction failure chance should + // not generally exceed target FP rate, so it unlikely useful to + // allow a higher "failure" chance. In some cases, even more overhead + // is appropriate. (TODO) + kOneIn1000, +}; + +namespace detail { + +// It is useful to compile ribbon_test linking to BandingConfigHelper with +// settings for which we do not have configuration data, as long as we don't +// run the code. This template hack supports that. +template +struct BandingConfigHelper1MaybeSupported { + public: + static uint32_t GetNumToAdd(uint32_t num_slots) { + // Unsupported + assert(num_slots == 0); + (void)num_slots; + return 0; + } + + static uint32_t GetNumSlots(uint32_t num_to_add) { + // Unsupported + assert(num_to_add == 0); + (void)num_to_add; + return 0; + } +}; + +// Base class for BandingConfigHelper1 and helper for BandingConfigHelper +// with core implementations built on above data +template +struct BandingConfigHelper1MaybeSupported< + kCfc, kCoeffBits, kUseSmash, kHomogeneous, true /* kIsSupported */> { + public: + // See BandingConfigHelper1. Implementation in ribbon_config.cc + static uint32_t GetNumToAdd(uint32_t num_slots); + + // See BandingConfigHelper1. Implementation in ribbon_config.cc + static uint32_t GetNumSlots(uint32_t num_to_add); +}; + +} // namespace detail + +template +struct BandingConfigHelper1 + : public detail::BandingConfigHelper1MaybeSupported< + kCfc, kCoeffBits, kUseSmash, kHomogeneous, + /* kIsSupported */ kCoeffBits == 64 || kCoeffBits == 128> { + public: + // Returns a number of entries that can be added to a given number of + // slots, with roughly kCfc chance of construction failure per seed, + // or better. Does NOT do rounding for InterleavedSoln; call + // RoundUpNumSlots for that. + // + // inherited: + // static uint32_t GetNumToAdd(uint32_t num_slots); + + // Returns a number of slots for a given number of entries to add + // that should have roughly kCfc chance of construction failure per + // seed, or better. Does NOT do rounding for InterleavedSoln; call + // RoundUpNumSlots for that. + // + // num_to_add should not exceed roughly 2/3rds of the maximum value + // of the uint32_t type to avoid overflow. + // + // inherited: + // static uint32_t GetNumSlots(uint32_t num_to_add); +}; + +// Configured using TypesAndSettings as in ribbon_impl.h +template +struct BandingConfigHelper1TS + : public BandingConfigHelper1< + kCfc, + /* kCoeffBits */ sizeof(typename TypesAndSettings::CoeffRow) * 8U, + TypesAndSettings::kUseSmash, TypesAndSettings::kHomogeneous> {}; + +// Like BandingConfigHelper1TS except failure chance can be a runtime rather +// than compile time value. +template +struct BandingConfigHelper { + public: + static constexpr ConstructionFailureChance kDefaultFailureChance = + TypesAndSettings::kHomogeneous ? kOneIn1000 : kOneIn20; + + static uint32_t GetNumToAdd( + uint32_t num_slots, + ConstructionFailureChance max_failure = kDefaultFailureChance) { + switch (max_failure) { + default: + assert(false); + FALLTHROUGH_INTENDED; + case kOneIn20: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumToAdd(num_slots); + } + case kOneIn2: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumToAdd(num_slots); + } + case kOneIn1000: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumToAdd(num_slots); + } + } + } + + static uint32_t GetNumSlots( + uint32_t num_to_add, + ConstructionFailureChance max_failure = kDefaultFailureChance) { + switch (max_failure) { + default: + assert(false); + FALLTHROUGH_INTENDED; + case kOneIn20: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumSlots(num_to_add); + } + case kOneIn2: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumSlots(num_to_add); + } + case kOneIn1000: { + using H1 = BandingConfigHelper1TS; + return H1::GetNumSlots(num_to_add); + } + } + } +}; + +} // namespace ribbon + +} // namespace ROCKSDB_NAMESPACE diff --git a/util/ribbon_impl.h b/util/ribbon_impl.h index 96fedadc1..3b84a0d90 100644 --- a/util/ribbon_impl.h +++ b/util/ribbon_impl.h @@ -8,6 +8,7 @@ #include #include "port/port.h" // for PREFETCH +#include "util/fastrange.h" #include "util/ribbon_alg.h" namespace ROCKSDB_NAMESPACE { @@ -23,6 +24,8 @@ namespace ribbon { // and core design details. // // TODO: more details on trade-offs and practical issues. +// +// APIs for configuring Ribbon are in ribbon_config.h // Ribbon implementations in this file take these parameters, which must be // provided in a class/struct type with members expressed in this concept: @@ -49,10 +52,22 @@ namespace ribbon { // // construction. // static constexpr bool kIsFilter; // +// // When true, enables a special "homogeneous" filter implementation that +// // is slightly faster to construct, and never fails to construct though +// // FP rate can quickly explode in cases where corresponding +// // non-homogeneous filter would fail (or nearly fail?) to construct. +// // For smaller filters, you can configure with ConstructionFailureChance +// // smaller than desired FP rate to largely counteract this effect. +// // TODO: configuring Homogeneous Ribbon for arbitrarily large filters +// // based on data from OptimizeHomogAtScale +// static constexpr bool kHomogeneous; +// // // When true, adds a tiny bit more hashing logic on queries and // // construction to improve utilization at the beginning and end of // // the structure. Recommended when CoeffRow is only 64 bits (or -// // less), so typical num_starts < 10k. +// // less), so typical num_starts < 10k. Although this is compatible +// // with kHomogeneous, the competing space vs. time priorities might +// // not be useful. // static constexpr bool kUseSmash; // // // When true, allows number of "starts" to be zero, for best support @@ -201,7 +216,27 @@ class StandardHasher { // This is not so much "critical path" code because it can be done in // parallel (instruction level) with memory lookup. // - // We do not need exhaustive remixing for CoeffRow, but just enough that + // When we might have many entries squeezed into a single start, + // we need reasonably good remixing for CoeffRow. + if (TypesAndSettings::kUseSmash) { + // Reasonably good, reasonably fast, reasonably general. + // Probably not 1:1 but probably close enough. + Unsigned128 a = Multiply64to128(h, kAltCoeffFactor1); + Unsigned128 b = Multiply64to128(h, kAltCoeffFactor2); + auto cr = static_cast(b ^ (a << 64) ^ (a >> 64)); + + // Now ensure the value is non-zero + if (kFirstCoeffAlwaysOne) { + cr |= 1; + } else { + // Still have to ensure some bit is non-zero + cr |= (cr == 0) ? 1 : 0; + } + return cr; + } + // If not kUseSmash, we ensure we're not squeezing many entries into a + // single start, in part by ensuring num_starts > num_slots / 2. Thus, + // here we do not need good remixing for CoeffRow, but just enough that // (a) every bit is reasonably independent from Start. // (b) every Hash-length bit subsequence of the CoeffRow has full or // nearly full entropy from h. @@ -220,25 +255,27 @@ class StandardHasher { // even with a (likely) different multiplier here. Hash a = h * kCoeffAndResultFactor; - // If that's big enough, we're done. If not, we have to expand it, - // maybe up to 4x size. - uint64_t b = a; static_assert( sizeof(Hash) == sizeof(uint64_t) || sizeof(Hash) == sizeof(uint32_t), "Supported sizes"); + // If that's big enough, we're done. If not, we have to expand it, + // maybe up to 4x size. + uint64_t b; if (sizeof(Hash) < sizeof(uint64_t)) { // Almost-trivial hash expansion (OK - see above), favoring roughly // equal number of 1's and 0's in result - b = (b << 32) ^ b ^ kCoeffXor32; + b = (uint64_t{a} << 32) ^ (a ^ kCoeffXor32); + } else { + b = a; } - Unsigned128 c = b; - static_assert(sizeof(CoeffRow) == sizeof(uint64_t) || - sizeof(CoeffRow) == sizeof(Unsigned128), - "Supported sizes"); + static_assert(sizeof(CoeffRow) <= sizeof(Unsigned128), "Supported sizes"); + Unsigned128 c; if (sizeof(uint64_t) < sizeof(CoeffRow)) { // Almost-trivial hash expansion (OK - see above), favoring roughly // equal number of 1's and 0's in result - c = (c << 64) ^ c ^ kCoeffXor64; + c = (Unsigned128{b} << 64) ^ (b ^ kCoeffXor64); + } else { + c = b; } auto cr = static_cast(c); @@ -261,7 +298,7 @@ class StandardHasher { return static_cast(~ResultRow{0}); } inline ResultRow GetResultRowFromHash(Hash h) const { - if (TypesAndSettings::kIsFilter) { + if (TypesAndSettings::kIsFilter && !TypesAndSettings::kHomogeneous) { // This is not so much "critical path" code because it can be done in // parallel (instruction level) with memory lookup. // @@ -272,10 +309,9 @@ class StandardHasher { // the same bits computed for CoeffRow, which are reasonably // independent from Start. (Inlining and common subexpression // elimination with GetCoeffRow should make this - // a single shared multiplication in generated code.) - // - // TODO: fix & test the kUseSmash case with very small num_starts + // a single shared multiplication in generated code when !kUseSmash.) Hash a = h * kCoeffAndResultFactor; + // The bits here that are *most* independent of Start are the highest // order bits (as in Knuth multiplicative hash). To make those the // most preferred for use in the result row, we do a bswap here. @@ -337,6 +373,8 @@ class StandardHasher { // large random prime static constexpr Hash kCoeffAndResultFactor = static_cast(0xc28f82822b650bedULL); + static constexpr uint64_t kAltCoeffFactor1 = 0x876f170be4f1fcb9U; + static constexpr uint64_t kAltCoeffFactor2 = 0xf0433a4aecda4c5fU; // random-ish data static constexpr uint32_t kCoeffXor32 = 0xa6293635U; static constexpr uint64_t kCoeffXor64 = 0xc367844a6e52731dU; @@ -447,15 +485,20 @@ class StandardBanding : public StandardHasher { assert(num_slots >= kCoeffBits); if (num_slots > num_slots_allocated_) { coeff_rows_.reset(new CoeffRow[num_slots]()); - // Note: don't strictly have to zero-init result_rows, - // except possible information leakage ;) - result_rows_.reset(new ResultRow[num_slots]()); + if (!TypesAndSettings::kHomogeneous) { + // Note: don't strictly have to zero-init result_rows, + // except possible information leakage, etc ;) + result_rows_.reset(new ResultRow[num_slots]()); + } num_slots_allocated_ = num_slots; } else { for (Index i = 0; i < num_slots; ++i) { coeff_rows_[i] = 0; - // Note: don't strictly have to zero-init result_rows - result_rows_[i] = 0; + if (!TypesAndSettings::kHomogeneous) { + // Note: don't strictly have to zero-init result_rows, + // except possible information leakage, etc ;) + result_rows_[i] = 0; + } } } num_starts_ = num_slots - kCoeffBits + 1; @@ -480,10 +523,32 @@ class StandardBanding : public StandardHasher { } inline void Prefetch(Index i) const { PREFETCH(&coeff_rows_[i], 1 /* rw */, 1 /* locality */); - PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */); + if (!TypesAndSettings::kHomogeneous) { + PREFETCH(&result_rows_[i], 1 /* rw */, 1 /* locality */); + } + } + inline void LoadRow(Index i, CoeffRow* cr, ResultRow* rr, + bool for_back_subst) const { + *cr = coeff_rows_[i]; + if (TypesAndSettings::kHomogeneous) { + if (for_back_subst && *cr == 0) { + // Cheap pseudorandom data to fill unconstrained solution rows + *rr = static_cast(i * 0x9E3779B185EBCA87ULL); + } else { + *rr = 0; + } + } else { + *rr = result_rows_[i]; + } + } + inline void StoreRow(Index i, CoeffRow cr, ResultRow rr) { + coeff_rows_[i] = cr; + if (TypesAndSettings::kHomogeneous) { + assert(rr == 0); + } else { + result_rows_[i] = rr; + } } - inline CoeffRow* CoeffRowPtr(Index i) { return &coeff_rows_[i]; } - inline ResultRow* ResultRowPtr(Index i) { return &result_rows_[i]; } inline Index GetNumStarts() const { return num_starts_; } // from concept BacktrackStorage, for when backtracking is used @@ -554,6 +619,10 @@ class StandardBanding : public StandardHasher { return count; } + // Returns whether a row is "occupied" in the banding (non-zero + // coefficients stored). (Only recommended for debug/test) + bool IsOccupied(Index i) { return coeff_rows_[i] != 0; } + // ******************************************************************** // High-level API @@ -608,54 +677,6 @@ class StandardBanding : public StandardHasher { return false; } - // ******************************************************************** - // Static high-level API - - // Based on data from FindOccupancyForSuccessRate in ribbon_test, - // returns a number of slots for a given number of entries to add - // that should have roughly 95% or better chance of successful - // construction per seed. Does NOT do rounding for InterleavedSoln; - // call RoundUpNumSlots for that. - // - // num_to_add should not exceed roughly 2/3rds of the maximum value - // of the Index type to avoid overflow. - static Index GetNumSlotsFor95PctSuccess(Index num_to_add) { - if (num_to_add == 0) { - return 0; - } - double factor = GetFactorFor95PctSuccess(num_to_add); - Index num_slots = static_cast(num_to_add * factor); - assert(num_slots >= num_to_add); - return num_slots; - } - - // Based on data from FindOccupancyForSuccessRate in ribbon_test, - // given a number of entries to add, returns a space overhead factor - // (slots divided by num_to_add) that should have roughly 95% or better - // chance of successful construction per seed. Does NOT do rounding for - // InterleavedSoln; call RoundUpNumSlots for that. - // - // The reason that num_to_add is needed is that Ribbon filters of a - // particular CoeffRow size do not scale infinitely. - static double GetFactorFor95PctSuccess(Index num_to_add) { - double log2_num_to_add = std::log(num_to_add) * 1.442695; - if (kCoeffBits == 64) { - if (TypesAndSettings::kUseSmash) { - return 1.02 + std::max(log2_num_to_add - 8.5, 0.0) * 0.009; - } else { - return 1.05 + std::max(log2_num_to_add - 11.0, 0.0) * 0.009; - } - } else { - // Currently only support 64 and 128 - assert(kCoeffBits == 128); - if (TypesAndSettings::kUseSmash) { - return 1.01 + std::max(log2_num_to_add - 10.0, 0.0) * 0.0042; - } else { - return 1.02 + std::max(log2_num_to_add - 12.0, 0.0) * 0.0042; - } - } - } - protected: // TODO: explore combining in a struct std::unique_ptr coeff_rows_; @@ -716,8 +737,8 @@ class InMemSimpleSolution { } template - ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) { - assert(!TypesAndSettings::kIsFilter); + ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { + // assert(!TypesAndSettings::kIsFilter); Can be useful in testing if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual return 0; @@ -728,7 +749,7 @@ class InMemSimpleSolution { } template - bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) { + bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { assert(TypesAndSettings::kIsFilter); if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual. Zero starts presumes no keys added -> always false @@ -740,7 +761,7 @@ class InMemSimpleSolution { } } - double ExpectedFpRate() { + double ExpectedFpRate() const { assert(TypesAndSettings::kIsFilter); if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual, but we don't have FPs if we always return false. @@ -752,6 +773,20 @@ class InMemSimpleSolution { return std::pow(0.5, 8U * sizeof(ResultRow)); } + // ******************************************************************** + // Static high-level API + + // Round up to a number of slots supported by this structure. Note that + // this needs to be must be taken into account for the banding if this + // solution layout/storage is to be used. + static Index RoundUpNumSlots(Index num_slots) { + // Must be at least kCoeffBits for at least one start + // Or if not smash, even more because hashing not equipped + // for stacking up so many entries on a single start location + auto min_slots = kCoeffBits * (TypesAndSettings::kUseSmash ? 1 : 2); + return std::max(num_slots, static_cast(min_slots)); + } + protected: // We generally store "starts" instead of slots for speed of GetStart(), // as in StandardHasher. @@ -853,8 +888,8 @@ class SerializableInterleavedSolution { } template - ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) { - assert(!TypesAndSettings::kIsFilter); + ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) const { + // assert(!TypesAndSettings::kIsFilter); Can be useful in testing if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual return 0; @@ -873,7 +908,7 @@ class SerializableInterleavedSolution { } template - bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) { + bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) const { assert(TypesAndSettings::kIsFilter); if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual. Zero starts presumes no keys added -> always false @@ -893,7 +928,7 @@ class SerializableInterleavedSolution { } } - double ExpectedFpRate() { + double ExpectedFpRate() const { assert(TypesAndSettings::kIsFilter); if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { // Unusual. Zero starts presumes no keys added -> always false diff --git a/util/ribbon_test.cc b/util/ribbon_test.cc index f9bcef418..320dd26f2 100644 --- a/util/ribbon_test.cc +++ b/util/ribbon_test.cc @@ -3,48 +3,71 @@ // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). -#include - #include "rocksdb/system_clock.h" #include "test_util/testharness.h" #include "util/bloom_impl.h" #include "util/coding.h" #include "util/hash.h" +#include "util/ribbon_config.h" #include "util/ribbon_impl.h" #include "util/stop_watch.h" +#include "util/string_util.h" #ifndef GFLAGS uint32_t FLAGS_thoroughness = 5; +uint32_t FLAGS_max_add = 0; +uint32_t FLAGS_min_check = 4000; +uint32_t FLAGS_max_check = 100000; +bool FLAGS_verbose = false; + bool FLAGS_find_occ = false; -double FLAGS_find_next_factor = 1.414; -double FLAGS_find_success = 0.95; -double FLAGS_find_delta_start = 0.01; -double FLAGS_find_delta_end = 0.0001; -double FLAGS_find_delta_shrink = 0.99; +bool FLAGS_find_slot_occ = false; +double FLAGS_find_next_factor = 1.618; +uint32_t FLAGS_find_iters = 10000; uint32_t FLAGS_find_min_slots = 128; -uint32_t FLAGS_find_max_slots = 12800000; +uint32_t FLAGS_find_max_slots = 1000000; + +bool FLAGS_optimize_homog = false; +uint32_t FLAGS_optimize_homog_slots = 30000000; +uint32_t FLAGS_optimize_homog_check = 200000; +double FLAGS_optimize_homog_granularity = 0.002; #else #include "util/gflags_compat.h" using GFLAGS_NAMESPACE::ParseCommandLineFlags; // Using 500 is a good test when you have time to be thorough. // Default is for general RocksDB regression test runs. DEFINE_uint32(thoroughness, 5, "iterations per configuration"); +DEFINE_uint32(max_add, 0, + "Add up to this number of entries to a single filter in " + "CompactnessAndBacktrackAndFpRate; 0 == reasonable default"); +DEFINE_uint32(min_check, 4000, + "Minimum number of novel entries for testing FP rate"); +DEFINE_uint32(max_check, 10000, + "Maximum number of novel entries for testing FP rate"); +DEFINE_bool(verbose, false, "Print extra details"); + +// Options for FindOccupancy, which is more of a tool than a test. +DEFINE_bool(find_occ, false, "whether to run the FindOccupancy tool"); +DEFINE_bool(find_slot_occ, false, + "whether to show individual slot occupancies with " + "FindOccupancy tool"); +DEFINE_double(find_next_factor, 1.618, + "factor to next num_slots for FindOccupancy"); +DEFINE_uint32(find_iters, 10000, "number of samples for FindOccupancy"); +DEFINE_uint32(find_min_slots, 128, "number of slots for FindOccupancy"); +DEFINE_uint32(find_max_slots, 1000000, "number of slots for FindOccupancy"); + +// Options for OptimizeHomogAtScale, which is more of a tool than a test. +DEFINE_bool(optimize_homog, false, + "whether to run the OptimizeHomogAtScale tool"); +DEFINE_uint32(optimize_homog_slots, 30000000, + "number of slots for OptimizeHomogAtScale"); +DEFINE_uint32(optimize_homog_check, 200000, + "number of queries for checking FP rate in OptimizeHomogAtScale"); +DEFINE_double( + optimize_homog_granularity, 0.002, + "overhead change between FP rate checking in OptimizeHomogAtScale"); -// Options for FindOccupancyForSuccessRate, which is more of a tool -// than a test. -DEFINE_bool(find_occ, false, - "whether to run the FindOccupancyForSuccessRate tool"); -DEFINE_double(find_next_factor, 1.414, - "target success rate for FindOccupancyForSuccessRate"); -DEFINE_double(find_success, 0.95, - "target success rate for FindOccupancyForSuccessRate"); -DEFINE_double(find_delta_start, 0.01, " for FindOccupancyForSuccessRate"); -DEFINE_double(find_delta_end, 0.0001, " for FindOccupancyForSuccessRate"); -DEFINE_double(find_delta_shrink, 0.99, " for FindOccupancyForSuccessRate"); -DEFINE_uint32(find_min_slots, 128, - "number of slots for FindOccupancyForSuccessRate"); -DEFINE_uint32(find_max_slots, 12800000, - "number of slots for FindOccupancyForSuccessRate"); #endif // GFLAGS template @@ -150,6 +173,18 @@ struct Hash64KeyGenWrapper : public KeyGen { } }; +using ROCKSDB_NAMESPACE::ribbon::ConstructionFailureChance; + +const std::vector kFailureOnly50Pct = { + ROCKSDB_NAMESPACE::ribbon::kOneIn2}; + +const std::vector kFailureOnlyRare = { + ROCKSDB_NAMESPACE::ribbon::kOneIn1000}; + +const std::vector kFailureAll = { + ROCKSDB_NAMESPACE::ribbon::kOneIn2, ROCKSDB_NAMESPACE::ribbon::kOneIn20, + ROCKSDB_NAMESPACE::ribbon::kOneIn1000}; + } // namespace using ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate; @@ -164,6 +199,7 @@ struct DefaultTypesAndSettings { using Seed = uint32_t; using Key = ROCKSDB_NAMESPACE::Slice; static constexpr bool kIsFilter = true; + static constexpr bool kHomogeneous = false; static constexpr bool kFirstCoeffAlwaysOne = true; static constexpr bool kUseSmash = false; static constexpr bool kAllowZeroStarts = false; @@ -175,6 +211,9 @@ struct DefaultTypesAndSettings { } // For testing using KeyGen = StandardKeyGen; + static const std::vector& FailureChanceToTest() { + return kFailureAll; + } }; using TypesAndSettings_Coeff128 = DefaultTypesAndSettings; @@ -184,23 +223,58 @@ struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings { struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings { using CoeffRow = uint64_t; }; -struct TypesAndSettings_Coeff64Smash1 : public DefaultTypesAndSettings { - using CoeffRow = uint64_t; +struct TypesAndSettings_Coeff64Smash : public TypesAndSettings_Coeff64 { static constexpr bool kUseSmash = true; }; -struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash1 { +struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash { static constexpr bool kFirstCoeffAlwaysOne = false; }; -struct TypesAndSettings_Result16 : public DefaultTypesAndSettings { + +// Homogeneous Ribbon configurations +struct TypesAndSettings_Coeff128_Homog : public DefaultTypesAndSettings { + static constexpr bool kHomogeneous = true; + // Since our best construction success setting still has 1/1000 failure + // rate, the best FP rate we test is 1/256 + using ResultRow = uint8_t; + // Homogeneous only makes sense with sufficient slots for equivalent of + // almost sure construction success + static const std::vector& FailureChanceToTest() { + return kFailureOnlyRare; + } +}; +struct TypesAndSettings_Coeff128Smash_Homog + : public TypesAndSettings_Coeff128_Homog { + // Smash (extra time to save space) + Homog (extra space to save time) + // doesn't make much sense in practice, but we minimally test it + static constexpr bool kUseSmash = true; +}; +struct TypesAndSettings_Coeff64_Homog : public TypesAndSettings_Coeff128_Homog { + using CoeffRow = uint64_t; +}; +struct TypesAndSettings_Coeff64Smash_Homog + : public TypesAndSettings_Coeff64_Homog { + // Smash (extra time to save space) + Homog (extra space to save time) + // doesn't make much sense in practice, but we minimally test it + static constexpr bool kUseSmash = true; +}; + +// Less exhaustive mix of coverage, but still covering the most stressful case +// (only 50% construction success) +struct AbridgedTypesAndSettings : public DefaultTypesAndSettings { + static const std::vector& FailureChanceToTest() { + return kFailureOnly50Pct; + } +}; +struct TypesAndSettings_Result16 : public AbridgedTypesAndSettings { using ResultRow = uint16_t; }; -struct TypesAndSettings_Result32 : public DefaultTypesAndSettings { +struct TypesAndSettings_Result32 : public AbridgedTypesAndSettings { using ResultRow = uint32_t; }; -struct TypesAndSettings_IndexSizeT : public DefaultTypesAndSettings { +struct TypesAndSettings_IndexSizeT : public AbridgedTypesAndSettings { using Index = size_t; }; -struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings { +struct TypesAndSettings_Hash32 : public AbridgedTypesAndSettings { using Hash = uint32_t; static Hash HashFn(const Key& key, Hash raw_seed) { // This MurmurHash1 function does not pass tests below without the @@ -209,29 +283,29 @@ struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings { return ROCKSDB_NAMESPACE::Hash(key.data(), key.size(), raw_seed); } }; -struct TypesAndSettings_Hash32_Result16 : public TypesAndSettings_Hash32 { +struct TypesAndSettings_Hash32_Result16 : public AbridgedTypesAndSettings { using ResultRow = uint16_t; }; -struct TypesAndSettings_KeyString : public DefaultTypesAndSettings { +struct TypesAndSettings_KeyString : public AbridgedTypesAndSettings { using Key = std::string; }; -struct TypesAndSettings_Seed8 : public DefaultTypesAndSettings { +struct TypesAndSettings_Seed8 : public AbridgedTypesAndSettings { // This is not a generally recommended configuration. With the configured // hash function, it would fail with SmallKeyGen due to insufficient // independence among the seeds. using Seed = uint8_t; }; -struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings { +struct TypesAndSettings_NoAlwaysOne : public AbridgedTypesAndSettings { static constexpr bool kFirstCoeffAlwaysOne = false; }; -struct TypesAndSettings_AllowZeroStarts : public DefaultTypesAndSettings { +struct TypesAndSettings_AllowZeroStarts : public AbridgedTypesAndSettings { static constexpr bool kAllowZeroStarts = true; }; -struct TypesAndSettings_Seed64 : public DefaultTypesAndSettings { +struct TypesAndSettings_Seed64 : public AbridgedTypesAndSettings { using Seed = uint64_t; }; struct TypesAndSettings_Rehasher - : public StandardRehasherAdapter { + : public StandardRehasherAdapter { using KeyGen = Hash64KeyGenWrapper; }; struct TypesAndSettings_Rehasher_Result16 : public TypesAndSettings_Rehasher { @@ -253,7 +327,7 @@ struct TypesAndSettings_Rehasher32_Coeff64 : public TypesAndSettings_Rehasher32 { using CoeffRow = uint64_t; }; -struct TypesAndSettings_SmallKeyGen : public DefaultTypesAndSettings { +struct TypesAndSettings_SmallKeyGen : public AbridgedTypesAndSettings { // SmallKeyGen stresses the independence of different hash seeds using KeyGen = SmallKeyGen; }; @@ -261,11 +335,25 @@ struct TypesAndSettings_Hash32_SmallKeyGen : public TypesAndSettings_Hash32 { // SmallKeyGen stresses the independence of different hash seeds using KeyGen = SmallKeyGen; }; +struct TypesAndSettings_Coeff32 : public DefaultTypesAndSettings { + using CoeffRow = uint32_t; +}; +struct TypesAndSettings_Coeff32Smash : public TypesAndSettings_Coeff32 { + static constexpr bool kUseSmash = true; +}; +struct TypesAndSettings_Coeff16 : public DefaultTypesAndSettings { + using CoeffRow = uint16_t; +}; +struct TypesAndSettings_Coeff16Smash : public TypesAndSettings_Coeff16 { + static constexpr bool kUseSmash = true; +}; using TestTypesAndSettings = ::testing::Types< TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash, - TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash0, - TypesAndSettings_Coeff64Smash1, TypesAndSettings_Result16, + TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash, + TypesAndSettings_Coeff64Smash0, TypesAndSettings_Coeff128_Homog, + TypesAndSettings_Coeff128Smash_Homog, TypesAndSettings_Coeff64_Homog, + TypesAndSettings_Coeff64Smash_Homog, TypesAndSettings_Result16, TypesAndSettings_Result32, TypesAndSettings_IndexSizeT, TypesAndSettings_Hash32, TypesAndSettings_Hash32_Result16, TypesAndSettings_KeyString, TypesAndSettings_Seed8, @@ -274,7 +362,9 @@ using TestTypesAndSettings = ::testing::Types< TypesAndSettings_Rehasher_Result16, TypesAndSettings_Rehasher_Result32, TypesAndSettings_Rehasher_Seed64, TypesAndSettings_Rehasher32, TypesAndSettings_Rehasher32_Coeff64, TypesAndSettings_SmallKeyGen, - TypesAndSettings_Hash32_SmallKeyGen>; + TypesAndSettings_Hash32_SmallKeyGen, TypesAndSettings_Coeff32, + TypesAndSettings_Coeff32Smash, TypesAndSettings_Coeff16, + TypesAndSettings_Coeff16Smash>; TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings); namespace { @@ -318,328 +408,476 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); IMPORT_RIBBON_IMPL_TYPES(TypeParam); using KeyGen = typename TypeParam::KeyGen; + using ConfigHelper = + ROCKSDB_NAMESPACE::ribbon::BandingConfigHelper; - // For testing FP rate etc. - constexpr Index kNumToCheck = 100000; + if (sizeof(CoeffRow) < 8) { + ROCKSDB_GTEST_SKIP("Not fully supported"); + return; + } const auto log2_thoroughness = - static_cast(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness)); - - // With overhead of just 2%, expect ~50% encoding success per - // seed with ~5k keys on 64-bit ribbon, or ~150k keys on 128-bit ribbon. - const double kFactor = 1.02; - - uint64_t total_reseeds = 0; - uint64_t total_single_failures = 0; - uint64_t total_batch_successes = 0; - uint64_t total_fp_count = 0; - uint64_t total_added = 0; - - uint64_t soln_query_nanos = 0; - uint64_t soln_query_count = 0; - uint64_t bloom_query_nanos = 0; - uint64_t isoln_query_nanos = 0; - uint64_t isoln_query_count = 0; - - // Take different samples if you change thoroughness - ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness); - - for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) { - uint32_t num_to_add = - sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5500 : 2500; - - // Use different values between that number and 50% of that number - num_to_add -= rnd.Uniformish(num_to_add / 2); - - total_added += num_to_add; - - // Most of the time, test the Interleaved solution storage, but when - // we do we have to make num_slots a multiple of kCoeffBits. So - // sometimes we want to test without that limitation. - bool test_interleaved = (i % 7) != 6; - - Index num_slots = static_cast(num_to_add * kFactor); - if (test_interleaved) { - // Round to supported number of slots - num_slots = InterleavedSoln::RoundUpNumSlots(num_slots); - // Re-adjust num_to_add to get as close as possible to kFactor - num_to_add = static_cast(num_slots / kFactor); + static_cast(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness)); + + // We are going to choose num_to_add using an exponential distribution, + // so that we have good representation of small-to-medium filters. + // Here we just pick some reasonable, practical upper bound based on + // kCoeffBits or option. + const double log_max_add = std::log( + FLAGS_max_add > 0 ? FLAGS_max_add + : static_cast(kCoeffBits * kCoeffBits) * + std::max(FLAGS_thoroughness, uint32_t{32})); + + // This needs to be enough below the minimum number of slots to get a + // reasonable number of samples with the minimum number of slots. + const double log_min_add = std::log(0.66 * SimpleSoln::RoundUpNumSlots(1)); + + ASSERT_GT(log_max_add, log_min_add); + + const double diff_log_add = log_max_add - log_min_add; + + for (ConstructionFailureChance cs : TypeParam::FailureChanceToTest()) { + double expected_reseeds; + switch (cs) { + default: + assert(false); + FALLTHROUGH_INTENDED; + case ROCKSDB_NAMESPACE::ribbon::kOneIn2: + fprintf(stderr, "== Failure: 50 percent\n"); + expected_reseeds = 1.0; + break; + case ROCKSDB_NAMESPACE::ribbon::kOneIn20: + fprintf(stderr, "== Failure: 95 percent\n"); + expected_reseeds = 0.053; + break; + case ROCKSDB_NAMESPACE::ribbon::kOneIn1000: + fprintf(stderr, "== Failure: 1/1000\n"); + expected_reseeds = 0.001; + break; } - std::string prefix; - ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next()); - - // Batch that must be added - std::string added_str = prefix + "added"; - KeyGen keys_begin(added_str, 0); - KeyGen keys_end(added_str, num_to_add); - - // A couple more that will probably be added - KeyGen one_more(prefix + "more", 1); - KeyGen two_more(prefix + "more", 2); - - // Batch that may or may not be added - const Index kBatchSize = - sizeof(CoeffRow) == 16 ? 300 : TypeParam::kUseSmash ? 20 : 10; - std::string batch_str = prefix + "batch"; - KeyGen batch_begin(batch_str, 0); - KeyGen batch_end(batch_str, kBatchSize); - - // Batch never (successfully) added, but used for querying FP rate - std::string not_str = prefix + "not"; - KeyGen other_keys_begin(not_str, 0); - KeyGen other_keys_end(not_str, kNumToCheck); - - // Vary bytes for InterleavedSoln to use number of solution columns - // from 0 to max allowed by ResultRow type (and used by SimpleSoln). - // Specifically include 0 and max, and otherwise skew toward max. - uint32_t max_ibytes = static_cast(sizeof(ResultRow) * num_slots); - size_t ibytes; - if (i == 0) { - ibytes = 0; - } else if (i == 1) { - ibytes = max_ibytes; - } else { - // Skewed - ibytes = std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes)); - } - std::unique_ptr idata(new char[ibytes]); - InterleavedSoln isoln(idata.get(), ibytes); - - SimpleSoln soln; - Hasher hasher; - bool first_single; - bool second_single; - bool batch_success; - { - Banding banding; - // Traditional solve for a fixed set. - ASSERT_TRUE( - banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end)); - - // Now to test backtracking, starting with guaranteed fail. By using - // the keys that will be used to test FP rate, we are then doing an - // extra check that after backtracking there are no remnants (e.g. in - // result side of banding) of these entries. - Index occupied_count = banding.GetOccupiedCount(); - banding.EnsureBacktrackSize(kNumToCheck); - EXPECT_FALSE( - banding.AddRangeOrRollBack(other_keys_begin, other_keys_end)); - EXPECT_EQ(occupied_count, banding.GetOccupiedCount()); - - // Check that we still have a good chance of adding a couple more - // individually - first_single = banding.Add(*one_more); - second_single = banding.Add(*two_more); - Index more_added = (first_single ? 1 : 0) + (second_single ? 1 : 0); - total_single_failures += 2U - more_added; - - // Or as a batch - batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end); - if (batch_success) { - more_added += kBatchSize; - ++total_batch_successes; + uint64_t total_reseeds = 0; + uint64_t total_singles = 0; + uint64_t total_single_failures = 0; + uint64_t total_batch = 0; + uint64_t total_batch_successes = 0; + uint64_t total_fp_count = 0; + uint64_t total_added = 0; + uint64_t total_expand_trials = 0; + uint64_t total_expand_failures = 0; + double total_expand_overhead = 0.0; + + uint64_t soln_query_nanos = 0; + uint64_t soln_query_count = 0; + uint64_t bloom_query_nanos = 0; + uint64_t isoln_query_nanos = 0; + uint64_t isoln_query_count = 0; + + // Take different samples if you change thoroughness + ROCKSDB_NAMESPACE::Random32 rnd(FLAGS_thoroughness); + + for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) { + // We are going to choose num_to_add using an exponential distribution + // as noted above, but instead of randomly choosing them, we generate + // samples linearly using the golden ratio, which ensures a nice spread + // even for a small number of samples, and starting with the minimum + // number of slots to ensure it is tested. + double log_add = + std::fmod(0.6180339887498948482 * diff_log_add * i, diff_log_add) + + log_min_add; + uint32_t num_to_add = static_cast(std::exp(log_add)); + + // Most of the time, test the Interleaved solution storage, but when + // we do we have to make num_slots a multiple of kCoeffBits. So + // sometimes we want to test without that limitation. + bool test_interleaved = (i % 7) != 6; + + // Compute num_slots, and re-adjust num_to_add to get as close as possible + // to next num_slots, to stress that num_slots in terms of construction + // success. Ensure at least one iteration: + Index num_slots = Index{0} - 1; + --num_to_add; + for (;;) { + Index next_num_slots = SimpleSoln::RoundUpNumSlots( + ConfigHelper::GetNumSlots(num_to_add + 1, cs)); + if (test_interleaved) { + next_num_slots = InterleavedSoln::RoundUpNumSlots(next_num_slots); + // assert idempotent + EXPECT_EQ(next_num_slots, + InterleavedSoln::RoundUpNumSlots(next_num_slots)); + } + // assert idempotent with InterleavedSoln::RoundUpNumSlots + EXPECT_EQ(next_num_slots, SimpleSoln::RoundUpNumSlots(next_num_slots)); + + if (next_num_slots > num_slots) { + break; + } + num_slots = next_num_slots; + ++num_to_add; } - EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + assert(num_slots < Index{0} - 1); - // Also verify that redundant adds are OK (no effect) - ASSERT_TRUE( - banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8))); - EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + total_added += num_to_add; - // Now back-substitution - soln.BackSubstFrom(banding); - if (test_interleaved) { - isoln.BackSubstFrom(banding); - } + std::string prefix; + ROCKSDB_NAMESPACE::PutFixed32(&prefix, rnd.Next()); + + // Batch that must be added + std::string added_str = prefix + "added"; + KeyGen keys_begin(added_str, 0); + KeyGen keys_end(added_str, num_to_add); - Seed reseeds = banding.GetOrdinalSeed(); - total_reseeds += reseeds; + // A couple more that will probably be added + KeyGen one_more(prefix + "more", 1); + KeyGen two_more(prefix + "more", 2); - EXPECT_LE(reseeds, 8 + log2_thoroughness); - if (reseeds > log2_thoroughness + 1) { - fprintf( - stderr, "%s high reseeds at %u, %u/%u: %u\n", - reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat", - static_cast(i), static_cast(num_to_add), - static_cast(num_slots), static_cast(reseeds)); + // Batch that may or may not be added + uint32_t batch_size = + static_cast(2.0 * std::sqrt(num_slots - num_to_add)); + if (batch_size < 10U) { + batch_size = 0; + } + std::string batch_str = prefix + "batch"; + KeyGen batch_begin(batch_str, 0); + KeyGen batch_end(batch_str, batch_size); + + // Batch never (successfully) added, but used for querying FP rate + std::string not_str = prefix + "not"; + KeyGen other_keys_begin(not_str, 0); + KeyGen other_keys_end(not_str, FLAGS_max_check); + + double overhead_ratio = 1.0 * num_slots / num_to_add; + if (FLAGS_verbose) { + fprintf(stderr, "Adding(%s) %u / %u Overhead: %g Batch size: %u\n", + test_interleaved ? "i" : "s", (unsigned)num_to_add, + (unsigned)num_slots, overhead_ratio, (unsigned)batch_size); } - hasher.SetOrdinalSeed(reseeds); - } - // soln and hasher now independent of Banding object - // Verify keys added - KeyGen cur = keys_begin; - while (cur != keys_end) { - ASSERT_TRUE(soln.FilterQuery(*cur, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); - ++cur; - } - // We (maybe) snuck these in! - if (first_single) { - ASSERT_TRUE(soln.FilterQuery(*one_more, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher)); - } - if (second_single) { - ASSERT_TRUE(soln.FilterQuery(*two_more, hasher)); - ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher)); - } - if (batch_success) { - cur = batch_begin; - while (cur != batch_end) { + // Vary bytes for InterleavedSoln to use number of solution columns + // from 0 to max allowed by ResultRow type (and used by SimpleSoln). + // Specifically include 0 and max, and otherwise skew toward max. + uint32_t max_ibytes = + static_cast(sizeof(ResultRow) * num_slots); + size_t ibytes; + if (i == 0) { + ibytes = 0; + } else if (i == 1) { + ibytes = max_ibytes; + } else { + // Skewed + ibytes = + std::max(rnd.Uniformish(max_ibytes), rnd.Uniformish(max_ibytes)); + } + std::unique_ptr idata(new char[ibytes]); + InterleavedSoln isoln(idata.get(), ibytes); + + SimpleSoln soln; + Hasher hasher; + bool first_single; + bool second_single; + bool batch_success; + { + Banding banding; + // Traditional solve for a fixed set. + ASSERT_TRUE( + banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end)); + + Index occupied_count = banding.GetOccupiedCount(); + Index more_added = 0; + + if (TypeParam::kHomogeneous || overhead_ratio < 1.01 || + batch_size == 0) { + // Homogeneous not compatible with backtracking because add + // doesn't fail. Small overhead ratio too packed to expect more + first_single = false; + second_single = false; + batch_success = false; + } else { + // Now to test backtracking, starting with guaranteed fail. By using + // the keys that will be used to test FP rate, we are then doing an + // extra check that after backtracking there are no remnants (e.g. in + // result side of banding) of these entries. + KeyGen other_keys_too_big_end = other_keys_begin; + other_keys_too_big_end += num_to_add; + banding.EnsureBacktrackSize(std::max(num_to_add, batch_size)); + EXPECT_FALSE(banding.AddRangeOrRollBack(other_keys_begin, + other_keys_too_big_end)); + EXPECT_EQ(occupied_count, banding.GetOccupiedCount()); + + // Check that we still have a good chance of adding a couple more + // individually + first_single = banding.Add(*one_more); + second_single = banding.Add(*two_more); + more_added += (first_single ? 1 : 0) + (second_single ? 1 : 0); + total_singles += 2U; + total_single_failures += 2U - more_added; + + // Or as a batch + batch_success = banding.AddRangeOrRollBack(batch_begin, batch_end); + ++total_batch; + if (batch_success) { + more_added += batch_size; + ++total_batch_successes; + } + EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + } + + // Also verify that redundant adds are OK (no effect) + ASSERT_TRUE( + banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8))); + EXPECT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + + // Now back-substitution + soln.BackSubstFrom(banding); + if (test_interleaved) { + isoln.BackSubstFrom(banding); + } + + Seed reseeds = banding.GetOrdinalSeed(); + total_reseeds += reseeds; + + EXPECT_LE(reseeds, 8 + log2_thoroughness); + if (reseeds > log2_thoroughness + 1) { + fprintf( + stderr, "%s high reseeds at %u, %u/%u: %u\n", + reseeds > log2_thoroughness + 8 ? "ERROR Extremely" : "Somewhat", + static_cast(i), static_cast(num_to_add), + static_cast(num_slots), static_cast(reseeds)); + } + + if (reseeds > 0) { + // "Expand" test: given a failed construction, how likely is it to + // pass with same seed and more slots. At each step, we increase + // enough to ensure there is at least one shift within each coeff + // block. + ++total_expand_trials; + Index expand_count = 0; + Index ex_slots = num_slots; + banding.SetOrdinalSeed(0); + for (;; ++expand_count) { + ASSERT_LE(expand_count, log2_thoroughness); + ex_slots += ex_slots / kCoeffBits; + if (test_interleaved) { + ex_slots = InterleavedSoln::RoundUpNumSlots(ex_slots); + } + banding.Reset(ex_slots); + bool success = banding.AddRange(keys_begin, keys_end); + if (success) { + break; + } + } + total_expand_failures += expand_count; + total_expand_overhead += 1.0 * (ex_slots - num_slots) / num_slots; + } + + hasher.SetOrdinalSeed(reseeds); + } + // soln and hasher now independent of Banding object + + // Verify keys added + KeyGen cur = keys_begin; + while (cur != keys_end) { ASSERT_TRUE(soln.FilterQuery(*cur, hasher)); ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); ++cur; } - } - - // Check FP rate (depends only on number of result bits == solution columns) - Index fp_count = 0; - cur = other_keys_begin; - { - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default(), true); - while (cur != other_keys_end) { - bool fp = soln.FilterQuery(*cur, hasher); - fp_count += fp ? 1 : 0; - ++cur; + // We (maybe) snuck these in! + if (first_single) { + ASSERT_TRUE(soln.FilterQuery(*one_more, hasher)); + ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*one_more, hasher)); + } + if (second_single) { + ASSERT_TRUE(soln.FilterQuery(*two_more, hasher)); + ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*two_more, hasher)); + } + if (batch_success) { + cur = batch_begin; + while (cur != batch_end) { + ASSERT_TRUE(soln.FilterQuery(*cur, hasher)); + ASSERT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); + ++cur; + } } - soln_query_nanos += timer.ElapsedNanos(); - soln_query_count += kNumToCheck; - } - { - double expected_fp_count = soln.ExpectedFpRate() * kNumToCheck; - // For expected FP rate, also include false positives due to collisions - // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - double correction = - kNumToCheck * ExpectedCollisionFpRate(hasher, num_to_add); - EXPECT_LE(fp_count, - FrequentPoissonUpperBound(expected_fp_count + correction)); - EXPECT_GE(fp_count, - FrequentPoissonLowerBound(expected_fp_count + correction)); - } - total_fp_count += fp_count; - // And also check FP rate for isoln - if (test_interleaved) { - Index ifp_count = 0; + // Check FP rate (depends only on number of result bits == solution + // columns) + Index fp_count = 0; cur = other_keys_begin; - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default(), true); - while (cur != other_keys_end) { - ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0; - ++cur; + { + ROCKSDB_NAMESPACE::StopWatchNano timer( + ROCKSDB_NAMESPACE::SystemClock::Default(), true); + while (cur != other_keys_end) { + bool fp = soln.FilterQuery(*cur, hasher); + fp_count += fp ? 1 : 0; + ++cur; + } + soln_query_nanos += timer.ElapsedNanos(); + soln_query_count += FLAGS_max_check; } - isoln_query_nanos += timer.ElapsedNanos(); - isoln_query_count += kNumToCheck; { - double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck; + double expected_fp_count = soln.ExpectedFpRate() * FLAGS_max_check; // For expected FP rate, also include false positives due to collisions // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) double correction = - kNumToCheck * ExpectedCollisionFpRate(hasher, num_to_add); - EXPECT_LE(ifp_count, + FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add); + + // NOTE: rare violations expected with kHomogeneous + EXPECT_LE(fp_count, FrequentPoissonUpperBound(expected_fp_count + correction)); - EXPECT_GE(ifp_count, + EXPECT_GE(fp_count, FrequentPoissonLowerBound(expected_fp_count + correction)); } - // Since the bits used in isoln are a subset of the bits used in soln, - // it cannot have fewer FPs - EXPECT_GE(ifp_count, fp_count); - } + total_fp_count += fp_count; - // And compare to Bloom time, for fun - if (ibytes >= /* minimum Bloom impl bytes*/ 64) { - Index bfp_count = 0; - cur = other_keys_begin; - ROCKSDB_NAMESPACE::StopWatchNano timer( - ROCKSDB_NAMESPACE::SystemClock::Default(), true); - while (cur != other_keys_end) { - uint64_t h = hasher.GetHash(*cur); - uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h); - uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h) - : h1 * 0x9e3779b9; - bfp_count += ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch( - h1, h2, static_cast(ibytes), 6, idata.get()) - ? 1 - : 0; - ++cur; + // And also check FP rate for isoln + if (test_interleaved) { + Index ifp_count = 0; + cur = other_keys_begin; + ROCKSDB_NAMESPACE::StopWatchNano timer( + ROCKSDB_NAMESPACE::SystemClock::Default(), true); + while (cur != other_keys_end) { + ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0; + ++cur; + } + isoln_query_nanos += timer.ElapsedNanos(); + isoln_query_count += FLAGS_max_check; + { + double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check; + // For expected FP rate, also include false positives due to + // collisions in Hash value. (Negligible for 64-bit, can matter for + // 32-bit.) + double correction = + FLAGS_max_check * ExpectedCollisionFpRate(hasher, num_to_add); + + // NOTE: rare violations expected with kHomogeneous + EXPECT_LE(ifp_count, + FrequentPoissonUpperBound(expected_fp_count + correction)); + + // FIXME: why sometimes can we slightly "beat the odds"? + // (0.95 factor should not be needed) + EXPECT_GE(ifp_count, FrequentPoissonLowerBound( + 0.95 * expected_fp_count + correction)); + } + // Since the bits used in isoln are a subset of the bits used in soln, + // it cannot have fewer FPs + EXPECT_GE(ifp_count, fp_count); + } + + // And compare to Bloom time, for fun + if (ibytes >= /* minimum Bloom impl bytes*/ 64) { + Index bfp_count = 0; + cur = other_keys_begin; + ROCKSDB_NAMESPACE::StopWatchNano timer( + ROCKSDB_NAMESPACE::SystemClock::Default(), true); + while (cur != other_keys_end) { + uint64_t h = hasher.GetHash(*cur); + uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h); + uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h) + : h1 * 0x9e3779b9; + bfp_count += + ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch( + h1, h2, static_cast(ibytes), 6, idata.get()) + ? 1 + : 0; + ++cur; + } + bloom_query_nanos += timer.ElapsedNanos(); + // ensure bfp_count is used + ASSERT_LT(bfp_count, FLAGS_max_check); } - bloom_query_nanos += timer.ElapsedNanos(); - // ensure bfp_count is used - ASSERT_LT(bfp_count, kNumToCheck); } - } - // "outside" == key not in original set so either negative or false positive - fprintf(stderr, "Simple outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * soln_query_nanos / soln_query_count); - fprintf(stderr, "Interleaved outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * isoln_query_nanos / isoln_query_count); - fprintf(stderr, "Bloom outside query, hot, incl hashing, ns/key: %g\n", - 1.0 * bloom_query_nanos / soln_query_count); + // "outside" == key not in original set so either negative or false positive + fprintf(stderr, + "Simple outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * soln_query_nanos / soln_query_count); + fprintf(stderr, + "Interleaved outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * isoln_query_nanos / isoln_query_count); + fprintf(stderr, + "Bloom outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * bloom_query_nanos / soln_query_count); + + if (TypeParam::kHomogeneous) { + EXPECT_EQ(total_reseeds, 0U); + } else { + double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness; + fprintf(stderr, "Average re-seeds: %g\n", average_reseeds); + // Values above were chosen to target around 50% chance of encoding + // success rate (average of 1.0 re-seeds) or slightly better. But 1.15 is + // also close enough. + EXPECT_LE(total_reseeds, + InfrequentPoissonUpperBound(1.15 * expected_reseeds * + FLAGS_thoroughness)); + // Would use 0.85 here instead of 0.75, but + // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of + // sequential keys with a small, cheap hash function. We accept that + // there are surely inputs that are somewhat bad for this setup, but + // these somewhat good inputs are probably more likely. + EXPECT_GE(total_reseeds, + InfrequentPoissonLowerBound(0.75 * expected_reseeds * + FLAGS_thoroughness)); + } - { - double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness; - fprintf(stderr, "Average re-seeds: %g\n", average_reseeds); - // Values above were chosen to target around 50% chance of encoding success - // rate (average of 1.0 re-seeds) or slightly better. But 1.15 is also close - // enough. - EXPECT_LE(total_reseeds, - InfrequentPoissonUpperBound(1.15 * FLAGS_thoroughness)); - // Would use 0.85 here instead of 0.75, but - // TypesAndSettings_Hash32_SmallKeyGen can "beat the odds" because of - // sequential keys with a small, cheap hash function. We accept that - // there are surely inputs that are somewhat bad for this setup, but - // these somewhat good inputs are probably more likely. - EXPECT_GE(total_reseeds, - InfrequentPoissonLowerBound(0.75 * FLAGS_thoroughness)); - } + if (total_expand_trials > 0) { + double average_expand_failures = + 1.0 * total_expand_failures / total_expand_trials; + fprintf(stderr, "Average expand failures, and overhead: %g, %g\n", + average_expand_failures, + total_expand_overhead / total_expand_trials); + // Seems to be a generous allowance + EXPECT_LE(total_expand_failures, + InfrequentPoissonUpperBound(1.0 * total_expand_trials)); + } else { + fprintf(stderr, "Average expand failures: N/A\n"); + } - { - uint64_t total_singles = 2 * FLAGS_thoroughness; - double single_failure_rate = 1.0 * total_single_failures / total_singles; - fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate); - // A rough bound (one sided) based on nothing in particular - double expected_single_failures = - 1.0 * total_singles / - (sizeof(CoeffRow) == 16 ? 128 : TypeParam::kUseSmash ? 64 : 32); - EXPECT_LE(total_single_failures, - InfrequentPoissonUpperBound(expected_single_failures)); - } + if (total_singles > 0) { + double single_failure_rate = 1.0 * total_single_failures / total_singles; + fprintf(stderr, "Add'l single, failure rate: %g\n", single_failure_rate); + // A rough bound (one sided) based on nothing in particular + double expected_single_failures = + 1.0 * total_singles / + (sizeof(CoeffRow) == 16 ? 128 : TypeParam::kUseSmash ? 64 : 32); + EXPECT_LE(total_single_failures, + InfrequentPoissonUpperBound(expected_single_failures)); + } - { - // Counting successes here for Poisson to approximate the Binomial - // distribution. - // A rough bound (one sided) based on nothing in particular. - double expected_batch_successes = 1.0 * FLAGS_thoroughness / 2; - uint64_t lower_bound = - InfrequentPoissonLowerBound(expected_batch_successes); - fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n", - 1.0 * total_batch_successes / FLAGS_thoroughness, - 1.0 * lower_bound / FLAGS_thoroughness); - EXPECT_GE(total_batch_successes, lower_bound); - } + if (total_batch > 0) { + // Counting successes here for Poisson to approximate the Binomial + // distribution. + // A rough bound (one sided) based on nothing in particular. + double expected_batch_successes = 1.0 * total_batch / 2; + uint64_t lower_bound = + InfrequentPoissonLowerBound(expected_batch_successes); + fprintf(stderr, "Add'l batch, success rate: %g (>= %g)\n", + 1.0 * total_batch_successes / total_batch, + 1.0 * lower_bound / total_batch); + EXPECT_GE(total_batch_successes, lower_bound); + } - { - uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness; - double expected_total_fp_count = - total_checked * std::pow(0.5, 8U * sizeof(ResultRow)); - // For expected FP rate, also include false positives due to collisions - // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - double average_added = 1.0 * total_added / FLAGS_thoroughness; - expected_total_fp_count += - total_checked * ExpectedCollisionFpRate(Hasher(), average_added); - - uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count); - uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count); - fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n", - 1.0 * total_fp_count / total_checked, - expected_total_fp_count / total_checked, - 1.0 * upper_bound / total_checked, - 1.0 * lower_bound / total_checked); - EXPECT_LE(total_fp_count, upper_bound); - EXPECT_GE(total_fp_count, lower_bound); + { + uint64_t total_checked = uint64_t{FLAGS_max_check} * FLAGS_thoroughness; + double expected_total_fp_count = + total_checked * std::pow(0.5, 8U * sizeof(ResultRow)); + // For expected FP rate, also include false positives due to collisions + // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) + double average_added = 1.0 * total_added / FLAGS_thoroughness; + expected_total_fp_count += + total_checked * ExpectedCollisionFpRate(Hasher(), average_added); + + uint64_t upper_bound = + InfrequentPoissonUpperBound(expected_total_fp_count); + uint64_t lower_bound = + InfrequentPoissonLowerBound(expected_total_fp_count); + fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n", + 1.0 * total_fp_count / total_checked, + expected_total_fp_count / total_checked, + 1.0 * upper_bound / total_checked, + 1.0 * lower_bound / total_checked); + EXPECT_LE(total_fp_count, upper_bound); + EXPECT_GE(total_fp_count, lower_bound); + } } } @@ -672,9 +910,8 @@ TYPED_TEST(RibbonTypeParamTest, Extremes) { // Somewhat oddly, we expect same FP rate as if we had essentially filled // up the slots. - constexpr Index kNumToCheck = 100000; KeyGen other_keys_begin("not", 0); - KeyGen other_keys_end("not", kNumToCheck); + KeyGen other_keys_end("not", FLAGS_max_check); Index fp_count = 0; KeyGen cur = other_keys_begin; @@ -683,19 +920,25 @@ TYPED_TEST(RibbonTypeParamTest, Extremes) { bool soln_query_result = soln.FilterQuery(*cur, hasher); // Solutions are equivalent ASSERT_EQ(isoln_query_result, soln_query_result); - // And in fact we only expect an FP when ResultRow is 0 - // CHANGE: no longer true because of filling some unused slots - // with pseudorandom values. - // ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash( - // hasher.GetHash(*cur)) == ResultRow{0}); + if (!TypeParam::kHomogeneous) { + // And in fact we only expect an FP when ResultRow is 0 + // (except Homogeneous) + ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash( + hasher.GetHash(*cur)) == ResultRow{0}); + } fp_count += soln_query_result ? 1 : 0; ++cur; } { ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate()); - double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck; + double expected_fp_count = isoln.ExpectedFpRate() * FLAGS_max_check; EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count)); - EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count)); + if (TypeParam::kHomogeneous) { + // Pseudorandom garbage in Homogeneous filter can "beat the odds" if + // nothing added + } else { + EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count)); + } } // ###################################################### @@ -874,8 +1117,8 @@ TEST(RibbonTest, PhsfBasic) { } } -// Not a real test, but a tool used to build GetNumSlotsFor95PctSuccess -TYPED_TEST(RibbonTypeParamTest, FindOccupancyForSuccessRate) { +// Not a real test, but a tool used to build APIs in ribbon_config.h +TYPED_TEST(RibbonTypeParamTest, FindOccupancy) { IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); IMPORT_RIBBON_IMPL_TYPES(TypeParam); using KeyGen = typename TypeParam::KeyGen; @@ -885,34 +1128,101 @@ TYPED_TEST(RibbonTypeParamTest, FindOccupancyForSuccessRate) { return; } - KeyGen cur("blah", 0); + KeyGen cur(ROCKSDB_NAMESPACE::ToString( + testing::UnitTest::GetInstance()->random_seed()), + 0); Banding banding; Index num_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_min_slots); - while (num_slots < FLAGS_find_max_slots) { - double factor = 0.95; - double delta = FLAGS_find_delta_start; - while (delta > FLAGS_find_delta_end) { - Index num_to_add = static_cast(factor * num_slots); + Index max_slots = InterleavedSoln::RoundUpNumSlots(FLAGS_find_max_slots); + while (num_slots <= max_slots) { + std::map rem_histogram; + std::map slot_histogram; + if (FLAGS_find_slot_occ) { + for (Index i = 0; i < kCoeffBits; ++i) { + slot_histogram[i] = 0; + slot_histogram[num_slots - 1 - i] = 0; + slot_histogram[num_slots / 2 - kCoeffBits / 2 + i] = 0; + } + } + uint64_t total_added = 0; + for (uint32_t i = 0; i < FLAGS_find_iters; ++i) { + banding.Reset(num_slots); + uint32_t j = 0; KeyGen end = cur; - end += num_to_add; - bool success = banding.ResetAndFindSeedToSolve(num_slots, cur, end, 0, 0); - cur = end; // fresh keys - if (success) { - factor += delta * (1.0 - FLAGS_find_success); - factor = std::min(factor, 1.0); - } else { - factor -= delta * FLAGS_find_success; - factor = std::max(factor, 0.0); + end += num_slots + num_slots / 10; + for (; cur != end; ++cur) { + if (banding.Add(*cur)) { + ++j; + } else { + break; + } + } + total_added += j; + for (auto& slot : slot_histogram) { + slot.second += banding.IsOccupied(slot.first); + } + + int32_t bucket = + static_cast(num_slots) - static_cast(j); + rem_histogram[bucket]++; + if (FLAGS_verbose) { + fprintf(stderr, "num_slots: %u i: %u / %u avg_overhead: %g\r", + static_cast(num_slots), static_cast(i), + static_cast(FLAGS_find_iters), + 1.0 * (i + 1) * num_slots / total_added); } - delta *= FLAGS_find_delta_shrink; - fprintf(stderr, - "slots: %u log2_slots: %g target_success: %g ->overhead: %g\r", - static_cast(num_slots), - std::log(num_slots * 1.0) / std::log(2.0), FLAGS_find_success, - 1.0 / factor); } - fprintf(stderr, "\n"); + if (FLAGS_verbose) { + fprintf(stderr, "\n"); + } + + uint32_t cumulative = 0; + + double p50_rem = 0; + double p95_rem = 0; + double p99_9_rem = 0; + + for (auto& h : rem_histogram) { + double before = 1.0 * cumulative / FLAGS_find_iters; + double not_after = 1.0 * (cumulative + h.second) / FLAGS_find_iters; + if (FLAGS_verbose) { + fprintf(stderr, "overhead: %g before: %g not_after: %g\n", + 1.0 * num_slots / (num_slots - h.first), before, not_after); + } + cumulative += h.second; + if (before < 0.5 && 0.5 <= not_after) { + // fake it with linear interpolation + double portion = (0.5 - before) / (not_after - before); + p50_rem = h.first + portion; + } else if (before < 0.95 && 0.95 <= not_after) { + // fake it with linear interpolation + double portion = (0.95 - before) / (not_after - before); + p95_rem = h.first + portion; + } else if (before < 0.999 && 0.999 <= not_after) { + // fake it with linear interpolation + double portion = (0.999 - before) / (not_after - before); + p99_9_rem = h.first + portion; + } + } + for (auto& slot : slot_histogram) { + fprintf(stderr, "slot[%u] occupied: %g\n", (unsigned)slot.first, + 1.0 * slot.second / FLAGS_find_iters); + } + + double mean_rem = + (1.0 * FLAGS_find_iters * num_slots - total_added) / FLAGS_find_iters; + fprintf( + stderr, + "num_slots: %u iters: %u mean_ovr: %g p50_ovr: %g p95_ovr: %g " + "p99.9_ovr: %g mean_rem: %g p50_rem: %g p95_rem: %g p99.9_rem: %g\n", + static_cast(num_slots), + static_cast(FLAGS_find_iters), + 1.0 * num_slots / (num_slots - mean_rem), + 1.0 * num_slots / (num_slots - p50_rem), + 1.0 * num_slots / (num_slots - p95_rem), + 1.0 * num_slots / (num_slots - p99_9_rem), mean_rem, p50_rem, p95_rem, + p99_9_rem); num_slots = std::max( num_slots + 1, static_cast(num_slots * FLAGS_find_next_factor)); @@ -920,8 +1230,74 @@ TYPED_TEST(RibbonTypeParamTest, FindOccupancyForSuccessRate) { } } -// TODO: unit tests for configuration APIs -// TODO: unit tests for small filter FP rates +// Not a real test, but a tool to understand Homogeneous Ribbon +// behavior (TODO: configuration APIs & tests) +TYPED_TEST(RibbonTypeParamTest, OptimizeHomogAtScale) { + IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); + IMPORT_RIBBON_IMPL_TYPES(TypeParam); + using KeyGen = typename TypeParam::KeyGen; + + if (!FLAGS_optimize_homog) { + fprintf(stderr, "Tool disabled during unit test runs\n"); + return; + } + + if (!TypeParam::kHomogeneous) { + fprintf(stderr, "Only for Homogeneous Ribbon\n"); + return; + } + + KeyGen cur(ROCKSDB_NAMESPACE::ToString( + testing::UnitTest::GetInstance()->random_seed()), + 0); + + Banding banding; + Index num_slots = SimpleSoln::RoundUpNumSlots(FLAGS_optimize_homog_slots); + banding.Reset(num_slots); + + // This and "band_ovr" is the "allocated overhead", or slots over added. + // It does not take into account FP rates. + double target_overhead = 1.20; + uint32_t num_added = 0; + + do { + do { + (void)banding.Add(*cur); + ++cur; + ++num_added; + } while (1.0 * num_slots / num_added > target_overhead); + + SimpleSoln soln; + soln.BackSubstFrom(banding); + + std::array fp_counts_by_cols; + fp_counts_by_cols.fill(0U); + for (uint32_t i = 0; i < FLAGS_optimize_homog_check; ++i) { + ResultRow r = soln.PhsfQuery(*cur, banding); + ++cur; + for (size_t j = 0; j < fp_counts_by_cols.size(); ++j) { + if ((r & 1) == 1) { + break; + } + fp_counts_by_cols[j]++; + r /= 2; + } + } + fprintf(stderr, "band_ovr: %g ", 1.0 * num_slots / num_added); + for (unsigned j = 0; j < fp_counts_by_cols.size(); ++j) { + double inv_fp_rate = + 1.0 * FLAGS_optimize_homog_check / fp_counts_by_cols[j]; + double equiv_cols = std::log(inv_fp_rate) * 1.4426950409; + // Overhead vs. information-theoretic minimum based on observed + // FP rate (subject to sampling error, especially for low FP rates) + double actual_overhead = + 1.0 * (j + 1) * num_slots / (equiv_cols * num_added); + fprintf(stderr, "ovr_%u: %g ", j + 1, actual_overhead); + } + fprintf(stderr, "\n"); + target_overhead -= FLAGS_optimize_homog_granularity; + } while (target_overhead > 1.0); +} int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv);