From 746909ceda5990eeaef08c9b54c46e55a57fc840 Mon Sep 17 00:00:00 2001 From: Peter Dillinger Date: Tue, 3 Nov 2020 12:45:18 -0800 Subject: [PATCH] Ribbon: InterleavedSolutionStorage (#7598) Summary: The core algorithms for InterleavedSolutionStorage and the implementation SerializableInterleavedSolution make Ribbon fast for filter queries. Example output from new unit test: Simple outside query, hot, incl hashing, ns/key: 117.796 Interleaved outside query, hot, incl hashing, ns/key: 42.2655 Bloom outside query, hot, incl hashing, ns/key: 24.0071 Also includes misc cleanup of previous Ribbon code and comments. Some TODOs and FIXMEs remain for futher work / investigation. Pull Request resolved: https://github.com/facebook/rocksdb/pull/7598 Test Plan: unit tests included (integration work and tests coming later) Reviewed By: jay-zhuang Differential Revision: D24559209 Pulled By: pdillinger fbshipit-source-id: fea483cd354ba782aea3e806f2bc96e183d59441 --- .gitignore | 1 + util/bloom_impl.h | 2 + util/math128.h | 4 + util/ribbon_alg.h | 414 +++++++++++++++++++++++++++++++++++++++++--- util/ribbon_impl.h | 321 +++++++++++++++++++++++++++++----- util/ribbon_test.cc | 323 ++++++++++++++++++++++++++++------ 6 files changed, 948 insertions(+), 117 deletions(-) diff --git a/.gitignore b/.gitignore index f070cc64a..dc59bb7e6 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ rocksdb.pc *.gcda *.gcno *.o +*.o.tmp *.so *.so.* *_test diff --git a/util/bloom_impl.h b/util/bloom_impl.h index 4e83f6bb1..5f2a69e07 100644 --- a/util/bloom_impl.h +++ b/util/bloom_impl.h @@ -10,8 +10,10 @@ #pragma once #include #include + #include +#include "port/port.h" // for PREFETCH #include "rocksdb/slice.h" #include "util/hash.h" diff --git a/util/math128.h b/util/math128.h index 5b4434536..a1f6d4151 100644 --- a/util/math128.h +++ b/util/math128.h @@ -44,6 +44,10 @@ struct Unsigned128 { explicit operator uint64_t() { return lo; } explicit operator uint32_t() { return static_cast(lo); } + + explicit operator uint16_t() { return static_cast(lo); } + + explicit operator uint8_t() { return static_cast(lo); } }; inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) { diff --git a/util/ribbon_alg.h b/util/ribbon_alg.h index 9f500aa7f..bf23c9998 100644 --- a/util/ribbon_alg.h +++ b/util/ribbon_alg.h @@ -6,6 +6,7 @@ #pragma once #include +#include #include "util/math128.h" @@ -31,7 +32,7 @@ namespace ribbon { // (b) developed by Peter C. Dillinger, though not the first on-the-fly // GE algorithm. See "On the fly Gaussian Elimination for LT codes" by // Bioglio, Grangetto, Gaeta, and Sereno. -// (c) TODO: not yet implemented here +// (c) see "interleaved" solution storage below. // // See ribbon_impl.h for high-level behavioral summary. This file focuses // on the core design details. @@ -242,7 +243,7 @@ namespace ribbon { // #################### Ribbon on-the-fly banding ####################### // // "Banding" is what we call the process of reducing the inputs to an -// upper-triangluar r-band matrix ready for finishing a solution with +// upper-triangular r-band matrix ready for finishing a solution with // back-substitution. Although the DW paper presents an algorithm for // this ("SGauss"), the awesome properties of their construction enable // an even simpler, faster, and more backtrackable algorithm. In simplest @@ -253,7 +254,7 @@ namespace ribbon { // The enhanced algorithm is based on these observations: // - When processing a coefficient row with first 1 in column j, // - If it's the first at column j to be processed, it can be part of -// the banding at row j. (And that descision never overwritten, with +// the banding at row j. (And that decision never overwritten, with // no loss of generality!) // - Else, it can be combined with existing row j and re-processed, // which will look for a later "empty" row or reach "no solution". @@ -299,7 +300,7 @@ namespace ribbon { // Row-major layout is typical for boolean (bit) matrices, including for // MWHC (Xor) filters where a query combines k b-bit values, and k is // typically smaller than b. Even for k=4 and b=2, at least k=4 random -// lookups are required regardless of layout. +// look-ups are required regardless of layout. // // Ribbon PHSFs are quite different, however, because // (a) all of the solution rows relevant to a query are within a single @@ -343,9 +344,29 @@ namespace ribbon { // At first glance, PHSFs only offer a whole number of bits per "slot" // (m rather than number of keys n), but coefficient locality in the // Ribbon construction makes fractional bits/key quite possible and -// attractive for filter applications. -// -// TODO: more detail +// attractive for filter applications. This works by a prefix of the +// structure using b-1 solution columns and the rest using b solution +// columns. See InterleavedSolutionStorage below for more detail. +// +// Because false positive rates are non-linear in bits/key, this approach +// is not quite optimal in terms of information theory. In common cases, +// we see additional space overhead up to about 1.5% vs. theoretical +// optimal to achieve the same FP rate. We consider this a quite acceptable +// overhead for very efficiently utilizing space that might otherwise be +// wasted. +// +// This property of Ribbon even makes it "elastic." A Ribbon filter and +// its small metadata for answering queries can be adapted into another +// Ribbon filter filling any smaller multiple of r bits (plus small +// metadata), with a correspondingly higher FP rate. None of the data +// thrown away during construction needs to be recalled for this reduction. +// Similarly a single Ribbon construction can be separated (by solution +// column) into two or more structures (or "layers" or "levels") with +// independent filtering ability (no FP correlation, just as solution or +// result columns in a single structure) despite being constructed as part +// of a single linear system. (TODO: implement) +// See also "ElasticBF: Fine-grained and Elastic Bloom Filter Towards +// Efficient Read for LSM-tree-based KV Stores." // // ###################################################################### @@ -354,7 +375,8 @@ namespace ribbon { // // These algorithms are templatized for genericity but near-maximum // performance in a given application. The template parameters -// adhere to class/struct type concepts outlined below. +// adhere to informal class/struct type concepts outlined below. (This +// code is written for C++11 so does not use formal C++ concepts.) // Rough architecture for these algorithms: // @@ -413,7 +435,7 @@ namespace ribbon { // // Given a hash value, return the r-bit sequence of coefficients to // // associate with it. It's generally OK if // // sizeof(CoeffRow) > sizeof(Hash) -// // as long as the hash itself is not too prone to collsions for the +// // as long as the hash itself is not too prone to collisions for the // // applications and the CoeffRow is generated uniformly from // // available hash data, but relatively independent of the start. // // @@ -699,20 +721,41 @@ bool BandingAddRange(BandingStorage *bs, const BandingHasher &bh, // for filter queries. // concept SimpleSolutionStorage extends RibbonTypes { +// // This is called at the beginning of back-substitution for the +// // solution storage to do any remaining configuration before data +// // is stored to it. If configuration is previously finalized, this +// // could be a simple assertion or even no-op. Ribbon algorithms +// // only call this from back-substitution, and only once per call, +// // before other functions here. // void PrepareForNumStarts(Index num_starts) const; +// // Must return num_starts passed to PrepareForNumStarts, or the most +// // recent call to PrepareForNumStarts if this storage object can be +// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because +// // there must be a run of kCoeffBits slots starting from each start. // Index GetNumStarts() const; +// // Load the solution row (type ResultRow) for a slot // ResultRow Load(Index slot_num) const; +// // Store the solution row (type ResultRow) for a slot // void Store(Index slot_num, ResultRow data); // }; // Back-substitution for generating a solution from BandingStorage to // SimpleSolutionStorage. template -void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &ss) { +void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &bs) { using CoeffRow = typename BandingStorage::CoeffRow; using Index = typename BandingStorage::Index; using ResultRow = typename BandingStorage::ResultRow; + static_assert(sizeof(Index) == sizeof(typename SimpleSolutionStorage::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename SimpleSolutionStorage::CoeffRow), + "must be same"); + static_assert( + sizeof(ResultRow) == sizeof(typename SimpleSolutionStorage::ResultRow), + "must be same"); + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); constexpr auto kResultBits = static_cast(sizeof(ResultRow) * 8U); @@ -722,14 +765,14 @@ void SimpleBackSubst(SimpleSolutionStorage *sss, const BandingStorage &ss) { std::array state; state.fill(0); - const Index num_starts = ss.GetNumStarts(); + const Index num_starts = bs.GetNumStarts(); sss->PrepareForNumStarts(num_starts); const Index num_slots = num_starts + kCoeffBits - 1; for (Index i = num_slots; i > 0;) { --i; - CoeffRow cr = *const_cast(ss).CoeffRowPtr(i); - ResultRow rr = *const_cast(ss).ResultRowPtr(i); + CoeffRow cr = *const_cast(bs).CoeffRowPtr(i); + ResultRow rr = *const_cast(bs).ResultRowPtr(i); // solution row ResultRow sr = 0; for (Index j = 0; j < kResultBits; ++j) { @@ -767,9 +810,9 @@ typename SimpleSolutionStorage::ResultRow SimpleQueryHelper( ResultRow result = 0; for (unsigned i = 0; i < kCoeffBits; ++i) { - if (static_cast(cr >> i) & 1U) { - result ^= sss.Load(start_slot + i); - } + // Bit masking whole value is generally faster here than 'if' + result ^= sss.Load(start_slot + i) & + (ResultRow{0} - (static_cast(cr >> i) & ResultRow{1})); } return result; } @@ -781,6 +824,13 @@ typename SimpleSolutionStorage::ResultRow SimplePhsfQuery( const SimpleSolutionStorage &sss) { const typename PhsfQueryHasher::Hash hash = hasher.GetHash(key); + static_assert(sizeof(typename SimpleSolutionStorage::Index) == + sizeof(typename PhsfQueryHasher::Index), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) == + sizeof(typename PhsfQueryHasher::CoeffRow), + "must be same"); + return SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()), hasher.GetCoeffRow(hash), sss); } @@ -794,6 +844,16 @@ bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key, const typename SimpleSolutionStorage::ResultRow expected = hasher.GetResultRowFromHash(hash); + static_assert(sizeof(typename SimpleSolutionStorage::Index) == + sizeof(typename FilterQueryHasher::Index), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::CoeffRow) == + sizeof(typename FilterQueryHasher::CoeffRow), + "must be same"); + static_assert(sizeof(typename SimpleSolutionStorage::ResultRow) == + sizeof(typename FilterQueryHasher::ResultRow), + "must be same"); + return expected == SimpleQueryHelper(hasher.GetStart(hash, sss.GetNumStarts()), hasher.GetCoeffRow(hash), sss); @@ -803,18 +863,326 @@ bool SimpleFilterQuery(const typename FilterQueryHasher::Key &key, // InterleavedSolutionStorage is row-major at a high level, for good // locality, and column-major at a low level, for CPU efficiency -// especially in filter querys or relatively small number of result bits +// especially in filter queries or relatively small number of result bits // (== solution columns). The storage is a sequence of "blocks" where a -// block has one CoeffRow for each solution column. - +// block has one CoeffRow-sized segment for each solution column. Each +// query spans at most two blocks; the starting solution row is typically +// in the row-logical middle of a block and spans to the middle of the +// next block. (See diagram below.) +// +// InterleavedSolutionStorage supports choosing b (number of result or +// solution columns) at run time, and even supports mixing b and b-1 solution +// columns in a single linear system solution, for filters that can +// effectively utilize any size space (multiple of CoeffRow) for minimizing +// FP rate for any number of added keys. To simplify query implementation +// (with lower-index columns first), the b-bit portion comes after the b-1 +// portion of the structure. +// +// Diagram (=== marks logical block boundary; b=4; ### is data used by a +// query crossing the b-1 to b boundary, each Segment has type CoeffRow): +// ... +// +======================+ +// | S e g m e n t col=0 | +// +----------------------+ +// | S e g m e n t col=1 | +// +----------------------+ +// | S e g m e n t col=2 | +// +======================+ +// | S e g m e n #########| +// +----------------------+ +// | S e g m e n #########| +// +----------------------+ +// | S e g m e n #########| +// +======================+ Result/solution columns: above = 3, below = 4 +// |#############t col=0 | +// +----------------------+ +// |#############t col=1 | +// +----------------------+ +// |#############t col=2 | +// +----------------------+ +// | S e g m e n t col=3 | +// +======================+ +// | S e g m e n t col=0 | +// +----------------------+ +// | S e g m e n t col=1 | +// +----------------------+ +// | S e g m e n t col=2 | +// +----------------------+ +// | S e g m e n t col=3 | +// +======================+ +// ... +// +// InterleavedSolutionStorage will be adapted by the algorithms from +// simple array-like segment storage. That array-like storage is templatized +// in part so that an implementation may choose to handle byte ordering +// at access time. +// // concept InterleavedSolutionStorage extends RibbonTypes { -// Index GetNumColumns() const; +// // This is called at the beginning of back-substitution for the +// // solution storage to do any remaining configuration before data +// // is stored to it. If configuration is previously finalized, this +// // could be a simple assertion or even no-op. Ribbon algorithms +// // only call this from back-substitution, and only once per call, +// // before other functions here. +// void PrepareForNumStarts(Index num_starts) const; +// // Must return num_starts passed to PrepareForNumStarts, or the most +// // recent call to PrepareForNumStarts if this storage object can be +// // reused. Note that num_starts == num_slots - kCoeffBits + 1 because +// // there must be a run of kCoeffBits slots starting from each start. // Index GetNumStarts() const; -// CoeffRow Load(Index block_num, Index column) const; -// void Store(Index block_num, Index column, CoeffRow data); +// // The larger number of solution columns used (called "b" above). +// Index GetUpperNumColumns() const; +// // If returns > 0, then block numbers below that use +// // GetUpperNumColumns() - 1 columns per solution row, and the rest +// // use GetUpperNumColumns(). A block represents kCoeffBits "slots", +// // where all but the last kCoeffBits - 1 slots are also starts. And +// // a block contains a segment for each solution column. +// // An implementation may only support uniform columns per solution +// // row and return constant 0 here. +// Index GetUpperStartBlock() const; +// +// // ### "Array of segments" portion of API ### +// // The number of values of type CoeffRow used in this solution +// // representation. (This value can be inferred from the previous +// // three functions, but is expected at least for sanity / assertion +// // checking.) +// Index GetNumSegments() const; +// // Load an entry from the logical array of segments +// CoeffRow LoadSegment(Index segment_num) const; +// // Store an entry to the logical array of segments +// void StoreSegment(Index segment_num, CoeffRow data); // }; -// TODO: not yet implemented here (only in prototype code elsewhere) +// A helper for InterleavedBackSubst. +template +inline void BackSubstBlock(typename BandingStorage::CoeffRow *state, + typename BandingStorage::Index num_columns, + const BandingStorage &bs, + typename BandingStorage::Index start_slot) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + using ResultRow = typename BandingStorage::ResultRow; + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + for (Index i = start_slot + kCoeffBits; i > start_slot;) { + --i; + CoeffRow cr = *const_cast(bs).CoeffRowPtr(i); + ResultRow rr = *const_cast(bs).ResultRowPtr(i); + for (Index j = 0; j < num_columns; ++j) { + // Compute next solution bit at row i, column j (see derivation below) + CoeffRow tmp = state[j] << 1; + int bit = BitParity(tmp & cr) ^ ((rr >> j) & 1); + tmp |= static_cast(bit); + + // Now tmp is solution at column j from row i for next kCoeffBits + // more rows. Thus, for valid solution, the dot product of the + // solution column with the coefficient row has to equal the result + // at that column, + // BitParity(tmp & cr) == ((rr >> j) & 1) + + // Update state. + state[j] = tmp; + } + } +} + +// Back-substitution for generating a solution from BandingStorage to +// InterleavedSolutionStorage. +template +void InterleavedBackSubst(InterleavedSolutionStorage *iss, + const BandingStorage &bs) { + using CoeffRow = typename BandingStorage::CoeffRow; + using Index = typename BandingStorage::Index; + + static_assert( + sizeof(Index) == sizeof(typename InterleavedSolutionStorage::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename InterleavedSolutionStorage::CoeffRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const Index num_starts = bs.GetNumStarts(); + // Although it might be nice to have a filter that returns "always false" + // when no key is added, we aren't specifically supporting that here + // because it would require another condition branch in the query. + assert(num_starts > 0); + iss->PrepareForNumStarts(num_starts); + + const Index num_slots = num_starts + kCoeffBits - 1; + assert(num_slots % kCoeffBits == 0); + const Index num_blocks = num_slots / kCoeffBits; + const Index num_segments = iss->GetNumSegments(); + + // For now upper, then lower + Index num_columns = iss->GetUpperNumColumns(); + const Index upper_start_block = iss->GetUpperStartBlock(); + + if (num_columns == 0) { + // Nothing to do, presumably because there's not enough space for even + // a single segment. + assert(num_segments == 0); + // When num_columns == 0, a Ribbon filter query will always return true, + // or a PHSF query always 0. + return; + } + + // We should be utilizing all available segments + assert(num_segments == (upper_start_block * (num_columns - 1)) + + ((num_blocks - upper_start_block) * num_columns)); + + // TODO: consider fixed-column specializations with stack-allocated state + + // A column-major buffer of the solution matrix, containing enough + // recently-computed solution data to compute the next solution row + // (based also on banding data). + std::unique_ptr state{new CoeffRow[num_columns]()}; + + Index block = num_blocks; + Index segment = num_segments; + while (block > upper_start_block) { + --block; + BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits); + segment -= num_columns; + for (Index i = 0; i < num_columns; ++i) { + iss->StoreSegment(segment + i, state[i]); + } + } + // Now (if applicable), region using lower number of columns + // (This should be optimized away if GetUpperStartBlock() returns + // constant 0.) + --num_columns; + while (block > 0) { + --block; + BackSubstBlock(state.get(), num_columns, bs, block * kCoeffBits); + segment -= num_columns; + for (Index i = 0; i < num_columns; ++i) { + iss->StoreSegment(segment + i, state[i]); + } + } + // Verify everything processed + assert(block == 0); + assert(segment == 0); +} + +// General PHSF query a key from InterleavedSolutionStorage. +template +typename InterleavedSolutionStorage::ResultRow InterleavedPhsfQuery( + const typename PhsfQueryHasher::Key &key, const PhsfQueryHasher &hasher, + const InterleavedSolutionStorage &iss) { + using Hash = typename PhsfQueryHasher::Hash; + + using CoeffRow = typename InterleavedSolutionStorage::CoeffRow; + using Index = typename InterleavedSolutionStorage::Index; + using ResultRow = typename InterleavedSolutionStorage::ResultRow; + + static_assert(sizeof(Index) == sizeof(typename PhsfQueryHasher::Index), + "must be same"); + static_assert(sizeof(CoeffRow) == sizeof(typename PhsfQueryHasher::CoeffRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const Hash hash = hasher.GetHash(key); + const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts()); + + const Index upper_start_block = iss->GetUpperStartBlock(); + Index num_columns = iss->GetUpperNumColumns(); + Index start_block_num = start_slot / kCoeffBits; + Index segment = start_block_num * num_columns - + std::min(start_block_num, upper_start_block); + // Change to lower num columns if applicable. + // (This should not compile to a conditional branch.) + num_columns -= (start_block_num < upper_start_block) ? 1 : 0; + + const CoeffRow cr = hasher.GetCoeffRow(hash); + Index start_bit = start_slot % kCoeffBits; + + ResultRow sr = 0; + const CoeffRow cr_left = cr << start_bit; + for (Index i = 0; i < num_columns; ++i) { + sr ^= BitParity(iss->LoadSegment(segment + i) & cr_left) << i; + } + + if (start_bit > 0) { + segment += num_columns; + const CoeffRow cr_right = cr >> (kCoeffBits - start_bit); + for (Index i = 0; i < num_columns; ++i) { + sr ^= BitParity(iss->LoadSegment(segment + i) & cr_right) << i; + } + } + + return sr; +} + +// Filter query a key from InterleavedFilterQuery. +template +bool InterleavedFilterQuery(const typename FilterQueryHasher::Key &key, + const FilterQueryHasher &hasher, + const InterleavedSolutionStorage &iss) { + // BEGIN mostly copied from InterleavedPhsfQuery + using Hash = typename FilterQueryHasher::Hash; + + using CoeffRow = typename InterleavedSolutionStorage::CoeffRow; + using Index = typename InterleavedSolutionStorage::Index; + using ResultRow = typename InterleavedSolutionStorage::ResultRow; + + static_assert(sizeof(Index) == sizeof(typename FilterQueryHasher::Index), + "must be same"); + static_assert( + sizeof(CoeffRow) == sizeof(typename FilterQueryHasher::CoeffRow), + "must be same"); + static_assert( + sizeof(ResultRow) == sizeof(typename FilterQueryHasher::ResultRow), + "must be same"); + + constexpr auto kCoeffBits = static_cast(sizeof(CoeffRow) * 8U); + + const Hash hash = hasher.GetHash(key); + const Index start_slot = hasher.GetStart(hash, iss.GetNumStarts()); + + const Index upper_start_block = iss.GetUpperStartBlock(); + Index num_columns = iss.GetUpperNumColumns(); + Index start_block_num = start_slot / kCoeffBits; + Index segment = start_block_num * num_columns - + std::min(start_block_num, upper_start_block); + // Change to lower num columns if applicable. + // (This should not compile to a conditional branch.) + num_columns -= (start_block_num < upper_start_block) ? 1 : 0; + + const CoeffRow cr = hasher.GetCoeffRow(hash); + Index start_bit = start_slot % kCoeffBits; + // END mostly copied from InterleavedPhsfQuery. + + const ResultRow expected = hasher.GetResultRowFromHash(hash); + + if (start_bit == 0) { + for (Index i = 0; i < num_columns; ++i) { + if (BitParity(iss.LoadSegment(segment + i) & cr) != + (static_cast(expected >> i) & 1)) { + return false; + } + } + } else { + for (Index i = 0; i < num_columns; ++i) { + CoeffRow soln_col = + (iss.LoadSegment(segment + i) >> static_cast(start_bit)) | + (iss.LoadSegment(segment + num_columns + i) + << static_cast(kCoeffBits - start_bit)); + if (BitParity(soln_col & cr) != (static_cast(expected >> i) & 1)) { + return false; + } + } + } + // otherwise, all match + return true; +} + +// TODO: refactor Interleaved*Query so that queries can be "prepared" by +// prefetching memory, to hide memory latency for multiple queries in a +// single thread. } // namespace ribbon diff --git a/util/ribbon_impl.h b/util/ribbon_impl.h index cde1c5898..fd5b51302 100644 --- a/util/ribbon_impl.h +++ b/util/ribbon_impl.h @@ -5,6 +5,8 @@ #pragma once +#include + #include "port/port.h" // for PREFETCH #include "util/ribbon_alg.h" @@ -52,6 +54,14 @@ namespace ribbon { // // less), so typical num_starts < 10k. // static constexpr bool kUseSmash; // +// // When true, allows number of "starts" to be zero, for best support +// // of the "no keys to add" case by always returning false for filter +// // queries. (This is distinct from the "keys added but no space for +// // any data" case, in which a filter always returns true.) The cost +// // supporting this is a conditional branch (probably predictable) in +// // queries. +// static constexpr bool kAllowZeroStarts; +// // // A seedable stock hash function on Keys. All bits of Hash must // // be reasonably high quality. XXH functions recommended, but // // Murmur, City, Farm, etc. also work. @@ -77,7 +87,7 @@ struct AddInputSelector { using T = Key; }; -// To avoid writing 'typename' everwhere that we use types like 'Index' +// To avoid writing 'typename' everywhere that we use types like 'Index' #define IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings) \ using CoeffRow = typename TypesAndSettings::CoeffRow; \ using ResultRow = typename TypesAndSettings::ResultRow; \ @@ -135,7 +145,7 @@ class StandardHasher { // lookup. // // FastRange gives us a fast and effective mapping from h to the - // approriate range. This depends most, sometimes exclusively, on + // appropriate range. This depends most, sometimes exclusively, on // upper bits of h. // if (TypesAndSettings::kUseSmash) { @@ -150,10 +160,12 @@ class StandardHasher { // it's usually small enough to be ignorable (less computation in // this function) when number of slots is roughly 10k or larger. // - // TODO: re-check these degress of smash, esp with kFirstCoeffAlwaysOne + // The best values for these smash weights might depend on how + // densely you're packing entries, but this seems to work well for + // 2% overhead and roughly 50% success probability. // - constexpr auto kFrontSmash = kCoeffBits / 2 - 1; - constexpr auto kBackSmash = kCoeffBits / 2; + constexpr auto kFrontSmash = kCoeffBits / 3; + constexpr auto kBackSmash = kCoeffBits / 3; Index start = FastRangeGeneric(h, num_starts + kFrontSmash + kBackSmash); start = std::max(start, kFrontSmash); start -= kFrontSmash; @@ -184,7 +196,7 @@ class StandardHasher { return cr; } inline ResultRow GetResultRowMask() const { - // TODO: will be used with InterleavedSolutionStorage + // TODO: will be used with InterleavedSolutionStorage? // For now, all bits set (note: might be a small type so might need to // narrow after promotion) return static_cast(~ResultRow{0}); @@ -236,7 +248,7 @@ class StandardHasher { // to apply a different seed. This hasher seeds a 1-to-1 mixing // transformation to apply a seed to an existing hash (or hash-sized key). // -// Testing suggests essentially no degredation of solution success rate +// Testing suggests essentially no degradation of solution success rate // vs. going back to original inputs when changing hash seeds. For example: // Average re-seeds for solution with r=128, 1.02x overhead, and ~100k keys // is about 1.10 for both StandardHasher and StandardRehasher. @@ -279,6 +291,26 @@ template using StandardRehasher = StandardHasher>; +// Especially with smaller hashes (e.g. 32 bit), there can be noticeable +// false positives due to collisions in the Hash returned by GetHash. +// This function returns the expected FP rate due to those collisions, +// which can be added to the expected FP rate from the underlying data +// structure. (Note: technically, a + b is only a good approximation of +// 1-(1-a)(1-b) == a + b - a*b, if a and b are much closer to 0 than to 1.) +// The number of entries added can be a double here in case it's an +// average. +template +double ExpectedCollisionFpRate(const Hasher& hasher, Numerical added) { + // Standardize on the 'double' specialization + return ExpectedCollisionFpRate(hasher, 1.0 * added); +} +template +double ExpectedCollisionFpRate(const Hasher& /*hasher*/, double added) { + // Technically, there could be overlap among the added, but ignoring that + // is typically close enough. + return added / std::pow(256.0, sizeof(typename Hasher::Hash)); +} + // StandardBanding: a canonical implementation of BandingStorage and // BacktrackStorage, with convenience API for banding (solving with on-the-fly // Gaussian elimination) with and without backtracking. @@ -288,28 +320,30 @@ class StandardBanding : public StandardHasher { IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); StandardBanding(Index num_slots = 0, Index backtrack_size = 0) { - if (num_slots > 0) { - Reset(num_slots, backtrack_size); - } else { - EnsureBacktrackSize(backtrack_size); - } + Reset(num_slots, backtrack_size); } void Reset(Index num_slots, Index backtrack_size = 0) { - assert(num_slots >= kCoeffBits); - if (num_slots > num_slots_allocated_) { - coeff_rows_.reset(new CoeffRow[num_slots]()); - // Note: don't strictly have to zero-init result_rows, - // except possible information leakage ;) - result_rows_.reset(new ResultRow[num_slots]()); - num_slots_allocated_ = num_slots; + if (num_slots == 0) { + // Unusual (TypesAndSettings::kAllowZeroStarts) or "uninitialized" + num_starts_ = 0; } else { - for (Index i = 0; i < num_slots; ++i) { - coeff_rows_[i] = 0; - // Note: don't strictly have to zero-init result_rows - result_rows_[i] = 0; + // Normal + assert(num_slots >= kCoeffBits); + if (num_slots > num_slots_allocated_) { + coeff_rows_.reset(new CoeffRow[num_slots]()); + // Note: don't strictly have to zero-init result_rows, + // except possible information leakage ;) + result_rows_.reset(new ResultRow[num_slots]()); + num_slots_allocated_ = num_slots; + } else { + for (Index i = 0; i < num_slots; ++i) { + coeff_rows_[i] = 0; + // Note: don't strictly have to zero-init result_rows + result_rows_[i] = 0; + } } + num_starts_ = num_slots - kCoeffBits + 1; } - num_starts_ = num_slots - kCoeffBits + 1; EnsureBacktrackSize(backtrack_size); } void EnsureBacktrackSize(Index backtrack_size) { @@ -323,7 +357,7 @@ class StandardBanding : public StandardHasher { // From concept BandingStorage inline bool UsePrefetch() const { - // A rough guestimate of when prefetching during construction pays off. + // A rough guesstimate of when prefetching during construction pays off. // TODO: verify/validate return num_starts_ > 1500; } @@ -352,6 +386,12 @@ class StandardBanding : public StandardHasher { // template bool AddRange(InputIterator begin, InputIterator end) { + assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual. Can't add any in this case. + return begin == end; + } + // Normal return BandingAddRange(this, *this, begin, end); } @@ -364,6 +404,12 @@ class StandardBanding : public StandardHasher { // template bool AddRangeOrRollBack(InputIterator begin, InputIterator end) { + assert(num_starts_ > 0 || TypesAndSettings::kAllowZeroStarts); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual. Can't add any in this case. + return begin == end; + } + // else Normal return BandingAddRange(this, this, *this, begin, end); } @@ -372,15 +418,20 @@ class StandardBanding : public StandardHasher { // // Adding can fail even before all the "slots" are completely "full". // - bool Add(const AddInput& input) { return AddRange(&input, &input + 1); } + bool Add(const AddInput& input) { + // Pointer can act as iterator + return AddRange(&input, &input + 1); + } // Return the number of "occupied" rows (with non-zero coefficients stored). Index GetOccupiedCount() const { Index count = 0; - const Index num_slots = num_starts_ + kCoeffBits - 1; - for (Index i = 0; i < num_slots; ++i) { - if (coeff_rows_[i] != 0) { - ++count; + if (num_starts_ > 0) { + const Index num_slots = num_starts_ + kCoeffBits - 1; + for (Index i = 0; i < num_slots; ++i) { + if (coeff_rows_[i] != 0) { + ++count; + } } } return count; @@ -442,14 +493,20 @@ class InMemSimpleSolution { IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); void PrepareForNumStarts(Index num_starts) { - const Index num_slots = num_starts + kCoeffBits - 1; - assert(num_slots >= kCoeffBits); - if (num_slots > num_slots_allocated_) { - // Do not need to init the memory - solution_rows_.reset(new ResultRow[num_slots]); - num_slots_allocated_ = num_slots; + if (TypesAndSettings::kAllowZeroStarts && num_starts == 0) { + // Unusual + num_starts_ = 0; + } else { + // Normal + const Index num_slots = num_starts + kCoeffBits - 1; + assert(num_slots >= kCoeffBits); + if (num_slots > num_slots_allocated_) { + // Do not need to init the memory + solution_rows_.reset(new ResultRow[num_slots]); + num_slots_allocated_ = num_slots; + } + num_starts_ = num_starts; } - num_starts_ = num_starts; } Index GetNumStarts() const { return num_starts_; } @@ -464,20 +521,51 @@ class InMemSimpleSolution { // High-level API template - void BackSubstFrom(const BandingStorage& ss) { - SimpleBackSubst(this, ss); + void BackSubstFrom(const BandingStorage& bs) { + if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { + // Unusual + PrepareForNumStarts(0); + } else { + // Normal + SimpleBackSubst(this, bs); + } } template ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) { assert(!TypesAndSettings::kIsFilter); - return SimplePhsfQuery(input, hasher, *this); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual + return 0; + } else { + // Normal + return SimplePhsfQuery(input, hasher, *this); + } } template bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) { assert(TypesAndSettings::kIsFilter); - return SimpleFilterQuery(input, hasher, *this); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual. Zero starts presumes no keys added -> always false + return false; + } else { + // Normal, or upper_num_columns_ == 0 means "no space for data" and + // thus will always return true. + return SimpleFilterQuery(input, hasher, *this); + } + } + + double ExpectedFpRate() { + assert(TypesAndSettings::kIsFilter); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual, but we don't have FPs if we always return false. + return 0.0; + } + // else Normal + + // Each result (solution) bit (column) cuts FP rate in half + return std::pow(0.5, 8U * sizeof(ResultRow)); } protected: @@ -488,6 +576,150 @@ class InMemSimpleSolution { std::unique_ptr solution_rows_; }; +// Implements concept InterleavedSolutionStorage always using little-endian +// byte order, so easy for serialization/deserialization. This implementation +// fully supports fractional bits per key, where any number of segments +// (number of bytes multiple of sizeof(CoeffRow)) can be used with any number +// of slots that is a multiple of kCoeffBits. +// +// The structure is passed an externally allocated/de-allocated byte buffer +// that is optionally pre-populated (from storage) for answering queries, +// or can be populated by BackSubstFrom. +// +template +class SerializableInterleavedSolution { + public: + IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings); + + // Does not take ownership of `data` but uses it (up to `data_len` bytes) + // throughout lifetime + SerializableInterleavedSolution(char* data, size_t data_len) + : data_(data), data_len_(data_len) {} + + void PrepareForNumStarts(Index num_starts) { + assert(num_starts == 0 || (num_starts % kCoeffBits == 1)); + num_starts_ = num_starts; + + InternalConfigure(); + } + + Index GetNumStarts() const { return num_starts_; } + + Index GetNumBlocks() const { + const Index num_slots = num_starts_ + kCoeffBits - 1; + return num_slots / kCoeffBits; + } + + Index GetUpperNumColumns() const { return upper_num_columns_; } + + Index GetUpperStartBlock() const { return upper_start_block_; } + + Index GetNumSegments() const { + return static_cast(data_len_ / sizeof(CoeffRow)); + } + + CoeffRow LoadSegment(Index segment_num) const { + assert(data_ != nullptr); // suppress clang analyzer report + return DecodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow)); + } + void StoreSegment(Index segment_num, CoeffRow val) { + assert(data_ != nullptr); // suppress clang analyzer report + EncodeFixedGeneric(data_ + segment_num * sizeof(CoeffRow), val); + } + + // ******************************************************************** + // High-level API + + template + void BackSubstFrom(const BandingStorage& bs) { + if (TypesAndSettings::kAllowZeroStarts && bs.GetNumStarts() == 0) { + // Unusual + PrepareForNumStarts(0); + } else { + // Normal + InterleavedBackSubst(this, bs); + } + } + + template + ResultRow PhsfQuery(const Key& input, const PhsfQueryHasher& hasher) { + assert(!TypesAndSettings::kIsFilter); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual + return 0; + } else { + // Normal + return InterleavedPhsfQuery(input, hasher, *this); + } + } + + template + bool FilterQuery(const Key& input, const FilterQueryHasher& hasher) { + assert(TypesAndSettings::kIsFilter); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual. Zero starts presumes no keys added -> always false + return false; + } else { + // Normal, or upper_num_columns_ == 0 means "no space for data" and + // thus will always return true. + return InterleavedFilterQuery(input, hasher, *this); + } + } + + double ExpectedFpRate() { + assert(TypesAndSettings::kIsFilter); + if (TypesAndSettings::kAllowZeroStarts && num_starts_ == 0) { + // Unusual. Zero starts presumes no keys added -> always false + return 0.0; + } + // else Normal + + // Note: Ignoring smash setting; still close enough in that case + double lower_portion = + (upper_start_block_ * kCoeffBits * 1.0) / num_starts_; + + // Each result (solution) bit (column) cuts FP rate in half. Weight that + // for upper and lower number of bits (columns). + return lower_portion * std::pow(0.5, upper_num_columns_ - 1) + + (1.0 - lower_portion) * std::pow(0.5, upper_num_columns_); + } + + protected: + void InternalConfigure() { + const Index num_blocks = GetNumBlocks(); + Index num_segments = GetNumSegments(); + + if (num_blocks == 0) { + // Exceptional + upper_num_columns_ = 0; + upper_start_block_ = 0; + } else { + // Normal + upper_num_columns_ = + (num_segments + /*round up*/ num_blocks - 1) / num_blocks; + upper_start_block_ = upper_num_columns_ * num_blocks - num_segments; + // Unless that's more columns than supported by ResultRow data type + if (upper_num_columns_ > 8U * sizeof(ResultRow)) { + // Use maximum columns (there will be space unused) + upper_num_columns_ = static_cast(8U * sizeof(ResultRow)); + upper_start_block_ = 0; + num_segments = num_blocks * upper_num_columns_; + } + } + // Update data_len_ for correct rounding and/or unused space + // NOTE: unused space stays gone if we PrepareForNumStarts again. + // We are prioritizing minimizing the number of fields over making + // the "unusued space" feature work well. + data_len_ = num_segments * sizeof(CoeffRow); + } + + Index num_starts_ = 0; + Index upper_num_columns_ = 0; + Index upper_start_block_ = 0; + char* const data_; + size_t data_len_; +}; + } // namespace ribbon } // namespace ROCKSDB_NAMESPACE @@ -499,5 +731,10 @@ class InMemSimpleSolution { ROCKSDB_NAMESPACE::ribbon::StandardBanding; \ using SimpleSoln = \ ROCKSDB_NAMESPACE::ribbon::InMemSimpleSolution; \ - static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) > 0, \ + using InterleavedSoln = \ + ROCKSDB_NAMESPACE::ribbon::SerializableInterleavedSolution< \ + TypesAndSettings>; \ + static_assert(sizeof(Hasher) + sizeof(Banding) + sizeof(SimpleSoln) + \ + sizeof(InterleavedSoln) > \ + 0, \ "avoid unused warnings, semicolon expected after macro call") diff --git a/util/ribbon_test.cc b/util/ribbon_test.cc index 9c47f7aa0..babd329c1 100644 --- a/util/ribbon_test.cc +++ b/util/ribbon_test.cc @@ -6,9 +6,11 @@ #include #include "test_util/testharness.h" +#include "util/bloom_impl.h" #include "util/coding.h" #include "util/hash.h" #include "util/ribbon_impl.h" +#include "util/stop_watch.h" #ifndef GFLAGS uint32_t FLAGS_thoroughness = 5; @@ -35,7 +37,10 @@ struct DefaultTypesAndSettings { static constexpr bool kIsFilter = true; static constexpr bool kFirstCoeffAlwaysOne = true; static constexpr bool kUseSmash = false; + static constexpr bool kAllowZeroStarts = false; static Hash HashFn(const Key& key, Seed seed) { + // TODO/FIXME: is there sufficient independence with sequential keys and + // sequential seeds? return ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed); } }; @@ -47,10 +52,13 @@ struct TypesAndSettings_Coeff128Smash : public DefaultTypesAndSettings { struct TypesAndSettings_Coeff64 : public DefaultTypesAndSettings { using CoeffRow = uint64_t; }; -struct TypesAndSettings_Coeff64Smash : public DefaultTypesAndSettings { +struct TypesAndSettings_Coeff64Smash1 : public DefaultTypesAndSettings { using CoeffRow = uint64_t; static constexpr bool kUseSmash = true; }; +struct TypesAndSettings_Coeff64Smash0 : public TypesAndSettings_Coeff64Smash1 { + static constexpr bool kFirstCoeffAlwaysOne = false; +}; struct TypesAndSettings_Result16 : public DefaultTypesAndSettings { using ResultRow = uint16_t; }; @@ -60,7 +68,7 @@ struct TypesAndSettings_IndexSizeT : public DefaultTypesAndSettings { struct TypesAndSettings_Hash32 : public DefaultTypesAndSettings { using Hash = uint32_t; static Hash HashFn(const Key& key, Seed seed) { - // NOTE: Using RockDB 32-bit Hash() here fails test below because of + // NOTE: Using RocksDB 32-bit Hash() here fails test below because of // insufficient mixing of seed (or generally insufficient mixing) return ROCKSDB_NAMESPACE::Upper32of64( ROCKSDB_NAMESPACE::Hash64(key.data(), key.size(), seed)); @@ -78,10 +86,13 @@ struct TypesAndSettings_Seed8 : public DefaultTypesAndSettings { struct TypesAndSettings_NoAlwaysOne : public DefaultTypesAndSettings { static constexpr bool kFirstCoeffAlwaysOne = false; }; +struct TypesAndSettings_AllowZeroStarts : public DefaultTypesAndSettings { + static constexpr bool kAllowZeroStarts = true; +}; struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings { // This doesn't directly use StandardRehasher as a whole, but simulates // its behavior with unseeded hash of key, then seeded hash-to-hash - // tranform. + // transform. static Hash HashFn(const Key& key, Seed seed) { Hash unseeded = DefaultTypesAndSettings::HashFn(key, /*seed*/ 0); using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter< @@ -89,10 +100,14 @@ struct TypesAndSettings_RehasherWrapped : public DefaultTypesAndSettings { return Rehasher::HashFn(unseeded, seed); } }; +struct TypesAndSettings_RehasherWrapped_Result16 + : public TypesAndSettings_RehasherWrapped { + using ResultRow = uint16_t; +}; struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 { // This doesn't directly use StandardRehasher as a whole, but simulates // its behavior with unseeded hash of key, then seeded hash-to-hash - // tranform. + // transform. static Hash HashFn(const Key& key, Seed seed) { Hash unseeded = TypesAndSettings_Hash32::HashFn(key, /*seed*/ 0); using Rehasher = ROCKSDB_NAMESPACE::ribbon::StandardRehasherAdapter< @@ -101,15 +116,16 @@ struct TypesAndSettings_Rehasher32Wrapped : public TypesAndSettings_Hash32 { } }; -using TestTypesAndSettings = - ::testing::Types; +using TestTypesAndSettings = ::testing::Types< + TypesAndSettings_Coeff128, TypesAndSettings_Coeff128Smash, + TypesAndSettings_Coeff64, TypesAndSettings_Coeff64Smash0, + TypesAndSettings_Coeff64Smash1, TypesAndSettings_Result16, + TypesAndSettings_IndexSizeT, TypesAndSettings_Hash32, + TypesAndSettings_Hash32_Result16, TypesAndSettings_KeyString, + TypesAndSettings_Seed8, TypesAndSettings_NoAlwaysOne, + TypesAndSettings_AllowZeroStarts, TypesAndSettings_RehasherWrapped, + TypesAndSettings_RehasherWrapped_Result16, + TypesAndSettings_Rehasher32Wrapped>; TYPED_TEST_CASE(RibbonTypeParamTest, TestTypesAndSettings); namespace { @@ -125,11 +141,6 @@ struct KeyGen { return *this; } - KeyGen& operator+=(uint64_t incr) { - id_ += incr; - return *this; - } - const std::string& operator*() { // Use multiplication to mix things up a little in the key ROCKSDB_NAMESPACE::EncodeFixed64(&str_[str_.size() - 8], @@ -191,9 +202,6 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { // For testing FP rate etc. constexpr Index kNumToCheck = 100000; - constexpr size_t kNumSolutionColumns = 8U * sizeof(ResultRow); - const double expected_fp_count = - kNumToCheck * std::pow(0.5, kNumSolutionColumns); const auto log2_thoroughness = static_cast(ROCKSDB_NAMESPACE::FloorLog2(FLAGS_thoroughness)); @@ -210,16 +218,33 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { uint64_t total_fp_count = 0; uint64_t total_added = 0; + uint64_t soln_query_nanos = 0; + uint64_t soln_query_count = 0; + uint64_t bloom_query_nanos = 0; + uint64_t isoln_query_nanos = 0; + uint64_t isoln_query_count = 0; + for (uint32_t i = 0; i < FLAGS_thoroughness; ++i) { - Index numToAdd = - sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5000 : 2500; + Index num_to_add = + sizeof(CoeffRow) == 16 ? 130000 : TypeParam::kUseSmash ? 5500 : 2500; // Use different values between that number and 50% of that number - numToAdd -= (i * 15485863) % (numToAdd / 2); + num_to_add -= (i * /* misc prime */ 15485863) % (num_to_add / 2); + + total_added += num_to_add; - total_added += numToAdd; + // Most of the time, test the Interleaved solution storage, but when + // we do we have to make num_slots a multiple of kCoeffBits. So + // sometimes we want to test without that limitation. + bool test_interleaved = (i % 7) != 6; - const Index kNumSlots = static_cast(numToAdd * kFactor); + Index num_slots = static_cast(num_to_add * kFactor); + if (test_interleaved) { + // Round to nearest multiple of kCoeffBits + num_slots = ((num_slots + kCoeffBits / 2) / kCoeffBits) * kCoeffBits; + // Re-adjust num_to_add to get as close as possible to kFactor + num_to_add = static_cast(num_slots / kFactor); + } std::string prefix; // Take different samples if you change thoroughness @@ -229,7 +254,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { // Batch that must be added std::string added_str = prefix + "added"; KeyGen keys_begin(added_str, 0); - KeyGen keys_end(added_str, numToAdd); + KeyGen keys_end(added_str, num_to_add); // Batch that may or may not be added const Index kBatchSize = @@ -243,6 +268,14 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { KeyGen other_keys_begin(not_str, 0); KeyGen other_keys_end(not_str, kNumToCheck); + // Vary bytes uniformly for InterleavedSoln to use number of solution + // columns varying from 0 to max allowed by ResultRow type (and used by + // SimpleSoln). + size_t ibytes = + (i * /* misc odd */ 67896789) % (sizeof(ResultRow) * num_to_add + 1); + std::unique_ptr idata(new char[ibytes]); + InterleavedSoln isoln(idata.get(), ibytes); + SimpleSoln soln; Hasher hasher; bool first_single; @@ -251,7 +284,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { { Banding banding; // Traditional solve for a fixed set. - ASSERT_TRUE(banding.ResetAndFindSeedToSolve(kNumSlots, keys_begin, + ASSERT_TRUE(banding.ResetAndFindSeedToSolve(num_slots, keys_begin, keys_end, max_seed)); // Now to test backtracking, starting with guaranteed fail @@ -276,15 +309,24 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { } ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + // Also verify that redundant adds are OK (no effect) + ASSERT_TRUE( + banding.AddRange(keys_begin, KeyGen(added_str, num_to_add / 8))); + ASSERT_LE(banding.GetOccupiedCount(), occupied_count + more_added); + // Now back-substitution soln.BackSubstFrom(banding); + if (test_interleaved) { + isoln.BackSubstFrom(banding); + } + Seed seed = banding.GetSeed(); total_reseeds += seed; if (seed > log2_thoroughness + 1) { - fprintf(stderr, "%s high reseeds at %u, %u: %u\n", + fprintf(stderr, "%s high reseeds at %u, %u/%u: %u\n", seed > log2_thoroughness + 8 ? "FIXME Extremely" : "Somewhat", - static_cast(i), static_cast(numToAdd), - static_cast(seed)); + static_cast(i), static_cast(num_to_add), + static_cast(num_slots), static_cast(seed)); } hasher.ResetSeed(seed); } @@ -294,19 +336,23 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { KeyGen cur = keys_begin; while (cur != keys_end) { EXPECT_TRUE(soln.FilterQuery(*cur, hasher)); + EXPECT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); ++cur; } // We (maybe) snuck these in! if (first_single) { EXPECT_TRUE(soln.FilterQuery("one_more", hasher)); + EXPECT_TRUE(!test_interleaved || isoln.FilterQuery("one_more", hasher)); } if (second_single) { EXPECT_TRUE(soln.FilterQuery("two_more", hasher)); + EXPECT_TRUE(!test_interleaved || isoln.FilterQuery("two_more", hasher)); } if (batch_success) { cur = batch_begin; while (cur != batch_end) { EXPECT_TRUE(soln.FilterQuery(*cur, hasher)); + EXPECT_TRUE(!test_interleaved || isoln.FilterQuery(*cur, hasher)); ++cur; } } @@ -314,22 +360,90 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { // Check FP rate (depends only on number of result bits == solution columns) Index fp_count = 0; cur = other_keys_begin; - while (cur != other_keys_end) { - fp_count += soln.FilterQuery(*cur, hasher) ? 1 : 0; - ++cur; + { + ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(), + true); + while (cur != other_keys_end) { + fp_count += soln.FilterQuery(*cur, hasher) ? 1 : 0; + ++cur; + } + soln_query_nanos += timer.ElapsedNanos(); + soln_query_count += kNumToCheck; + } + { + double expected_fp_count = soln.ExpectedFpRate() * kNumToCheck; + // For expected FP rate, also include false positives due to collisions + // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) + double correction = + kNumToCheck * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate( + hasher, num_to_add); + EXPECT_LE(fp_count, + FrequentPoissonUpperBound(expected_fp_count + correction)); + EXPECT_GE(fp_count, + FrequentPoissonLowerBound(expected_fp_count + correction)); } - // For expected FP rate, also include false positives due to collisions - // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - double correction = - 1.0 * kNumToCheck * numToAdd / std::pow(256.0, sizeof(Hash)); - EXPECT_LE(fp_count, - FrequentPoissonUpperBound(expected_fp_count + correction)); - EXPECT_GE(fp_count, - FrequentPoissonLowerBound(expected_fp_count + correction)); - total_fp_count += fp_count; + + // And also check FP rate for isoln + if (test_interleaved) { + Index ifp_count = 0; + cur = other_keys_begin; + ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(), + true); + while (cur != other_keys_end) { + ifp_count += isoln.FilterQuery(*cur, hasher) ? 1 : 0; + ++cur; + } + isoln_query_nanos += timer.ElapsedNanos(); + isoln_query_count += kNumToCheck; + { + double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck; + // For expected FP rate, also include false positives due to collisions + // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) + double correction = + kNumToCheck * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate( + hasher, num_to_add); + EXPECT_LE(ifp_count, + FrequentPoissonUpperBound(expected_fp_count + correction)); + EXPECT_GE(ifp_count, + FrequentPoissonLowerBound(expected_fp_count + correction)); + } + // Since the bits used in isoln are a subset of the bits used in soln, + // it cannot have fewer FPs + EXPECT_GE(ifp_count, fp_count); + } + + // And compare to Bloom time, for fun + if (ibytes >= /* minimum Bloom impl bytes*/ 64) { + Index bfp_count = 0; + cur = other_keys_begin; + ROCKSDB_NAMESPACE::StopWatchNano timer(ROCKSDB_NAMESPACE::Env::Default(), + true); + while (cur != other_keys_end) { + uint64_t h = hasher.GetHash(*cur); + uint32_t h1 = ROCKSDB_NAMESPACE::Lower32of64(h); + uint32_t h2 = sizeof(Hash) >= 8 ? ROCKSDB_NAMESPACE::Upper32of64(h) + : h1 * 0x9e3779b9; + bfp_count += ROCKSDB_NAMESPACE::FastLocalBloomImpl::HashMayMatch( + h1, h2, static_cast(ibytes), 6, idata.get()) + ? 1 + : 0; + ++cur; + } + bloom_query_nanos += timer.ElapsedNanos(); + // ensure bfp_count is used + ASSERT_LT(bfp_count, kNumToCheck); + } } + // "outside" == key not in original set so either negative or false positive + fprintf(stderr, "Simple outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * soln_query_nanos / soln_query_count); + fprintf(stderr, "Interleaved outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * isoln_query_nanos / isoln_query_count); + fprintf(stderr, "Bloom outside query, hot, incl hashing, ns/key: %g\n", + 1.0 * bloom_query_nanos / soln_query_count); + { double average_reseeds = 1.0 * total_reseeds / FLAGS_thoroughness; fprintf(stderr, "Average re-seeds: %g\n", average_reseeds); @@ -370,12 +484,14 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { { uint64_t total_checked = uint64_t{kNumToCheck} * FLAGS_thoroughness; double expected_total_fp_count = - total_checked * std::pow(0.5, kNumSolutionColumns); + total_checked * std::pow(0.5, 8U * sizeof(ResultRow)); // For expected FP rate, also include false positives due to collisions // in Hash value. (Negligible for 64-bit, can matter for 32-bit.) - expected_total_fp_count += 1.0 * total_checked * total_added / - FLAGS_thoroughness / - std::pow(256.0, sizeof(Hash)); + double average_added = 1.0 * total_added / FLAGS_thoroughness; + expected_total_fp_count += + total_checked * ROCKSDB_NAMESPACE::ribbon::ExpectedCollisionFpRate( + Hasher(), average_added); + uint64_t upper_bound = InfrequentPoissonUpperBound(expected_total_fp_count); uint64_t lower_bound = InfrequentPoissonLowerBound(expected_total_fp_count); fprintf(stderr, "Average FP rate: %g (~= %g, <= %g, >= %g)\n", @@ -383,7 +499,7 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { expected_total_fp_count / total_checked, 1.0 * upper_bound / total_checked, 1.0 * lower_bound / total_checked); - // FIXME: this can fail for Result16, e.g. --thoroughness=100 + // FIXME: this can fail for Result16, e.g. --thoroughness=300 // Seems due to inexpensive hashing in StandardHasher::GetCoeffRow and // GetResultRowFromHash as replacing those with different Hash64 instances // fixes it, at least mostly. @@ -392,11 +508,114 @@ TYPED_TEST(RibbonTypeParamTest, CompactnessAndBacktrackAndFpRate) { } } -TEST(RibbonTest, Another) { - IMPORT_RIBBON_TYPES_AND_SETTINGS(DefaultTypesAndSettings); - IMPORT_RIBBON_IMPL_TYPES(DefaultTypesAndSettings); +TYPED_TEST(RibbonTypeParamTest, Extremes) { + IMPORT_RIBBON_TYPES_AND_SETTINGS(TypeParam); + IMPORT_RIBBON_IMPL_TYPES(TypeParam); + + size_t bytes = 128 * 1024; + std::unique_ptr buf(new char[bytes]); + InterleavedSoln isoln(buf.get(), bytes); + SimpleSoln soln; + Hasher hasher; + Banding banding; + + // ######################################## + // Add zero keys to minimal number of slots + KeyGen begin_and_end("foo", 123); + ASSERT_TRUE(banding.ResetAndFindSeedToSolve( + /*slots*/ kCoeffBits, begin_and_end, begin_and_end, /*max_seed*/ 0)); + + soln.BackSubstFrom(banding); + isoln.BackSubstFrom(banding); + + // Because there's plenty of memory, we expect the interleaved solution to + // use maximum supported columns (same as simple solution) + ASSERT_EQ(isoln.GetUpperNumColumns(), 8U * sizeof(ResultRow)); + ASSERT_EQ(isoln.GetUpperStartBlock(), 0U); + + // Somewhat oddly, we expect same FP rate as if we had essentially filled + // up the slots. + constexpr Index kNumToCheck = 100000; + KeyGen other_keys_begin("not", 0); + KeyGen other_keys_end("not", kNumToCheck); + + Index fp_count = 0; + KeyGen cur = other_keys_begin; + while (cur != other_keys_end) { + bool isoln_query_result = isoln.FilterQuery(*cur, hasher); + bool soln_query_result = soln.FilterQuery(*cur, hasher); + // Solutions are equivalent + ASSERT_EQ(isoln_query_result, soln_query_result); + // And in fact we only expect an FP when ResultRow is 0 + ASSERT_EQ(soln_query_result, hasher.GetResultRowFromHash( + hasher.GetHash(*cur)) == ResultRow{0}); + + fp_count += soln_query_result ? 1 : 0; + ++cur; + } + { + ASSERT_EQ(isoln.ExpectedFpRate(), soln.ExpectedFpRate()); + double expected_fp_count = isoln.ExpectedFpRate() * kNumToCheck; + EXPECT_LE(fp_count, InfrequentPoissonUpperBound(expected_fp_count)); + EXPECT_GE(fp_count, InfrequentPoissonLowerBound(expected_fp_count)); + } + + // ###################################################### + // Use zero bytes for interleaved solution (key(s) added) + + // Add one key + KeyGen key_begin("added", 0); + KeyGen key_end("added", 1); + ASSERT_TRUE(banding.ResetAndFindSeedToSolve( + /*slots*/ kCoeffBits, key_begin, key_end, /*max_seed*/ 0)); + + InterleavedSoln isoln2(nullptr, /*bytes*/ 0); + + isoln2.BackSubstFrom(banding); + + ASSERT_EQ(isoln2.GetUpperNumColumns(), 0U); + ASSERT_EQ(isoln2.GetUpperStartBlock(), 0U); + + // All queries return true + ASSERT_TRUE(isoln2.FilterQuery(*other_keys_begin, hasher)); + ASSERT_EQ(isoln2.ExpectedFpRate(), 1.0); +} - // TODO +TEST(RibbonTest, AllowZeroStarts) { + IMPORT_RIBBON_TYPES_AND_SETTINGS(TypesAndSettings_AllowZeroStarts); + IMPORT_RIBBON_IMPL_TYPES(TypesAndSettings_AllowZeroStarts); + + InterleavedSoln isoln(nullptr, /*bytes*/ 0); + SimpleSoln soln; + Hasher hasher; + Banding banding; + + KeyGen begin("foo", 0); + KeyGen end("foo", 1); + // Can't add 1 entry + ASSERT_FALSE( + banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin, end, /*max_seed*/ 5)); + + KeyGen begin_and_end("foo", 123); + // Can add 0 entries + ASSERT_TRUE(banding.ResetAndFindSeedToSolve(/*slots*/ 0, begin_and_end, + begin_and_end, /*max_seed*/ 5)); + + Seed seed = banding.GetSeed(); + ASSERT_EQ(seed, 0U); + hasher.ResetSeed(seed); + + // Can construct 0-slot solutions + isoln.BackSubstFrom(banding); + soln.BackSubstFrom(banding); + + // Should always return false + ASSERT_FALSE(isoln.FilterQuery(*begin, hasher)); + ASSERT_FALSE(soln.FilterQuery(*begin, hasher)); + + // And report that in FP rate + ASSERT_EQ(isoln.ExpectedFpRate(), 0.0); + ASSERT_EQ(soln.ExpectedFpRate(), 0.0); } int main(int argc, char** argv) {