// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include #include #include #include #include "rocksdb/customizable.h" #include "rocksdb/slice.h" namespace ROCKSDB_NAMESPACE { class Slice; class Logger; // The Merge Operator // // Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only // client knows. It could be numeric addition, list append, string // concatenation, edit data structure, ... , anything. // The library, on the other hand, is concerned with the exercise of this // interface, at the right time (during get, iteration, compaction...) // // To use merge, the client needs to provide an object implementing one of // the following interfaces: // a) AssociativeMergeOperator - for most simple semantics (always take // two values, and merge them into one value, which is then put back // into rocksdb); numeric addition and string concatenation are examples; // // b) MergeOperator - the generic class for all the more abstract / complex // operations; one method (FullMergeV2) to merge a Put/Delete value with a // merge operand; and another method (PartialMerge) that merges multiple // operands together. this is especially useful if your key values have // complex structures but you would still like to support client-specific // incremental updates. // // AssociativeMergeOperator is simpler to implement. MergeOperator is simply // more powerful. // // Refer to rocksdb-merge wiki for more details and example implementations. // // Exceptions MUST NOT propagate out of overridden functions into RocksDB, // because RocksDB is not exception-safe. This could cause undefined behavior // including data loss, unreported corruption, deadlocks, and more. class MergeOperator : public Customizable { public: virtual ~MergeOperator() {} static const char* Type() { return "MergeOperator"; } static Status CreateFromString(const ConfigOptions& opts, const std::string& id, std::shared_ptr* result); // Gives the client a way to express the read -> modify -> write semantics // key: (IN) The key that's associated with this merge operation. // Client could multiplex the merge operator based on it // if the key space is partitioned and different subspaces // refer to different types of data which have different // merge operation semantics // existing: (IN) null indicates that the key does not exist before this op // operand_list:(IN) the sequence of merge operations to apply, front() first. // new_value:(OUT) Client is responsible for filling the merge result here. // The string that new_value is pointing to will be empty. // logger: (IN) Client could use this to log errors during merge. // // Return true on success. // All values passed in will be client-specific values. So if this method // returns false, it is because client specified bad data or there was // internal corruption. This will be treated as an error by the library. // // Also make use of the *logger for error messages. virtual bool FullMerge(const Slice& /*key*/, const Slice* /*existing_value*/, const std::deque& /*operand_list*/, std::string* /*new_value*/, Logger* /*logger*/) const { // deprecated, please use FullMergeV2() assert(false); return false; } struct MergeOperationInput { // If user-defined timestamp is enabled, `_key` includes timestamp. explicit MergeOperationInput(const Slice& _key, const Slice* _existing_value, const std::vector& _operand_list, Logger* _logger) : key(_key), existing_value(_existing_value), operand_list(_operand_list), logger(_logger) {} // The key associated with the merge operation. const Slice& key; // The existing value of the current key, nullptr means that the // value doesn't exist. const Slice* existing_value; // A list of operands to apply. const std::vector& operand_list; // Logger could be used by client to log any errors that happen during // the merge operation. Logger* logger; }; enum class OpFailureScope { kDefault, kTryMerge, kMustMerge, kOpFailureScopeMax, }; struct MergeOperationOutput { explicit MergeOperationOutput(std::string& _new_value, Slice& _existing_operand) : new_value(_new_value), existing_operand(_existing_operand) {} // Client is responsible for filling the merge result here. std::string& new_value; // If the merge result is one of the existing operands (or existing_value), // client can set this field to the operand (or existing_value) instead of // using new_value. Slice& existing_operand; // Indicates the blast radius of the failure. It is only meaningful to // provide a failure scope when returning `false` from the API populating // the `MergeOperationOutput`. Currently RocksDB operations handle these // values as follows: // // - `OpFailureScope::kDefault`: fallback to default // (`OpFailureScope::kTryMerge`) // - `OpFailureScope::kTryMerge`: operations that try to merge that key will // fail. This includes flush and compaction, which puts the DB in // read-only mode. // - `OpFailureScope::kMustMerge`: operations that must merge that key will // fail (e.g., `Get()`, `MultiGet()`, iteration). Flushes/compactions can // still proceed by copying the original input operands to the output. OpFailureScope op_failure_scope = OpFailureScope::kDefault; }; // This function applies a stack of merge operands in chronological order // on top of an existing value. There are two ways in which this method is // being used: // a) During Get() operation, it used to calculate the final value of a key // b) During compaction, in order to collapse some operands with the based // value. // // Note: The name of the method is somewhat misleading, as both in the cases // of Get() or compaction it may be called on a subset of operands: // K: 0 +1 +2 +7 +4 +5 2 +1 +2 // ^ // | // snapshot // In the example above, Get(K) operation will call FullMerge with a base // value of 2 and operands [+1, +2]. Compaction process might decide to // collapse the beginning of the history up to the snapshot by performing // full Merge with base value of 0 and operands [+1, +2, +7, +4]. virtual bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const; // This function performs merge(left_op, right_op) // when both the operands are themselves merge operation types // that you would have passed to a DB::Merge() call in the same order // (i.e.: DB::Merge(key,left_op), followed by DB::Merge(key,right_op)). // // PartialMerge should combine them into a single merge operation that is // saved into *new_value, and then it should return true. // *new_value should be constructed such that a call to // DB::Merge(key, *new_value) would yield the same result as a call // to DB::Merge(key, left_op) followed by DB::Merge(key, right_op). // // The string that new_value is pointing to will be empty. // // The default implementation of PartialMergeMulti will use this function // as a helper, for backward compatibility. Any successor class of // MergeOperator should either implement PartialMerge or PartialMergeMulti, // although implementing PartialMergeMulti is suggested as it is in general // more effective to merge multiple operands at a time instead of two // operands at a time. // // If it is impossible or infeasible to combine the two operations, // leave new_value unchanged and return false. The library will // internally keep track of the operations, and apply them in the // correct order once a base-value (a Put/Delete/End-of-Database) is seen. // // TODO: Presently there is no way to differentiate between error/corruption // and simply "return false". For now, the client should simply return // false in any case it cannot perform partial-merge, regardless of reason. // If there is corruption in the data, handle it in the FullMergeV2() function // and return false there. The default implementation of PartialMerge will // always return false. virtual bool PartialMerge(const Slice& /*key*/, const Slice& /*left_operand*/, const Slice& /*right_operand*/, std::string* /*new_value*/, Logger* /*logger*/) const { return false; } // This function performs merge when all the operands are themselves merge // operation types that you would have passed to a DB::Merge() call in the // same order (front() first) // (i.e. DB::Merge(key, operand_list[0]), followed by // DB::Merge(key, operand_list[1]), ...) // // PartialMergeMulti should combine them into a single merge operation that is // saved into *new_value, and then it should return true. *new_value should // be constructed such that a call to DB::Merge(key, *new_value) would yield // the same result as sequential individual calls to DB::Merge(key, operand) // for each operand in operand_list from front() to back(). // // The string that new_value is pointing to will be empty. // // The PartialMergeMulti function will be called when there are at least two // operands. // // In the default implementation, PartialMergeMulti will invoke PartialMerge // multiple times, where each time it only merges two operands. Developers // should either implement PartialMergeMulti, or implement PartialMerge which // is served as the helper function of the default PartialMergeMulti. virtual bool PartialMergeMulti(const Slice& key, const std::deque& operand_list, std::string* new_value, Logger* logger) const; // The name of the MergeOperator. Used to check for MergeOperator // mismatches (i.e., a DB created with one MergeOperator is // accessed using a different MergeOperator) // TODO: the name is currently not stored persistently and thus // no checking is enforced. Client is responsible for providing // consistent MergeOperator between DB opens. virtual const char* Name() const override = 0; // Determines whether the PartialMerge can be called with just a single // merge operand. // Override and return true for allowing a single operand. PartialMerge // and PartialMergeMulti should be overridden and implemented // correctly to properly handle a single operand. virtual bool AllowSingleOperand() const { return false; } // Allows to control when to invoke a full merge during Get. // This could be used to limit the number of merge operands that are looked at // during a point lookup, thereby helping in limiting the number of levels to // read from. // Doesn't help with iterators. // // Note: the merge operands are passed to this function in the reversed order // relative to how they were merged (passed to FullMerge or FullMergeV2) // for performance reasons, see also: // https://github.com/facebook/rocksdb/issues/3865 virtual bool ShouldMerge(const std::vector& /*operands*/) const { return false; } }; // The simpler, associative merge operator. class AssociativeMergeOperator : public MergeOperator { public: ~AssociativeMergeOperator() override {} // Gives the client a way to express the read -> modify -> write semantics // key: (IN) The key that's associated with this merge operation. // existing_value:(IN) null indicates the key does not exist before this op // value: (IN) the value to update/merge the existing_value with // new_value: (OUT) Client is responsible for filling the merge result // here. The string that new_value is pointing to will be empty. // logger: (IN) Client could use this to log errors during merge. // // Return true on success. // All values passed in will be client-specific values. So if this method // returns false, it is because client specified bad data or there was // internal corruption. The client should assume that this will be treated // as an error by the library. virtual bool Merge(const Slice& key, const Slice* existing_value, const Slice& value, std::string* new_value, Logger* logger) const = 0; private: // Default implementations of the MergeOperator functions bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override; bool PartialMerge(const Slice& key, const Slice& left_operand, const Slice& right_operand, std::string* new_value, Logger* logger) const override; }; } // namespace ROCKSDB_NAMESPACE