From 0a17d95357478abb64213bba426d25996bd2307a Mon Sep 17 00:00:00 2001 From: mrambacher Date: Wed, 3 Jun 2020 12:19:54 -0700 Subject: [PATCH] Add OptionTypeInfo::Vector to parse/serialize vectors (#6424) Summary: The OptionTypeInfo::Vector method allows a vector to be converted to/from strings via the options. The kVectorInt and kVectorCompressionType vectors were replaced with this methodology. As part of this change, the NextToken method was added to the OptionTypeInfo. This method was refactored from code within the StringToMap function. Future types that could use this functionality include the EventListener vectors. Pull Request resolved: https://github.com/facebook/rocksdb/pull/6424 Reviewed By: cheng-chang Differential Revision: D21832368 Pulled By: zhichao-cao fbshipit-source-id: e1ca766faff139d54e6e8407a9ec09ece6517439 --- options/cf_options.cc | 20 ++-- options/options_helper.cc | 180 ++++++++++++---------------------- options/options_test.cc | 42 ++++++++ options/options_type.h | 200 +++++++++++++++++++++++++++++++++++++- 4 files changed, 311 insertions(+), 131 deletions(-) diff --git a/options/cf_options.cc b/options/cf_options.cc index c27c3f960..ef782f6a8 100644 --- a/options/cf_options.cc +++ b/options/cf_options.cc @@ -367,12 +367,13 @@ std::unordered_map OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier)}}, {"max_bytes_for_level_multiplier_additional", - {offset_of( - &ColumnFamilyOptions::max_bytes_for_level_multiplier_additional), - OptionType::kVectorInt, OptionVerificationType::kNormal, - OptionTypeFlags::kMutable, - offsetof(struct MutableCFOptions, - max_bytes_for_level_multiplier_additional)}}, + OptionTypeInfo::Vector( + offset_of(&ColumnFamilyOptions:: + max_bytes_for_level_multiplier_additional), + OptionVerificationType::kNormal, OptionTypeFlags::kMutable, + offsetof(struct MutableCFOptions, + max_bytes_for_level_multiplier_additional), + {0, OptionType::kInt, 0})}, {"max_sequential_skip_in_iterations", {offset_of(&ColumnFamilyOptions::max_sequential_skip_in_iterations), OptionType::kUInt64T, OptionVerificationType::kNormal, @@ -393,9 +394,10 @@ std::unordered_map OptionTypeFlags::kMutable, offsetof(struct MutableCFOptions, compression)}}, {"compression_per_level", - {offset_of(&ColumnFamilyOptions::compression_per_level), - OptionType::kVectorCompressionType, OptionVerificationType::kNormal, - OptionTypeFlags::kNone, 0}}, + OptionTypeInfo::Vector( + offset_of(&ColumnFamilyOptions::compression_per_level), + OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, + {0, OptionType::kCompressionType})}, {"bottommost_compression", {offset_of(&ColumnFamilyOptions::bottommost_compression), OptionType::kCompressionType, OptionVerificationType::kNormal, diff --git a/options/options_helper.cc b/options/options_helper.cc index 4f5d5c28f..b01d389a4 100644 --- a/options/options_helper.cc +++ b/options/options_helper.cc @@ -271,59 +271,6 @@ std::vector GetSupportedCompressions() { } #ifndef ROCKSDB_LITE - -namespace { -bool SerializeVectorCompressionType(const std::vector& types, - std::string* value) { - std::stringstream ss; - bool result; - for (size_t i = 0; i < types.size(); ++i) { - if (i > 0) { - ss << ':'; - } - std::string string_type; - result = SerializeEnum(compression_type_string_map, - types[i], &string_type); - if (result == false) { - return result; - } - ss << string_type; - } - *value = ss.str(); - return true; -} - -bool ParseVectorCompressionType( - const std::string& value, - std::vector* compression_per_level) { - compression_per_level->clear(); - size_t start = 0; - while (start < value.size()) { - size_t end = value.find(':', start); - bool is_ok; - CompressionType type; - if (end == std::string::npos) { - is_ok = ParseEnum(compression_type_string_map, - value.substr(start), &type); - if (!is_ok) { - return false; - } - compression_per_level->emplace_back(type); - break; - } else { - is_ok = ParseEnum( - compression_type_string_map, value.substr(start, end - start), &type); - if (!is_ok) { - return false; - } - compression_per_level->emplace_back(type); - start = end + 1; - } - } - return true; -} -} // anonymouse namespace - bool ParseSliceTransformHelper( const std::string& kFixedPrefixName, const std::string& kCappedPrefixName, const std::string& value, @@ -394,9 +341,6 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type, case OptionType::kInt64T: PutUnaligned(reinterpret_cast(opt_address), ParseInt64(value)); break; - case OptionType::kVectorInt: - *reinterpret_cast*>(opt_address) = ParseVectorInt(value); - break; case OptionType::kUInt: *reinterpret_cast(opt_address) = ParseUint32(value); break; @@ -427,9 +371,6 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type, return ParseEnum( compression_type_string_map, value, reinterpret_cast(opt_address)); - case OptionType::kVectorCompressionType: - return ParseVectorCompressionType( - value, reinterpret_cast*>(opt_address)); case OptionType::kSliceTransform: return ParseSliceTransform( value, reinterpret_cast*>( @@ -474,9 +415,6 @@ bool SerializeSingleOptionHelper(const char* opt_address, *value = ToString(v); } break; - case OptionType::kVectorInt: - return SerializeIntVector( - *reinterpret_cast*>(opt_address), value); case OptionType::kUInt: *value = ToString(*(reinterpret_cast(opt_address))); break; @@ -516,11 +454,6 @@ bool SerializeSingleOptionHelper(const char* opt_address, return SerializeEnum( compression_type_string_map, *(reinterpret_cast(opt_address)), value); - case OptionType::kVectorCompressionType: - return SerializeVectorCompressionType( - *(reinterpret_cast*>(opt_address)), - value); - break; case OptionType::kSliceTransform: { const auto* slice_transform_ptr = reinterpret_cast*>( @@ -700,58 +633,17 @@ Status StringToMap(const std::string& opts_str, return Status::InvalidArgument("Empty key found"); } - // skip space after '=' and look for '{' for possible nested options - pos = eq_pos + 1; - while (pos < opts.size() && isspace(opts[pos])) { - ++pos; - } - // Empty value at the end - if (pos >= opts.size()) { - (*opts_map)[key] = ""; - break; - } - if (opts[pos] == '{') { - int count = 1; - size_t brace_pos = pos + 1; - while (brace_pos < opts.size()) { - if (opts[brace_pos] == '{') { - ++count; - } else if (opts[brace_pos] == '}') { - --count; - if (count == 0) { - break; - } - } - ++brace_pos; - } - // found the matching closing brace - if (count == 0) { - (*opts_map)[key] = trim(opts.substr(pos + 1, brace_pos - pos - 1)); - // skip all whitespace and move to the next ';' - // brace_pos points to the next position after the matching '}' - pos = brace_pos + 1; - while (pos < opts.size() && isspace(opts[pos])) { - ++pos; - } - if (pos < opts.size() && opts[pos] != ';') { - return Status::InvalidArgument( - "Unexpected chars after nested options"); - } - ++pos; - } else { - return Status::InvalidArgument( - "Mismatched curly braces for nested options"); - } + std::string value; + Status s = OptionTypeInfo::NextToken(opts, ';', eq_pos + 1, &pos, &value); + if (!s.ok()) { + return s; } else { - size_t sc_pos = opts.find(';', pos); - if (sc_pos == std::string::npos) { - (*opts_map)[key] = trim(opts.substr(pos)); - // It either ends with a trailing semi-colon or the last key-value pair + (*opts_map)[key] = value; + if (pos == std::string::npos) { break; } else { - (*opts_map)[key] = trim(opts.substr(pos, sc_pos - pos)); + pos++; } - pos = sc_pos + 1; } } @@ -1086,6 +978,59 @@ std::unordered_map {"kCompactionStopStyleSimilarSize", kCompactionStopStyleSimilarSize}, {"kCompactionStopStyleTotalSize", kCompactionStopStyleTotalSize}}; +Status OptionTypeInfo::NextToken(const std::string& opts, char delimiter, + size_t pos, size_t* end, std::string* token) { + while (pos < opts.size() && isspace(opts[pos])) { + ++pos; + } + // Empty value at the end + if (pos >= opts.size()) { + *token = ""; + *end = std::string::npos; + return Status::OK(); + } else if (opts[pos] == '{') { + int count = 1; + size_t brace_pos = pos + 1; + while (brace_pos < opts.size()) { + if (opts[brace_pos] == '{') { + ++count; + } else if (opts[brace_pos] == '}') { + --count; + if (count == 0) { + break; + } + } + ++brace_pos; + } + // found the matching closing brace + if (count == 0) { + *token = trim(opts.substr(pos + 1, brace_pos - pos - 1)); + // skip all whitespace and move to the next delimiter + // brace_pos points to the next position after the matching '}' + pos = brace_pos + 1; + while (pos < opts.size() && isspace(opts[pos])) { + ++pos; + } + if (pos < opts.size() && opts[pos] != delimiter) { + return Status::InvalidArgument("Unexpected chars after nested options"); + } + *end = pos; + } else { + return Status::InvalidArgument( + "Mismatched curly braces for nested options"); + } + } else { + *end = opts.find(delimiter, pos); + if (*end == std::string::npos) { + // It either ends with a trailing semi-colon or the last key-value pair + *token = trim(opts.substr(pos)); + } else { + *token = trim(opts.substr(pos, *end - pos)); + } + } + return Status::OK(); +} + Status OptionTypeInfo::Parse(const ConfigOptions& config_options, const std::string& opt_name, const std::string& opt_value, @@ -1258,8 +1203,6 @@ static bool AreOptionsEqual(OptionType type, const char* this_offset, GetUnaligned(reinterpret_cast(that_offset), &v2); return (v1 == v2); } - case OptionType::kVectorInt: - return IsOptionEqual >(this_offset, that_offset); case OptionType::kUInt32T: return IsOptionEqual(this_offset, that_offset); case OptionType::kUInt64T: { @@ -1279,9 +1222,6 @@ static bool AreOptionsEqual(OptionType type, const char* this_offset, case OptionType::kDouble: return AreEqualDoubles(*reinterpret_cast(this_offset), *reinterpret_cast(that_offset)); - case OptionType::kVectorCompressionType: - return IsOptionEqual >(this_offset, - that_offset); case OptionType::kCompactionStyle: return IsOptionEqual(this_offset, that_offset); case OptionType::kCompactionStopStyle: diff --git a/options/options_test.cc b/options/options_test.cc index 26f2153b6..b5b2e1416 100644 --- a/options/options_test.cc +++ b/options/options_test.cc @@ -3441,6 +3441,48 @@ TEST_F(OptionTypeInfoTest, TestStruct) { ASSERT_EQ(e1.j, 22); ASSERT_EQ(e1.b.s, "66"); } + +TEST_F(OptionTypeInfoTest, TestVectorType) { + OptionTypeInfo vec_info = OptionTypeInfo::Vector( + 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, + {0, OptionType::kString}); + std::vector vec1, vec2; + std::string mismatch; + + ConfigOptions config_options; + TestAndCompareOption(config_options, vec_info, "v", "a:b:c:d", &vec1, &vec2); + ASSERT_EQ(vec1.size(), 4); + ASSERT_EQ(vec1[0], "a"); + ASSERT_EQ(vec1[1], "b"); + ASSERT_EQ(vec1[2], "c"); + ASSERT_EQ(vec1[3], "d"); + vec1[3] = "e"; + ASSERT_FALSE(vec_info.AreEqual(config_options, "v", + reinterpret_cast(&vec1), + reinterpret_cast(&vec2), &mismatch)); + ASSERT_EQ(mismatch, "v"); + + // Test vectors with inner brackets + TestAndCompareOption(config_options, vec_info, "v", "a:{b}:c:d", &vec1, + &vec2); + ASSERT_EQ(vec1.size(), 4); + ASSERT_EQ(vec1[0], "a"); + ASSERT_EQ(vec1[1], "b"); + ASSERT_EQ(vec1[2], "c"); + ASSERT_EQ(vec1[3], "d"); + + OptionTypeInfo bar_info = OptionTypeInfo::Vector( + 0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0, + {0, OptionType::kString}, '|'); + TestAndCompareOption(config_options, vec_info, "v", "x|y|z", &vec1, &vec2); + // Test vectors with inner vector + TestAndCompareOption(config_options, bar_info, "v", + "a|{b1|b2}|{c1|c2|{d1|d2}}", &vec1, &vec2); + ASSERT_EQ(vec1.size(), 3); + ASSERT_EQ(vec1[0], "a"); + ASSERT_EQ(vec1[1], "b1|b2"); + ASSERT_EQ(vec1[2], "c1|c2|{d1|d2}"); +} #endif // !ROCKSDB_LITE } // namespace ROCKSDB_NAMESPACE diff --git a/options/options_type.h b/options/options_type.h index e39966e44..c2df3ea9a 100644 --- a/options/options_type.h +++ b/options/options_type.h @@ -14,13 +14,13 @@ #include "rocksdb/status.h" namespace ROCKSDB_NAMESPACE { +class OptionTypeInfo; enum class OptionType { kBoolean, kInt, kInt32T, kInt64T, - kVectorInt, kUInt, kUInt32T, kUInt64T, @@ -31,7 +31,6 @@ enum class OptionType { kCompactionPri, kSliceTransform, kCompressionType, - kVectorCompressionType, kTableFactory, kComparator, kCompactionFilter, @@ -46,6 +45,7 @@ enum class OptionType { kEnv, kEnum, kStruct, + kVector, kUnknown, }; @@ -123,6 +123,23 @@ bool SerializeEnum(const std::unordered_map& type_map, return false; } +template +Status ParseVector(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, char separator, + const std::string& name, const std::string& value, + std::vector* result); + +template +Status SerializeVector(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, char separator, + const std::string& name, const std::vector& vec, + std::string* value); +template +bool VectorsAreEqual(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, const std::string& name, + const std::vector& vec1, const std::vector& vec2, + std::string* mismatch); + // Function for converting a option string value into its underlying // representation in "addr" // On success, Status::OK is returned and addr is set to the parsed form @@ -354,6 +371,38 @@ class OptionTypeInfo { }); } + template + static OptionTypeInfo Vector(int _offset, + OptionVerificationType _verification, + OptionTypeFlags _flags, int _mutable_offset, + const OptionTypeInfo& elem_info, + char separator = ':') { + return OptionTypeInfo( + _offset, OptionType::kVector, _verification, _flags, _mutable_offset, + [elem_info, separator](const ConfigOptions& opts, + const std::string& name, + const std::string& value, char* addr) { + auto result = reinterpret_cast*>(addr); + return ParseVector(opts, elem_info, separator, name, value, + result); + }, + [elem_info, separator](const ConfigOptions& opts, + const std::string& name, const char* addr, + std::string* value) { + const auto& vec = *(reinterpret_cast*>(addr)); + return SerializeVector(opts, elem_info, separator, name, vec, + value); + }, + [elem_info](const ConfigOptions& opts, const std::string& name, + const char* addr1, const char* addr2, + std::string* mismatch) { + const auto& vec1 = *(reinterpret_cast*>(addr1)); + const auto& vec2 = *(reinterpret_cast*>(addr2)); + return VectorsAreEqual(opts, elem_info, name, vec1, vec2, + mismatch); + }); + } + bool IsEnabled(OptionTypeFlags otf) const { return (flags_ & otf) == otf; } bool IsMutable() const { return IsEnabled(OptionTypeFlags::kMutable); } @@ -483,6 +532,26 @@ class OptionTypeInfo { const std::unordered_map& opt_map, std::string* elem_name); + // Returns the next token marked by the delimiter from "opts" after start in + // token and updates end to point to where that token stops. Delimiters inside + // of braces are ignored. Returns OK if a token is found and an error if the + // input opts string is mis-formatted. + // Given "a=AA;b=BB;" start=2 and delimiter=";", token is "AA" and end points + // to "b" Given "{a=A;b=B}", the token would be "a=A;b=B" + // + // @param opts The string in which to find the next token + // @param delimiter The delimiter between tokens + // @param start The position in opts to start looking for the token + // @parem ed Returns the end position in opts of the token + // @param token Returns the token + // @returns OK if a token was found + // @return InvalidArgument if the braces mismatch + // (e.g. "{a={b=c;}" ) -- missing closing brace + // @return InvalidArgument if an expected delimiter is not found + // e.g. "{a=b}c=d;" -- missing delimiter before "c" + static Status NextToken(const std::string& opts, char delimiter, size_t start, + size_t* end, std::string* token); + private: // The optional function to convert a string to its representation ParseFunc parse_func_; @@ -497,4 +566,131 @@ class OptionTypeInfo { OptionVerificationType verification_; OptionTypeFlags flags_; }; + +// Parses the input value into elements of the result vector. This method +// will break the input value into the individual tokens (based on the +// separator), where each of those tokens will be parsed based on the rules of +// elem_info. The result vector will be populated with elements based on the +// input tokens. For example, if the value=1:2:3:4:5 and elem_info parses +// integers, the result vector will contain the integers 1,2,3,4,5 +// @param config_options Controls how the option value is parsed. +// @param elem_info Controls how individual tokens in value are parsed +// @param separator Character separating tokens in values (':' in the above +// example) +// @param name The name associated with this vector option +// @param value The input string to parse into tokens +// @param result Returns the results of parsing value into its elements. +// @return OK if the value was successfully parse +// @return InvalidArgument if the value is improperly formed or if the token +// could not be parsed +// @return NotFound If the tokenized value contains unknown options for +// its type +template +Status ParseVector(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, char separator, + const std::string& name, const std::string& value, + std::vector* result) { + result->clear(); + Status status; + + for (size_t start = 0, end = 0; + status.ok() && start < value.size() && end != std::string::npos; + start = end + 1) { + std::string token; + status = OptionTypeInfo::NextToken(value, separator, start, &end, &token); + if (status.ok()) { + T elem; + status = elem_info.Parse(config_options, name, token, + reinterpret_cast(&elem)); + if (status.ok()) { + result->emplace_back(elem); + } + } + } + return status; +} + +// Serializes the input vector into its output value. Elements are +// separated by the separator character. This element will convert all of the +// elements in vec into their serialized form, using elem_info to perform the +// serialization. +// For example, if the vec contains the integers 1,2,3,4,5 and elem_info +// serializes the output would be 1:2:3:4:5 for separator ":". +// @param config_options Controls how the option value is serialized. +// @param elem_info Controls how individual tokens in value are serialized +// @param separator Character separating tokens in value (':' in the above +// example) +// @param name The name associated with this vector option +// @param vec The input vector to serialize +// @param value The output string of serialized options +// @return OK if the value was successfully parse +// @return InvalidArgument if the value is improperly formed or if the token +// could not be parsed +// @return NotFound If the tokenized value contains unknown options for +// its type +template +Status SerializeVector(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, char separator, + const std::string& name, const std::vector& vec, + std::string* value) { + std::string result; + ConfigOptions embedded = config_options; + embedded.delimiter = ";"; + for (size_t i = 0; i < vec.size(); ++i) { + std::string elem_str; + Status s = elem_info.Serialize( + embedded, name, reinterpret_cast(&vec[i]), &elem_str); + if (!s.ok()) { + return s; + } else { + if (i > 0) { + result += separator; + } + // If the element contains embedded separators, put it inside of brackets + if (result.find(separator) != std::string::npos) { + result += "{" + elem_str + "}"; + } else { + result += elem_str; + } + } + } + if (result.find("=") != std::string::npos) { + *value = "{" + result + "}"; + } else { + *value = result; + } + return Status::OK(); +} + +// Compares the input vectors vec1 and vec2 for equality +// If the vectors are the same size, elements of the vectors are compared one by +// one using elem_info to perform the comparison. +// +// @param config_options Controls how the vectors are compared. +// @param elem_info Controls how individual elements in the vectors are compared +// @param name The name associated with this vector option +// @param vec1,vec2 The vectors to compare. +// @param mismatch If the vectors are not equivalent, mismatch will point to +// the first +// element of the comparison tht did not match. +// @return true If vec1 and vec2 are "equal", false otherwise +template +bool VectorsAreEqual(const ConfigOptions& config_options, + const OptionTypeInfo& elem_info, const std::string& name, + const std::vector& vec1, const std::vector& vec2, + std::string* mismatch) { + if (vec1.size() != vec2.size()) { + *mismatch = name; + return false; + } else { + for (size_t i = 0; i < vec1.size(); ++i) { + if (!elem_info.AreEqual( + config_options, name, reinterpret_cast(&vec1[i]), + reinterpret_cast(&vec2[i]), mismatch)) { + return false; + } + } + return true; + } +} } // namespace ROCKSDB_NAMESPACE