Add OptionTypeInfo::Vector to parse/serialize vectors (#6424)

Summary:
The OptionTypeInfo::Vector method allows a vector<T> to be converted to/from strings via the options.

The kVectorInt and kVectorCompressionType vectors were replaced with this methodology.

As part of this change, the NextToken method was added to the OptionTypeInfo.  This method was refactored from code within the StringToMap function.

Future types that could use this functionality include the EventListener vectors.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/6424

Reviewed By: cheng-chang

Differential Revision: D21832368

Pulled By: zhichao-cao

fbshipit-source-id: e1ca766faff139d54e6e8407a9ec09ece6517439
main
mrambacher 5 years ago committed by Facebook GitHub Bot
parent 172adce767
commit 0a17d95357
  1. 20
      options/cf_options.cc
  2. 180
      options/options_helper.cc
  3. 42
      options/options_test.cc
  4. 200
      options/options_type.h

@ -367,12 +367,13 @@ std::unordered_map<std::string, OptionTypeInfo>
OptionTypeFlags::kMutable,
offsetof(struct MutableCFOptions, max_bytes_for_level_multiplier)}},
{"max_bytes_for_level_multiplier_additional",
{offset_of(
&ColumnFamilyOptions::max_bytes_for_level_multiplier_additional),
OptionType::kVectorInt, OptionVerificationType::kNormal,
OptionTypeFlags::kMutable,
offsetof(struct MutableCFOptions,
max_bytes_for_level_multiplier_additional)}},
OptionTypeInfo::Vector<int>(
offset_of(&ColumnFamilyOptions::
max_bytes_for_level_multiplier_additional),
OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
offsetof(struct MutableCFOptions,
max_bytes_for_level_multiplier_additional),
{0, OptionType::kInt, 0})},
{"max_sequential_skip_in_iterations",
{offset_of(&ColumnFamilyOptions::max_sequential_skip_in_iterations),
OptionType::kUInt64T, OptionVerificationType::kNormal,
@ -393,9 +394,10 @@ std::unordered_map<std::string, OptionTypeInfo>
OptionTypeFlags::kMutable,
offsetof(struct MutableCFOptions, compression)}},
{"compression_per_level",
{offset_of(&ColumnFamilyOptions::compression_per_level),
OptionType::kVectorCompressionType, OptionVerificationType::kNormal,
OptionTypeFlags::kNone, 0}},
OptionTypeInfo::Vector<CompressionType>(
offset_of(&ColumnFamilyOptions::compression_per_level),
OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0,
{0, OptionType::kCompressionType})},
{"bottommost_compression",
{offset_of(&ColumnFamilyOptions::bottommost_compression),
OptionType::kCompressionType, OptionVerificationType::kNormal,

@ -271,59 +271,6 @@ std::vector<CompressionType> GetSupportedCompressions() {
}
#ifndef ROCKSDB_LITE
namespace {
bool SerializeVectorCompressionType(const std::vector<CompressionType>& types,
std::string* value) {
std::stringstream ss;
bool result;
for (size_t i = 0; i < types.size(); ++i) {
if (i > 0) {
ss << ':';
}
std::string string_type;
result = SerializeEnum<CompressionType>(compression_type_string_map,
types[i], &string_type);
if (result == false) {
return result;
}
ss << string_type;
}
*value = ss.str();
return true;
}
bool ParseVectorCompressionType(
const std::string& value,
std::vector<CompressionType>* compression_per_level) {
compression_per_level->clear();
size_t start = 0;
while (start < value.size()) {
size_t end = value.find(':', start);
bool is_ok;
CompressionType type;
if (end == std::string::npos) {
is_ok = ParseEnum<CompressionType>(compression_type_string_map,
value.substr(start), &type);
if (!is_ok) {
return false;
}
compression_per_level->emplace_back(type);
break;
} else {
is_ok = ParseEnum<CompressionType>(
compression_type_string_map, value.substr(start, end - start), &type);
if (!is_ok) {
return false;
}
compression_per_level->emplace_back(type);
start = end + 1;
}
}
return true;
}
} // anonymouse namespace
bool ParseSliceTransformHelper(
const std::string& kFixedPrefixName, const std::string& kCappedPrefixName,
const std::string& value,
@ -394,9 +341,6 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type,
case OptionType::kInt64T:
PutUnaligned(reinterpret_cast<int64_t*>(opt_address), ParseInt64(value));
break;
case OptionType::kVectorInt:
*reinterpret_cast<std::vector<int>*>(opt_address) = ParseVectorInt(value);
break;
case OptionType::kUInt:
*reinterpret_cast<unsigned int*>(opt_address) = ParseUint32(value);
break;
@ -427,9 +371,6 @@ bool ParseOptionHelper(char* opt_address, const OptionType& opt_type,
return ParseEnum<CompressionType>(
compression_type_string_map, value,
reinterpret_cast<CompressionType*>(opt_address));
case OptionType::kVectorCompressionType:
return ParseVectorCompressionType(
value, reinterpret_cast<std::vector<CompressionType>*>(opt_address));
case OptionType::kSliceTransform:
return ParseSliceTransform(
value, reinterpret_cast<std::shared_ptr<const SliceTransform>*>(
@ -474,9 +415,6 @@ bool SerializeSingleOptionHelper(const char* opt_address,
*value = ToString(v);
}
break;
case OptionType::kVectorInt:
return SerializeIntVector(
*reinterpret_cast<const std::vector<int>*>(opt_address), value);
case OptionType::kUInt:
*value = ToString(*(reinterpret_cast<const unsigned int*>(opt_address)));
break;
@ -516,11 +454,6 @@ bool SerializeSingleOptionHelper(const char* opt_address,
return SerializeEnum<CompressionType>(
compression_type_string_map,
*(reinterpret_cast<const CompressionType*>(opt_address)), value);
case OptionType::kVectorCompressionType:
return SerializeVectorCompressionType(
*(reinterpret_cast<const std::vector<CompressionType>*>(opt_address)),
value);
break;
case OptionType::kSliceTransform: {
const auto* slice_transform_ptr =
reinterpret_cast<const std::shared_ptr<const SliceTransform>*>(
@ -700,58 +633,17 @@ Status StringToMap(const std::string& opts_str,
return Status::InvalidArgument("Empty key found");
}
// skip space after '=' and look for '{' for possible nested options
pos = eq_pos + 1;
while (pos < opts.size() && isspace(opts[pos])) {
++pos;
}
// Empty value at the end
if (pos >= opts.size()) {
(*opts_map)[key] = "";
break;
}
if (opts[pos] == '{') {
int count = 1;
size_t brace_pos = pos + 1;
while (brace_pos < opts.size()) {
if (opts[brace_pos] == '{') {
++count;
} else if (opts[brace_pos] == '}') {
--count;
if (count == 0) {
break;
}
}
++brace_pos;
}
// found the matching closing brace
if (count == 0) {
(*opts_map)[key] = trim(opts.substr(pos + 1, brace_pos - pos - 1));
// skip all whitespace and move to the next ';'
// brace_pos points to the next position after the matching '}'
pos = brace_pos + 1;
while (pos < opts.size() && isspace(opts[pos])) {
++pos;
}
if (pos < opts.size() && opts[pos] != ';') {
return Status::InvalidArgument(
"Unexpected chars after nested options");
}
++pos;
} else {
return Status::InvalidArgument(
"Mismatched curly braces for nested options");
}
std::string value;
Status s = OptionTypeInfo::NextToken(opts, ';', eq_pos + 1, &pos, &value);
if (!s.ok()) {
return s;
} else {
size_t sc_pos = opts.find(';', pos);
if (sc_pos == std::string::npos) {
(*opts_map)[key] = trim(opts.substr(pos));
// It either ends with a trailing semi-colon or the last key-value pair
(*opts_map)[key] = value;
if (pos == std::string::npos) {
break;
} else {
(*opts_map)[key] = trim(opts.substr(pos, sc_pos - pos));
pos++;
}
pos = sc_pos + 1;
}
}
@ -1086,6 +978,59 @@ std::unordered_map<std::string, CompactionStopStyle>
{"kCompactionStopStyleSimilarSize", kCompactionStopStyleSimilarSize},
{"kCompactionStopStyleTotalSize", kCompactionStopStyleTotalSize}};
Status OptionTypeInfo::NextToken(const std::string& opts, char delimiter,
size_t pos, size_t* end, std::string* token) {
while (pos < opts.size() && isspace(opts[pos])) {
++pos;
}
// Empty value at the end
if (pos >= opts.size()) {
*token = "";
*end = std::string::npos;
return Status::OK();
} else if (opts[pos] == '{') {
int count = 1;
size_t brace_pos = pos + 1;
while (brace_pos < opts.size()) {
if (opts[brace_pos] == '{') {
++count;
} else if (opts[brace_pos] == '}') {
--count;
if (count == 0) {
break;
}
}
++brace_pos;
}
// found the matching closing brace
if (count == 0) {
*token = trim(opts.substr(pos + 1, brace_pos - pos - 1));
// skip all whitespace and move to the next delimiter
// brace_pos points to the next position after the matching '}'
pos = brace_pos + 1;
while (pos < opts.size() && isspace(opts[pos])) {
++pos;
}
if (pos < opts.size() && opts[pos] != delimiter) {
return Status::InvalidArgument("Unexpected chars after nested options");
}
*end = pos;
} else {
return Status::InvalidArgument(
"Mismatched curly braces for nested options");
}
} else {
*end = opts.find(delimiter, pos);
if (*end == std::string::npos) {
// It either ends with a trailing semi-colon or the last key-value pair
*token = trim(opts.substr(pos));
} else {
*token = trim(opts.substr(pos, *end - pos));
}
}
return Status::OK();
}
Status OptionTypeInfo::Parse(const ConfigOptions& config_options,
const std::string& opt_name,
const std::string& opt_value,
@ -1258,8 +1203,6 @@ static bool AreOptionsEqual(OptionType type, const char* this_offset,
GetUnaligned(reinterpret_cast<const int64_t*>(that_offset), &v2);
return (v1 == v2);
}
case OptionType::kVectorInt:
return IsOptionEqual<std::vector<int> >(this_offset, that_offset);
case OptionType::kUInt32T:
return IsOptionEqual<uint32_t>(this_offset, that_offset);
case OptionType::kUInt64T: {
@ -1279,9 +1222,6 @@ static bool AreOptionsEqual(OptionType type, const char* this_offset,
case OptionType::kDouble:
return AreEqualDoubles(*reinterpret_cast<const double*>(this_offset),
*reinterpret_cast<const double*>(that_offset));
case OptionType::kVectorCompressionType:
return IsOptionEqual<std::vector<CompressionType> >(this_offset,
that_offset);
case OptionType::kCompactionStyle:
return IsOptionEqual<CompactionStyle>(this_offset, that_offset);
case OptionType::kCompactionStopStyle:

@ -3441,6 +3441,48 @@ TEST_F(OptionTypeInfoTest, TestStruct) {
ASSERT_EQ(e1.j, 22);
ASSERT_EQ(e1.b.s, "66");
}
TEST_F(OptionTypeInfoTest, TestVectorType) {
OptionTypeInfo vec_info = OptionTypeInfo::Vector<std::string>(
0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0,
{0, OptionType::kString});
std::vector<std::string> vec1, vec2;
std::string mismatch;
ConfigOptions config_options;
TestAndCompareOption(config_options, vec_info, "v", "a:b:c:d", &vec1, &vec2);
ASSERT_EQ(vec1.size(), 4);
ASSERT_EQ(vec1[0], "a");
ASSERT_EQ(vec1[1], "b");
ASSERT_EQ(vec1[2], "c");
ASSERT_EQ(vec1[3], "d");
vec1[3] = "e";
ASSERT_FALSE(vec_info.AreEqual(config_options, "v",
reinterpret_cast<char*>(&vec1),
reinterpret_cast<char*>(&vec2), &mismatch));
ASSERT_EQ(mismatch, "v");
// Test vectors with inner brackets
TestAndCompareOption(config_options, vec_info, "v", "a:{b}:c:d", &vec1,
&vec2);
ASSERT_EQ(vec1.size(), 4);
ASSERT_EQ(vec1[0], "a");
ASSERT_EQ(vec1[1], "b");
ASSERT_EQ(vec1[2], "c");
ASSERT_EQ(vec1[3], "d");
OptionTypeInfo bar_info = OptionTypeInfo::Vector<std::string>(
0, OptionVerificationType::kNormal, OptionTypeFlags::kNone, 0,
{0, OptionType::kString}, '|');
TestAndCompareOption(config_options, vec_info, "v", "x|y|z", &vec1, &vec2);
// Test vectors with inner vector
TestAndCompareOption(config_options, bar_info, "v",
"a|{b1|b2}|{c1|c2|{d1|d2}}", &vec1, &vec2);
ASSERT_EQ(vec1.size(), 3);
ASSERT_EQ(vec1[0], "a");
ASSERT_EQ(vec1[1], "b1|b2");
ASSERT_EQ(vec1[2], "c1|c2|{d1|d2}");
}
#endif // !ROCKSDB_LITE
} // namespace ROCKSDB_NAMESPACE

@ -14,13 +14,13 @@
#include "rocksdb/status.h"
namespace ROCKSDB_NAMESPACE {
class OptionTypeInfo;
enum class OptionType {
kBoolean,
kInt,
kInt32T,
kInt64T,
kVectorInt,
kUInt,
kUInt32T,
kUInt64T,
@ -31,7 +31,6 @@ enum class OptionType {
kCompactionPri,
kSliceTransform,
kCompressionType,
kVectorCompressionType,
kTableFactory,
kComparator,
kCompactionFilter,
@ -46,6 +45,7 @@ enum class OptionType {
kEnv,
kEnum,
kStruct,
kVector,
kUnknown,
};
@ -123,6 +123,23 @@ bool SerializeEnum(const std::unordered_map<std::string, T>& type_map,
return false;
}
template <typename T>
Status ParseVector(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, char separator,
const std::string& name, const std::string& value,
std::vector<T>* result);
template <typename T>
Status SerializeVector(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, char separator,
const std::string& name, const std::vector<T>& vec,
std::string* value);
template <typename T>
bool VectorsAreEqual(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, const std::string& name,
const std::vector<T>& vec1, const std::vector<T>& vec2,
std::string* mismatch);
// Function for converting a option string value into its underlying
// representation in "addr"
// On success, Status::OK is returned and addr is set to the parsed form
@ -354,6 +371,38 @@ class OptionTypeInfo {
});
}
template <typename T>
static OptionTypeInfo Vector(int _offset,
OptionVerificationType _verification,
OptionTypeFlags _flags, int _mutable_offset,
const OptionTypeInfo& elem_info,
char separator = ':') {
return OptionTypeInfo(
_offset, OptionType::kVector, _verification, _flags, _mutable_offset,
[elem_info, separator](const ConfigOptions& opts,
const std::string& name,
const std::string& value, char* addr) {
auto result = reinterpret_cast<std::vector<T>*>(addr);
return ParseVector<T>(opts, elem_info, separator, name, value,
result);
},
[elem_info, separator](const ConfigOptions& opts,
const std::string& name, const char* addr,
std::string* value) {
const auto& vec = *(reinterpret_cast<const std::vector<T>*>(addr));
return SerializeVector<T>(opts, elem_info, separator, name, vec,
value);
},
[elem_info](const ConfigOptions& opts, const std::string& name,
const char* addr1, const char* addr2,
std::string* mismatch) {
const auto& vec1 = *(reinterpret_cast<const std::vector<T>*>(addr1));
const auto& vec2 = *(reinterpret_cast<const std::vector<T>*>(addr2));
return VectorsAreEqual<T>(opts, elem_info, name, vec1, vec2,
mismatch);
});
}
bool IsEnabled(OptionTypeFlags otf) const { return (flags_ & otf) == otf; }
bool IsMutable() const { return IsEnabled(OptionTypeFlags::kMutable); }
@ -483,6 +532,26 @@ class OptionTypeInfo {
const std::unordered_map<std::string, OptionTypeInfo>& opt_map,
std::string* elem_name);
// Returns the next token marked by the delimiter from "opts" after start in
// token and updates end to point to where that token stops. Delimiters inside
// of braces are ignored. Returns OK if a token is found and an error if the
// input opts string is mis-formatted.
// Given "a=AA;b=BB;" start=2 and delimiter=";", token is "AA" and end points
// to "b" Given "{a=A;b=B}", the token would be "a=A;b=B"
//
// @param opts The string in which to find the next token
// @param delimiter The delimiter between tokens
// @param start The position in opts to start looking for the token
// @parem ed Returns the end position in opts of the token
// @param token Returns the token
// @returns OK if a token was found
// @return InvalidArgument if the braces mismatch
// (e.g. "{a={b=c;}" ) -- missing closing brace
// @return InvalidArgument if an expected delimiter is not found
// e.g. "{a=b}c=d;" -- missing delimiter before "c"
static Status NextToken(const std::string& opts, char delimiter, size_t start,
size_t* end, std::string* token);
private:
// The optional function to convert a string to its representation
ParseFunc parse_func_;
@ -497,4 +566,131 @@ class OptionTypeInfo {
OptionVerificationType verification_;
OptionTypeFlags flags_;
};
// Parses the input value into elements of the result vector. This method
// will break the input value into the individual tokens (based on the
// separator), where each of those tokens will be parsed based on the rules of
// elem_info. The result vector will be populated with elements based on the
// input tokens. For example, if the value=1:2:3:4:5 and elem_info parses
// integers, the result vector will contain the integers 1,2,3,4,5
// @param config_options Controls how the option value is parsed.
// @param elem_info Controls how individual tokens in value are parsed
// @param separator Character separating tokens in values (':' in the above
// example)
// @param name The name associated with this vector option
// @param value The input string to parse into tokens
// @param result Returns the results of parsing value into its elements.
// @return OK if the value was successfully parse
// @return InvalidArgument if the value is improperly formed or if the token
// could not be parsed
// @return NotFound If the tokenized value contains unknown options for
// its type
template <typename T>
Status ParseVector(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, char separator,
const std::string& name, const std::string& value,
std::vector<T>* result) {
result->clear();
Status status;
for (size_t start = 0, end = 0;
status.ok() && start < value.size() && end != std::string::npos;
start = end + 1) {
std::string token;
status = OptionTypeInfo::NextToken(value, separator, start, &end, &token);
if (status.ok()) {
T elem;
status = elem_info.Parse(config_options, name, token,
reinterpret_cast<char*>(&elem));
if (status.ok()) {
result->emplace_back(elem);
}
}
}
return status;
}
// Serializes the input vector into its output value. Elements are
// separated by the separator character. This element will convert all of the
// elements in vec into their serialized form, using elem_info to perform the
// serialization.
// For example, if the vec contains the integers 1,2,3,4,5 and elem_info
// serializes the output would be 1:2:3:4:5 for separator ":".
// @param config_options Controls how the option value is serialized.
// @param elem_info Controls how individual tokens in value are serialized
// @param separator Character separating tokens in value (':' in the above
// example)
// @param name The name associated with this vector option
// @param vec The input vector to serialize
// @param value The output string of serialized options
// @return OK if the value was successfully parse
// @return InvalidArgument if the value is improperly formed or if the token
// could not be parsed
// @return NotFound If the tokenized value contains unknown options for
// its type
template <typename T>
Status SerializeVector(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, char separator,
const std::string& name, const std::vector<T>& vec,
std::string* value) {
std::string result;
ConfigOptions embedded = config_options;
embedded.delimiter = ";";
for (size_t i = 0; i < vec.size(); ++i) {
std::string elem_str;
Status s = elem_info.Serialize(
embedded, name, reinterpret_cast<const char*>(&vec[i]), &elem_str);
if (!s.ok()) {
return s;
} else {
if (i > 0) {
result += separator;
}
// If the element contains embedded separators, put it inside of brackets
if (result.find(separator) != std::string::npos) {
result += "{" + elem_str + "}";
} else {
result += elem_str;
}
}
}
if (result.find("=") != std::string::npos) {
*value = "{" + result + "}";
} else {
*value = result;
}
return Status::OK();
}
// Compares the input vectors vec1 and vec2 for equality
// If the vectors are the same size, elements of the vectors are compared one by
// one using elem_info to perform the comparison.
//
// @param config_options Controls how the vectors are compared.
// @param elem_info Controls how individual elements in the vectors are compared
// @param name The name associated with this vector option
// @param vec1,vec2 The vectors to compare.
// @param mismatch If the vectors are not equivalent, mismatch will point to
// the first
// element of the comparison tht did not match.
// @return true If vec1 and vec2 are "equal", false otherwise
template <typename T>
bool VectorsAreEqual(const ConfigOptions& config_options,
const OptionTypeInfo& elem_info, const std::string& name,
const std::vector<T>& vec1, const std::vector<T>& vec2,
std::string* mismatch) {
if (vec1.size() != vec2.size()) {
*mismatch = name;
return false;
} else {
for (size_t i = 0; i < vec1.size(); ++i) {
if (!elem_info.AreEqual(
config_options, name, reinterpret_cast<const char*>(&vec1[i]),
reinterpret_cast<const char*>(&vec2[i]), mismatch)) {
return false;
}
}
return true;
}
}
} // namespace ROCKSDB_NAMESPACE

Loading…
Cancel
Save