diff --git a/CMakeLists.txt b/CMakeLists.txt index 63217b888..5d9b4b64a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -836,6 +836,7 @@ set(SOURCES util/random.cc util/rate_limiter.cc util/ribbon_config.cc + util/regex.cc util/slice.cc util/file_checksum_helper.cc util/status.cc diff --git a/TARGETS b/TARGETS index 214e35e45..76266e74a 100644 --- a/TARGETS +++ b/TARGETS @@ -355,6 +355,7 @@ cpp_library( "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", + "util/regex.cc", "util/ribbon_config.cc", "util/slice.cc", "util/status.cc", @@ -676,6 +677,7 @@ cpp_library( "util/murmurhash.cc", "util/random.cc", "util/rate_limiter.cc", + "util/regex.cc", "util/ribbon_config.cc", "util/slice.cc", "util/status.cc", diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index b22010528..a9e6922f9 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -8,7 +8,6 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include -#include #include "db/db_test_util.h" #include "port/stack_trace.h" @@ -69,11 +68,10 @@ TEST_F(DBBasicTest, UniqueSession) { ASSERT_EQ(sid2, sid4); // Expected compact format for session ids (see notes in implementation) - std::regex expected("[0-9A-Z]{20}"); - const std::string match("match"); - EXPECT_EQ(match, std::regex_replace(sid1, expected, match)); - EXPECT_EQ(match, std::regex_replace(sid2, expected, match)); - EXPECT_EQ(match, std::regex_replace(sid3, expected, match)); + TestRegex expected("[0-9A-Z]{20}"); + EXPECT_MATCHES_REGEX(sid1, expected); + EXPECT_MATCHES_REGEX(sid2, expected); + EXPECT_MATCHES_REGEX(sid3, expected); #ifndef ROCKSDB_LITE Close(); diff --git a/include/rocksdb/utilities/object_registry.h b/include/rocksdb/utilities/object_registry.h index 946a26705..31ed1abfb 100644 --- a/include/rocksdb/utilities/object_registry.h +++ b/include/rocksdb/utilities/object_registry.h @@ -10,12 +10,12 @@ #include #include #include -#include #include #include #include #include "rocksdb/status.h" +#include "rocksdb/utilities/regex.h" namespace ROCKSDB_NAMESPACE { class Logger; @@ -55,14 +55,23 @@ class ObjectLibrary { }; // End class Entry // An Entry containing a FactoryFunc for creating new Objects + // + // !!!!!! WARNING !!!!!!: The implementation currently uses std::regex, which + // has terrible performance in some cases, including possible crash due to + // stack overflow. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61582 + // for example. Avoid complicated regexes as much as possible. template class FactoryEntry : public Entry { public: FactoryEntry(const std::string& name, FactoryFunc f) - : Entry(name), pattern_(std::move(name)), factory_(std::move(f)) {} + : Entry(name), factory_(std::move(f)) { + // FIXME: the API needs to expose this failure mode. For now, bad regexes + // will match nothing. + Regex::Parse(name, ®ex_).PermitUncheckedError(); + } ~FactoryEntry() override {} bool matches(const std::string& target) const override { - return std::regex_match(target, pattern_); + return regex_.Matches(target); } // Creates a new T object. T* NewFactoryObject(const std::string& target, std::unique_ptr* guard, @@ -71,7 +80,7 @@ class ObjectLibrary { } private: - std::regex pattern_; // The pattern for this entry + Regex regex_; // The pattern for this entry FactoryFunc factory_; }; // End class FactoryEntry public: @@ -152,6 +161,9 @@ class ObjectRegistry { // pattern that matches the provided "target" string according to // std::regex_match. // + // WARNING: some regexes are problematic for std::regex; see + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61582 for example + // // If no registered functions match, returns nullptr. If multiple functions // match, the factory function used is unspecified. // diff --git a/include/rocksdb/utilities/regex.h b/include/rocksdb/utilities/regex.h new file mode 100644 index 000000000..ac2431237 --- /dev/null +++ b/include/rocksdb/utilities/regex.h @@ -0,0 +1,48 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +#pragma once + +#ifndef ROCKSDB_LITE + +#include +#include + +#include "rocksdb/status.h" + +namespace ROCKSDB_NAMESPACE { + +// A wrapper for parsed regular expressions. The regex syntax and matching is +// compatible with std::regex. +// +// !!!!!! WARNING !!!!!!: The implementation currently uses std::regex, which +// has terrible performance in some cases, including possible crash due to +// stack overflow. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61582 +// for example. Avoid use in production as much as possible. +// +// Internal note: see also TestRegex +class Regex { + public: + // Note: Cannot be constructed with a pattern, so that syntax errors can + // be handled without using exceptions. + + // Parse returns OK and saves to `out` when the pattern is valid regex + // syntax (modified ECMAScript), or else returns InvalidArgument. + // See https://en.cppreference.com/w/cpp/regex/ecmascript + static Status Parse(const char *pattern, Regex *out); + static Status Parse(const std::string &pattern, Regex *out); + + // Checks that the whole of str is matched by this regex. If called on a + // default-constructed Regex, will trigger assertion failure in DEBUG build + // or return false in release build. + bool Matches(const std::string &str) const; + + private: + class Impl; + std::shared_ptr impl_; // shared_ptr for simple implementation +}; +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/src.mk b/src.mk index 0506beeb7..2dab0010b 100644 --- a/src.mk +++ b/src.mk @@ -220,6 +220,7 @@ LIB_SOURCES = \ util/random.cc \ util/rate_limiter.cc \ util/ribbon_config.cc \ + util/regex.cc \ util/slice.cc \ util/file_checksum_helper.cc \ util/status.cc \ diff --git a/test_util/testharness.cc b/test_util/testharness.cc index d8650dafb..899d39cc7 100644 --- a/test_util/testharness.cc +++ b/test_util/testharness.cc @@ -8,6 +8,8 @@ // found in the LICENSE file. See the AUTHORS file for names of contributors. #include "test_util/testharness.h" + +#include #include #include @@ -60,5 +62,49 @@ int RandomSeed() { return result; } +TestRegex::TestRegex(const std::string& pattern) + : impl_(std::make_shared(pattern)), pattern_(pattern) {} +TestRegex::TestRegex(const char* pattern) + : impl_(std::make_shared(pattern)), pattern_(pattern) {} + +const std::string& TestRegex::GetPattern() const { return pattern_; } + +// Sorry about code duplication with regex.cc, but it doesn't support LITE +// due to exception handling +class TestRegex::Impl : public std::regex { + public: + using std::regex::basic_regex; +}; + +bool TestRegex::Matches(const std::string& str) const { + if (impl_) { + return std::regex_match(str, *impl_); + } else { + // Should not call Matches on unset Regex + assert(false); + return false; + } +} + +::testing::AssertionResult AssertMatchesRegex(const char* str_expr, + const char* pattern_expr, + const std::string& str, + const TestRegex& pattern) { + if (pattern.Matches(str)) { + return ::testing::AssertionSuccess(); + } else if (TestRegex("\".*\"").Matches(pattern_expr)) { + // constant regex string + return ::testing::AssertionFailure() + << str << " (" << str_expr << ")" << std::endl + << "does not match regex " << pattern.GetPattern(); + } else { + // runtime regex string + return ::testing::AssertionFailure() + << str << " (" << str_expr << ")" << std::endl + << "does not match regex" << std::endl + << pattern.GetPattern() << " (" << pattern_expr << ")"; + } +} + } // namespace test } // namespace ROCKSDB_NAMESPACE diff --git a/test_util/testharness.h b/test_util/testharness.h index 739f32cb9..eb104b326 100644 --- a/test_util/testharness.h +++ b/test_util/testharness.h @@ -14,6 +14,7 @@ #else #include #endif +#include // A "skipped" test has a specific meaning in Facebook infrastructure: the // test is in good shape and should be run, but something about the @@ -78,5 +79,40 @@ int RandomSeed(); #define EXPECT_OK(s) \ EXPECT_PRED_FORMAT1(ROCKSDB_NAMESPACE::test::AssertStatus, s) #define EXPECT_NOK(s) EXPECT_FALSE((s).ok()) + +// Useful for testing +// * No need to deal with Status like in Regex public API +// * No triggering lint reports on use of std::regex in tests +// * Available in LITE (unlike public API) +class TestRegex { + public: + // These throw on bad pattern + /*implicit*/ TestRegex(const std::string& pattern); + /*implicit*/ TestRegex(const char* pattern); + + // Checks that the whole of str is matched by this regex + bool Matches(const std::string& str) const; + + const std::string& GetPattern() const; + + private: + class Impl; + std::shared_ptr impl_; // shared_ptr for simple implementation + std::string pattern_; +}; + +::testing::AssertionResult AssertMatchesRegex(const char* str_expr, + const char* pattern_expr, + const std::string& str, + const TestRegex& pattern); + +#define ASSERT_MATCHES_REGEX(str, pattern) \ + ASSERT_PRED_FORMAT2(ROCKSDB_NAMESPACE::test::AssertMatchesRegex, str, pattern) +#define EXPECT_MATCHES_REGEX(str, pattern) \ + EXPECT_PRED_FORMAT2(ROCKSDB_NAMESPACE::test::AssertMatchesRegex, str, pattern) + } // namespace test + +using test::TestRegex; + } // namespace ROCKSDB_NAMESPACE diff --git a/util/regex.cc b/util/regex.cc new file mode 100644 index 000000000..7aa4c52ae --- /dev/null +++ b/util/regex.cc @@ -0,0 +1,50 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +// This source code is licensed under both the GPLv2 (found in the +// COPYING file in the root directory) and Apache 2.0 License +// (found in the LICENSE.Apache file in the root directory). + +// LITE not supported here in part because of exception handling +#ifndef ROCKSDB_LITE + +#include "rocksdb/utilities/regex.h" + +#include +#include + +namespace ROCKSDB_NAMESPACE { + +// This section would change for alternate underlying implementations other +// than std::regex. +#if 1 +class Regex::Impl : public std::regex { + public: + using std::regex::basic_regex; +}; + +bool Regex::Matches(const std::string &str) const { + if (impl_) { + return std::regex_match(str, *impl_); + } else { + // Should not call Matches on unset Regex + assert(false); + return false; + } +} + +Status Regex::Parse(const std::string &pattern, Regex *out) { + try { + out->impl_.reset(new Impl(pattern)); + return Status::OK(); + } catch (const std::regex_error &e) { + return Status::InvalidArgument(e.what()); + } +} +#endif + +Status Regex::Parse(const char *pattern, Regex *out) { + return Parse(std::string(pattern), out); +} + +} // namespace ROCKSDB_NAMESPACE + +#endif // ROCKSDB_LITE diff --git a/util/slice_test.cc b/util/slice_test.cc index 4226768f3..e3997ca33 100644 --- a/util/slice_test.cc +++ b/util/slice_test.cc @@ -5,10 +5,13 @@ #include "rocksdb/slice.h" +#include + #include "port/port.h" #include "port/stack_trace.h" #include "rocksdb/data_structure.h" #include "rocksdb/types.h" +#include "rocksdb/utilities/regex.h" #include "test_util/testharness.h" #include "test_util/testutil.h" @@ -157,6 +160,7 @@ TEST_F(PinnableSliceTest, Move) { ASSERT_EQ(2, res); } +// ***************************************************************** // // Unit test for SmallEnumSet class SmallEnumSetTest : public testing::Test { public: @@ -173,6 +177,35 @@ TEST_F(SmallEnumSetTest, SmallSetTest) { ASSERT_FALSE(fs.Contains(FileType::kDBLockFile)); } +// ***************************************************************** // +// Unit test for Regex +#ifndef ROCKSDB_LITE +TEST(RegexTest, ParseEtc) { + Regex r; + ASSERT_OK(Regex::Parse("[abc]{5}", &r)); + ASSERT_TRUE(r.Matches("abcba")); + ASSERT_FALSE(r.Matches("abcb")); // too short + ASSERT_FALSE(r.Matches("abcbaa")); // too long + + ASSERT_OK(Regex::Parse(".*foo.*", &r)); + ASSERT_TRUE(r.Matches("123forfoodie456")); + ASSERT_FALSE(r.Matches("123forfodie456")); + // Ensure copy operator + Regex r2; + r2 = r; + ASSERT_TRUE(r2.Matches("123forfoodie456")); + ASSERT_FALSE(r2.Matches("123forfodie456")); + // Ensure copy constructor + Regex r3{r}; + ASSERT_TRUE(r3.Matches("123forfoodie456")); + ASSERT_FALSE(r3.Matches("123forfodie456")); + + ASSERT_TRUE(Regex::Parse("*foo.*", &r).IsInvalidArgument()); + ASSERT_TRUE(Regex::Parse("[abc", &r).IsInvalidArgument()); + ASSERT_TRUE(Regex::Parse("[abc]{1", &r).IsInvalidArgument()); +} +#endif // ROCKSDB_LITE + } // namespace ROCKSDB_NAMESPACE int main(int argc, char** argv) { diff --git a/utilities/backupable/backupable_db_test.cc b/utilities/backupable/backupable_db_test.cc index 97ed3aec1..a5374025f 100644 --- a/utilities/backupable/backupable_db_test.cc +++ b/utilities/backupable/backupable_db_test.cc @@ -15,7 +15,6 @@ #include #include #include -#include #include #include @@ -907,7 +906,7 @@ class BackupEngineTest : public testing::Test { } void AssertDirectoryFilesMatchRegex(const std::string& dir, - const std::regex& pattern, + const TestRegex& pattern, const std::string& file_type, int minimum_count) { std::vector children; @@ -915,8 +914,7 @@ class BackupEngineTest : public testing::Test { int found_count = 0; for (const auto& child : children) { if (EndsWith(child.name, file_type)) { - ASSERT_TRUE(std::regex_match(child.name, pattern)) - << "File name " << child.name << " does not match regex."; + ASSERT_MATCHES_REGEX(child.name, pattern); ++found_count; } } @@ -1764,7 +1762,7 @@ TEST_F(BackupEngineTest, FlushCompactDuringBackupCheckpoint) { if (sopt == kShareWithChecksum) { // Ensure we actually got DB manifest checksums by inspecting // shared_checksum file names for hex checksum component - std::regex expected("[^_]+_[0-9A-F]{8}_[^_]+.sst"); + TestRegex expected("[^_]+_[0-9A-F]{8}_[^_]+.sst"); std::vector children; const std::string dir = backupdir_ + "/shared_checksum"; ASSERT_OK(file_manager_->GetChildrenFileAttributes(dir, &children)); @@ -1772,8 +1770,7 @@ TEST_F(BackupEngineTest, FlushCompactDuringBackupCheckpoint) { if (child.size_bytes == 0) { continue; } - const std::string match("match"); - EXPECT_EQ(match, std::regex_replace(child.name, expected, match)); + EXPECT_MATCHES_REGEX(child.name, expected); } } */ @@ -2000,7 +1997,7 @@ TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNaming) { FillDB(db_.get(), 0, keys_iteration); CloseDBAndBackupEngine(); - static const std::map option_to_expected = { + static const std::map option_to_expected = { {kLegacyCrc32cAndFileSize, "[0-9]+_[0-9]+_[0-9]+[.]sst"}, // kFlagIncludeFileSize redundant here {kLegacyCrc32cAndFileSize | kFlagIncludeFileSize, @@ -2010,7 +2007,7 @@ TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNaming) { "[0-9]+_s[0-9A-Z]{20}_[0-9]+[.]sst"}, }; - const std::string blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob"; + const TestRegex blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob"; for (const auto& pair : option_to_expected) { CloseAndReopenDB(); @@ -2019,15 +2016,15 @@ TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsNewNaming) { ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); CloseDBAndBackupEngine(); AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2); - AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", - std::regex(pair.second), ".sst", - 1 /* minimum_count */); - if (std::string::npos != pair.second.find("_[0-9]+[.]sst")) { + AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", pair.second, + ".sst", 1 /* minimum_count */); + if (std::string::npos != pair.second.GetPattern().find("_[0-9]+[.]sst")) { AssertDirectoryFilesSizeIndicators(backupdir_ + "/shared_checksum", 1 /* minimum_count */); } + AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", - std::regex(blobfile_pattern), ".blob", + blobfile_pattern, ".blob", 1 /* minimum_count */); } } @@ -2051,9 +2048,9 @@ TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsOldFileNaming) { CloseDBAndBackupEngine(); // Old names should always be used on old files - const std::regex expected("[0-9]+_[0-9]+_[0-9]+[.]sst"); + const TestRegex sstfile_pattern("[0-9]+_[0-9]+_[0-9]+[.]sst"); - const std::string blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob"; + const TestRegex blobfile_pattern = "[0-9]+_[0-9]+_[0-9]+[.]blob"; for (ShareFilesNaming option : {kNamingDefault, kUseDbSessionId}) { CloseAndReopenDB(); @@ -2062,10 +2059,11 @@ TEST_F(BackupEngineTest, ShareTableFilesWithChecksumsOldFileNaming) { ASSERT_OK(backup_engine_->CreateNewBackup(db_.get())); CloseDBAndBackupEngine(); AssertBackupConsistency(1, 0, keys_iteration, keys_iteration * 2); - AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", expected, - ".sst", 1 /* minimum_count */); AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", - std::regex(blobfile_pattern), ".blob", + sstfile_pattern, ".sst", + 1 /* minimum_count */); + AssertDirectoryFilesMatchRegex(backupdir_ + "/shared_checksum", + blobfile_pattern, ".blob", 1 /* minimum_count */); } diff --git a/utilities/persistent_cache/block_cache_tier.cc b/utilities/persistent_cache/block_cache_tier.cc index baefa8771..1ab420251 100644 --- a/utilities/persistent_cache/block_cache_tier.cc +++ b/utilities/persistent_cache/block_cache_tier.cc @@ -6,7 +6,6 @@ #include "utilities/persistent_cache/block_cache_tier.h" -#include #include #include