From 51778612c9cf4cb842eed10f270ec0ed29ff22f1 Mon Sep 17 00:00:00 2001 From: Ewout Prangsma Date: Mon, 26 Jun 2017 16:52:06 -0700 Subject: [PATCH] Encryption at rest support Summary: This PR adds support for encrypting data stored by RocksDB when written to disk. It adds an `EncryptedEnv` override of the `Env` class with matching overrides for sequential&random access files. The encryption itself is done through a configurable `EncryptionProvider`. This class creates is asked to create `BlockAccessCipherStream` for a file. This is where the actual encryption/decryption is being done. Currently there is a Counter mode implementation of `BlockAccessCipherStream` with a `ROT13` block cipher (NOTE the `ROT13` is for demo purposes only!!). The Counter operation mode uses an initial counter & random initialization vector (IV). Both are created randomly for each file and stored in a 4K (default size) block that is prefixed to that file. The `EncryptedEnv` implementation is such that clients of the `Env` class do not see this prefix (nor data, nor in filesize). The largest part of the prefix block is also encrypted, and there is room left for implementation specific settings/values/keys in there. To test the encryption, the `DBTestBase` class has been extended to consider a new environment variable called `ENCRYPTED_ENV`. If set, the test will setup a encrypted instance of the `Env` class to use for all tests. Typically you would run it like this: ``` ENCRYPTED_ENV=1 make check_some ``` There is also an added test that checks that some data inserted into the database is or is not "visible" on disk. With `ENCRYPTED_ENV` active it must not find plain text strings, with `ENCRYPTED_ENV` unset, it must find the plain text strings. Closes https://github.com/facebook/rocksdb/pull/2424 Differential Revision: D5322178 Pulled By: sdwilsh fbshipit-source-id: 253b0a9c2c498cc98f580df7f2623cbf7678a27f --- CMakeLists.txt | 1 + Makefile | 4 + TARGETS | 2 + db/db_basic_test.cc | 3 + db/db_encryption_test.cc | 96 ++++ db/db_options_test.cc | 1 + db/db_range_del_test.cc | 3 + db/db_test2.cc | 1 + db/db_test_util.cc | 21 +- db/db_test_util.h | 3 + db/listener_test.cc | 9 + env/env_encryption.cc | 911 +++++++++++++++++++++++++++++++ include/rocksdb/env.h | 4 + include/rocksdb/env_encryption.h | 198 +++++++ src.mk | 2 + 15 files changed, 1253 insertions(+), 6 deletions(-) create mode 100644 db/db_encryption_test.cc create mode 100644 env/env_encryption.cc create mode 100644 include/rocksdb/env_encryption.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 4227c1367..39b135a69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -380,6 +380,7 @@ set(SOURCES db/write_thread.cc env/env.cc env/env_chroot.cc + env/env_encryption.cc env/env_hdfs.cc env/mock_env.cc memtable/alloc_tracker.cc diff --git a/Makefile b/Makefile index 4c48beac4..20e012174 100644 --- a/Makefile +++ b/Makefile @@ -323,6 +323,7 @@ EXPOBJECTS = $(EXP_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL) TESTS = \ db_basic_test \ + db_encryption_test \ db_test2 \ external_sst_file_basic_test \ auto_roll_logger_test \ @@ -1030,6 +1031,9 @@ slice_transform_test: util/slice_transform_test.o $(LIBOBJECTS) $(TESTHARNESS) db_basic_test: db/db_basic_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) +db_encryption_test: db/db_encryption_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) + $(AM_LINK) + db_test: db/db_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS) $(AM_LINK) diff --git a/TARGETS b/TARGETS index fa0be6be1..e8bd6cc51 100644 --- a/TARGETS +++ b/TARGETS @@ -105,6 +105,7 @@ cpp_library( "db/write_thread.cc", "env/env.cc", "env/env_chroot.cc", + "env/env_encryption.cc", "env/env_hdfs.cc", "env/env_posix.cc", "env/io_posix.cc", @@ -446,6 +447,7 @@ ROCKS_TESTS = [['merger_test', 'table/merger_test.cc', 'serial'], ['options_util_test', 'utilities/options/options_util_test.cc', 'serial'], ['dynamic_bloom_test', 'util/dynamic_bloom_test.cc', 'serial'], ['db_basic_test', 'db/db_basic_test.cc', 'serial'], + ['db_encryption_test', 'db/db_encryption_test.cc', 'serial'], ['db_merge_operator_test', 'db/db_merge_operator_test.cc', 'serial'], ['manual_compaction_test', 'db/manual_compaction_test.cc', 'parallel'], ['delete_scheduler_test', 'util/delete_scheduler_test.cc', 'serial'], diff --git a/db/db_basic_test.cc b/db/db_basic_test.cc index 179c7ef9b..5427a08f5 100644 --- a/db/db_basic_test.cc +++ b/db/db_basic_test.cc @@ -824,6 +824,9 @@ TEST_F(DBBasicTest, ChecksumTest) { // sense to run #ifndef OS_WIN TEST_F(DBBasicTest, MmapAndBufferOptions) { + if (!IsMemoryMappedAccessSupported()) { + return; + } Options options = CurrentOptions(); options.use_direct_reads = true; diff --git a/db/db_encryption_test.cc b/db/db_encryption_test.cc new file mode 100644 index 000000000..49c432f39 --- /dev/null +++ b/db/db_encryption_test.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. +// +#include "db/db_test_util.h" +#include "port/stack_trace.h" +#include "rocksdb/perf_context.h" +#if !defined(ROCKSDB_LITE) +#include "util/sync_point.h" +#endif +#include +#include + +namespace rocksdb { + +class DBEncryptionTest : public DBTestBase { + public: + DBEncryptionTest() : DBTestBase("/db_encryption_test") {} +}; + +#ifndef ROCKSDB_LITE + +TEST_F(DBEncryptionTest, CheckEncrypted) { + ASSERT_OK(Put("foo567", "v1.fetdq")); + ASSERT_OK(Put("bar123", "v2.dfgkjdfghsd")); + Close(); + + // Open all files and look for the values we've put in there. + // They should not be found if encrypted, otherwise + // they should be found. + std::vector fileNames; + auto status = env_->GetChildren(dbname_, &fileNames); + ASSERT_OK(status); + + auto defaultEnv = Env::Default(); + int hits = 0; + for (auto it = fileNames.begin() ; it != fileNames.end(); ++it) { + if ((*it == "..") || (*it == ".")) { + continue; + } + auto filePath = dbname_ + "/" + *it; + unique_ptr seqFile; + auto envOptions = EnvOptions(CurrentOptions()); + status = defaultEnv->NewSequentialFile(filePath, &seqFile, envOptions); + ASSERT_OK(status); + + uint64_t fileSize; + status = defaultEnv->GetFileSize(filePath, &fileSize); + ASSERT_OK(status); + + std::string scratch; + scratch.reserve(fileSize); + Slice data; + status = seqFile->Read(fileSize, &data, (char*)scratch.data()); + ASSERT_OK(status); + + if (data.ToString().find("foo567") != std::string::npos) { + hits++; + //std::cout << "Hit in " << filePath << "\n"; + } + if (data.ToString().find("v1.fetdq") != std::string::npos) { + hits++; + //std::cout << "Hit in " << filePath << "\n"; + } + if (data.ToString().find("bar123") != std::string::npos) { + hits++; + //std::cout << "Hit in " << filePath << "\n"; + } + if (data.ToString().find("v2.dfgkjdfghsd") != std::string::npos) { + hits++; + //std::cout << "Hit in " << filePath << "\n"; + } + if (data.ToString().find("dfgk") != std::string::npos) { + hits++; + //std::cout << "Hit in " << filePath << "\n"; + } + } + if (encrypted_env_) { + ASSERT_EQ(hits, 0); + } else { + ASSERT_GE(hits, 4); + } +} + +#endif // ROCKSDB_LITE + +} // namespace rocksdb + +int main(int argc, char** argv) { + rocksdb::port::InstallStackTraceHandler(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/db/db_options_test.cc b/db/db_options_test.cc index f10b58cd8..1d43f6b3c 100644 --- a/db/db_options_test.cc +++ b/db/db_options_test.cc @@ -416,6 +416,7 @@ TEST_F(DBOptionsTest, DeleteObsoleteFilesPeriodChange) { TEST_F(DBOptionsTest, MaxOpenFilesChange) { SpecialEnv env(env_); Options options; + options.env = CurrentOptions().env; options.max_open_files = -1; Reopen(options); diff --git a/db/db_range_del_test.cc b/db/db_range_del_test.cc index fcc43d3f7..26e71e8f2 100644 --- a/db/db_range_del_test.cc +++ b/db/db_range_del_test.cc @@ -29,6 +29,9 @@ class DBRangeDelTest : public DBTestBase { // ROCKSDB_LITE #ifndef ROCKSDB_LITE TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) { + if (!IsMemoryMappedAccessSupported()) { + return; + } Options opts = CurrentOptions(); opts.table_factory.reset(new PlainTableFactory()); opts.prefix_extractor.reset(NewNoopTransform()); diff --git a/db/db_test2.cc b/db/db_test2.cc index b2b814961..b1e1da4e0 100644 --- a/db/db_test2.cc +++ b/db/db_test2.cc @@ -1030,6 +1030,7 @@ TEST_F(DBTest2, PresetCompressionDict) { const int kNumL0Files = 5; Options options; + options.env = CurrentOptions().env; // Make sure to use any custom env that the test is configured with. options.allow_concurrent_memtable_write = false; options.arena_block_size = kBlockSizeBytes; options.compaction_style = kCompactionStyleUniversal; diff --git a/db/db_test_util.cc b/db/db_test_util.cc index e199ac526..8de51b18c 100644 --- a/db/db_test_util.cc +++ b/db/db_test_util.cc @@ -9,6 +9,7 @@ #include "db/db_test_util.h" #include "db/forward_iterator.h" +#include "rocksdb/env_encryption.h" namespace rocksdb { @@ -42,9 +43,12 @@ SpecialEnv::SpecialEnv(Env* base) table_write_callback_ = nullptr; } +ROT13BlockCipher rot13Cipher_(16); + DBTestBase::DBTestBase(const std::string path) : mem_env_(!getenv("MEM_ENV") ? nullptr : new MockEnv(Env::Default())), - env_(new SpecialEnv(mem_env_ ? mem_env_ : Env::Default())), + encrypted_env_(!getenv("ENCRYPTED_ENV") ? nullptr : NewEncryptedEnv(mem_env_ ? mem_env_ : Env::Default(), new CTREncryptionProvider(rot13Cipher_))), + env_(new SpecialEnv(encrypted_env_ ? encrypted_env_ : (mem_env_ ? mem_env_ : Env::Default()))), option_config_(kDefault) { env_->SetBackgroundThreads(1, Env::LOW); env_->SetBackgroundThreads(1, Env::HIGH); @@ -281,6 +285,7 @@ Options DBTestBase::GetOptions( "NewWritableFile:O_DIRECT"); #endif + bool can_allow_mmap = IsMemoryMappedAccessSupported(); switch (option_config) { #ifndef ROCKSDB_LITE case kHashSkipList: @@ -291,14 +296,14 @@ Options DBTestBase::GetOptions( case kPlainTableFirstBytePrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewFixedPrefixTransform(1)); - options.allow_mmap_reads = true; + options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; case kPlainTableCappedPrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewCappedPrefixTransform(8)); - options.allow_mmap_reads = true; + options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; @@ -312,7 +317,7 @@ Options DBTestBase::GetOptions( case kPlainTableAllBytesPrefix: options.table_factory.reset(new PlainTableFactory()); options.prefix_extractor.reset(NewNoopTransform()); - options.allow_mmap_reads = true; + options.allow_mmap_reads = can_allow_mmap; options.max_sequential_skip_in_iterations = 999999; set_block_based_table_factory = false; break; @@ -364,7 +369,7 @@ Options DBTestBase::GetOptions( options.wal_dir = alternative_wal_dir_; // mmap reads should be orthogonal to WalDir setting, so we piggyback to // this option config to test mmap reads as well - options.allow_mmap_reads = true; + options.allow_mmap_reads = can_allow_mmap; break; case kManifestFileSize: options.max_manifest_file_size = 50; // 50 bytes @@ -384,7 +389,7 @@ Options DBTestBase::GetOptions( options.num_levels = 8; break; case kCompressedBlockCache: - options.allow_mmap_writes = true; + options.allow_mmap_writes = can_allow_mmap; table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024); break; case kInfiniteMaxOpenFiles: @@ -598,6 +603,10 @@ bool DBTestBase::IsDirectIOSupported() { return s.ok(); } +bool DBTestBase::IsMemoryMappedAccessSupported() const { + return (!encrypted_env_); +} + Status DBTestBase::Flush(int cf) { if (cf == 0) { return db_->Flush(FlushOptions()); diff --git a/db/db_test_util.h b/db/db_test_util.h index d8f9ba8d7..14d91ec2e 100644 --- a/db/db_test_util.h +++ b/db/db_test_util.h @@ -671,6 +671,7 @@ class DBTestBase : public testing::Test { std::string alternative_wal_dir_; std::string alternative_db_log_dir_; MockEnv* mem_env_; + Env* encrypted_env_; SpecialEnv* env_; DB* db_; std::vector handles_; @@ -774,6 +775,8 @@ class DBTestBase : public testing::Test { bool IsDirectIOSupported(); + bool IsMemoryMappedAccessSupported() const; + Status Flush(int cf = 0); Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()); diff --git a/db/listener_test.cc b/db/listener_test.cc index ba993e7b0..7ab49d074 100644 --- a/db/listener_test.cc +++ b/db/listener_test.cc @@ -114,6 +114,7 @@ TEST_F(EventListenerTest, OnSingleDBCompactionTest) { const int kNumL0Files = 4; Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.write_buffer_size = kEntrySize * kEntriesPerBuffer; options.compaction_style = kCompactionStyleLevel; @@ -233,6 +234,7 @@ class TestFlushListener : public EventListener { TEST_F(EventListenerTest, OnSingleDBFlushTest) { Options options; + options.env = CurrentOptions().env; options.write_buffer_size = k110KB; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; @@ -269,6 +271,7 @@ TEST_F(EventListenerTest, OnSingleDBFlushTest) { TEST_F(EventListenerTest, MultiCF) { Options options; + options.env = CurrentOptions().env; options.write_buffer_size = k110KB; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; @@ -304,6 +307,7 @@ TEST_F(EventListenerTest, MultiCF) { TEST_F(EventListenerTest, MultiDBMultiListeners) { Options options; + options.env = CurrentOptions().env; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS @@ -386,6 +390,7 @@ TEST_F(EventListenerTest, MultiDBMultiListeners) { TEST_F(EventListenerTest, DisableBGCompaction) { Options options; + options.env = CurrentOptions().env; #ifdef ROCKSDB_USING_THREAD_STATUS options.enable_thread_tracking = true; #endif // ROCKSDB_USING_THREAD_STATUS @@ -433,6 +438,7 @@ class TestCompactionReasonListener : public EventListener { TEST_F(EventListenerTest, CompactionReasonLevel) { Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); @@ -498,6 +504,7 @@ TEST_F(EventListenerTest, CompactionReasonLevel) { TEST_F(EventListenerTest, CompactionReasonUniversal) { Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); @@ -559,6 +566,7 @@ TEST_F(EventListenerTest, CompactionReasonUniversal) { TEST_F(EventListenerTest, CompactionReasonFIFO) { Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.memtable_factory.reset( new SpecialSkipListFactory(DBTestBase::kNumKeysByGenerateNewRandomFile)); @@ -781,6 +789,7 @@ TEST_F(EventListenerTest, ColumnFamilyHandleDeletionStartedListenerTest) { auto listener = std::make_shared(cfs); Options options; + options.env = CurrentOptions().env; options.create_if_missing = true; options.listeners.push_back(listener); CreateAndReopenWithCF(cfs, options); diff --git a/env/env_encryption.cc b/env/env_encryption.cc new file mode 100644 index 000000000..9c9fcd3fc --- /dev/null +++ b/env/env_encryption.cc @@ -0,0 +1,911 @@ +// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. + +#ifndef ROCKSDB_LITE + +#include +#include +#include + +#include "rocksdb/env_encryption.h" +#include "util/aligned_buffer.h" +#include "util/coding.h" +#include "util/random.h" + +#endif + +namespace rocksdb { + +#ifndef ROCKSDB_LITE + +class EncryptedSequentialFile : public SequentialFile { + private: + std::unique_ptr file_; + std::unique_ptr stream_; + uint64_t offset_; + size_t prefixLength_; + + public: + // Default ctor. Given underlying sequential file is supposed to be at + // offset == prefixLength. + EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength) + : file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) { + } + + // Read up to "n" bytes from the file. "scratch[0..n-1]" may be + // written by this routine. Sets "*result" to the data that was + // read (including if fewer than "n" bytes were successfully read). + // May set "*result" to point at data in "scratch[0..n-1]", so + // "scratch[0..n-1]" must be live when "*result" is used. + // If an error was encountered, returns a non-OK status. + // + // REQUIRES: External synchronization + virtual Status Read(size_t n, Slice* result, char* scratch) override { + assert(scratch); + Status status = file_->Read(n, result, scratch); + if (!status.ok()) { + return status; + } + status = stream_->Decrypt(offset_, (char*)result->data(), result->size()); + offset_ += result->size(); // We've already ready data from disk, so update offset_ even if decryption fails. + return status; + } + + // Skip "n" bytes from the file. This is guaranteed to be no + // slower that reading the same data, but may be faster. + // + // If end of file is reached, skipping will stop at the end of the + // file, and Skip will return OK. + // + // REQUIRES: External synchronization + virtual Status Skip(uint64_t n) override { + auto status = file_->Skip(n); + if (!status.ok()) { + return status; + } + offset_ += n; + return status; + } + + // Indicates the upper layers if the current SequentialFile implementation + // uses direct IO. + virtual bool use_direct_io() const override { + return file_->use_direct_io(); + } + + // Use the returned alignment value to allocate + // aligned buffer for Direct I/O + virtual size_t GetRequiredBufferAlignment() const override { + return file_->GetRequiredBufferAlignment(); + } + + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + virtual Status InvalidateCache(size_t offset, size_t length) override { + return file_->InvalidateCache(offset + prefixLength_, length); + } + + // Positioned Read for direct I/O + // If Direct I/O enabled, offset, n, and scratch should be properly aligned + virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result, char* scratch) override { + assert(scratch); + offset += prefixLength_; // Skip prefix + auto status = file_->PositionedRead(offset, n, result, scratch); + if (!status.ok()) { + return status; + } + offset_ = offset + result->size(); + status = stream_->Decrypt(offset, (char*)result->data(), result->size()); + return status; + } + +}; + +// A file abstraction for randomly reading the contents of a file. +class EncryptedRandomAccessFile : public RandomAccessFile { + private: + std::unique_ptr file_; + std::unique_ptr stream_; + size_t prefixLength_; + + public: + EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength) + : file_(f), stream_(s), prefixLength_(prefixLength) { } + + // Read up to "n" bytes from the file starting at "offset". + // "scratch[0..n-1]" may be written by this routine. Sets "*result" + // to the data that was read (including if fewer than "n" bytes were + // successfully read). May set "*result" to point at data in + // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when + // "*result" is used. If an error was encountered, returns a non-OK + // status. + // + // Safe for concurrent use by multiple threads. + // If Direct I/O enabled, offset, n, and scratch should be aligned properly. + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + assert(scratch); + offset += prefixLength_; + auto status = file_->Read(offset, n, result, scratch); + if (!status.ok()) { + return status; + } + status = stream_->Decrypt(offset, (char*)result->data(), result->size()); + return status; + } + + // Readahead the file starting from offset by n bytes for caching. + virtual Status Prefetch(uint64_t offset, size_t n) override { + //return Status::OK(); + return file_->Prefetch(offset + prefixLength_, n); + } + + // Tries to get an unique ID for this file that will be the same each time + // the file is opened (and will stay the same while the file is open). + // Furthermore, it tries to make this ID at most "max_size" bytes. If such an + // ID can be created this function returns the length of the ID and places it + // in "id"; otherwise, this function returns 0, in which case "id" + // may not have been modified. + // + // This function guarantees, for IDs from a given environment, two unique ids + // cannot be made equal to eachother by adding arbitrary bytes to one of + // them. That is, no unique ID is the prefix of another. + // + // This function guarantees that the returned ID will not be interpretable as + // a single varint. + // + // Note: these IDs are only valid for the duration of the process. + virtual size_t GetUniqueId(char* id, size_t max_size) const override { + return file_->GetUniqueId(id, max_size); + }; + + virtual void Hint(AccessPattern pattern) override { + file_->Hint(pattern); + } + + // Indicates the upper layers if the current RandomAccessFile implementation + // uses direct IO. + virtual bool use_direct_io() const override { + return file_->use_direct_io(); + } + + // Use the returned alignment value to allocate + // aligned buffer for Direct I/O + virtual size_t GetRequiredBufferAlignment() const override { + return file_->GetRequiredBufferAlignment(); + } + + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + virtual Status InvalidateCache(size_t offset, size_t length) override { + return file_->InvalidateCache(offset + prefixLength_, length); + } +}; + +// A file abstraction for sequential writing. The implementation +// must provide buffering since callers may append small fragments +// at a time to the file. +class EncryptedWritableFile : public WritableFileWrapper { + private: + std::unique_ptr file_; + std::unique_ptr stream_; + size_t prefixLength_; + + public: + // Default ctor. Prefix is assumed to be written already. + EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength) + : WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { } + + Status Append(const Slice& data) override { + AlignedBuffer buf; + Status status; + Slice dataToAppend(data); + if (data.size() > 0) { + auto offset = file_->GetFileSize(); // size including prefix + // Encrypt in cloned buffer + buf.Alignment(GetRequiredBufferAlignment()); + buf.AllocateNewBuffer(data.size()); + memmove(buf.BufferStart(), data.data(), data.size()); + status = stream_->Encrypt(offset, buf.BufferStart(), data.size()); + if (!status.ok()) { + return status; + } + dataToAppend = Slice(buf.BufferStart(), data.size()); + } + status = file_->Append(dataToAppend); + if (!status.ok()) { + return status; + } + return status; + } + + Status PositionedAppend(const Slice& data, uint64_t offset) override { + AlignedBuffer buf; + Status status; + Slice dataToAppend(data); + offset += prefixLength_; + if (data.size() > 0) { + // Encrypt in cloned buffer + buf.Alignment(GetRequiredBufferAlignment()); + buf.AllocateNewBuffer(data.size()); + memmove(buf.BufferStart(), data.data(), data.size()); + status = stream_->Encrypt(offset, buf.BufferStart(), data.size()); + if (!status.ok()) { + return status; + } + dataToAppend = Slice(buf.BufferStart(), data.size()); + } + status = file_->PositionedAppend(dataToAppend, offset); + if (!status.ok()) { + return status; + } + return status; + } + + // Indicates the upper layers if the current WritableFile implementation + // uses direct IO. + virtual bool use_direct_io() const override { return file_->use_direct_io(); } + + // Use the returned alignment value to allocate + // aligned buffer for Direct I/O + virtual size_t GetRequiredBufferAlignment() const override { return file_->GetRequiredBufferAlignment(); } + + /* + * Get the size of valid data in the file. + */ + virtual uint64_t GetFileSize() override { + return file_->GetFileSize() - prefixLength_; + } + + // Truncate is necessary to trim the file to the correct size + // before closing. It is not always possible to keep track of the file + // size due to whole pages writes. The behavior is undefined if called + // with other writes to follow. + virtual Status Truncate(uint64_t size) override { + return file_->Truncate(size + prefixLength_); + } + + // Remove any kind of caching of data from the offset to offset+length + // of this file. If the length is 0, then it refers to the end of file. + // If the system is not caching the file contents, then this is a noop. + // This call has no effect on dirty pages in the cache. + virtual Status InvalidateCache(size_t offset, size_t length) override { + return file_->InvalidateCache(offset + prefixLength_, length); + } + + // Sync a file range with disk. + // offset is the starting byte of the file range to be synchronized. + // nbytes specifies the length of the range to be synchronized. + // This asks the OS to initiate flushing the cached data to disk, + // without waiting for completion. + // Default implementation does nothing. + virtual Status RangeSync(uint64_t offset, uint64_t nbytes) override { + return file_->RangeSync(offset + prefixLength_, nbytes); + } + + // PrepareWrite performs any necessary preparation for a write + // before the write actually occurs. This allows for pre-allocation + // of space on devices where it can result in less file + // fragmentation and/or less waste from over-zealous filesystem + // pre-allocation. + virtual void PrepareWrite(size_t offset, size_t len) override { + file_->PrepareWrite(offset + prefixLength_, len); + } + + // Pre-allocates space for a file. + virtual Status Allocate(uint64_t offset, uint64_t len) override { + return file_->Allocate(offset + prefixLength_, len); + } +}; + +// A file abstraction for random reading and writing. +class EncryptedRandomRWFile : public RandomRWFile { + private: + std::unique_ptr file_; + std::unique_ptr stream_; + size_t prefixLength_; + + public: + EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength) + : file_(f), stream_(s), prefixLength_(prefixLength) {} + + // Indicates if the class makes use of direct I/O + // If false you must pass aligned buffer to Write() + virtual bool use_direct_io() const override { return file_->use_direct_io(); } + + // Use the returned alignment value to allocate + // aligned buffer for Direct I/O + virtual size_t GetRequiredBufferAlignment() const override { + return file_->GetRequiredBufferAlignment(); + } + + // Write bytes in `data` at offset `offset`, Returns Status::OK() on success. + // Pass aligned buffer when use_direct_io() returns true. + virtual Status Write(uint64_t offset, const Slice& data) override { + AlignedBuffer buf; + Status status; + Slice dataToWrite(data); + offset += prefixLength_; + if (data.size() > 0) { + // Encrypt in cloned buffer + buf.Alignment(GetRequiredBufferAlignment()); + buf.AllocateNewBuffer(data.size()); + memmove(buf.BufferStart(), data.data(), data.size()); + status = stream_->Encrypt(offset, buf.BufferStart(), data.size()); + if (!status.ok()) { + return status; + } + dataToWrite = Slice(buf.BufferStart(), data.size()); + } + status = file_->Write(offset, dataToWrite); + return status; + } + + // Read up to `n` bytes starting from offset `offset` and store them in + // result, provided `scratch` size should be at least `n`. + // Returns Status::OK() on success. + virtual Status Read(uint64_t offset, size_t n, Slice* result, char* scratch) const override { + assert(scratch); + offset += prefixLength_; + auto status = file_->Read(offset, n, result, scratch); + if (!status.ok()) { + return status; + } + status = stream_->Decrypt(offset, (char*)result->data(), result->size()); + return status; + } + + virtual Status Flush() override { + return file_->Flush(); + } + + virtual Status Sync() override { + return file_->Sync(); + } + + virtual Status Fsync() override { + return file_->Fsync(); + } + + virtual Status Close() override { + return file_->Close(); + } +}; + +// EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk. +class EncryptedEnv : public EnvWrapper { + public: + EncryptedEnv(Env* base_env, EncryptionProvider *provider) + : EnvWrapper(base_env) { + provider_ = provider; + } + + // NewSequentialFile opens a file for sequential reading. + virtual Status NewSequentialFile(const std::string& fname, + std::unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_reads) { + return Status::InvalidArgument(); + } + // Open file using underlying Env implementation + std::unique_ptr underlying; + auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Read prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + // Read prefix + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart()); + if (!status.ok()) { + return status; + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // NewRandomAccessFile opens a file for random read access. + virtual Status NewRandomAccessFile(const std::string& fname, + unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_reads) { + return Status::InvalidArgument(); + } + // Open file using underlying Env implementation + std::unique_ptr underlying; + auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Read prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + // Read prefix + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart()); + if (!status.ok()) { + return status; + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // NewWritableFile opens a file for sequential writing. + virtual Status NewWritableFile(const std::string& fname, + unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_writes) { + return Status::InvalidArgument(); + } + // Open file using underlying Env implementation + std::unique_ptr underlying; + Status status = EnvWrapper::NewWritableFile(fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Initialize & write prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + // Initialize prefix + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); + prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength); + // Write prefix + status = underlying->Append(prefixSlice); + if (!status.ok()) { + return status; + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // Create an object that writes to a new file with the specified + // name. Deletes any existing file with the same name and creates a + // new file. On success, stores a pointer to the new file in + // *result and returns OK. On failure stores nullptr in *result and + // returns non-OK. + // + // The returned file will only be accessed by one thread at a time. + virtual Status ReopenWritableFile(const std::string& fname, + unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_writes) { + return Status::InvalidArgument(); + } + // Open file using underlying Env implementation + std::unique_ptr underlying; + Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Initialize & write prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + // Initialize prefix + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); + prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength); + // Write prefix + status = underlying->Append(prefixSlice); + if (!status.ok()) { + return status; + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // Reuse an existing file by renaming it and opening it as writable. + virtual Status ReuseWritableFile(const std::string& fname, + const std::string& old_fname, + unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_writes) { + return Status::InvalidArgument(); + } + // Open file using underlying Env implementation + std::unique_ptr underlying; + Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Initialize & write prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + // Initialize prefix + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); + prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength); + // Write prefix + status = underlying->Append(prefixSlice); + if (!status.ok()) { + return status; + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // Open `fname` for random read and write, if file dont exist the file + // will be created. On success, stores a pointer to the new file in + // *result and returns OK. On failure returns non-OK. + // + // The returned file will only be accessed by one thread at a time. + virtual Status NewRandomRWFile(const std::string& fname, + unique_ptr* result, + const EnvOptions& options) override { + result->reset(); + if (options.use_mmap_reads || options.use_mmap_writes) { + return Status::InvalidArgument(); + } + // Check file exists + bool isNewFile = !FileExists(fname).ok(); + + // Open file using underlying Env implementation + std::unique_ptr underlying; + Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options); + if (!status.ok()) { + return status; + } + // Read or Initialize & write prefix (if needed) + AlignedBuffer prefixBuf; + Slice prefixSlice; + size_t prefixLength = provider_->GetPrefixLength(); + if (prefixLength > 0) { + prefixBuf.Alignment(underlying->GetRequiredBufferAlignment()); + prefixBuf.AllocateNewBuffer(prefixLength); + if (!isNewFile) { + // File already exists, read prefix + status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart()); + if (!status.ok()) { + return status; + } + } else { + // File is new, initialize & write prefix + provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength); + prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength); + // Write prefix + status = underlying->Write(0, prefixSlice); + if (!status.ok()) { + return status; + } + } + } + // Create cipher stream + std::unique_ptr stream; + status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream); + if (!status.ok()) { + return status; + } + (*result) = std::unique_ptr(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength)); + return Status::OK(); + } + + // Store in *result the attributes of the children of the specified directory. + // In case the implementation lists the directory prior to iterating the files + // and files are concurrently deleted, the deleted files will be omitted from + // result. + // The name attributes are relative to "dir". + // Original contents of *results are dropped. + // Returns OK if "dir" exists and "*result" contains its children. + // NotFound if "dir" does not exist, the calling process does not have + // permission to access "dir", or if "dir" is invalid. + // IOError if an IO Error was encountered + virtual Status GetChildrenFileAttributes(const std::string& dir, std::vector* result) override { + auto status = EnvWrapper::GetChildrenFileAttributes(dir, result); + if (!status.ok()) { + return status; + } + size_t prefixLength = provider_->GetPrefixLength(); + for (auto it = std::begin(*result); it!=std::end(*result); ++it) { + assert(it->size_bytes >= prefixLength); + it->size_bytes -= prefixLength; + } + return Status::OK(); + } + + // Store the size of fname in *file_size. + virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) override { + auto status = EnvWrapper::GetFileSize(fname, file_size); + if (!status.ok()) { + return status; + } + size_t prefixLength = provider_->GetPrefixLength(); + assert(*file_size >= prefixLength); + *file_size -= prefixLength; + return Status::OK(); + } + + private: + EncryptionProvider *provider_; +}; + + +// Returns an Env that encrypts data when stored on disk and decrypts data when +// read from disk. +Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) { + return new EncryptedEnv(base_env, provider); +} + +// Encrypt one or more (partial) blocks of data at the file offset. +// Length of data is given in dataSize. +Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) { + // Calculate block index + auto blockSize = BlockSize(); + uint64_t blockIndex = fileOffset / blockSize; + size_t blockOffset = fileOffset % blockSize; + unique_ptr blockBuffer; + + std::string scratch; + AllocateScratch(scratch); + + // Encrypt individual blocks. + while (1) { + char *block = data; + size_t n = std::min(dataSize, blockSize - blockOffset); + if (n != blockSize) { + // We're not encrypting a full block. + // Copy data to blockBuffer + if (!blockBuffer.get()) { + // Allocate buffer + blockBuffer = unique_ptr(new char[blockSize]); + } + block = blockBuffer.get(); + // Copy plain data to block buffer + memmove(block + blockOffset, data, n); + } + auto status = EncryptBlock(blockIndex, block, (char*)scratch.data()); + if (!status.ok()) { + return status; + } + if (block != data) { + // Copy encrypted data back to `data`. + memmove(data, block + blockOffset, n); + } + dataSize -= n; + if (dataSize == 0) { + return Status::OK(); + } + data += n; + blockOffset = 0; + blockIndex++; + } +} + +// Decrypt one or more (partial) blocks of data at the file offset. +// Length of data is given in dataSize. +Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) { + // Calculate block index + auto blockSize = BlockSize(); + uint64_t blockIndex = fileOffset / blockSize; + size_t blockOffset = fileOffset % blockSize; + unique_ptr blockBuffer; + + std::string scratch; + AllocateScratch(scratch); + + // Decrypt individual blocks. + while (1) { + char *block = data; + size_t n = std::min(dataSize, blockSize - blockOffset); + if (n != blockSize) { + // We're not decrypting a full block. + // Copy data to blockBuffer + if (!blockBuffer.get()) { + // Allocate buffer + blockBuffer = unique_ptr(new char[blockSize]); + } + block = blockBuffer.get(); + // Copy encrypted data to block buffer + memmove(block + blockOffset, data, n); + } + auto status = DecryptBlock(blockIndex, block, (char*)scratch.data()); + if (!status.ok()) { + return status; + } + if (block != data) { + // Copy decrypted data back to `data`. + memmove(data, block + blockOffset, n); + } + dataSize -= n; + if (dataSize == 0) { + return Status::OK(); + } + data += n; + blockOffset = 0; + blockIndex++; + } +} + +// Encrypt a block of data. +// Length of data is equal to BlockSize(). +Status ROT13BlockCipher::Encrypt(char *data) { + for (size_t i = 0; i < blockSize_; ++i) { + data[i] += 13; + } + return Status::OK(); +} + +// Decrypt a block of data. +// Length of data is equal to BlockSize(). +Status ROT13BlockCipher::Decrypt(char *data) { + return Encrypt(data); +} + +// Allocate scratch space which is passed to EncryptBlock/DecryptBlock. +void CTRCipherStream::AllocateScratch(std::string& scratch) { + auto blockSize = cipher_.BlockSize(); + scratch.reserve(blockSize); +} + +// Encrypt a block of data at the given block index. +// Length of data is equal to BlockSize(); +Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) { + + // Create nonce + counter + auto blockSize = cipher_.BlockSize(); + memmove(scratch, iv_.data(), blockSize); + EncodeFixed64(scratch, blockIndex + initialCounter_); + + // Encrypt nonce+counter + auto status = cipher_.Encrypt(scratch); + if (!status.ok()) { + return status; + } + + // XOR data with ciphertext. + for (size_t i = 0; i < blockSize; i++) { + data[i] = data[i] ^ scratch[i]; + } + return Status::OK(); +} + +// Decrypt a block of data at the given block index. +// Length of data is equal to BlockSize(); +Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) { + // For CTR decryption & encryption are the same + return EncryptBlock(blockIndex, data, scratch); +} + +// GetPrefixLength returns the length of the prefix that is added to every file +// and used for storing encryption options. +// For optimal performance, the prefix length should be a multiple of +// the a page size. +size_t CTREncryptionProvider::GetPrefixLength() { + return defaultPrefixLength; +} + +// decodeCTRParameters decodes the initial counter & IV from the given +// (plain text) prefix. +static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) { + // First block contains 64-bit initial counter + initialCounter = DecodeFixed64(prefix); + // Second block contains IV + iv = Slice(prefix + blockSize, blockSize); +} + +// CreateNewPrefix initialized an allocated block of prefix memory +// for a new file. +Status CTREncryptionProvider::CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) { + // Create & seed rnd. + Random rnd((uint32_t)Env::Default()->NowMicros()); + // Fill entire prefix block with random values. + for (size_t i = 0; i < prefixLength; i++) { + prefix[i] = rnd.Uniform(256) & 0xFF; + } + // Take random data to extract initial counter & IV + auto blockSize = cipher_.BlockSize(); + uint64_t initialCounter; + Slice prefixIV; + decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV); + + // Now populate the rest of the prefix, starting from the third block. + PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize); + + // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial counter & IV unencrypted) + CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter); + auto status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize)); + if (!status.ok()) { + return status; + } + return Status::OK(); +} + +// PopulateSecretPrefixPart initializes the data into a new prefix block +// in plain text. +// Returns the amount of space (starting from the start of the prefix) +// that has been initialized. +size_t CTREncryptionProvider::PopulateSecretPrefixPart(char *prefix, size_t prefixLength, size_t blockSize) { + // Nothing to do here, put in custom data in override when needed. + return 0; +} + +Status CTREncryptionProvider::CreateCipherStream(const std::string& fname, const EnvOptions& options, Slice &prefix, unique_ptr* result) { + // Read plain text part of prefix. + auto blockSize = cipher_.BlockSize(); + uint64_t initialCounter; + Slice iv; + decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv); + + // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 with initial counter & IV are unencrypted) + CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter); + auto status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize)); + if (!status.ok()) { + return status; + } + + // Create cipher stream + return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result); +} + +// CreateCipherStreamFromPrefix creates a block access cipher stream for a file given +// given name and options. The given prefix is already decrypted. +Status CTREncryptionProvider::CreateCipherStreamFromPrefix(const std::string& fname, const EnvOptions& options, + uint64_t initialCounter, const Slice& iv, const Slice& prefix, unique_ptr* result) { + (*result) = unique_ptr(new CTRCipherStream(cipher_, iv.data(), initialCounter)); + return Status::OK(); +} + +#endif // ROCKSDB_LITE + +} // namespace rocksdb diff --git a/include/rocksdb/env.h b/include/rocksdb/env.h index 49c981433..bcfd24c1f 100644 --- a/include/rocksdb/env.h +++ b/include/rocksdb/env.h @@ -1055,6 +1055,10 @@ class EnvWrapper : public Env { return target_->GetThreadID(); } + std::string GenerateUniqueId() override { + return target_->GenerateUniqueId(); + } + private: Env* target_; }; diff --git a/include/rocksdb/env_encryption.h b/include/rocksdb/env_encryption.h new file mode 100644 index 000000000..764fffba7 --- /dev/null +++ b/include/rocksdb/env_encryption.h @@ -0,0 +1,198 @@ +// Copyright (c) 2016-present, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. +// This source code is also licensed under the GPLv2 license found in the +// COPYING file in the root directory of this source tree. + +#pragma once + +#if !defined(ROCKSDB_LITE) + +#include + +#include "env.h" + +namespace rocksdb { + +class EncryptionProvider; + +// Returns an Env that encrypts data when stored on disk and decrypts data when +// read from disk. +Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider); + +// BlockAccessCipherStream is the base class for any cipher stream that +// supports random access at block level (without requiring data from other blocks). +// E.g. CTR (Counter operation mode) supports this requirement. +class BlockAccessCipherStream { + public: + virtual ~BlockAccessCipherStream() {}; + + // BlockSize returns the size of each block supported by this cipher stream. + virtual size_t BlockSize() = 0; + + // Encrypt one or more (partial) blocks of data at the file offset. + // Length of data is given in dataSize. + virtual Status Encrypt(uint64_t fileOffset, char *data, size_t dataSize); + + // Decrypt one or more (partial) blocks of data at the file offset. + // Length of data is given in dataSize. + virtual Status Decrypt(uint64_t fileOffset, char *data, size_t dataSize); + + protected: + // Allocate scratch space which is passed to EncryptBlock/DecryptBlock. + virtual void AllocateScratch(std::string&) = 0; + + // Encrypt a block of data at the given block index. + // Length of data is equal to BlockSize(); + virtual Status EncryptBlock(uint64_t blockIndex, char *data, char* scratch) = 0; + + // Decrypt a block of data at the given block index. + // Length of data is equal to BlockSize(); + virtual Status DecryptBlock(uint64_t blockIndex, char *data, char* scratch) = 0; +}; + +// BlockCipher +class BlockCipher { + public: + virtual ~BlockCipher() {}; + + // BlockSize returns the size of each block supported by this cipher stream. + virtual size_t BlockSize() = 0; + + // Encrypt a block of data. + // Length of data is equal to BlockSize(). + virtual Status Encrypt(char *data) = 0; + + // Decrypt a block of data. + // Length of data is equal to BlockSize(). + virtual Status Decrypt(char *data) = 0; +}; + +// Implements a BlockCipher using ROT13. +// +// Note: This is a sample implementation of BlockCipher, +// it is NOT considered safe and should NOT be used in production. +class ROT13BlockCipher : public BlockCipher { + private: + size_t blockSize_; + public: + ROT13BlockCipher(size_t blockSize) + : blockSize_(blockSize) {} + virtual ~ROT13BlockCipher() {}; + + // BlockSize returns the size of each block supported by this cipher stream. + virtual size_t BlockSize() override { return blockSize_; } + + // Encrypt a block of data. + // Length of data is equal to BlockSize(). + virtual Status Encrypt(char *data) override; + + // Decrypt a block of data. + // Length of data is equal to BlockSize(). + virtual Status Decrypt(char *data) override; +}; + +// CTRCipherStream implements BlockAccessCipherStream using an +// Counter operations mode. +// See https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation +// +// Note: This is a possible implementation of BlockAccessCipherStream, +// it is considered suitable for use. +class CTRCipherStream final : public BlockAccessCipherStream { + private: + BlockCipher& cipher_; + std::string iv_; + uint64_t initialCounter_; + public: + CTRCipherStream(BlockCipher& c, const char *iv, uint64_t initialCounter) + : cipher_(c), iv_(iv, c.BlockSize()), initialCounter_(initialCounter) {}; + virtual ~CTRCipherStream() {}; + + // BlockSize returns the size of each block supported by this cipher stream. + virtual size_t BlockSize() override { return cipher_.BlockSize(); } + + protected: + // Allocate scratch space which is passed to EncryptBlock/DecryptBlock. + virtual void AllocateScratch(std::string&) override; + + // Encrypt a block of data at the given block index. + // Length of data is equal to BlockSize(); + virtual Status EncryptBlock(uint64_t blockIndex, char *data, char *scratch) override; + + // Decrypt a block of data at the given block index. + // Length of data is equal to BlockSize(); + virtual Status DecryptBlock(uint64_t blockIndex, char *data, char *scratch) override; +}; + +// The encryption provider is used to create a cipher stream for a specific file. +// The returned cipher stream will be used for actual encryption/decryption +// actions. +class EncryptionProvider { + public: + virtual ~EncryptionProvider() {}; + + // GetPrefixLength returns the length of the prefix that is added to every file + // and used for storing encryption options. + // For optimal performance, the prefix length should be a multiple of + // the a page size. + virtual size_t GetPrefixLength() = 0; + + // CreateNewPrefix initialized an allocated block of prefix memory + // for a new file. + virtual Status CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) = 0; + + // CreateCipherStream creates a block access cipher stream for a file given + // given name and options. + virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options, + Slice& prefix, unique_ptr* result) = 0; +}; + +// This encryption provider uses a CTR cipher stream, with a given block cipher +// and IV. +// +// Note: This is a possible implementation of EncryptionProvider, +// it is considered suitable for use, provided a safe BlockCipher is used. +class CTREncryptionProvider : public EncryptionProvider { + private: + BlockCipher& cipher_; + protected: + const static size_t defaultPrefixLength = 4096; + + public: + CTREncryptionProvider(BlockCipher& c) + : cipher_(c) {}; + virtual ~CTREncryptionProvider() {} + + // GetPrefixLength returns the length of the prefix that is added to every file + // and used for storing encryption options. + // For optimal performance, the prefix length should be a multiple of + // the a page size. + virtual size_t GetPrefixLength() override; + + // CreateNewPrefix initialized an allocated block of prefix memory + // for a new file. + virtual Status CreateNewPrefix(const std::string& fname, char *prefix, size_t prefixLength) override; + + // CreateCipherStream creates a block access cipher stream for a file given + // given name and options. + virtual Status CreateCipherStream(const std::string& fname, const EnvOptions& options, + Slice& prefix, unique_ptr* result) override; + + protected: + // PopulateSecretPrefixPart initializes the data into a new prefix block + // that will be encrypted. This function will store the data in plain text. + // It will be encrypted later (before written to disk). + // Returns the amount of space (starting from the start of the prefix) + // that has been initialized. + virtual size_t PopulateSecretPrefixPart(char *prefix, size_t prefixLength, size_t blockSize); + + // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given + // given name and options. The given prefix is already decrypted. + virtual Status CreateCipherStreamFromPrefix(const std::string& fname, const EnvOptions& options, + uint64_t initialCounter, const Slice& iv, const Slice& prefix, unique_ptr* result); +}; + +} // namespace rocksdb + +#endif // !defined(ROCKSDB_LITE) diff --git a/src.mk b/src.mk index 732bf3a5a..7ada535b6 100644 --- a/src.mk +++ b/src.mk @@ -57,6 +57,7 @@ LIB_SOURCES = \ db/write_thread.cc \ env/env.cc \ env/env_chroot.cc \ + env/env_encryption.cc \ env/env_hdfs.cc \ env/env_posix.cc \ env/io_posix.cc \ @@ -241,6 +242,7 @@ MAIN_SOURCES = \ db/db_compaction_filter_test.cc \ db/db_compaction_test.cc \ db/db_dynamic_level_test.cc \ + db/db_encryption_test.cc \ db/db_flush_test.cc \ db/db_inplace_update_test.cc \ db/db_io_failure_test.cc \