From 60bf2b7d4a78f2477ba2ecc231c0963080c5362d Mon Sep 17 00:00:00 2001 From: Simha Venkataramaiah Date: Thu, 22 Aug 2013 14:32:53 -0700 Subject: [PATCH] Add APIs to query SST file metadata and to delete specific SST files Summary: An api to query the level, key ranges, size etc for each SST file and an api to delete a specific file from the db and all associated state in the bookkeeping datastructures. Notes: Editing the manifest version does not release the obsolete files right away. However deleting the file directly will mess up the iterator. We may need a more aggressive/timely file deletion api. I have used std::unique_ptr - will switch to boost:: since this is external. thoughts? Unit test is fragile right now as it expects the compaction at certain levels. Test Plan: unittest Reviewers: dhruba, vamsi, emayanke CC: zshao, leveldb, haobo Task ID: # Blame Rev: --- Makefile | 6 +- db/db_impl.cc | 58 +++++++++++++ db/db_impl.h | 4 + db/deletefile_test.cc | 188 ++++++++++++++++++++++++++++++++++++++++++ db/version_set.cc | 33 ++++++++ db/version_set.h | 6 ++ include/leveldb/db.h | 29 +++++++ 7 files changed, 323 insertions(+), 1 deletion(-) create mode 100644 db/deletefile_test.cc diff --git a/Makefile b/Makefile index f2e2a0f2b..b4051f9dc 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,8 @@ TESTS = \ ttl_test \ version_edit_test \ version_set_test \ - write_batch_test + write_batch_test\ + deletefile_test TOOLS = \ sst_dump \ @@ -266,6 +267,9 @@ write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) + $(MEMENVLIBRARY) : $(MEMENVOBJECTS) rm -f $@ $(AR) -rs $@ $(MEMENVOBJECTS) diff --git a/db/db_impl.cc b/db/db_impl.cc index 5ea8ef33e..f76cf6bf9 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -2951,6 +2951,64 @@ inline void DBImpl::DelayLoggingAndReset() { } } +Status DBImpl::DeleteFile(std::string name) { + uint64_t number; + FileType type; + if (!ParseFileName(name, &number, &type) || + (type != kTableFile)) { + Log(options_.info_log, "DeleteFile #%lld FAILED. Invalid file name\n", + static_cast(number)); + return Status::InvalidArgument("Invalid file name"); + } + + int level; + FileMetaData metadata; + int maxlevel = NumberLevels(); + VersionEdit edit(maxlevel); + MutexLock l(&mutex_); + Status status = + versions_->GetMetadataForFile(number, &level, &metadata); + if (!status.ok()) { + Log(options_.info_log, "DeleteFile #%lld FAILED. File not found\n", + static_cast(number)); + return Status::InvalidArgument("File not found"); + } + assert((level > 0) && (level < maxlevel)); + + // If the file is being compacted no need to delete. + if (metadata.being_compacted) { + Log(options_.info_log, + "DeleteFile #%lld Skipped. File about to be compacted\n", + static_cast(number)); + return Status::OK(); + } + + // Only the files in the last level can be deleted externally. + // This is to make sure that any deletion tombstones are not + // lost. Check that the level passed is the last level. + for (int i = level + 1; i < maxlevel; i++) { + if (versions_->NumLevelFiles(i) != 0) { + Log(options_.info_log, + "DeleteFile #%lld FAILED. File not in last level\n", + static_cast(number)); + return Status::InvalidArgument("File not in last level"); + } + } + + edit.DeleteFile(level, number); + status = versions_->LogAndApply(&edit, &mutex_); + if (status.ok()) { + DeleteObsoleteFiles(); + } + return status; +} + +void DBImpl::GetLiveFilesMetaData( + std::vector *metadata) { + MutexLock l(&mutex_); + return versions_->GetLiveFilesMetaData(metadata); +} + // Default implementations of convenience methods that subclasses of DB // can call if they wish Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { diff --git a/db/db_impl.h b/db/db_impl.h index cea8041e4..f5625eb14 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -78,6 +78,10 @@ class DBImpl : public DB { virtual SequenceNumber GetLatestSequenceNumber(); virtual Status GetUpdatesSince(SequenceNumber seq_number, unique_ptr* iter); + virtual Status DeleteFile(std::string name); + + virtual void GetLiveFilesMetaData( + std::vector *metadata); // Extra methods (for testing) that are not in the public DB interface diff --git a/db/deletefile_test.cc b/db/deletefile_test.cc new file mode 100644 index 000000000..639137246 --- /dev/null +++ b/db/deletefile_test.cc @@ -0,0 +1,188 @@ +// Copyright (c) 2011 The LevelDB Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. See the AUTHORS file for names of contributors. + +#include "leveldb/db.h" +#include "db/db_impl.h" +#include "db/filename.h" +#include "db/version_set.h" +#include "db/write_batch_internal.h" +#include "util/testharness.h" +#include "util/testutil.h" +#include "boost/lexical_cast.hpp" +#include "leveldb/env.h" +#include +#include +#include +#include + +namespace leveldb { + +class DeleteFileTest { + public: + std::string dbname_; + Options options_; + DB* db_; + Env* env_; + int numlevels_; + + DeleteFileTest() { + db_ = nullptr; + env_ = Env::Default(); + options_.write_buffer_size = 1024*1024*1000; + options_.target_file_size_base = 1024*1024*1000; + options_.max_bytes_for_level_base = 1024*1024*1000; + dbname_ = test::TmpDir() + "/deletefile_test"; + DestroyDB(dbname_, options_); + numlevels_ = 7; + ASSERT_OK(ReopenDB(true)); + } + + Status ReopenDB(bool create) { + delete db_; + if (create) { + DestroyDB(dbname_, options_); + } + db_ = nullptr; + options_.create_if_missing = create; + return DB::Open(options_, dbname_, &db_); + } + + void CloseDB() { + delete db_; + } + + void AddKeys(int numkeys, int startkey = 0) { + WriteOptions options; + options.sync = false; + ReadOptions roptions; + for (int i = startkey; i < (numkeys + startkey) ; i++) { + std::string temp = boost::lexical_cast(i); + Slice key(temp); + Slice value(temp); + ASSERT_OK(db_->Put(options, key, value)); + } + } + + int numKeysInLevels( + std::vector &metadata, + std::vector *keysperlevel = nullptr) { + + if (keysperlevel != nullptr) { + keysperlevel->resize(numlevels_); + } + + int numKeys = 0; + for (size_t i = 0; i < metadata.size(); i++) { + int startkey = atoi(metadata[i].smallestkey.c_str()); + int endkey = atoi(metadata[i].largestkey.c_str()); + int numkeysinfile = (endkey - startkey + 1); + numKeys += numkeysinfile; + if (keysperlevel != nullptr) { + (*keysperlevel)[(int)metadata[i].level] += numkeysinfile; + } + fprintf(stderr, "level %d name %s smallest %s largest %s\n", + metadata[i].level, metadata[i].name.c_str(), + metadata[i].smallestkey.c_str(), + metadata[i].largestkey.c_str()); + } + return numKeys; + } + + void CreateTwoLevels() { + AddKeys(50000, 10000); + DBImpl* dbi = reinterpret_cast(db_); + ASSERT_OK(dbi->TEST_CompactMemTable()); + ASSERT_OK(dbi->TEST_WaitForCompactMemTable()); + + AddKeys(50000, 10000); + ASSERT_OK(dbi->TEST_CompactMemTable()); + ASSERT_OK(dbi->TEST_WaitForCompactMemTable()); + } + +}; + +TEST(DeleteFileTest, AddKeysAndQueryLevels) { + CreateTwoLevels(); + std::vector metadata; + std::vector keysinlevel; + db_->GetLiveFilesMetaData(&metadata); + + std::string level1file = ""; + int level1keycount = 0; + std::string level2file = ""; + int level2keycount = 0; + int level1index = 0; + int level2index = 1; + + ASSERT_EQ((int)metadata.size(), 2); + if (metadata[0].level == 2) { + level1index = 1; + level2index = 0; + } + + level1file = metadata[level1index].name; + int startkey = atoi(metadata[level1index].smallestkey.c_str()); + int endkey = atoi(metadata[level1index].largestkey.c_str()); + level1keycount = (endkey - startkey + 1); + level2file = metadata[level2index].name; + startkey = atoi(metadata[level2index].smallestkey.c_str()); + endkey = atoi(metadata[level2index].largestkey.c_str()); + level2keycount = (endkey - startkey + 1); + + // COntrolled setup. Levels 1 and 2 should both have 50K files. + // This is a little fragile as it depends on the current + // compaction heuristics. + ASSERT_EQ(level1keycount, 50000); + ASSERT_EQ(level2keycount, 50000); + + Status status = db_->DeleteFile("0.sst"); + ASSERT_TRUE(status.IsInvalidArgument()); + + // intermediate level files cannot be deleted. + status = db_->DeleteFile(level1file); + ASSERT_TRUE(status.IsInvalidArgument()); + + // Lowest level file deletion should succeed. + ASSERT_OK(db_->DeleteFile(level2file)); + + CloseDB(); +} + + +TEST(DeleteFileTest, DeleteFileWithIterator) { + CreateTwoLevels(); + ReadOptions options; + Iterator* it = db_->NewIterator(options); + std::vector metadata; + db_->GetLiveFilesMetaData(&metadata); + + std::string level2file = ""; + + ASSERT_EQ((int)metadata.size(), 2); + if (metadata[0].level == 1) { + level2file = metadata[1].name; + } else { + level2file = metadata[0].name; + } + + Status status = db_->DeleteFile(level2file); + fprintf(stderr, "Deletion status %s: %s\n", + level2file.c_str(), status.ToString().c_str()); + ASSERT_TRUE(status.ok()); + it->SeekToFirst(); + int numKeysIterated = 0; + while(it->Valid()) { + numKeysIterated++; + it->Next(); + } + ASSERT_EQ(numKeysIterated, 50000); + delete it; + CloseDB(); +} +} //namespace leveldb + +int main(int argc, char** argv) { + return leveldb::test::RunAllTests(); +} + diff --git a/db/version_set.cc b/db/version_set.cc index 7e3e5d0ec..29741ba14 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -2637,6 +2637,39 @@ void VersionSet::SetupOtherInputs(Compaction* c) { c->edit_->SetCompactPointer(level, largest); } +Status VersionSet::GetMetadataForFile( + uint64_t number, + int *filelevel, + FileMetaData *meta) { + for (int level = 0; level < NumberLevels(); level++) { + const std::vector& files = current_->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + if (files[i]->number == number) { + *meta = *files[i]; + *filelevel = level; + return Status::OK(); + } + } + } + return Status::NotFound("File not present in any level"); +} + +void VersionSet::GetLiveFilesMetaData( + std::vector * metadata) { + for (int level = 0; level < NumberLevels(); level++) { + const std::vector& files = current_->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + LiveFileMetaData filemetadata; + filemetadata.name = TableFileName("", files[i]->number); + filemetadata.level = level; + filemetadata.size = files[i]->file_size; + filemetadata.smallestkey = files[i]->smallest.user_key().ToString(); + filemetadata.largestkey = files[i]->largest.user_key().ToString(); + metadata->push_back(filemetadata); + } + } +} + Compaction* VersionSet::CompactRange( int level, const InternalKey* begin, diff --git a/db/version_set.h b/db/version_set.h index f122ea444..ddfd17cdc 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -405,6 +405,12 @@ class VersionSet { double MaxBytesForLevel(int level); + Status GetMetadataForFile( + uint64_t number, int *filelevel, FileMetaData *metadata); + + void GetLiveFilesMetaData( + std::vector *metadata); + private: class Builder; struct ManifestWriter; diff --git a/include/leveldb/db.h b/include/leveldb/db.h index d0316ee4f..2298e4597 100644 --- a/include/leveldb/db.h +++ b/include/leveldb/db.h @@ -28,6 +28,20 @@ struct WriteOptions; struct FlushOptions; class WriteBatch; +// Metadata associated with each SST file. +struct LiveFileMetaData { + // Name of the file + std::string name; + // Level at which this file resides. + int level; + // File size in bytes. + size_t size; + // Smallest user defined key in the file. + std::string smallestkey; + // Largest user defined key in the file. + std::string largestkey; +}; + // Abstract handle to particular state of a DB. // A Snapshot is an immutable object and can therefore be safely // accessed from multiple threads without any external synchronization. @@ -223,6 +237,8 @@ class DB { // Allow compactions to delete obselete files. virtual Status EnableFileDeletions() = 0; + // THIS METHOD IS DEPRECATED. Use the GetTableMetaData to get more + // detailed information on the live files. // Retrieve the list of all files in the database. The files are // relative to the dbname and are not absolute paths. This list // can be used to generate a backup. The valid size of the manifest @@ -256,6 +272,19 @@ class DB { // an update is read. virtual Status GetUpdatesSince(SequenceNumber seq_number, unique_ptr* iter) = 0; + + // Delete the file name from the db directory and update the internal + // state to reflect that. + virtual Status DeleteFile(std::string name) { + return Status::OK(); + } + + // Returns a list of all table files with their level, start key + // and end key + virtual void GetLiveFilesMetaData( + std::vector *metadata) { + } + private: // No copying allowed DB(const DB&);