Add APIs to query SST file metadata and to delete specific SST files

Summary: An api to query the level, key ranges, size etc for each SST file and an api to delete a specific file from the db and all associated state in the bookkeeping datastructures.

Notes: Editing the manifest version does not release the obsolete files right away. However deleting the file directly will mess up the iterator. We may need a more aggressive/timely file deletion api.

I have used std::unique_ptr - will switch to boost:: since this is external. thoughts?

Unit test is fragile right now as it expects the compaction at certain levels.

Test Plan: unittest

Reviewers: dhruba, vamsi, emayanke

CC: zshao, leveldb, haobo

Task ID: #

Blame Rev:
main
Simha Venkataramaiah 11 years ago
parent bc8eed12d9
commit 60bf2b7d4a
  1. 6
      Makefile
  2. 58
      db/db_impl.cc
  3. 4
      db/db_impl.h
  4. 188
      db/deletefile_test.cc
  5. 33
      db/version_set.cc
  6. 6
      db/version_set.h
  7. 29
      include/leveldb/db.h

@ -64,7 +64,8 @@ TESTS = \
ttl_test \ ttl_test \
version_edit_test \ version_edit_test \
version_set_test \ version_set_test \
write_batch_test write_batch_test\
deletefile_test
TOOLS = \ TOOLS = \
sst_dump \ sst_dump \
@ -266,6 +267,9 @@ write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) merge_test: db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) $(CXX) db/merge_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS)
deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS)
$(MEMENVLIBRARY) : $(MEMENVOBJECTS) $(MEMENVLIBRARY) : $(MEMENVOBJECTS)
rm -f $@ rm -f $@
$(AR) -rs $@ $(MEMENVOBJECTS) $(AR) -rs $@ $(MEMENVOBJECTS)

@ -2951,6 +2951,64 @@ inline void DBImpl::DelayLoggingAndReset() {
} }
} }
Status DBImpl::DeleteFile(std::string name) {
uint64_t number;
FileType type;
if (!ParseFileName(name, &number, &type) ||
(type != kTableFile)) {
Log(options_.info_log, "DeleteFile #%lld FAILED. Invalid file name\n",
static_cast<unsigned long long>(number));
return Status::InvalidArgument("Invalid file name");
}
int level;
FileMetaData metadata;
int maxlevel = NumberLevels();
VersionEdit edit(maxlevel);
MutexLock l(&mutex_);
Status status =
versions_->GetMetadataForFile(number, &level, &metadata);
if (!status.ok()) {
Log(options_.info_log, "DeleteFile #%lld FAILED. File not found\n",
static_cast<unsigned long long>(number));
return Status::InvalidArgument("File not found");
}
assert((level > 0) && (level < maxlevel));
// If the file is being compacted no need to delete.
if (metadata.being_compacted) {
Log(options_.info_log,
"DeleteFile #%lld Skipped. File about to be compacted\n",
static_cast<unsigned long long>(number));
return Status::OK();
}
// Only the files in the last level can be deleted externally.
// This is to make sure that any deletion tombstones are not
// lost. Check that the level passed is the last level.
for (int i = level + 1; i < maxlevel; i++) {
if (versions_->NumLevelFiles(i) != 0) {
Log(options_.info_log,
"DeleteFile #%lld FAILED. File not in last level\n",
static_cast<unsigned long long>(number));
return Status::InvalidArgument("File not in last level");
}
}
edit.DeleteFile(level, number);
status = versions_->LogAndApply(&edit, &mutex_);
if (status.ok()) {
DeleteObsoleteFiles();
}
return status;
}
void DBImpl::GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata) {
MutexLock l(&mutex_);
return versions_->GetLiveFilesMetaData(metadata);
}
// Default implementations of convenience methods that subclasses of DB // Default implementations of convenience methods that subclasses of DB
// can call if they wish // can call if they wish
Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) { Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {

@ -78,6 +78,10 @@ class DBImpl : public DB {
virtual SequenceNumber GetLatestSequenceNumber(); virtual SequenceNumber GetLatestSequenceNumber();
virtual Status GetUpdatesSince(SequenceNumber seq_number, virtual Status GetUpdatesSince(SequenceNumber seq_number,
unique_ptr<TransactionLogIterator>* iter); unique_ptr<TransactionLogIterator>* iter);
virtual Status DeleteFile(std::string name);
virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata);
// Extra methods (for testing) that are not in the public DB interface // Extra methods (for testing) that are not in the public DB interface

@ -0,0 +1,188 @@
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "leveldb/db.h"
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "util/testharness.h"
#include "util/testutil.h"
#include "boost/lexical_cast.hpp"
#include "leveldb/env.h"
#include <vector>
#include <boost/algorithm/string.hpp>
#include <stdlib.h>
#include <map>
namespace leveldb {
class DeleteFileTest {
public:
std::string dbname_;
Options options_;
DB* db_;
Env* env_;
int numlevels_;
DeleteFileTest() {
db_ = nullptr;
env_ = Env::Default();
options_.write_buffer_size = 1024*1024*1000;
options_.target_file_size_base = 1024*1024*1000;
options_.max_bytes_for_level_base = 1024*1024*1000;
dbname_ = test::TmpDir() + "/deletefile_test";
DestroyDB(dbname_, options_);
numlevels_ = 7;
ASSERT_OK(ReopenDB(true));
}
Status ReopenDB(bool create) {
delete db_;
if (create) {
DestroyDB(dbname_, options_);
}
db_ = nullptr;
options_.create_if_missing = create;
return DB::Open(options_, dbname_, &db_);
}
void CloseDB() {
delete db_;
}
void AddKeys(int numkeys, int startkey = 0) {
WriteOptions options;
options.sync = false;
ReadOptions roptions;
for (int i = startkey; i < (numkeys + startkey) ; i++) {
std::string temp = boost::lexical_cast<std::string>(i);
Slice key(temp);
Slice value(temp);
ASSERT_OK(db_->Put(options, key, value));
}
}
int numKeysInLevels(
std::vector<LiveFileMetaData> &metadata,
std::vector<int> *keysperlevel = nullptr) {
if (keysperlevel != nullptr) {
keysperlevel->resize(numlevels_);
}
int numKeys = 0;
for (size_t i = 0; i < metadata.size(); i++) {
int startkey = atoi(metadata[i].smallestkey.c_str());
int endkey = atoi(metadata[i].largestkey.c_str());
int numkeysinfile = (endkey - startkey + 1);
numKeys += numkeysinfile;
if (keysperlevel != nullptr) {
(*keysperlevel)[(int)metadata[i].level] += numkeysinfile;
}
fprintf(stderr, "level %d name %s smallest %s largest %s\n",
metadata[i].level, metadata[i].name.c_str(),
metadata[i].smallestkey.c_str(),
metadata[i].largestkey.c_str());
}
return numKeys;
}
void CreateTwoLevels() {
AddKeys(50000, 10000);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
ASSERT_OK(dbi->TEST_CompactMemTable());
ASSERT_OK(dbi->TEST_WaitForCompactMemTable());
AddKeys(50000, 10000);
ASSERT_OK(dbi->TEST_CompactMemTable());
ASSERT_OK(dbi->TEST_WaitForCompactMemTable());
}
};
TEST(DeleteFileTest, AddKeysAndQueryLevels) {
CreateTwoLevels();
std::vector<LiveFileMetaData> metadata;
std::vector<int> keysinlevel;
db_->GetLiveFilesMetaData(&metadata);
std::string level1file = "";
int level1keycount = 0;
std::string level2file = "";
int level2keycount = 0;
int level1index = 0;
int level2index = 1;
ASSERT_EQ((int)metadata.size(), 2);
if (metadata[0].level == 2) {
level1index = 1;
level2index = 0;
}
level1file = metadata[level1index].name;
int startkey = atoi(metadata[level1index].smallestkey.c_str());
int endkey = atoi(metadata[level1index].largestkey.c_str());
level1keycount = (endkey - startkey + 1);
level2file = metadata[level2index].name;
startkey = atoi(metadata[level2index].smallestkey.c_str());
endkey = atoi(metadata[level2index].largestkey.c_str());
level2keycount = (endkey - startkey + 1);
// COntrolled setup. Levels 1 and 2 should both have 50K files.
// This is a little fragile as it depends on the current
// compaction heuristics.
ASSERT_EQ(level1keycount, 50000);
ASSERT_EQ(level2keycount, 50000);
Status status = db_->DeleteFile("0.sst");
ASSERT_TRUE(status.IsInvalidArgument());
// intermediate level files cannot be deleted.
status = db_->DeleteFile(level1file);
ASSERT_TRUE(status.IsInvalidArgument());
// Lowest level file deletion should succeed.
ASSERT_OK(db_->DeleteFile(level2file));
CloseDB();
}
TEST(DeleteFileTest, DeleteFileWithIterator) {
CreateTwoLevels();
ReadOptions options;
Iterator* it = db_->NewIterator(options);
std::vector<LiveFileMetaData> metadata;
db_->GetLiveFilesMetaData(&metadata);
std::string level2file = "";
ASSERT_EQ((int)metadata.size(), 2);
if (metadata[0].level == 1) {
level2file = metadata[1].name;
} else {
level2file = metadata[0].name;
}
Status status = db_->DeleteFile(level2file);
fprintf(stderr, "Deletion status %s: %s\n",
level2file.c_str(), status.ToString().c_str());
ASSERT_TRUE(status.ok());
it->SeekToFirst();
int numKeysIterated = 0;
while(it->Valid()) {
numKeysIterated++;
it->Next();
}
ASSERT_EQ(numKeysIterated, 50000);
delete it;
CloseDB();
}
} //namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

@ -2637,6 +2637,39 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
c->edit_->SetCompactPointer(level, largest); c->edit_->SetCompactPointer(level, largest);
} }
Status VersionSet::GetMetadataForFile(
uint64_t number,
int *filelevel,
FileMetaData *meta) {
for (int level = 0; level < NumberLevels(); level++) {
const std::vector<FileMetaData*>& files = current_->files_[level];
for (size_t i = 0; i < files.size(); i++) {
if (files[i]->number == number) {
*meta = *files[i];
*filelevel = level;
return Status::OK();
}
}
}
return Status::NotFound("File not present in any level");
}
void VersionSet::GetLiveFilesMetaData(
std::vector<LiveFileMetaData> * metadata) {
for (int level = 0; level < NumberLevels(); level++) {
const std::vector<FileMetaData*>& files = current_->files_[level];
for (size_t i = 0; i < files.size(); i++) {
LiveFileMetaData filemetadata;
filemetadata.name = TableFileName("", files[i]->number);
filemetadata.level = level;
filemetadata.size = files[i]->file_size;
filemetadata.smallestkey = files[i]->smallest.user_key().ToString();
filemetadata.largestkey = files[i]->largest.user_key().ToString();
metadata->push_back(filemetadata);
}
}
}
Compaction* VersionSet::CompactRange( Compaction* VersionSet::CompactRange(
int level, int level,
const InternalKey* begin, const InternalKey* begin,

@ -405,6 +405,12 @@ class VersionSet {
double MaxBytesForLevel(int level); double MaxBytesForLevel(int level);
Status GetMetadataForFile(
uint64_t number, int *filelevel, FileMetaData *metadata);
void GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata);
private: private:
class Builder; class Builder;
struct ManifestWriter; struct ManifestWriter;

@ -28,6 +28,20 @@ struct WriteOptions;
struct FlushOptions; struct FlushOptions;
class WriteBatch; class WriteBatch;
// Metadata associated with each SST file.
struct LiveFileMetaData {
// Name of the file
std::string name;
// Level at which this file resides.
int level;
// File size in bytes.
size_t size;
// Smallest user defined key in the file.
std::string smallestkey;
// Largest user defined key in the file.
std::string largestkey;
};
// Abstract handle to particular state of a DB. // Abstract handle to particular state of a DB.
// A Snapshot is an immutable object and can therefore be safely // A Snapshot is an immutable object and can therefore be safely
// accessed from multiple threads without any external synchronization. // accessed from multiple threads without any external synchronization.
@ -223,6 +237,8 @@ class DB {
// Allow compactions to delete obselete files. // Allow compactions to delete obselete files.
virtual Status EnableFileDeletions() = 0; virtual Status EnableFileDeletions() = 0;
// THIS METHOD IS DEPRECATED. Use the GetTableMetaData to get more
// detailed information on the live files.
// Retrieve the list of all files in the database. The files are // Retrieve the list of all files in the database. The files are
// relative to the dbname and are not absolute paths. This list // relative to the dbname and are not absolute paths. This list
// can be used to generate a backup. The valid size of the manifest // can be used to generate a backup. The valid size of the manifest
@ -256,6 +272,19 @@ class DB {
// an update is read. // an update is read.
virtual Status GetUpdatesSince(SequenceNumber seq_number, virtual Status GetUpdatesSince(SequenceNumber seq_number,
unique_ptr<TransactionLogIterator>* iter) = 0; unique_ptr<TransactionLogIterator>* iter) = 0;
// Delete the file name from the db directory and update the internal
// state to reflect that.
virtual Status DeleteFile(std::string name) {
return Status::OK();
}
// Returns a list of all table files with their level, start key
// and end key
virtual void GetLiveFilesMetaData(
std::vector<LiveFileMetaData> *metadata) {
}
private: private:
// No copying allowed // No copying allowed
DB(const DB&); DB(const DB&);

Loading…
Cancel
Save