A number of bugfixes:

- Added DB::CompactRange() method.

  Changed manual compaction code so it breaks up compactions of
  big ranges into smaller compactions.

  Changed the code that pushes the output of memtable compactions
  to higher levels to obey the grandparent constraint: i.e., we
  must never have a single file in level L that overlaps too
  much data in level L+1 (to avoid very expensive L-1 compactions).

  Added code to pretty-print internal keys.

- Fixed bug where we would not detect overlap with files in
  level-0 because we were incorrectly using binary search
  on an array of files with overlapping ranges.

  Added "leveldb.sstables" property that can be used to dump
  all of the sstables and ranges that make up the db state.

- Removing post_write_snapshot support.  Email to leveldb mailing
  list brought up no users, just confusion from one person about
  what it meant.

- Fixing static_cast char to unsigned on BIG_ENDIAN platforms.

  Fixes	Issue 35 and Issue 36.

- Comment clarification to address leveldb Issue 37.

- Change license in posix_logger.h to match other files.

- A build problem where uint32 was used instead of uint32_t.

Sync with upstream @24408625
main
Gabor Cselle 13 years ago
parent 26db4d971a
commit 299ccedfec
  1. 5
      build_detect_platform
  2. 6
      db/corruption_test.cc
  3. 15
      db/db_bench.cc
  4. 86
      db/db_impl.cc
  5. 14
      db/db_impl.h
  6. 157
      db/db_test.cc
  7. 12
      db/dbformat.cc
  8. 2
      db/dbformat.h
  9. 14
      db/version_edit.cc
  10. 199
      db/version_set.cc
  11. 38
      db/version_set.h
  12. 57
      db/version_set_test.cc
  13. 22
      doc/index.html
  14. 15
      include/leveldb/db.h
  15. 8
      include/leveldb/env.h
  16. 15
      include/leveldb/options.h
  17. 8
      util/coding.h
  18. 5
      util/posix_logger.h

@ -35,11 +35,6 @@ case `uname -s` in
echo "PLATFORM_CFLAGS=-D_REENTRANT -DOS_FREEBSD" >> build_config.mk echo "PLATFORM_CFLAGS=-D_REENTRANT -DOS_FREEBSD" >> build_config.mk
echo "PLATFORM_LDFLAGS=-lpthread" >> build_config.mk echo "PLATFORM_LDFLAGS=-lpthread" >> build_config.mk
;; ;;
GNU/kFreeBSD)
PLATFORM=OS_FREEBSD
echo "PLATFORM_CFLAGS=-pthread -DOS_FREEBSD" >> build_config.mk
echo "PLATFORM_LDFLAGS=-lpthread -lrt" >> build_config.mk
;;
*) *)
echo "Unknown platform!" echo "Unknown platform!"
exit 1 exit 1

@ -229,8 +229,8 @@ TEST(CorruptionTest, TableFile) {
Build(100); Build(100);
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable(); dbi->TEST_CompactMemTable();
dbi->TEST_CompactRange(0, "", "~"); dbi->TEST_CompactRange(0, NULL, NULL);
dbi->TEST_CompactRange(1, "", "~"); dbi->TEST_CompactRange(1, NULL, NULL);
Corrupt(kTableFile, 100, 1); Corrupt(kTableFile, 100, 1);
Check(99, 99); Check(99, 99);
@ -278,7 +278,7 @@ TEST(CorruptionTest, CorruptedDescriptor) {
ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello"));
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
dbi->TEST_CompactMemTable(); dbi->TEST_CompactMemTable();
dbi->TEST_CompactRange(0, "", "~"); dbi->TEST_CompactRange(0, NULL, NULL);
Corrupt(kDescriptorFile, 0, 1000); Corrupt(kDescriptorFile, 0, 1000);
Status s = TryReopen(); Status s = TryReopen();

@ -796,20 +796,7 @@ class Benchmark {
} }
void Compact(ThreadState* thread) { void Compact(ThreadState* thread) {
DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); db_->CompactRange(NULL, NULL);
dbi->TEST_CompactMemTable();
int max_level_with_files = 1;
for (int level = 1; level < config::kNumLevels; level++) {
std::string property;
char name[100];
snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level);
if (db_->GetProperty(name, &property) && atoi(property.c_str()) > 0) {
max_level_with_files = level;
}
}
for (int level = 0; level < max_level_with_files; level++) {
dbi->TEST_CompactRange(level, "", "~");
}
} }
void PrintStats() { void PrintStats() {

@ -454,13 +454,8 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
if (s.ok() && meta.file_size > 0) { if (s.ok() && meta.file_size > 0) {
const Slice min_user_key = meta.smallest.user_key(); const Slice min_user_key = meta.smallest.user_key();
const Slice max_user_key = meta.largest.user_key(); const Slice max_user_key = meta.largest.user_key();
if (base != NULL && !base->OverlapInLevel(0, min_user_key, max_user_key)) { if (base != NULL) {
// Push the new sstable to a higher level if possible to reduce level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
// expensive manifest file ops.
while (level < config::kMaxMemCompactLevel &&
!base->OverlapInLevel(level + 1, min_user_key, max_user_key)) {
level++;
}
} }
edit->AddFile(level, meta.number, meta.file_size, edit->AddFile(level, meta.number, meta.file_size,
meta.smallest, meta.largest); meta.smallest, meta.largest);
@ -506,26 +501,56 @@ Status DBImpl::CompactMemTable() {
return s; return s;
} }
void DBImpl::TEST_CompactRange( void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
int level, int max_level_with_files = 1;
const std::string& begin, {
const std::string& end) { MutexLock l(&mutex_);
Version* base = versions_->current();
for (int level = 1; level < config::kNumLevels; level++) {
if (base->OverlapInLevel(level, begin, end)) {
max_level_with_files = level;
}
}
}
TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
for (int level = 0; level < max_level_with_files; level++) {
TEST_CompactRange(level, begin, end);
}
}
void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
assert(level >= 0); assert(level >= 0);
assert(level + 1 < config::kNumLevels); assert(level + 1 < config::kNumLevels);
InternalKey begin_storage, end_storage;
ManualCompaction manual;
manual.level = level;
manual.done = false;
if (begin == NULL) {
manual.begin = NULL;
} else {
begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
manual.begin = &begin_storage;
}
if (end == NULL) {
manual.end = NULL;
} else {
end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
manual.end = &end_storage;
}
MutexLock l(&mutex_); MutexLock l(&mutex_);
while (!manual.done) {
while (manual_compaction_ != NULL) { while (manual_compaction_ != NULL) {
bg_cv_.Wait(); bg_cv_.Wait();
} }
ManualCompaction manual;
manual.level = level;
manual.begin = begin;
manual.end = end;
manual_compaction_ = &manual; manual_compaction_ = &manual;
MaybeScheduleCompaction(); MaybeScheduleCompaction();
while (manual_compaction_ == &manual) { while (manual_compaction_ == &manual) {
bg_cv_.Wait(); bg_cv_.Wait();
} }
}
} }
Status DBImpl::TEST_CompactMemTable() { Status DBImpl::TEST_CompactMemTable() {
@ -590,12 +615,20 @@ void DBImpl::BackgroundCompaction() {
Compaction* c; Compaction* c;
bool is_manual = (manual_compaction_ != NULL); bool is_manual = (manual_compaction_ != NULL);
InternalKey manual_end;
if (is_manual) { if (is_manual) {
const ManualCompaction* m = manual_compaction_; ManualCompaction* m = manual_compaction_;
c = versions_->CompactRange( c = versions_->CompactRange(m->level, m->begin, m->end);
m->done = (c == NULL);
if (c != NULL) {
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
}
Log(options_.info_log,
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
m->level, m->level,
InternalKey(m->begin, kMaxSequenceNumber, kValueTypeForSeek), (m->begin ? m->begin->DebugString().c_str() : "(begin)"),
InternalKey(m->end, 0, static_cast<ValueType>(0))); (m->end ? m->end->DebugString().c_str() : "(end)"),
(m->done ? "(end)" : manual_end.DebugString().c_str()));
} else { } else {
c = versions_->PickCompaction(); c = versions_->PickCompaction();
} }
@ -638,7 +671,13 @@ void DBImpl::BackgroundCompaction() {
} }
if (is_manual) { if (is_manual) {
// Mark it as done ManualCompaction* m = manual_compaction_;
if (!m->done) {
// We only compacted part of the requested range. Update *m
// to the range that is left to be compacted.
m->tmp_storage = manual_end;
m->begin = &m->tmp_storage;
}
manual_compaction_ = NULL; manual_compaction_ = NULL;
} }
} }
@ -1109,10 +1148,6 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
versions_->SetLastSequence(last_sequence); versions_->SetLastSequence(last_sequence);
} }
if (options.post_write_snapshot != NULL) {
*options.post_write_snapshot =
status.ok() ? snapshots_.New(last_sequence) : NULL;
}
ReleaseLoggingResponsibility(&self); ReleaseLoggingResponsibility(&self);
return status; return status;
} }
@ -1225,6 +1260,9 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
} }
} }
return true; return true;
} else if (in == "sstables") {
*value = versions_->current()->DebugString();
return true;
} }
return false; return false;

@ -38,14 +38,12 @@ class DBImpl : public DB {
virtual void ReleaseSnapshot(const Snapshot* snapshot); virtual void ReleaseSnapshot(const Snapshot* snapshot);
virtual bool GetProperty(const Slice& property, std::string* value); virtual bool GetProperty(const Slice& property, std::string* value);
virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes); virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
virtual void CompactRange(const Slice* begin, const Slice* end);
// Extra methods (for testing) that are not in the public DB interface // Extra methods (for testing) that are not in the public DB interface
// Compact any files in the named level that overlap [begin,end] // Compact any files in the named level that overlap [*begin,*end]
void TEST_CompactRange( void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
int level,
const std::string& begin,
const std::string& end);
// Force current memtable contents to be compacted. // Force current memtable contents to be compacted.
Status TEST_CompactMemTable(); Status TEST_CompactMemTable();
@ -145,8 +143,10 @@ class DBImpl : public DB {
// Information for a manual compaction // Information for a manual compaction
struct ManualCompaction { struct ManualCompaction {
int level; int level;
std::string begin; bool done;
std::string end; const InternalKey* begin; // NULL means beginning of key range
const InternalKey* end; // NULL means end of key range
InternalKey tmp_storage; // Used to keep track of compaction progress
}; };
ManualCompaction* manual_compaction_; ManualCompaction* manual_compaction_;

@ -195,6 +195,23 @@ class DBTest {
return result; return result;
} }
// Return spread of files per level
std::string FilesPerLevel() {
std::string result;
int last_non_zero_offset = 0;
for (int level = 0; level < config::kNumLevels; level++) {
int f = NumTableFilesAtLevel(level);
char buf[100];
snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
result += buf;
if (f > 0) {
last_non_zero_offset = result.size();
}
}
result.resize(last_non_zero_offset);
return result;
}
uint64_t Size(const Slice& start, const Slice& limit) { uint64_t Size(const Slice& start, const Slice& limit) {
Range r(start, limit); Range r(start, limit);
uint64_t size; uint64_t size;
@ -203,26 +220,23 @@ class DBTest {
} }
void Compact(const Slice& start, const Slice& limit) { void Compact(const Slice& start, const Slice& limit) {
dbfull()->TEST_CompactMemTable(); db_->CompactRange(&start, &limit);
int max_level_with_files = 1;
for (int level = 1; level < config::kNumLevels; level++) {
if (NumTableFilesAtLevel(level) > 0) {
max_level_with_files = level;
}
} }
for (int level = 0; level < max_level_with_files; level++) {
dbfull()->TEST_CompactRange(level, "", "~"); // Do n memtable compactions, each of which produces an sstable
// covering the range [small,large].
void MakeTables(int n, const std::string& small, const std::string& large) {
for (int i = 0; i < n; i++) {
Put(small, "begin");
Put(large, "end");
dbfull()->TEST_CompactMemTable();
} }
} }
// Prevent pushing of new sstables into deeper levels by adding // Prevent pushing of new sstables into deeper levels by adding
// tables that cover a specified range to all levels. // tables that cover a specified range to all levels.
void FillLevels(const std::string& smallest, const std::string& largest) { void FillLevels(const std::string& smallest, const std::string& largest) {
for (int level = 0; level < config::kNumLevels; level++) { MakeTables(config::kNumLevels, smallest, largest);
Put(smallest, "begin");
Put(largest, "end");
dbfull()->TEST_CompactMemTable();
}
} }
void DumpFileCounts(const char* label) { void DumpFileCounts(const char* label) {
@ -238,6 +252,12 @@ class DBTest {
} }
} }
std::string DumpSSTableList() {
std::string property;
db_->GetProperty("leveldb.sstables", &property);
return property;
}
std::string IterStatus(Iterator* iter) { std::string IterStatus(Iterator* iter) {
std::string result; std::string result;
if (iter->Valid()) { if (iter->Valid()) {
@ -367,7 +387,7 @@ TEST(DBTest, GetEncountersEmptyLevel) {
} }
// Step 2: clear level 1 if necessary. // Step 2: clear level 1 if necessary.
dbfull()->TEST_CompactRange(1, "a", "z"); dbfull()->TEST_CompactRange(1, NULL, NULL);
ASSERT_EQ(NumTableFilesAtLevel(0), 1); ASSERT_EQ(NumTableFilesAtLevel(0), 1);
ASSERT_EQ(NumTableFilesAtLevel(1), 0); ASSERT_EQ(NumTableFilesAtLevel(1), 0);
ASSERT_EQ(NumTableFilesAtLevel(2), 1); ASSERT_EQ(NumTableFilesAtLevel(2), 1);
@ -693,7 +713,7 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
// Reopening moves updates to level-0 // Reopening moves updates to level-0
Reopen(&options); Reopen(&options);
dbfull()->TEST_CompactRange(0, "", Key(100000)); dbfull()->TEST_CompactRange(0, NULL, NULL);
ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_GT(NumTableFilesAtLevel(1), 1); ASSERT_GT(NumTableFilesAtLevel(1), 1);
@ -744,7 +764,7 @@ TEST(DBTest, SparseMerge) {
} }
Put("C", "vc"); Put("C", "vc");
dbfull()->TEST_CompactMemTable(); dbfull()->TEST_CompactMemTable();
dbfull()->TEST_CompactRange(0, "A", "Z"); dbfull()->TEST_CompactRange(0, NULL, NULL);
// Make sparse update // Make sparse update
Put("A", "va2"); Put("A", "va2");
@ -755,9 +775,9 @@ TEST(DBTest, SparseMerge) {
// Compactions should not cause us to create a situation where // Compactions should not cause us to create a situation where
// a file overlaps too much data at the next level. // a file overlaps too much data at the next level.
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
dbfull()->TEST_CompactRange(0, "", "z"); dbfull()->TEST_CompactRange(0, NULL, NULL);
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
dbfull()->TEST_CompactRange(1, "", "z"); dbfull()->TEST_CompactRange(1, NULL, NULL);
ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576); ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
} }
@ -808,9 +828,11 @@ TEST(DBTest, ApproximateSizes) {
ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000)); ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000)); ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
dbfull()->TEST_CompactRange(0, std::string cstart_str = Key(compact_start);
Key(compact_start), std::string cend_str = Key(compact_start + 9);
Key(compact_start + 9)); Slice cstart = cstart_str;
Slice cend = cend_str;
dbfull()->TEST_CompactRange(0, &cstart, &cend);
} }
ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(0), 0);
@ -850,7 +872,7 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000)); ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
dbfull()->TEST_CompactRange(0, Key(0), Key(100)); dbfull()->TEST_CompactRange(0, NULL, NULL);
} }
} }
@ -921,11 +943,12 @@ TEST(DBTest, HiddenValuesAreRemoved) {
ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000)); ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
db_->ReleaseSnapshot(snapshot); db_->ReleaseSnapshot(snapshot);
ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
dbfull()->TEST_CompactRange(0, "", "x"); Slice x("x");
dbfull()->TEST_CompactRange(0, NULL, &x);
ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
ASSERT_EQ(NumTableFilesAtLevel(0), 0); ASSERT_EQ(NumTableFilesAtLevel(0), 0);
ASSERT_GE(NumTableFilesAtLevel(1), 1); ASSERT_GE(NumTableFilesAtLevel(1), 1);
dbfull()->TEST_CompactRange(1, "", "x"); dbfull()->TEST_CompactRange(1, NULL, &x);
ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000)); ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
@ -949,11 +972,12 @@ TEST(DBTest, DeletionMarkers1) {
ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
dbfull()->TEST_CompactRange(last-2, "", "z"); Slice z("z");
dbfull()->TEST_CompactRange(last-2, NULL, &z);
// DEL eliminated, but v1 remains because we aren't compacting that level // DEL eliminated, but v1 remains because we aren't compacting that level
// (DEL can be eliminated because v2 hides v1). // (DEL can be eliminated because v2 hides v1).
ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
dbfull()->TEST_CompactRange(last-1, "", "z"); dbfull()->TEST_CompactRange(last-1, NULL, NULL);
// Merging last-1 w/ last, so we are the base level for "foo", so // Merging last-1 w/ last, so we are the base level for "foo", so
// DEL is removed. (as is v1). // DEL is removed. (as is v1).
ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
@ -976,15 +1000,54 @@ TEST(DBTest, DeletionMarkers2) {
ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2 ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
dbfull()->TEST_CompactRange(last-2, "", "z"); dbfull()->TEST_CompactRange(last-2, NULL, NULL);
// DEL kept: "last" file overlaps // DEL kept: "last" file overlaps
ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
dbfull()->TEST_CompactRange(last-1, "", "z"); dbfull()->TEST_CompactRange(last-1, NULL, NULL);
// Merging last-1 w/ last, so we are the base level for "foo", so // Merging last-1 w/ last, so we are the base level for "foo", so
// DEL is removed. (as is v1). // DEL is removed. (as is v1).
ASSERT_EQ(AllEntriesFor("foo"), "[ ]"); ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
} }
TEST(DBTest, OverlapInLevel0) {
ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
// Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
ASSERT_OK(Put("100", "v100"));
ASSERT_OK(Put("999", "v999"));
dbfull()->TEST_CompactMemTable();
ASSERT_OK(Delete("100"));
ASSERT_OK(Delete("999"));
dbfull()->TEST_CompactMemTable();
ASSERT_EQ("0,1,1", FilesPerLevel());
// Make files spanning the following ranges in level-0:
// files[0] 200 .. 900
// files[1] 300 .. 500
// Note that files are sorted by smallest key.
ASSERT_OK(Put("300", "v300"));
ASSERT_OK(Put("500", "v500"));
dbfull()->TEST_CompactMemTable();
ASSERT_OK(Put("200", "v200"));
ASSERT_OK(Put("600", "v600"));
ASSERT_OK(Put("900", "v900"));
dbfull()->TEST_CompactMemTable();
ASSERT_EQ("2,1,1", FilesPerLevel());
// Compact away the placeholder files we created initially
dbfull()->TEST_CompactRange(1, NULL, NULL);
dbfull()->TEST_CompactRange(2, NULL, NULL);
ASSERT_EQ("2", FilesPerLevel());
// Do a memtable compaction. Before bug-fix, the compaction would
// not detect the overlap with level-0 files and would incorrectly place
// the deletion in a deeper level.
ASSERT_OK(Delete("600"));
dbfull()->TEST_CompactMemTable();
ASSERT_EQ("3", FilesPerLevel());
ASSERT_EQ("NOT_FOUND", Get("600"));
}
TEST(DBTest, ComparatorCheck) { TEST(DBTest, ComparatorCheck) {
class NewComparator : public Comparator { class NewComparator : public Comparator {
public: public:
@ -1008,6 +1071,40 @@ TEST(DBTest, ComparatorCheck) {
<< s.ToString(); << s.ToString();
} }
TEST(DBTest, ManualCompaction) {
ASSERT_EQ(config::kMaxMemCompactLevel, 2)
<< "Need to update this test to match kMaxMemCompactLevel";
MakeTables(3, "p", "q");
ASSERT_EQ("1,1,1", FilesPerLevel());
// Compaction range falls before files
Compact("", "c");
ASSERT_EQ("1,1,1", FilesPerLevel());
// Compaction range falls after files
Compact("r", "z");
ASSERT_EQ("1,1,1", FilesPerLevel());
// Compaction range overlaps files
Compact("p1", "p9");
ASSERT_EQ("0,0,1", FilesPerLevel());
// Populate a different range
MakeTables(3, "c", "e");
ASSERT_EQ("1,1,2", FilesPerLevel());
// Compact just the new range
Compact("b", "f");
ASSERT_EQ("0,0,2", FilesPerLevel());
// Compact all
MakeTables(1, "a", "z");
ASSERT_EQ("0,1,2", FilesPerLevel());
db_->CompactRange(NULL, NULL);
ASSERT_EQ("0,0,1", FilesPerLevel());
}
TEST(DBTest, DBOpen_Options) { TEST(DBTest, DBOpen_Options) {
std::string dbname = test::TmpDir() + "/db_options_test"; std::string dbname = test::TmpDir() + "/db_options_test";
DestroyDB(dbname, Options()); DestroyDB(dbname, Options());
@ -1187,7 +1284,6 @@ class ModelDB: public DB {
delete reinterpret_cast<const ModelSnapshot*>(snapshot); delete reinterpret_cast<const ModelSnapshot*>(snapshot);
} }
virtual Status Write(const WriteOptions& options, WriteBatch* batch) { virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
assert(options.post_write_snapshot == NULL); // Not supported
class Handler : public WriteBatch::Handler { class Handler : public WriteBatch::Handler {
public: public:
KVMap* map_; KVMap* map_;
@ -1211,6 +1307,9 @@ class ModelDB: public DB {
sizes[i] = 0; sizes[i] = 0;
} }
} }
virtual void CompactRange(const Slice* start, const Slice* end) {
}
private: private:
class ModelIter: public Iterator { class ModelIter: public Iterator {
public: public:

@ -31,6 +31,18 @@ std::string ParsedInternalKey::DebugString() const {
return result; return result;
} }
std::string InternalKey::DebugString() const {
std::string result;
ParsedInternalKey parsed;
if (ParseInternalKey(rep_, &parsed)) {
result = parsed.DebugString();
} else {
result = "(bad)";
result.append(EscapeString(rep_));
}
return result;
}
const char* InternalKeyComparator::Name() const { const char* InternalKeyComparator::Name() const {
return "leveldb.InternalKeyComparator"; return "leveldb.InternalKeyComparator";
} }

@ -149,6 +149,8 @@ class InternalKey {
} }
void Clear() { rep_.clear(); } void Clear() { rep_.clear(); }
std::string DebugString() const;
}; };
inline int InternalKeyComparator::Compare( inline int InternalKeyComparator::Compare(

@ -235,9 +235,8 @@ std::string VersionEdit::DebugString() const {
for (size_t i = 0; i < compact_pointers_.size(); i++) { for (size_t i = 0; i < compact_pointers_.size(); i++) {
r.append("\n CompactPointer: "); r.append("\n CompactPointer: ");
AppendNumberTo(&r, compact_pointers_[i].first); AppendNumberTo(&r, compact_pointers_[i].first);
r.append(" '"); r.append(" ");
AppendEscapedStringTo(&r, compact_pointers_[i].second.Encode()); r.append(compact_pointers_[i].second.DebugString());
r.append("'");
} }
for (DeletedFileSet::const_iterator iter = deleted_files_.begin(); for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
iter != deleted_files_.end(); iter != deleted_files_.end();
@ -255,11 +254,10 @@ std::string VersionEdit::DebugString() const {
AppendNumberTo(&r, f.number); AppendNumberTo(&r, f.number);
r.append(" "); r.append(" ");
AppendNumberTo(&r, f.file_size); AppendNumberTo(&r, f.file_size);
r.append(" '"); r.append(" ");
AppendEscapedStringTo(&r, f.smallest.Encode()); r.append(f.smallest.DebugString());
r.append("' .. '"); r.append(" .. ");
AppendEscapedStringTo(&r, f.largest.Encode()); r.append(f.largest.DebugString());
r.append("'");
} }
r.append("\n}\n"); r.append("\n}\n");
return r; return r;

@ -41,6 +41,14 @@ static uint64_t MaxFileSizeForLevel(int level) {
return kTargetFileSize; // We could vary per level to reduce number of files? return kTargetFileSize; // We could vary per level to reduce number of files?
} }
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
int64_t sum = 0;
for (size_t i = 0; i < files.size(); i++) {
sum += files[i]->file_size;
}
return sum;
}
namespace { namespace {
std::string IntSetToString(const std::set<uint64_t>& s) { std::string IntSetToString(const std::set<uint64_t>& s) {
std::string result = "{"; std::string result = "{";
@ -96,17 +104,55 @@ int FindFile(const InternalKeyComparator& icmp,
return right; return right;
} }
static bool AfterFile(const Comparator* ucmp,
const Slice* user_key, const FileMetaData* f) {
// NULL user_key occurs before all keys and is therefore never after *f
return (user_key != NULL &&
ucmp->Compare(*user_key, f->largest.user_key()) > 0);
}
static bool BeforeFile(const Comparator* ucmp,
const Slice* user_key, const FileMetaData* f) {
// NULL user_key occurs after all keys and is therefore never before *f
return (user_key != NULL &&
ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
}
bool SomeFileOverlapsRange( bool SomeFileOverlapsRange(
const InternalKeyComparator& icmp, const InternalKeyComparator& icmp,
bool disjoint_sorted_files,
const std::vector<FileMetaData*>& files, const std::vector<FileMetaData*>& files,
const Slice& smallest_user_key, const Slice* smallest_user_key,
const Slice& largest_user_key) { const Slice* largest_user_key) {
const Comparator* ucmp = icmp.user_comparator();
if (!disjoint_sorted_files) {
// Need to check against all files
for (int i = 0; i < files.size(); i++) {
const FileMetaData* f = files[i];
if (AfterFile(ucmp, smallest_user_key, f) ||
BeforeFile(ucmp, largest_user_key, f)) {
// No overlap
} else {
return true; // Overlap
}
}
return false;
}
// Binary search over file list
uint32_t index = 0;
if (smallest_user_key != NULL) {
// Find the earliest possible internal key for smallest_user_key // Find the earliest possible internal key for smallest_user_key
InternalKey small(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek); InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
const uint32_t index = FindFile(icmp, files, small.Encode()); index = FindFile(icmp, files, small.Encode());
return ((index < files.size()) && }
icmp.user_comparator()->Compare(
largest_user_key, files[index]->smallest.user_key()) >= 0); if (index >= files.size()) {
// beginning of range is after all files, so no overlap.
return false;
}
return !BeforeFile(ucmp, largest_user_key, files[index]);
} }
// An internal iterator. For a given version/level pair, yields // An internal iterator. For a given version/level pair, yields
@ -358,11 +404,64 @@ void Version::Unref() {
} }
bool Version::OverlapInLevel(int level, bool Version::OverlapInLevel(int level,
const Slice* smallest_user_key,
const Slice* largest_user_key) {
return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
smallest_user_key, largest_user_key);
}
int Version::PickLevelForMemTableOutput(
const Slice& smallest_user_key, const Slice& smallest_user_key,
const Slice& largest_user_key) { const Slice& largest_user_key) {
return SomeFileOverlapsRange(vset_->icmp_, files_[level], int level = 0;
smallest_user_key, if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
largest_user_key); // Push to next level if there is no overlap in next level,
// and the #bytes overlapping in the level after that are limited.
InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
std::vector<FileMetaData*> overlaps;
while (level < config::kMaxMemCompactLevel) {
if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
break;
}
GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
const int64_t sum = TotalFileSize(overlaps);
if (sum > kMaxGrandParentOverlapBytes) {
break;
}
level++;
}
}
return level;
}
// Store in "*inputs" all files in "level" that overlap [begin,end]
void Version::GetOverlappingInputs(
int level,
const InternalKey* begin,
const InternalKey* end,
std::vector<FileMetaData*>* inputs) {
inputs->clear();
Slice user_begin, user_end;
if (begin != NULL) {
user_begin = begin->user_key();
}
if (end != NULL) {
user_end = end->user_key();
}
const Comparator* user_cmp = vset_->icmp_.user_comparator();
for (size_t i = 0; i < files_[level].size(); i++) {
FileMetaData* f = files_[level][i];
if (begin != NULL &&
user_cmp->Compare(f->largest.user_key(), user_begin) < 0) {
// "f" is completely before specified range; skip it
} else if (end != NULL &&
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
// "f" is completely after specified range; skip it
} else {
inputs->push_back(f);
}
}
} }
std::string Version::DebugString() const { std::string Version::DebugString() const {
@ -381,11 +480,11 @@ std::string Version::DebugString() const {
AppendNumberTo(&r, files[i]->number); AppendNumberTo(&r, files[i]->number);
r.push_back(':'); r.push_back(':');
AppendNumberTo(&r, files[i]->file_size); AppendNumberTo(&r, files[i]->file_size);
r.append("['"); r.append("[");
AppendEscapedStringTo(&r, files[i]->smallest.Encode()); r.append(files[i]->smallest.DebugString());
r.append("' .. '"); r.append(" .. ");
AppendEscapedStringTo(&r, files[i]->largest.Encode()); r.append(files[i]->largest.DebugString());
r.append("']\n"); r.append("]\n");
} }
} }
return r; return r;
@ -540,8 +639,8 @@ class VersionSet::Builder {
const InternalKey& this_begin = v->files_[level][i]->smallest; const InternalKey& this_begin = v->files_[level][i]->smallest;
if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) { if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
fprintf(stderr, "overlapping ranges in same level %s vs. %s\n", fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
EscapeString(prev_end.Encode()).c_str(), prev_end.DebugString().c_str(),
EscapeString(this_begin.Encode()).c_str()); this_begin.DebugString().c_str());
abort(); abort();
} }
} }
@ -814,14 +913,6 @@ void VersionSet::MarkFileNumberUsed(uint64_t number) {
} }
} }
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
int64_t sum = 0;
for (size_t i = 0; i < files.size(); i++) {
sum += files[i]->file_size;
}
return sum;
}
void VersionSet::Finalize(Version* v) { void VersionSet::Finalize(Version* v) {
// Precomputed best level for next compaction // Precomputed best level for next compaction
int best_level = -1; int best_level = -1;
@ -967,7 +1058,8 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
for (int level = 1; level < config::kNumLevels - 1; level++) { for (int level = 1; level < config::kNumLevels - 1; level++) {
for (size_t i = 0; i < current_->files_[level].size(); i++) { for (size_t i = 0; i < current_->files_[level].size(); i++) {
const FileMetaData* f = current_->files_[level][i]; const FileMetaData* f = current_->files_[level][i];
GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps); current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
&overlaps);
const int64_t sum = TotalFileSize(overlaps); const int64_t sum = TotalFileSize(overlaps);
if (sum > result) { if (sum > result) {
result = sum; result = sum;
@ -977,27 +1069,6 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
return result; return result;
} }
// Store in "*inputs" all files in "level" that overlap [begin,end]
void VersionSet::GetOverlappingInputs(
int level,
const InternalKey& begin,
const InternalKey& end,
std::vector<FileMetaData*>* inputs) {
inputs->clear();
Slice user_begin = begin.user_key();
Slice user_end = end.user_key();
const Comparator* user_cmp = icmp_.user_comparator();
for (size_t i = 0; i < current_->files_[level].size(); i++) {
FileMetaData* f = current_->files_[level][i];
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
// Either completely before or after range; skip it
} else {
inputs->push_back(f);
}
}
}
// Stores the minimal range that covers all entries in inputs in // Stores the minimal range that covers all entries in inputs in
// *smallest, *largest. // *smallest, *largest.
// REQUIRES: inputs is not empty // REQUIRES: inputs is not empty
@ -1113,7 +1184,7 @@ Compaction* VersionSet::PickCompaction() {
// Note that the next call will discard the file we placed in // Note that the next call will discard the file we placed in
// c->inputs_[0] earlier and replace it with an overlapping set // c->inputs_[0] earlier and replace it with an overlapping set
// which will include the picked file. // which will include the picked file.
GetOverlappingInputs(0, smallest, largest, &c->inputs_[0]); current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
assert(!c->inputs_[0].empty()); assert(!c->inputs_[0].empty());
} }
@ -1127,7 +1198,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
InternalKey smallest, largest; InternalKey smallest, largest;
GetRange(c->inputs_[0], &smallest, &largest); GetRange(c->inputs_[0], &smallest, &largest);
GetOverlappingInputs(level+1, smallest, largest, &c->inputs_[1]); current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
// Get entire range covered by compaction // Get entire range covered by compaction
InternalKey all_start, all_limit; InternalKey all_start, all_limit;
@ -1137,12 +1208,13 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
// changing the number of "level+1" files we pick up. // changing the number of "level+1" files we pick up.
if (!c->inputs_[1].empty()) { if (!c->inputs_[1].empty()) {
std::vector<FileMetaData*> expanded0; std::vector<FileMetaData*> expanded0;
GetOverlappingInputs(level, all_start, all_limit, &expanded0); current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
if (expanded0.size() > c->inputs_[0].size()) { if (expanded0.size() > c->inputs_[0].size()) {
InternalKey new_start, new_limit; InternalKey new_start, new_limit;
GetRange(expanded0, &new_start, &new_limit); GetRange(expanded0, &new_start, &new_limit);
std::vector<FileMetaData*> expanded1; std::vector<FileMetaData*> expanded1;
GetOverlappingInputs(level+1, new_start, new_limit, &expanded1); current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
&expanded1);
if (expanded1.size() == c->inputs_[1].size()) { if (expanded1.size() == c->inputs_[1].size()) {
Log(options_->info_log, Log(options_->info_log,
"Expanding@%d %d+%d to %d+%d\n", "Expanding@%d %d+%d to %d+%d\n",
@ -1163,14 +1235,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
// Compute the set of grandparent files that overlap this compaction // Compute the set of grandparent files that overlap this compaction
// (parent == level+1; grandparent == level+2) // (parent == level+1; grandparent == level+2)
if (level + 2 < config::kNumLevels) { if (level + 2 < config::kNumLevels) {
GetOverlappingInputs(level + 2, all_start, all_limit, &c->grandparents_); current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
&c->grandparents_);
} }
if (false) { if (false) {
Log(options_->info_log, "Compacting %d '%s' .. '%s'", Log(options_->info_log, "Compacting %d '%s' .. '%s'",
level, level,
EscapeString(smallest.Encode()).c_str(), smallest.DebugString().c_str(),
EscapeString(largest.Encode()).c_str()); largest.DebugString().c_str());
} }
// Update the place where we will do the next compaction for this level. // Update the place where we will do the next compaction for this level.
@ -1183,14 +1256,26 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
Compaction* VersionSet::CompactRange( Compaction* VersionSet::CompactRange(
int level, int level,
const InternalKey& begin, const InternalKey* begin,
const InternalKey& end) { const InternalKey* end) {
std::vector<FileMetaData*> inputs; std::vector<FileMetaData*> inputs;
GetOverlappingInputs(level, begin, end, &inputs); current_->GetOverlappingInputs(level, begin, end, &inputs);
if (inputs.empty()) { if (inputs.empty()) {
return NULL; return NULL;
} }
// Avoid compacting too much in one shot in case the range is large.
const uint64_t limit = MaxFileSizeForLevel(level);
uint64_t total = 0;
for (int i = 0; i < inputs.size(); i++) {
uint64_t s = inputs[i]->file_size;
total += s;
if (total >= limit) {
inputs.resize(i + 1);
break;
}
}
Compaction* c = new Compaction(level); Compaction* c = new Compaction(level);
c->input_version_ = current_; c->input_version_ = current_;
c->input_version_->Ref(); c->input_version_->Ref();

@ -43,12 +43,17 @@ extern int FindFile(const InternalKeyComparator& icmp,
const Slice& key); const Slice& key);
// Returns true iff some file in "files" overlaps the user key range // Returns true iff some file in "files" overlaps the user key range
// [smallest,largest]. // [*smallest,*largest].
// smallest==NULL represents a key smaller than all keys in the DB.
// largest==NULL represents a key largest than all keys in the DB.
// REQUIRES: If disjoint_sorted_files, files[] contains disjoint ranges
// in sorted order.
extern bool SomeFileOverlapsRange( extern bool SomeFileOverlapsRange(
const InternalKeyComparator& icmp, const InternalKeyComparator& icmp,
bool disjoint_sorted_files,
const std::vector<FileMetaData*>& files, const std::vector<FileMetaData*>& files,
const Slice& smallest_user_key, const Slice* smallest_user_key,
const Slice& largest_user_key); const Slice* largest_user_key);
class Version { class Version {
public: public:
@ -77,10 +82,23 @@ class Version {
void Ref(); void Ref();
void Unref(); void Unref();
void GetOverlappingInputs(
int level,
const InternalKey* begin, // NULL means before all keys
const InternalKey* end, // NULL means after all keys
std::vector<FileMetaData*>* inputs);
// Returns true iff some file in the specified level overlaps // Returns true iff some file in the specified level overlaps
// some part of [smallest_user_key,largest_user_key]. // some part of [*smallest_user_key,*largest_user_key].
// smallest_user_key==NULL represents a key smaller than all keys in the DB.
// largest_user_key==NULL represents a key largest than all keys in the DB.
bool OverlapInLevel(int level, bool OverlapInLevel(int level,
const Slice& smallest_user_key, const Slice* smallest_user_key,
const Slice* largest_user_key);
// Return the level at which we should place a new memtable compaction
// result that covers the range [smallest_user_key,largest_user_key].
int PickLevelForMemTableOutput(const Slice& smallest_user_key,
const Slice& largest_user_key); const Slice& largest_user_key);
int NumFiles(int level) const { return files_[level].size(); } int NumFiles(int level) const { return files_[level].size(); }
@ -192,8 +210,8 @@ class VersionSet {
// the result. // the result.
Compaction* CompactRange( Compaction* CompactRange(
int level, int level,
const InternalKey& begin, const InternalKey* begin,
const InternalKey& end); const InternalKey* end);
// Return the maximum overlapping data (in bytes) at next level for any // Return the maximum overlapping data (in bytes) at next level for any
// file at a level >= 1. // file at a level >= 1.
@ -232,12 +250,6 @@ class VersionSet {
void Finalize(Version* v); void Finalize(Version* v);
void GetOverlappingInputs(
int level,
const InternalKey& begin,
const InternalKey& end,
std::vector<FileMetaData*>* inputs);
void GetRange(const std::vector<FileMetaData*>& inputs, void GetRange(const std::vector<FileMetaData*>& inputs,
InternalKey* smallest, InternalKey* smallest,
InternalKey* largest); InternalKey* largest);

@ -12,6 +12,9 @@ namespace leveldb {
class FindFileTest { class FindFileTest {
public: public:
std::vector<FileMetaData*> files_; std::vector<FileMetaData*> files_;
bool disjoint_sorted_files_;
FindFileTest() : disjoint_sorted_files_(true) { }
~FindFileTest() { ~FindFileTest() {
for (int i = 0; i < files_.size(); i++) { for (int i = 0; i < files_.size(); i++) {
@ -37,13 +40,20 @@ class FindFileTest {
bool Overlaps(const char* smallest, const char* largest) { bool Overlaps(const char* smallest, const char* largest) {
InternalKeyComparator cmp(BytewiseComparator()); InternalKeyComparator cmp(BytewiseComparator());
return SomeFileOverlapsRange(cmp, files_, smallest, largest); Slice s(smallest != NULL ? smallest : "");
Slice l(largest != NULL ? largest : "");
return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_,
(smallest != NULL ? &s : NULL),
(largest != NULL ? &l : NULL));
} }
}; };
TEST(FindFileTest, Empty) { TEST(FindFileTest, Empty) {
ASSERT_EQ(0, Find("foo")); ASSERT_EQ(0, Find("foo"));
ASSERT_TRUE(! Overlaps("a", "z")); ASSERT_TRUE(! Overlaps("a", "z"));
ASSERT_TRUE(! Overlaps(NULL, "z"));
ASSERT_TRUE(! Overlaps("a", NULL));
ASSERT_TRUE(! Overlaps(NULL, NULL));
} }
TEST(FindFileTest, Single) { TEST(FindFileTest, Single) {
@ -67,6 +77,13 @@ TEST(FindFileTest, Single) {
ASSERT_TRUE(Overlaps("p1", "z")); ASSERT_TRUE(Overlaps("p1", "z"));
ASSERT_TRUE(Overlaps("q", "q")); ASSERT_TRUE(Overlaps("q", "q"));
ASSERT_TRUE(Overlaps("q", "q1")); ASSERT_TRUE(Overlaps("q", "q1"));
ASSERT_TRUE(! Overlaps(NULL, "j"));
ASSERT_TRUE(! Overlaps("r", NULL));
ASSERT_TRUE(Overlaps(NULL, "p"));
ASSERT_TRUE(Overlaps(NULL, "p1"));
ASSERT_TRUE(Overlaps("q", NULL));
ASSERT_TRUE(Overlaps(NULL, NULL));
} }
@ -108,6 +125,26 @@ TEST(FindFileTest, Multiple) {
ASSERT_TRUE(Overlaps("450", "500")); ASSERT_TRUE(Overlaps("450", "500"));
} }
TEST(FindFileTest, MultipleNullBoundaries) {
Add("150", "200");
Add("200", "250");
Add("300", "350");
Add("400", "450");
ASSERT_TRUE(! Overlaps(NULL, "149"));
ASSERT_TRUE(! Overlaps("451", NULL));
ASSERT_TRUE(Overlaps(NULL, NULL));
ASSERT_TRUE(Overlaps(NULL, "150"));
ASSERT_TRUE(Overlaps(NULL, "199"));
ASSERT_TRUE(Overlaps(NULL, "200"));
ASSERT_TRUE(Overlaps(NULL, "201"));
ASSERT_TRUE(Overlaps(NULL, "400"));
ASSERT_TRUE(Overlaps(NULL, "800"));
ASSERT_TRUE(Overlaps("100", NULL));
ASSERT_TRUE(Overlaps("200", NULL));
ASSERT_TRUE(Overlaps("449", NULL));
ASSERT_TRUE(Overlaps("450", NULL));
}
TEST(FindFileTest, OverlapSequenceChecks) { TEST(FindFileTest, OverlapSequenceChecks) {
Add("200", "200", 5000, 3000); Add("200", "200", 5000, 3000);
ASSERT_TRUE(! Overlaps("199", "199")); ASSERT_TRUE(! Overlaps("199", "199"));
@ -117,6 +154,24 @@ TEST(FindFileTest, OverlapSequenceChecks) {
ASSERT_TRUE(Overlaps("200", "210")); ASSERT_TRUE(Overlaps("200", "210"));
} }
TEST(FindFileTest, OverlappingFiles) {
Add("150", "600");
Add("400", "500");
disjoint_sorted_files_ = false;
ASSERT_TRUE(! Overlaps("100", "149"));
ASSERT_TRUE(! Overlaps("601", "700"));
ASSERT_TRUE(Overlaps("100", "150"));
ASSERT_TRUE(Overlaps("100", "200"));
ASSERT_TRUE(Overlaps("100", "300"));
ASSERT_TRUE(Overlaps("100", "400"));
ASSERT_TRUE(Overlaps("100", "500"));
ASSERT_TRUE(Overlaps("375", "400"));
ASSERT_TRUE(Overlaps("450", "450"));
ASSERT_TRUE(Overlaps("450", "500"));
ASSERT_TRUE(Overlaps("450", "700"));
ASSERT_TRUE(Overlaps("600", "700"));
}
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -193,7 +193,7 @@ that a read should operate on a particular version of the DB state.
If <code>ReadOptions::snapshot</code> is NULL, the read will operate on an If <code>ReadOptions::snapshot</code> is NULL, the read will operate on an
implicit snapshot of the current state. implicit snapshot of the current state.
<p> <p>
Snapshots typically are created by the DB::GetSnapshot() method: Snapshots are created by the DB::GetSnapshot() method:
<p> <p>
<pre> <pre>
leveldb::ReadOptions options; leveldb::ReadOptions options;
@ -208,26 +208,6 @@ Note that when a snapshot is no longer needed, it should be released
using the DB::ReleaseSnapshot interface. This allows the using the DB::ReleaseSnapshot interface. This allows the
implementation to get rid of state that was being maintained just to implementation to get rid of state that was being maintained just to
support reading as of that snapshot. support reading as of that snapshot.
<p>
A Write operation can also return a snapshot that
represents the state of the database just after applying a particular
set of updates:
<p>
<pre>
leveldb::Snapshot* snapshot;
leveldb::WriteOptions write_options;
write_options.post_write_snapshot = &amp;snapshot;
leveldb::Status status = db-&gt;Write(write_options, ...);
... perform other mutations to db ...
leveldb::ReadOptions read_options;
read_options.snapshot = snapshot;
leveldb::Iterator* iter = db-&gt;NewIterator(read_options);
... read as of the state just after the Write call returned ...
delete iter;
db-&gt;ReleaseSnapshot(snapshot);
</pre>
<h1>Slice</h1> <h1>Slice</h1>
<p> <p>
The return value of the <code>it->key()</code> and <code>it->value()</code> calls above The return value of the <code>it->key()</code> and <code>it->value()</code> calls above

@ -112,6 +112,8 @@ class DB {
// where <N> is an ASCII representation of a level number (e.g. "0"). // where <N> is an ASCII representation of a level number (e.g. "0").
// "leveldb.stats" - returns a multi-line string that describes statistics // "leveldb.stats" - returns a multi-line string that describes statistics
// about the internal operation of the DB. // about the internal operation of the DB.
// "leveldb.sstables" - returns a multi-line string that describes all
// of the sstables that make up the db contents.
virtual bool GetProperty(const Slice& property, std::string* value) = 0; virtual bool GetProperty(const Slice& property, std::string* value) = 0;
// For each i in [0,n-1], store in "sizes[i]", the approximate // For each i in [0,n-1], store in "sizes[i]", the approximate
@ -125,8 +127,17 @@ class DB {
virtual void GetApproximateSizes(const Range* range, int n, virtual void GetApproximateSizes(const Range* range, int n,
uint64_t* sizes) = 0; uint64_t* sizes) = 0;
// Possible extensions: // Compact the underlying storage for the key range [*begin,*end].
// (1) Add a method to compact a range of keys // In particular, deleted and overwritten versions are discarded,
// and the data is rearranged to reduce the cost of operations
// needed to access the data. This operation should typically only
// be invoked by users who understand the underlying implementation.
//
// begin==NULL is treated as a key before all keys in the database.
// end==NULL is treated as a key after all keys in the database.
// Therefore the following call will compact the entire database:
// db->CompactRange(NULL, NULL);
virtual void CompactRange(const Slice* begin, const Slice* end) = 0;
private: private:
// No copying allowed // No copying allowed

@ -160,6 +160,8 @@ class SequentialFile {
// Read up to "n" bytes from the file. "scratch[0..n-1]" may be // Read up to "n" bytes from the file. "scratch[0..n-1]" may be
// written by this routine. Sets "*result" to the data that was // written by this routine. Sets "*result" to the data that was
// read (including if fewer than "n" bytes were successfully read). // read (including if fewer than "n" bytes were successfully read).
// May set "*result" to point at data in "scratch[0..n-1]", so
// "scratch[0..n-1]" must be live when "*result" is used.
// If an error was encountered, returns a non-OK status. // If an error was encountered, returns a non-OK status.
// //
// REQUIRES: External synchronization // REQUIRES: External synchronization
@ -184,8 +186,10 @@ class RandomAccessFile {
// Read up to "n" bytes from the file starting at "offset". // Read up to "n" bytes from the file starting at "offset".
// "scratch[0..n-1]" may be written by this routine. Sets "*result" // "scratch[0..n-1]" may be written by this routine. Sets "*result"
// to the data that was read (including if fewer than "n" bytes were // to the data that was read (including if fewer than "n" bytes were
// successfully read). If an error was encountered, returns a // successfully read). May set "*result" to point at data in
// non-OK status. // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
// "*result" is used. If an error was encountered, returns a non-OK
// status.
// //
// Safe for concurrent use by multiple threads. // Safe for concurrent use by multiple threads.
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,

@ -177,21 +177,8 @@ struct WriteOptions {
// Default: false // Default: false
bool sync; bool sync;
// If "post_write_snapshot" is non-NULL, and the write succeeds,
// *post_write_snapshot will be modified to point to a snapshot of
// the DB state immediately after this write. The caller must call
// DB::ReleaseSnapshot(*post_write_snapshotsnapshot) when the
// snapshot is no longer needed.
//
// If "post_write_snapshot" is non-NULL, and the write fails,
// *post_write_snapshot will be set to NULL.
//
// Default: NULL
const Snapshot** post_write_snapshot;
WriteOptions() WriteOptions()
: sync(false), : sync(false) {
post_write_snapshot(NULL) {
} }
}; };

@ -62,10 +62,10 @@ inline uint32_t DecodeFixed32(const char* ptr) {
memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
return result; return result;
} else { } else {
return ((static_cast<uint32_t>(ptr[0])) return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
| (static_cast<uint32_t>(ptr[1]) << 8) | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
| (static_cast<uint32_t>(ptr[2]) << 16) | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
| (static_cast<uint32_t>(ptr[3]) << 24)); | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
} }
} }

@ -1,5 +1,6 @@
// Copyright 2011 Google Inc. All Rights Reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Author: sanjay@google.com (Sanjay Ghemawat) // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// //
// Logger implementation that can be shared by all environments // Logger implementation that can be shared by all environments
// where enough posix functionality is available. // where enough posix functionality is available.

Loading…
Cancel
Save