From b694b16105644d726b4efe8eb831c2fda9b93772 Mon Sep 17 00:00:00 2001 From: agchou Date: Wed, 12 Mar 2014 12:06:58 -0700 Subject: [PATCH 1/8] Fix copyright year Fix outdated copyright year (update to 2014) The copyright year was out of date. Copyright notices must reflect the current year. This commit updates the listed year to 2014. see: http://www.copyright.gov/circs/circ01.pdf for more info --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 716ad9e74..b13290186 100644 --- a/LICENSE +++ b/LICENSE @@ -2,7 +2,7 @@ BSD License For rocksdb software -Copyright (c) 2013, Facebook, Inc. +Copyright (c) 2014, Facebook, Inc. All rights reserved. --------------------------------------------------------------------- From b9c78d2db6cbd15c87146c915b27375a41495a7d Mon Sep 17 00:00:00 2001 From: Caio SBA Date: Fri, 14 Mar 2014 22:44:35 +0000 Subject: [PATCH 2/8] Make it compile on Debian/GCC 4.7 --- db/log_test.cc | 8 ++++---- util/env_test.cc | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/db/log_test.cc b/db/log_test.cc index b28343e63..6577a6a9c 100644 --- a/db/log_test.cc +++ b/db/log_test.cc @@ -451,7 +451,7 @@ TEST(LogTest, TruncatedTrailingRecordIsIgnored) { ShrinkSize(4); // Drop all payload as well as a header byte ASSERT_EQ("EOF", Read()); // Truncated last record is ignored, not treated as an error - ASSERT_EQ(0, DroppedBytes()); + ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } @@ -470,7 +470,7 @@ TEST(LogTest, BadLengthAtEndIsIgnored) { Write("foo"); ShrinkSize(1); ASSERT_EQ("EOF", Read()); - ASSERT_EQ(0, DroppedBytes()); + ASSERT_EQ(0U, DroppedBytes()); ASSERT_EQ("", ReportMessage()); } @@ -528,7 +528,7 @@ TEST(LogTest, MissingLastIsIgnored) { ShrinkSize(14); ASSERT_EQ("EOF", Read()); ASSERT_EQ("", ReportMessage()); - ASSERT_EQ(0, DroppedBytes()); + ASSERT_EQ(0U, DroppedBytes()); } TEST(LogTest, PartialLastIsIgnored) { @@ -537,7 +537,7 @@ TEST(LogTest, PartialLastIsIgnored) { ShrinkSize(1); ASSERT_EQ("EOF", Read()); ASSERT_EQ("", ReportMessage()); - ASSERT_EQ(0, DroppedBytes()); + ASSERT_EQ(0U, DroppedBytes()); } TEST(LogTest, ErrorJoinsRecords) { diff --git a/util/env_test.cc b/util/env_test.cc index e17027a39..892586478 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -172,8 +172,8 @@ TEST(EnvPosixTest, TwoPools) { env_->SetBackgroundThreads(kLowPoolSize); env_->SetBackgroundThreads(kHighPoolSize, Env::Priority::HIGH); - ASSERT_EQ(0, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ(0, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); + ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // schedule same number of jobs in each pool for (int i = 0; i < kJobs; i++) { @@ -182,10 +182,10 @@ TEST(EnvPosixTest, TwoPools) { } // Wait a short while for the jobs to be dispatched. Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ(kJobs - kLowPoolSize, env_->GetThreadPoolQueueLen()); - ASSERT_EQ(kJobs - kLowPoolSize, + ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen()); + ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ(kJobs - kHighPoolSize, + ASSERT_EQ((unsigned int)(kJobs - kHighPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); // wait for all jobs to finish @@ -194,8 +194,8 @@ TEST(EnvPosixTest, TwoPools) { env_->SleepForMicroseconds(kDelayMicros); } - ASSERT_EQ(0, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); - ASSERT_EQ(0, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); + ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::LOW)); + ASSERT_EQ(0U, env_->GetThreadPoolQueueLen(Env::Priority::HIGH)); } bool IsSingleVarint(const std::string& s) { @@ -296,18 +296,18 @@ TEST(EnvPosixTest, AllocateTest) { struct stat f_stat; stat(fname.c_str(), &f_stat); - ASSERT_EQ(data.size(), f_stat.st_size); + ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); // verify that blocks are preallocated - ASSERT_EQ(kPreallocateSize / kBlockSize, f_stat.st_blocks); + ASSERT_EQ((unsigned int)(kPreallocateSize / kBlockSize), f_stat.st_blocks); // close the file, should deallocate the blocks wfile.reset(); stat(fname.c_str(), &f_stat); - ASSERT_EQ(data.size(), f_stat.st_size); + ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); // verify that preallocated blocks were deallocated on file close size_t data_blocks_pages = ((data.size() + kPageSize - 1) / kPageSize); - ASSERT_EQ(data_blocks_pages * kPageSize / kBlockSize, f_stat.st_blocks); + ASSERT_EQ((unsigned int)(data_blocks_pages * kPageSize / kBlockSize), f_stat.st_blocks); } #endif From f234dfd8fbebe483298ee015214dcf8fbcd28342 Mon Sep 17 00:00:00 2001 From: Caio SBA Date: Fri, 14 Mar 2014 23:56:58 +0000 Subject: [PATCH 3/8] Breaking line --- util/env_test.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/util/env_test.cc b/util/env_test.cc index 892586478..00d813fa4 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -182,7 +182,8 @@ TEST(EnvPosixTest, TwoPools) { } // Wait a short while for the jobs to be dispatched. Env::Default()->SleepForMicroseconds(kDelayMicros); - ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen()); + ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), + env_->GetThreadPoolQueueLen()); ASSERT_EQ((unsigned int)(kJobs - kLowPoolSize), env_->GetThreadPoolQueueLen(Env::Priority::LOW)); ASSERT_EQ((unsigned int)(kJobs - kHighPoolSize), From 453ec52ca1c39c1af9bf2db01510dc9a54c210ff Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Fri, 14 Mar 2014 18:36:47 -0700 Subject: [PATCH 4/8] journal log_number correctly in MANIFEST Summary: Here is what it can cause probelm: There is one memtable flush and one compaction. Both call LogAndApply(). If both edits are applied in the same batch with flush edit first and the compaction edit followed. LogAndApplyHelper() will assign compaction edit current VersionSet's log number(which should be smaller than the log number from flush edit). It cause log_numbers in MANIFEST to be not monotonic increasing, which violates the assume Recover() makes. What is more is after comitting to MANIFEST file, log_number_ in VersionSet is updated to the log_number from the last edit, which is the compaction one. It ends up not updating the log_number. Test Plan: make whitebox_crash_test got another assertion about iter->valid(), not sure if that is related to this. Reviewers: igor, haobo Reviewed By: igor CC: leveldb Differential Revision: https://reviews.facebook.net/D16875 --- db/version_set.cc | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 14df94471..226cc881e 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1507,9 +1507,20 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, ManifestWriter* last_writer = &w; assert(!manifest_writers_.empty()); assert(manifest_writers_.front() == &w); + + uint64_t max_log_number_in_batch = 0; for (const auto& writer : manifest_writers_) { last_writer = writer; LogAndApplyHelper(&builder, v, writer->edit, mu); + if (edit->has_log_number_) { + // When batch commit of manifest writes, we could have multiple flush and + // compaction edits. A flush edit has a bigger log number than what + // VersionSet has while a compaction edit does not have a log number. + // In this case, we want to make sure the largest log number is updated + // to VersionSet + max_log_number_in_batch = + std::max(max_log_number_in_batch, edit->log_number_); + } batch_edits.push_back(writer->edit); } builder.SaveTo(v); @@ -1640,7 +1651,10 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, if (s.ok()) { manifest_file_size_ = new_manifest_file_size; AppendVersion(v); - log_number_ = edit->log_number_; + if (max_log_number_in_batch != 0) { + assert(log_number_ < max_log_number_in_batch); + log_number_ = max_log_number_in_batch; + } prev_log_number_ = edit->prev_log_number_; } else { @@ -1678,9 +1692,9 @@ void VersionSet::LogAndApplyHelper(Builder* builder, Version* v, if (edit->has_log_number_) { assert(edit->log_number_ >= log_number_); assert(edit->log_number_ < next_file_number_); - } else { - edit->SetLogNumber(log_number_); } + // If the edit does not have log number, it must be generated + // from a compaction if (!edit->has_prev_log_number_) { edit->SetPrevLogNumber(prev_log_number_); @@ -1772,7 +1786,15 @@ Status VersionSet::Recover() { builder.Apply(&edit); + // Only a flush's edit or a new snapshot can write log number during + // LogAndApply. Since memtables are flushed and inserted into + // manifest_writers_ queue in order, the log number in MANIFEST file + // should be monotonically increasing. if (edit.has_log_number_) { + if (have_log_number && log_number > edit.log_number_) { + s = Status::Corruption("log number decreases"); + break; + } log_number = edit.log_number_; have_log_number = true; } @@ -2072,6 +2094,7 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { f->smallest_seqno, f->largest_seqno); } } + edit.SetLogNumber(log_number_); std::string record; edit.EncodeTo(&record); From 0cf6c8f7cec664e3be16a25e01198f5160632273 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Sat, 15 Mar 2014 23:30:43 -0700 Subject: [PATCH 5/8] fix: use the correct edit when comparing log_number Summary: In the last fix, I forgot to point to the writer when comparing edit, which is apparently not correct. Test Plan: still running make whitebox_crash_test Reviewers: igor, haobo, igor2 Reviewed By: igor2 CC: leveldb Differential Revision: https://reviews.facebook.net/D16911 --- db/version_set.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index 226cc881e..5c4043116 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1512,14 +1512,14 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu, for (const auto& writer : manifest_writers_) { last_writer = writer; LogAndApplyHelper(&builder, v, writer->edit, mu); - if (edit->has_log_number_) { + if (writer->edit->has_log_number_) { // When batch commit of manifest writes, we could have multiple flush and // compaction edits. A flush edit has a bigger log number than what // VersionSet has while a compaction edit does not have a log number. // In this case, we want to make sure the largest log number is updated // to VersionSet max_log_number_in_batch = - std::max(max_log_number_in_batch, edit->log_number_); + std::max(max_log_number_in_batch, writer->edit->log_number_); } batch_edits.push_back(writer->edit); } From f9d0530213da1a88a53d2476557309183dcce715 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 17 Mar 2014 09:41:41 -0700 Subject: [PATCH 6/8] Don't care about signed/unsigned compare Summary: We need to stop these: https://github.com/facebook/rocksdb/pull/99 https://github.com/facebook/rocksdb/pull/83 Test Plan: no Reviewers: dhruba, haobo, sdong, ljin, yhchiang Reviewed By: ljin CC: leveldb Differential Revision: https://reviews.facebook.net/D16905 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a7fcbf0cd..2e05e6513 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ else PLATFORM_CCFLAGS += $(JEMALLOC_INCLUDE) -DHAVE_JEMALLOC endif -WARNING_FLAGS = -Wall -Werror +WARNING_FLAGS = -Wall -Werror -Wno-sign-compare CFLAGS += -g $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) CXXFLAGS += -g $(WARNING_FLAGS) -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) -Woverloaded-virtual From c61c9830d4b9e4eed66bd8e013788f861ab88d2e Mon Sep 17 00:00:00 2001 From: sdong Date: Fri, 14 Mar 2014 16:06:38 -0700 Subject: [PATCH 7/8] Fix a bug that Prev() can hang. Summary: Prev() now can hang when there is a key with more than max_skipped number of appearance internally but all of them are newer than the sequence ID to seek. Add unit tests to confirm the bug and fix it. Test Plan: make all check Reviewers: igor, haobo Reviewed By: igor CC: ljin, yhchiang, leveldb Differential Revision: https://reviews.facebook.net/D16899 --- db/db_iter.cc | 10 +++++--- db/db_test.cc | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 3 deletions(-) diff --git a/db/db_iter.cc b/db/db_iter.cc index b8d9038a1..e7491f7e3 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -379,10 +379,10 @@ void DBIter::FindPrevUserEntry() { uint64_t num_skipped = 0; ValueType value_type = kTypeDeletion; + bool saved_key_valid = true; if (iter_->Valid()) { do { ParsedInternalKey ikey; - bool saved_key_cleared = false; if (ParseKey(&ikey) && ikey.sequence <= sequence_) { if ((value_type != kTypeDeletion) && user_comparator_->Compare(ikey.user_key, saved_key_) < 0) { @@ -393,7 +393,7 @@ void DBIter::FindPrevUserEntry() { if (value_type == kTypeDeletion) { saved_key_.clear(); ClearSavedValue(); - saved_key_cleared = true; + saved_key_valid = false; } else { Slice raw_value = iter_->value(); if (saved_value_.capacity() > raw_value.size() + 1048576) { @@ -403,13 +403,17 @@ void DBIter::FindPrevUserEntry() { SaveKey(ExtractUserKey(iter_->key()), &saved_key_); saved_value_.assign(raw_value.data(), raw_value.size()); } + } else { + // In the case of ikey.sequence > sequence_, we might have already + // iterated to a different user key. + saved_key_valid = false; } num_skipped++; // If we have sequentially iterated via numerous keys and still not // found the prev user-key, then it is better to seek so that we can // avoid too many key comparisons. We seek to the first occurence of // our current key by looking for max sequence number. - if (!saved_key_cleared && num_skipped > max_skip_) { + if (saved_key_valid && num_skipped > max_skip_) { num_skipped = 0; std::string last_key; AppendInternalKey(&last_key, diff --git a/db/db_test.cc b/db/db_test.cc index c508c666d..19b95540b 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -1368,6 +1368,75 @@ TEST(DBTest, IterSeekBeforePrev) { delete iter; } +TEST(DBTest, IterNextWithNewerSeq) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Seek(Slice("a")); + ASSERT_EQ(IterStatus(iter), "a->b"); + iter->Next(); + ASSERT_EQ(IterStatus(iter), "c->d"); + delete iter; +} + +TEST(DBTest, IterPrevWithNewerSeq) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Seek(Slice("d")); + ASSERT_EQ(IterStatus(iter), "d->e"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "c->d"); + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->b"); + + iter->Prev(); + delete iter; +} + +TEST(DBTest, IterPrevWithNewerSeq2) { + ASSERT_OK(Put("0", "0")); + dbfull()->Flush(FlushOptions()); + ASSERT_OK(Put("a", "b")); + ASSERT_OK(Put("c", "d")); + ASSERT_OK(Put("d", "e")); + auto iter = db_->NewIterator(ReadOptions()); + iter->Seek(Slice("c")); + ASSERT_EQ(IterStatus(iter), "c->d"); + + // Create a key that needs to be skipped for Seq too new + for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1; + i++) { + ASSERT_OK(Put("b", "f")); + } + + iter->Prev(); + ASSERT_EQ(IterStatus(iter), "a->b"); + + iter->Prev(); + delete iter; +} + TEST(DBTest, IterEmpty) { do { Iterator* iter = db_->NewIterator(ReadOptions()); From 9b8a2b52d451dd1a162ef34c59df907f4b476b59 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Mon, 17 Mar 2014 10:02:46 -0700 Subject: [PATCH 8/8] No prefix iterator in db_stress Summary: We're trying to deprecate prefix iterators, so no need to test them in db_stress Test Plan: ran it Reviewers: ljin Reviewed By: ljin CC: leveldb Differential Revision: https://reviews.facebook.net/D16917 --- tools/db_stress.cc | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tools/db_stress.cc b/tools/db_stress.cc index b7b8e91ea..a47a03933 100644 --- a/tools/db_stress.cc +++ b/tools/db_stress.cc @@ -952,10 +952,10 @@ class StressTest { prefixes[i].resize(FLAGS_prefix_size); prefix_slices[i] = Slice(prefixes[i]); readoptionscopy[i] = readoptions; - readoptionscopy[i].prefix = &prefix_slices[i]; + readoptionscopy[i].prefix_seek = true; readoptionscopy[i].snapshot = snapshot; iters[i] = db_->NewIterator(readoptionscopy[i]); - iters[i]->SeekToFirst(); + iters[i]->Seek(prefix_slices[i]); } int count = 0; @@ -1103,11 +1103,11 @@ class StressTest { // prefix if (!FLAGS_test_batches_snapshots) { Slice prefix = Slice(key.data(), FLAGS_prefix_size); - read_opts.prefix = &prefix; + read_opts.prefix_seek = true; Iterator* iter = db_->NewIterator(read_opts); int64_t count = 0; - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - assert(iter->key().starts_with(prefix)); + for (iter->Seek(prefix); + iter->Valid() && iter->key().starts_with(prefix); iter->Next()) { ++count; } assert(count <= @@ -1121,7 +1121,6 @@ class StressTest { } else { MultiPrefixScan(thread, read_opts, key); } - read_opts.prefix = nullptr; } else if (prefixBound <= prob_op && prob_op < writeBound) { // OPERATION write uint32_t value_base = thread->rand.Next();