From ba6dac0e809b249532a7465f71a00ccda307161d Mon Sep 17 00:00:00 2001 From: "dgrogan@chromium.org" Date: Wed, 20 Apr 2011 22:48:11 +0000 Subject: [PATCH] @20776309 * env_chromium.cc should not export symbols. * Fix MSVC warnings. * Removed large value support. * Fix broken reference to documentation file git-svn-id: https://leveldb.googlecode.com/svn/trunk@24 62dab493-f737-651d-591e-8d6aee1b9529 --- Makefile | 5 - README | 4 +- TODO | 4 +- db/builder.cc | 9 -- db/builder.h | 6 +- db/corruption_test.cc | 26 +--- db/db_bench.cc | 22 ---- db/db_impl.cc | 217 +++++--------------------------- db/db_impl.h | 23 ---- db/db_iter.cc | 101 +-------------- db/db_test.cc | 251 ++++++-------------------------------- db/dbformat.cc | 65 ---------- db/dbformat.h | 53 +------- db/dbformat_test.cc | 15 --- db/filename.cc | 19 --- db/filename.h | 16 +-- db/filename_test.cc | 76 ++++-------- db/log_writer.cc | 4 +- db/repair.cc | 40 ++---- db/version_edit.cc | 43 +------ db/version_edit.h | 18 --- db/version_edit_test.cc | 6 +- db/version_set.cc | 129 +++----------------- db/version_set.h | 28 +---- db/write_batch.cc | 16 --- db/write_batch_internal.h | 4 - db/write_batch_test.cc | 23 ---- doc/impl.html | 13 +- doc/index.html | 11 -- include/leveldb/options.h | 12 +- leveldb.gyp | 12 -- port/port_android.h | 8 -- port/port_chromium.h | 7 -- port/port_example.h | 5 - port/port_posix.h | 5 - table/block.cc | 4 +- table/block_builder.cc | 2 +- table/format.cc | 4 +- util/arena.cc | 2 +- util/coding.cc | 2 +- util/comparator.cc | 2 +- util/logging.cc | 2 +- util/options.cc | 1 - util/random.h | 2 +- 44 files changed, 152 insertions(+), 1165 deletions(-) diff --git a/Makefile b/Makefile index 756970150..43ac23d5f 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,6 @@ LIBOBJECTS = \ ./db/version_set.o \ ./db/write_batch.o \ ./port/port_posix.o \ - ./port/sha1_portable.o \ ./table/block.o \ ./table/block_builder.o \ ./table/format.o \ @@ -63,7 +62,6 @@ TESTS = \ env_test \ filename_test \ log_test \ - sha1_test \ skiplist_test \ table_test \ version_edit_test \ @@ -115,9 +113,6 @@ log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CC) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ -sha1_test: port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) - $(CC) $(LDFLAGS) port/sha1_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ - skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CC) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ diff --git a/README b/README index c97e43c8c..3618adeee 100644 --- a/README +++ b/README @@ -2,10 +2,10 @@ leveldb: A key-value store Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) The code under this directory implements a system for maintaining a -persistent key/value store. +persistent key/value store. See doc/index.html for more explanation. -See doc/db_layout.txt for a brief overview of the implementation. +See doc/impl.html for a brief overview of the implementation. The public interface is in include/*.h. Callers should not include or rely on the details of any other header files in this package. Those diff --git a/TODO b/TODO index 2f848b863..ce8143906 100644 --- a/TODO +++ b/TODO @@ -8,7 +8,7 @@ db object stores, etc. can be done in the background anyway, so probably not that important. -api changes? -- Efficient large value reading and writing +api changes: +- Make it wrappable Faster Get implementation diff --git a/db/builder.cc b/db/builder.cc index 6c8e6b8ef..9f132d78a 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -38,15 +38,6 @@ Status BuildTable(const std::string& dbname, for (; iter->Valid(); iter->Next()) { Slice key = iter->key(); meta->largest.DecodeFrom(key); - if (ExtractValueType(key) == kTypeLargeValueRef) { - if (iter->value().size() != LargeValueRef::ByteSize()) { - s = Status::Corruption("invalid indirect reference hash value (L0)"); - break; - } - edit->AddLargeValueRef(LargeValueRef::FromRef(iter->value()), - meta->number, - iter->key()); - } builder->Add(key, iter->value()); } diff --git a/db/builder.h b/db/builder.h index 4efcb04a5..5dd17b666 100644 --- a/db/builder.h +++ b/db/builder.h @@ -20,9 +20,9 @@ class VersionEdit; // Build a Table file from the contents of *iter. The generated file // will be named according to meta->number. On success, the rest of // *meta will be filled with metadata about the generated table, and -// large value refs and the added file information will be added to -// *edit. If no data is present in *iter, meta->file_size will be set -// to zero, and no Table file will be produced. +// the file information will be added to *edit. If no data is present +// in *iter, meta->file_size will be set to zero, and no Table file +// will be produced. extern Status BuildTable(const std::string& dbname, Env* env, const Options& options, diff --git a/db/corruption_test.cc b/db/corruption_test.cc index 63d8d8bca..12d176ef2 100644 --- a/db/corruption_test.cc +++ b/db/corruption_test.cc @@ -121,11 +121,10 @@ class CorruptionTest { std::vector filenames; ASSERT_OK(env_.GetChildren(dbname_, &filenames)); uint64_t number; - LargeValueRef large_ref; FileType type; std::vector candidates; for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &large_ref, &type) && + if (ParseFileName(filenames[i], &number, &type) && type == filetype) { candidates.push_back(dbname_ + "/" + filenames[i]); } @@ -276,29 +275,6 @@ TEST(CorruptionTest, SequenceNumberRecovery) { ASSERT_EQ("v6", v); } -TEST(CorruptionTest, LargeValueRecovery) { - Options options; - options.large_value_threshold = 10000; - Reopen(&options); - - Random rnd(301); - std::string big; - ASSERT_OK(db_->Put(WriteOptions(), - "foo", test::RandomString(&rnd, 100000, &big))); - std::string v; - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ(big, v); - - RepairDB(); - Reopen(); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ(big, v); - - Reopen(); - ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); - ASSERT_EQ(big, v); -} - TEST(CorruptionTest, CorruptedDescriptor) { ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); DBImpl* dbi = reinterpret_cast(db_); diff --git a/db/db_bench.cc b/db/db_bench.cc index 849ebfa66..d1cbdc01c 100644 --- a/db/db_bench.cc +++ b/db/db_bench.cc @@ -28,7 +28,6 @@ // readreverse -- read N values in reverse order // readrandom -- read N values in random order // crc32c -- repeated crc32c of 4K of data -// sha1 -- repeated SHA1 computation over 4K of data // Meta operations: // compact -- Compact the entire DB // stats -- Print DB stats @@ -48,7 +47,6 @@ static const char* FLAGS_benchmarks = "readreverse," "fill100K," "crc32c," - "sha1," "snappycomp," "snappyuncomp," ; @@ -366,8 +364,6 @@ class Benchmark { Compact(); } else if (name == Slice("crc32c")) { Crc32c(4096, "(4K per op)"); - } else if (name == Slice("sha1")) { - SHA1(4096, "(4K per op)"); } else if (name == Slice("snappycomp")) { SnappyCompress(); } else if (name == Slice("snappyuncomp")) { @@ -406,24 +402,6 @@ class Benchmark { message_ = label; } - void SHA1(int size, const char* label) { - // SHA1 about 100MB of data total - std::string data(size, 'x'); - int64_t bytes = 0; - char sha1[20]; - while (bytes < 100 * 1048576) { - port::SHA1_Hash(data.data(), size, sha1); - FinishedSingleOp(); - bytes += size; - } - - // Print so result is not dead - fprintf(stderr, "... sha1=%02x...\r", static_cast(sha1[0])); - - bytes_ = bytes; - message_ = label; - } - void SnappyCompress() { Slice input = gen_.Generate(Options().block_size); int64_t bytes = 0; diff --git a/db/db_impl.cc b/db/db_impl.cc index d01223682..3b9e04e83 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -81,8 +81,8 @@ class NullWritableFile : public WritableFile { // Fix user-supplied options to be reasonable template static void ClipToRange(T* ptr, V minvalue, V maxvalue) { - if (*ptr > maxvalue) *ptr = maxvalue; - if (*ptr < minvalue) *ptr = minvalue; + if (static_cast(*ptr) > maxvalue) *ptr = maxvalue; + if (static_cast(*ptr) < minvalue) *ptr = minvalue; } Options SanitizeOptions(const std::string& dbname, const InternalKeyComparator* icmp, @@ -91,7 +91,6 @@ Options SanitizeOptions(const std::string& dbname, result.comparator = icmp; ClipToRange(&result.max_open_files, 20, 50000); ClipToRange(&result.write_buffer_size, 64<<10, 1<<30); - ClipToRange(&result.large_value_threshold, 16<<10, 1<<30); ClipToRange(&result.block_size, 1<<10, 4<<20); if (result.info_log == NULL) { // Open a log file in the same directory as the db @@ -213,15 +212,12 @@ void DBImpl::DeleteObsoleteFiles() { std::set live = pending_outputs_; versions_->AddLiveFiles(&live); - versions_->CleanupLargeValueRefs(live); - std::vector filenames; env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose uint64_t number; - LargeValueRef large_ref; FileType type; - for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &large_ref, &type)) { + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { bool keep = true; switch (type) { case kLogFile: @@ -241,9 +237,6 @@ void DBImpl::DeleteObsoleteFiles() { // be recorded in pending_outputs_, which is inserted into "live" keep = (live.find(number) != live.end()); break; - case kLargeValueFile: - keep = versions_->LargeValueIsLive(large_ref); - break; case kCurrentFile: case kDBLockFile: case kInfoLogFile: @@ -599,7 +592,7 @@ void DBImpl::CleanupCompaction(CompactionState* compact) { assert(compact->outfile == NULL); } delete compact->outfile; - for (int i = 0; i < compact->outputs.size(); i++) { + for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; pending_outputs_.erase(out.number); } @@ -695,7 +688,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) { // Add compaction outputs compact->compaction->AddInputDeletions(compact->compaction->edit()); const int level = compact->compaction->level(); - for (int i = 0; i < compact->outputs.size(); i++) { + for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; compact->compaction->edit()->AddFile( level + 1, @@ -710,7 +703,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) { DeleteObsoleteFiles(); } else { // Discard any files we may have created during this failed compaction - for (int i = 0; i < compact->outputs.size(); i++) { + for (size_t i = 0; i < compact->outputs.size(); i++) { env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number)); } } @@ -811,7 +804,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, " "%d smallest_snapshot: %d", ikey.user_key.ToString().c_str(), - (int)ikey.sequence, ikey.type, kTypeLargeValueRef, drop, + (int)ikey.sequence, ikey.type, kTypeValue, drop, compact->compaction->IsBaseLevelForKey(ikey.user_key), (int)last_sequence_for_key, (int)compact->smallest_snapshot); #endif @@ -828,26 +821,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { compact->current_output()->smallest.DecodeFrom(key); } compact->current_output()->largest.DecodeFrom(key); - - if (ikey.type == kTypeLargeValueRef) { - if (input->value().size() != LargeValueRef::ByteSize()) { - if (options_.paranoid_checks) { - status = Status::Corruption("invalid large value ref"); - break; - } else { - Log(env_, options_.info_log, - "compaction found invalid large value ref"); - } - } else { - compact->compaction->edit()->AddLargeValueRef( - LargeValueRef::FromRef(input->value()), - compact->current_output()->number, - input->key()); - compact->builder->Add(key, input->value()); - } - } else { - compact->builder->Add(key, input->value()); - } + compact->builder->Add(key, input->value()); // Close output file if it is big enough if (compact->builder->FileSize() >= @@ -881,7 +855,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { stats.bytes_read += compact->compaction->input(which, i)->file_size; } } - for (int i = 0; i < compact->outputs.size(); i++) { + for (size_t i = 0; i < compact->outputs.size(); i++) { stats.bytes_written += compact->outputs[i].file_size; } @@ -985,40 +959,27 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) { Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) { Status status; - - WriteBatch* final = NULL; - { - MutexLock l(&mutex_); - status = MakeRoomForWrite(false); // May temporarily release lock and wait - - uint64_t last_sequence = versions_->LastSequence(); - if (status.ok()) { - status = HandleLargeValues(last_sequence + 1, updates, &final); + MutexLock l(&mutex_); + status = MakeRoomForWrite(false); // May temporarily release lock and wait + uint64_t last_sequence = versions_->LastSequence(); + if (status.ok()) { + WriteBatchInternal::SetSequence(updates, last_sequence + 1); + last_sequence += WriteBatchInternal::Count(updates); + versions_->SetLastSequence(last_sequence); + + // Add to log and apply to memtable + status = log_->AddRecord(WriteBatchInternal::Contents(updates)); + if (status.ok() && options.sync) { + status = logfile_->Sync(); } if (status.ok()) { - WriteBatchInternal::SetSequence(final, last_sequence + 1); - last_sequence += WriteBatchInternal::Count(final); - versions_->SetLastSequence(last_sequence); - - // Add to log and apply to memtable - status = log_->AddRecord(WriteBatchInternal::Contents(final)); - if (status.ok() && options.sync) { - status = logfile_->Sync(); - } - if (status.ok()) { - status = WriteBatchInternal::InsertInto(final, mem_); - } - } - - if (options.post_write_snapshot != NULL) { - *options.post_write_snapshot = - status.ok() ? snapshots_.New(last_sequence) : NULL; + status = WriteBatchInternal::InsertInto(updates, mem_); } } - if (final != updates) { - delete final; + if (options.post_write_snapshot != NULL) { + *options.post_write_snapshot = + status.ok() ? snapshots_.New(last_sequence) : NULL; } - return status; } @@ -1070,124 +1031,6 @@ Status DBImpl::MakeRoomForWrite(bool force) { return s; } -bool DBImpl::HasLargeValues(const WriteBatch& batch) const { - if (WriteBatchInternal::ByteSize(&batch) >= options_.large_value_threshold) { - for (WriteBatchInternal::Iterator it(batch); !it.Done(); it.Next()) { - if (it.op() == kTypeValue && - it.value().size() >= options_.large_value_threshold) { - return true; - } - } - } - return false; -} - -// Given "raw_value", determines the appropriate compression format to use -// and stores the data that should be written to the large value file in -// "*file_bytes", and sets "*ref" to the appropriate large value reference. -// May use "*scratch" as backing store for "*file_bytes". -void DBImpl::MaybeCompressLargeValue( - const Slice& raw_value, - Slice* file_bytes, - std::string* scratch, - LargeValueRef* ref) { - switch (options_.compression) { - case kSnappyCompression: { - if (port::Snappy_Compress(raw_value.data(), raw_value.size(), scratch) && - (scratch->size() < (raw_value.size() / 8) * 7)) { - *file_bytes = *scratch; - *ref = LargeValueRef::Make(raw_value, kSnappyCompression); - return; - } - - // Less than 12.5% compression: just leave as uncompressed data - break; - } - case kNoCompression: - // Use default code outside of switch - break; - } - // Store as uncompressed data - *file_bytes = raw_value; - *ref = LargeValueRef::Make(raw_value, kNoCompression); -} - -Status DBImpl::HandleLargeValues(SequenceNumber assigned_seq, - WriteBatch* updates, - WriteBatch** final) { - if (!HasLargeValues(*updates)) { - // Fast path: no large values found - *final = updates; - } else { - // Copy *updates to a new WriteBatch, replacing the references to - *final = new WriteBatch; - SequenceNumber seq = assigned_seq; - for (WriteBatchInternal::Iterator it(*updates); !it.Done(); it.Next()) { - switch (it.op()) { - case kTypeValue: - if (it.value().size() < options_.large_value_threshold) { - (*final)->Put(it.key(), it.value()); - } else { - std::string scratch; - Slice file_bytes; - LargeValueRef large_ref; - MaybeCompressLargeValue( - it.value(), &file_bytes, &scratch, &large_ref); - InternalKey ikey(it.key(), seq, kTypeLargeValueRef); - if (versions_->RegisterLargeValueRef( - large_ref, versions_->LogNumber(), ikey)) { - // TODO(opt): avoid holding the lock here (but be careful about - // another thread doing a Write and switching logs or - // having us get a different "assigned_seq" value). - - uint64_t tmp_number = versions_->NewFileNumber(); - pending_outputs_.insert(tmp_number); - std::string tmp = TempFileName(dbname_, tmp_number); - WritableFile* file; - Status s = env_->NewWritableFile(tmp, &file); - if (!s.ok()) { - return s; // Caller will delete *final - } - - file->Append(file_bytes); - - s = file->Close(); - delete file; - - if (s.ok()) { - const std::string fname = - LargeValueFileName(dbname_, large_ref); - s = env_->RenameFile(tmp, fname); - } else { - Log(env_, options_.info_log, "Write large value: %s", - s.ToString().c_str()); - } - pending_outputs_.erase(tmp_number); - - if (!s.ok()) { - env_->DeleteFile(tmp); // Cleanup; intentionally ignoring error - return s; // Caller will delete *final - } - } - - // Put an indirect reference in the write batch in place - // of large value - WriteBatchInternal::PutLargeValueRef(*final, it.key(), large_ref); - } - break; - case kTypeLargeValueRef: - return Status::Corruption("Corrupted write batch"); - break; - case kTypeDeletion: - (*final)->Delete(it.key()); - break; - } - seq = seq + 1; - } - } - return Status::OK(); -} - bool DBImpl::GetProperty(const Slice& property, std::string* value) { value->clear(); @@ -1205,7 +1048,8 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { return false; } else { char buf[100]; - snprintf(buf, sizeof(buf), "%d", versions_->NumLevelFiles(level)); + snprintf(buf, sizeof(buf), "%d", + versions_->NumLevelFiles(static_cast(level))); *value = buf; return true; } @@ -1325,10 +1169,9 @@ Status DestroyDB(const std::string& dbname, const Options& options) { Status result = env->LockFile(LockFileName(dbname), &lock); if (result.ok()) { uint64_t number; - LargeValueRef large_ref; FileType type; - for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &large_ref, &type)) { + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { Status del = env->DeleteFile(dbname + "/" + filenames[i]); if (result.ok() && !del.ok()) { result = del; diff --git a/db/db_impl.h b/db/db_impl.h index 1f685f011..7699d8cc0 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -92,29 +92,6 @@ class DBImpl : public DB { Status WriteLevel0Table(MemTable* mem, VersionEdit* edit); Status MakeRoomForWrite(bool force /* compact even if there is room? */); - bool HasLargeValues(const WriteBatch& batch) const; - - // Process data in "*updates" and return a status. "assigned_seq" - // is the sequence number assigned to the first mod in "*updates". - // If no large values are encountered, "*final" is set to "updates". - // If large values were encountered, registers the references of the - // large values with the VersionSet, writes the large values to - // files (if appropriate), and allocates a new WriteBatch with the - // large values replaced with indirect references and stores a - // pointer to the new WriteBatch in *final. If *final != updates on - // return, then the client should delete *final when no longer - // needed. Returns OK on success, and an appropriate error - // otherwise. - Status HandleLargeValues(SequenceNumber assigned_seq, - WriteBatch* updates, - WriteBatch** final); - - // Helper routine for HandleLargeValues - void MaybeCompressLargeValue( - const Slice& raw_value, - Slice* file_bytes, - std::string* scratch, - LargeValueRef* ref); struct CompactionState; diff --git a/db/db_iter.cc b/db/db_iter.cc index 31c2a38f5..0be18fffa 100644 --- a/db/db_iter.cc +++ b/db/db_iter.cc @@ -53,13 +53,11 @@ class DBIter: public Iterator { user_comparator_(cmp), iter_(iter), sequence_(s), - large_(NULL), direction_(kForward), valid_(false) { } virtual ~DBIter() { delete iter_; - delete large_; } virtual bool Valid() const { return valid_; } virtual Slice key() const { @@ -68,20 +66,10 @@ class DBIter: public Iterator { } virtual Slice value() const { assert(valid_); - Slice raw_value = (direction_ == kForward) ? iter_->value() : saved_value_; - if (large_ == NULL) { - return raw_value; - } else { - MutexLock l(&large_->mutex); - if (!large_->produced) { - ReadIndirectValue(raw_value); - } - return large_->value; - } + return (direction_ == kForward) ? iter_->value() : saved_value_; } virtual Status status() const { if (status_.ok()) { - if (large_ != NULL && !large_->status.ok()) return large_->status; return iter_->status(); } else { return status_; @@ -95,29 +83,14 @@ class DBIter: public Iterator { virtual void SeekToLast(); private: - struct Large { - port::Mutex mutex; - std::string value; - bool produced; - Status status; - }; - void FindNextUserEntry(bool skipping, std::string* skip); void FindPrevUserEntry(); bool ParseKey(ParsedInternalKey* key); - void ReadIndirectValue(Slice ref) const; inline void SaveKey(const Slice& k, std::string* dst) { dst->assign(k.data(), k.size()); } - inline void ForgetLargeValue() { - if (large_ != NULL) { - delete large_; - large_ = NULL; - } - } - inline void ClearSavedValue() { if (saved_value_.capacity() > 1048576) { std::string empty; @@ -136,7 +109,6 @@ class DBIter: public Iterator { Status status_; std::string saved_key_; // == current key when direction_==kReverse std::string saved_value_; // == current raw value when direction_==kReverse - Large* large_; // Non-NULL if value is an indirect reference Direction direction_; bool valid_; @@ -156,7 +128,6 @@ inline bool DBIter::ParseKey(ParsedInternalKey* ikey) { void DBIter::Next() { assert(valid_); - ForgetLargeValue(); if (direction_ == kReverse) { // Switch directions? direction_ = kForward; @@ -185,7 +156,6 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { // Loop until we hit an acceptable entry to yield assert(iter_->Valid()); assert(direction_ == kForward); - assert(large_ == NULL); do { ParsedInternalKey ikey; if (ParseKey(&ikey) && ikey.sequence <= sequence_) { @@ -197,17 +167,12 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { skipping = true; break; case kTypeValue: - case kTypeLargeValueRef: if (skipping && user_comparator_->Compare(ikey.user_key, *skip) <= 0) { // Entry hidden } else { valid_ = true; saved_key_.clear(); - if (ikey.type == kTypeLargeValueRef) { - large_ = new Large; - large_->produced = false; - } return; } break; @@ -221,7 +186,6 @@ void DBIter::FindNextUserEntry(bool skipping, std::string* skip) { void DBIter::Prev() { assert(valid_); - ForgetLargeValue(); if (direction_ == kForward) { // Switch directions? // iter_ is pointing at the current entry. Scan backwards until @@ -249,7 +213,6 @@ void DBIter::Prev() { void DBIter::FindPrevUserEntry() { assert(direction_ == kReverse); - assert(large_ == NULL); ValueType value_type = kTypeDeletion; if (iter_->Valid()) { @@ -286,16 +249,11 @@ void DBIter::FindPrevUserEntry() { direction_ = kForward; } else { valid_ = true; - if (value_type == kTypeLargeValueRef) { - large_ = new Large; - large_->produced = false; - } } } void DBIter::Seek(const Slice& target) { direction_ = kForward; - ForgetLargeValue(); ClearSavedValue(); saved_key_.clear(); AppendInternalKey( @@ -310,7 +268,6 @@ void DBIter::Seek(const Slice& target) { void DBIter::SeekToFirst() { direction_ = kForward; - ForgetLargeValue(); ClearSavedValue(); iter_->SeekToFirst(); if (iter_->Valid()) { @@ -322,67 +279,11 @@ void DBIter::SeekToFirst() { void DBIter::SeekToLast() { direction_ = kReverse; - ForgetLargeValue(); ClearSavedValue(); iter_->SeekToLast(); FindPrevUserEntry(); } -void DBIter::ReadIndirectValue(Slice ref) const { - assert(!large_->produced); - large_->produced = true; - LargeValueRef large_ref; - if (ref.size() != LargeValueRef::ByteSize()) { - large_->status = Status::Corruption("malformed large value reference"); - return; - } - memcpy(large_ref.data, ref.data(), LargeValueRef::ByteSize()); - std::string fname = LargeValueFileName(*dbname_, large_ref); - RandomAccessFile* file; - Status s = env_->NewRandomAccessFile(fname, &file); - uint64_t file_size = 0; - if (s.ok()) { - s = env_->GetFileSize(fname, &file_size); - } - if (s.ok()) { - uint64_t value_size = large_ref.ValueSize(); - large_->value.resize(value_size); - Slice result; - s = file->Read(0, file_size, &result, - const_cast(large_->value.data())); - if (s.ok()) { - if (result.size() == file_size) { - switch (large_ref.compression_type()) { - case kNoCompression: { - if (result.data() != large_->value.data()) { - large_->value.assign(result.data(), result.size()); - } - break; - } - case kSnappyCompression: { - std::string uncompressed; - if (port::Snappy_Uncompress(result.data(), result.size(), - &uncompressed) && - uncompressed.size() == large_ref.ValueSize()) { - swap(uncompressed, large_->value); - } else { - s = Status::Corruption( - "Unable to read entire compressed large value file"); - } - } - } - } else { - s = Status::Corruption("Unable to read entire large value file"); - } - } - delete file; // Ignore errors on closing - } - if (!s.ok()) { - large_->value.clear(); - large_->status = s; - } -} - } // anonymous namespace Iterator* NewDBIterator( diff --git a/db/db_test.cc b/db/db_test.cc index 04de3313e..f828e3dce 100644 --- a/db/db_test.cc +++ b/db/db_test.cc @@ -119,9 +119,6 @@ class DBTest { case kTypeValue: result += iter->value().ToString(); break; - case kTypeLargeValueRef: - result += "LARGEVALUE(" + EscapeString(iter->value()) + ")"; - break; case kTypeDeletion: result += "DEL"; break; @@ -153,26 +150,6 @@ class DBTest { return size; } - std::set LargeValueFiles() const { - // Return the set of large value files that exist in the database - std::vector filenames; - env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose - uint64_t number; - LargeValueRef large_ref; - FileType type; - std::set live; - for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &large_ref, &type) && - type == kLargeValueFile) { - fprintf(stderr, " live: %s\n", - LargeValueRefToFilenameString(large_ref).c_str()); - live.insert(large_ref); - } - } - fprintf(stderr, "Found %d live large value files\n", (int)live.size()); - return live; - } - void Compact(const Slice& start, const Slice& limit) { dbfull()->TEST_CompactMemTable(); int max_level_with_files = 1; @@ -471,7 +448,6 @@ TEST(DBTest, MinorCompactionsHappen) { TEST(DBTest, RecoverWithLargeLog) { { Options options; - options.large_value_threshold = 1048576; Reopen(&options); ASSERT_OK(Put("big1", std::string(200000, '1'))); ASSERT_OK(Put("big2", std::string(200000, '2'))); @@ -484,7 +460,6 @@ TEST(DBTest, RecoverWithLargeLog) { // we flush table files in the middle of a large log file. Options options; options.write_buffer_size = 100000; - options.large_value_threshold = 1048576; Reopen(&options); ASSERT_EQ(NumTableFilesAtLevel(0), 3); ASSERT_EQ(std::string(200000, '1'), Get("big1")); @@ -497,7 +472,6 @@ TEST(DBTest, RecoverWithLargeLog) { TEST(DBTest, CompactionsGenerateMultipleFiles) { Options options; options.write_buffer_size = 100000000; // Large write buffer - options.large_value_threshold = 1048576; Reopen(&options); Random rnd(301); @@ -570,65 +544,53 @@ static bool Between(uint64_t val, uint64_t low, uint64_t high) { } TEST(DBTest, ApproximateSizes) { - for (int test = 0; test < 2; test++) { - // test==0: default large_value_threshold - // test==1: 1 MB large_value_threshold - Options options; - options.large_value_threshold = (test == 0) ? 65536 : 1048576; - options.write_buffer_size = 100000000; // Large write buffer - options.compression = kNoCompression; - DestroyAndReopen(); + Options options; + options.write_buffer_size = 100000000; // Large write buffer + options.compression = kNoCompression; + DestroyAndReopen(); - ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); - Reopen(&options); - ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); + ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); + Reopen(&options); + ASSERT_TRUE(Between(Size("", "xyz"), 0, 0)); - // Write 8MB (80 values, each 100K) - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - const int N = 80; - Random rnd(301); - for (int i = 0; i < N; i++) { - ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000))); - } - if (test == 1) { - // 0 because GetApproximateSizes() does not account for memtable space for - // non-large values - ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); - } else { - ASSERT_TRUE(Between(Size("", Key(50)), 100000*50, 100000*50 + 10000)); - ASSERT_TRUE(Between(Size(Key(20), Key(30)), - 100000*10, 100000*10 + 10000)); - } + // Write 8MB (80 values, each 100K) + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + const int N = 80; + Random rnd(301); + for (int i = 0; i < N; i++) { + ASSERT_OK(Put(Key(i), RandomString(&rnd, 100000))); + } - // Check sizes across recovery by reopening a few times - for (int run = 0; run < 3; run++) { - Reopen(&options); - - for (int compact_start = 0; compact_start < N; compact_start += 10) { - for (int i = 0; i < N; i += 10) { - ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000)); - ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), - 100000 * (i+1), 100000 * (i+1) + 10000)); - ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), - 100000 * 10, 100000 * 10 + 10000)); - } - ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000)); - ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000)); + // 0 because GetApproximateSizes() does not account for memtable space + ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); - dbfull()->TEST_CompactRange(0, - Key(compact_start), - Key(compact_start + 9)); + // Check sizes across recovery by reopening a few times + for (int run = 0; run < 3; run++) { + Reopen(&options); + + for (int compact_start = 0; compact_start < N; compact_start += 10) { + for (int i = 0; i < N; i += 10) { + ASSERT_TRUE(Between(Size("", Key(i)), 100000*i, 100000*i + 10000)); + ASSERT_TRUE(Between(Size("", Key(i)+".suffix"), + 100000 * (i+1), 100000 * (i+1) + 10000)); + ASSERT_TRUE(Between(Size(Key(i), Key(i+10)), + 100000 * 10, 100000 * 10 + 10000)); } + ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000)); + ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000)); - ASSERT_EQ(NumTableFilesAtLevel(0), 0); - ASSERT_GT(NumTableFilesAtLevel(1), 0); + dbfull()->TEST_CompactRange(0, + Key(compact_start), + Key(compact_start + 9)); } + + ASSERT_EQ(NumTableFilesAtLevel(0), 0); + ASSERT_GT(NumTableFilesAtLevel(1), 0); } } TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) { Options options; - options.large_value_threshold = 65536; options.compression = kNoCompression; Reopen(); @@ -801,146 +763,6 @@ TEST(DBTest, ComparatorCheck) { << s.ToString(); } -static bool LargeValuesOK(DBTest* db, - const std::set& expected) { - std::set actual = db->LargeValueFiles(); - if (actual.size() != expected.size()) { - fprintf(stderr, "Sets differ in size: %d vs %d\n", - (int)actual.size(), (int)expected.size()); - return false; - } - for (std::set::const_iterator it = expected.begin(); - it != expected.end(); - ++it) { - if (actual.count(*it) != 1) { - fprintf(stderr, " key '%s' not found in actual set\n", - LargeValueRefToFilenameString(*it).c_str()); - return false; - } - } - return true; -} - -TEST(DBTest, LargeValues1) { - Options options; - options.large_value_threshold = 10000; - Reopen(&options); - - Random rnd(301); - - std::string big1; - test::CompressibleString(&rnd, 1.0, 100000, &big1); // Not compressible - std::set expected; - - ASSERT_OK(Put("big1", big1)); - expected.insert(LargeValueRef::Make(big1, kNoCompression)); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(Delete("big1")); - ASSERT_TRUE(LargeValuesOK(this, expected)); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - // No handling of deletion markers on memtable compactions, so big1 remains - ASSERT_TRUE(LargeValuesOK(this, expected)); - - dbfull()->TEST_CompactRange(0, "", "z"); - expected.erase(LargeValueRef::Make(big1, kNoCompression)); - ASSERT_TRUE(LargeValuesOK(this, expected)); -} - -static bool SnappyCompressionSupported() { - std::string out; - Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; - return port::Snappy_Compress(in.data(), in.size(), &out); -} - -TEST(DBTest, LargeValues2) { - Options options; - options.large_value_threshold = 10000; - Reopen(&options); - - Random rnd(301); - - std::string big1, big2; - test::CompressibleString(&rnd, 1.0, 20000, &big1); // Not compressible - test::CompressibleString(&rnd, 0.6, 40000, &big2); // Compressible - std::set expected; - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(Put("big1", big1)); - expected.insert(LargeValueRef::Make(big1, kNoCompression)); - ASSERT_EQ(big1, Get("big1")); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(Put("big2", big2)); - ASSERT_EQ(big2, Get("big2")); - if (SnappyCompressionSupported()) { - expected.insert(LargeValueRef::Make(big2, kSnappyCompression)); - } else { - expected.insert(LargeValueRef::Make(big2, kNoCompression)); - } - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - dbfull()->TEST_CompactRange(0, "", "z"); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(Put("big2", big2)); - ASSERT_OK(Put("big2_b", big2)); - ASSERT_EQ(big1, Get("big1")); - ASSERT_EQ(big2, Get("big2")); - ASSERT_EQ(big2, Get("big2_b")); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(Delete("big1")); - ASSERT_EQ("NOT_FOUND", Get("big1")); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - ASSERT_TRUE(LargeValuesOK(this, expected)); - dbfull()->TEST_CompactRange(0, "", "z"); - expected.erase(LargeValueRef::Make(big1, kNoCompression)); - ASSERT_TRUE(LargeValuesOK(this, expected)); - dbfull()->TEST_CompactRange(1, "", "z"); - - ASSERT_OK(Delete("big2")); - ASSERT_EQ("NOT_FOUND", Get("big2")); - ASSERT_EQ(big2, Get("big2_b")); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - ASSERT_TRUE(LargeValuesOK(this, expected)); - dbfull()->TEST_CompactRange(0, "", "z"); - ASSERT_TRUE(LargeValuesOK(this, expected)); - - // Make sure the large value refs survive a reload and compactions after - // the reload. - Reopen(); - ASSERT_TRUE(LargeValuesOK(this, expected)); - ASSERT_OK(Put("foo", "bar")); - ASSERT_OK(dbfull()->TEST_CompactMemTable()); - dbfull()->TEST_CompactRange(0, "", "z"); - ASSERT_TRUE(LargeValuesOK(this, expected)); -} - -TEST(DBTest, LargeValues3) { - // Make sure we don't compress values if - Options options; - options.large_value_threshold = 10000; - options.compression = kNoCompression; - Reopen(&options); - - Random rnd(301); - - std::string big1 = std::string(100000, 'x'); // Very compressible - std::set expected; - - ASSERT_OK(Put("big1", big1)); - ASSERT_EQ(big1, Get("big1")); - expected.insert(LargeValueRef::Make(big1, kNoCompression)); - ASSERT_TRUE(LargeValuesOK(this, expected)); -} - - TEST(DBTest, DBOpen_Options) { std::string dbname = test::TmpDir() + "/db_options_test"; DestroyDB(dbname, Options()); @@ -1025,9 +847,6 @@ class ModelDB: public DB { case kTypeValue: map_[it.key().ToString()] = it.value().ToString(); break; - case kTypeLargeValueRef: - assert(false); // Should not occur - break; case kTypeDeletion: map_.erase(it.key().ToString()); break; diff --git a/db/dbformat.cc b/db/dbformat.cc index 2664eb43f..c12c1387c 100644 --- a/db/dbformat.cc +++ b/db/dbformat.cc @@ -84,69 +84,4 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const { } } -LargeValueRef LargeValueRef::Make(const Slice& value, CompressionType ctype) { - LargeValueRef result; - port::SHA1_Hash(value.data(), value.size(), &result.data[0]); - EncodeFixed64(&result.data[20], value.size()); - result.data[28] = static_cast(ctype); - return result; -} - -std::string LargeValueRefToFilenameString(const LargeValueRef& h) { - assert(sizeof(h.data) == LargeValueRef::ByteSize()); - assert(sizeof(h.data) == 29); // So we can hardcode the array size of buf - static const char tohex[] = "0123456789abcdef"; - char buf[20*2]; - for (int i = 0; i < 20; i++) { - buf[2*i] = tohex[(h.data[i] >> 4) & 0xf]; - buf[2*i+1] = tohex[h.data[i] & 0xf]; - } - std::string result = std::string(buf, sizeof(buf)); - result += "-"; - result += NumberToString(h.ValueSize()); - result += "-"; - result += NumberToString(static_cast(h.compression_type())); - return result; -} - -static uint32_t hexvalue(char c) { - if (c >= '0' && c <= '9') { - return c - '0'; - } else if (c >= 'A' && c <= 'F') { - return 10 + c - 'A'; - } else { - assert(c >= 'a' && c <= 'f'); - return 10 + c - 'a'; - } -} - -bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) { - Slice in = s; - if (in.size() < 40) { - return false; - } - for (int i = 0; i < 20; i++) { - if (!isxdigit(in[i*2]) || !isxdigit(in[i*2+1])) { - return false; - } - unsigned char c = (hexvalue(in[i*2])<<4) | hexvalue(in[i*2+1]); - h->data[i] = c; - } - in.remove_prefix(40); - uint64_t value_size, ctype; - - if (ConsumeChar(&in, '-') && - ConsumeDecimalNumber(&in, &value_size) && - ConsumeChar(&in, '-') && - ConsumeDecimalNumber(&in, &ctype) && - in.empty() && - (ctype <= kSnappyCompression)) { - EncodeFixed64(&h->data[20], value_size); - h->data[28] = static_cast(ctype); - return true; - } else { - return false; - } -} - } diff --git a/db/dbformat.h b/db/dbformat.h index 5f117f95f..d58366526 100644 --- a/db/dbformat.h +++ b/db/dbformat.h @@ -29,7 +29,6 @@ class InternalKey; enum ValueType { kTypeDeletion = 0x0, kTypeValue = 0x1, - kTypeLargeValueRef = 0x2, }; // kValueTypeForSeek defines the ValueType that should be passed when // constructing a ParsedInternalKey object for seeking to a particular @@ -37,7 +36,7 @@ enum ValueType { // and the value type is embedded as the low 8 bits in the sequence // number in internal keys, we need to use the highest-numbered // ValueType, not the lowest). -static const ValueType kValueTypeForSeek = kTypeLargeValueRef; +static const ValueType kValueTypeForSeek = kTypeValue; typedef uint64_t SequenceNumber; @@ -139,54 +138,6 @@ inline int InternalKeyComparator::Compare( return Compare(a.Encode(), b.Encode()); } -// LargeValueRef is a 160-bit hash value (20 bytes), plus an 8 byte -// uncompressed size, and a 1 byte CompressionType code. An -// encoded form of it is embedded in the filenames of large value -// files stored in the database, and the raw binary form is stored as -// the iter->value() result for values of type kTypeLargeValueRef in -// the table and log files that make up the database. -struct LargeValueRef { - char data[29]; - - // Initialize a large value ref for the given data - static LargeValueRef Make(const Slice& data, - CompressionType compression_type); - - // Initialize a large value ref from a serialized, 29-byte reference value - static LargeValueRef FromRef(const Slice& ref) { - LargeValueRef result; - assert(ref.size() == sizeof(result.data)); - memcpy(result.data, ref.data(), sizeof(result.data)); - return result; - } - - // Return the number of bytes in a LargeValueRef (not the - // number of bytes in the value referenced). - static size_t ByteSize() { return sizeof(LargeValueRef().data); } - - // Return the number of bytes in the value referenced by "*this". - uint64_t ValueSize() const { return DecodeFixed64(&data[20]); } - - CompressionType compression_type() const { - return static_cast(data[28]); - } - - bool operator==(const LargeValueRef& b) const { - return memcmp(data, b.data, sizeof(data)) == 0; - } - bool operator<(const LargeValueRef& b) const { - return memcmp(data, b.data, sizeof(data)) < 0; - } -}; - -// Convert the large value ref to a human-readable string suitable -// for embedding in a large value filename. -extern std::string LargeValueRefToFilenameString(const LargeValueRef& h); - -// Parse the large value filename string in "input" and store it in -// "*h". If successful, returns true. Otherwise returns false. -extern bool FilenameStringToLargeValueRef(const Slice& in, LargeValueRef* ref); - inline bool ParseInternalKey(const Slice& internal_key, ParsedInternalKey* result) { const size_t n = internal_key.size(); @@ -196,7 +147,7 @@ inline bool ParseInternalKey(const Slice& internal_key, result->sequence = num >> 8; result->type = static_cast(c); result->user_key = Slice(internal_key.data(), n - 8); - return (c <= static_cast(kTypeLargeValueRef)); + return (c <= static_cast(kTypeValue)); } } diff --git a/db/dbformat_test.cc b/db/dbformat_test.cc index 702cbb490..57c557803 100644 --- a/db/dbformat_test.cc +++ b/db/dbformat_test.cc @@ -76,9 +76,6 @@ TEST(FormatTest, InternalKeyShortSeparator) { ASSERT_EQ(IKey("foo", 100, kTypeValue), Shorten(IKey("foo", 100, kTypeValue), IKey("foo", 100, kTypeDeletion))); - ASSERT_EQ(IKey("foo", 100, kTypeValue), - Shorten(IKey("foo", 100, kTypeValue), - IKey("foo", 100, kTypeLargeValueRef))); // When user keys are misordered ASSERT_EQ(IKey("foo", 100, kTypeValue), @@ -108,18 +105,6 @@ TEST(FormatTest, InternalKeyShortestSuccessor) { ShortSuccessor(IKey("\xff\xff", 100, kTypeValue))); } -TEST(FormatTest, SHA1) { - // Check that we are computing the same value as sha1. - // Note that the last two numbers are the length of the input and the - // compression type. - ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-0", // SHA1, uncompr - LargeValueRefToFilenameString( - LargeValueRef::Make("hello", kNoCompression))); - ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1", // SHA1, lwcompr - LargeValueRefToFilenameString( - LargeValueRef::Make("hello", kSnappyCompression))); -} - } int main(int argc, char** argv) { diff --git a/db/filename.cc b/db/filename.cc index d21918c87..b3a917c69 100644 --- a/db/filename.cc +++ b/db/filename.cc @@ -30,14 +30,6 @@ std::string TableFileName(const std::string& name, uint64_t number) { return MakeFileName(name, number, "sst"); } -std::string LargeValueFileName(const std::string& name, - const LargeValueRef& large_ref) { - std::string result = name + "/"; - result += LargeValueRefToFilenameString(large_ref); - result += ".val"; - return result; -} - std::string DescriptorFileName(const std::string& dbname, uint64_t number) { assert(number > 0); char buf[100]; @@ -75,11 +67,9 @@ std::string OldInfoLogFileName(const std::string& dbname) { // dbname/LOG // dbname/LOG.old // dbname/MANIFEST-[0-9]+ -// dbname/[0-9a-f]{20}-[0-9]+-[0-9]+.val // dbname/[0-9]+.(log|sst) bool ParseFileName(const std::string& fname, uint64_t* number, - LargeValueRef* large_ref, FileType* type) { Slice rest(fname); if (rest == "CURRENT") { @@ -91,15 +81,6 @@ bool ParseFileName(const std::string& fname, } else if (rest == "LOG" || rest == "LOG.old") { *number = 0; *type = kInfoLogFile; - } else if (rest.size() >= 4 && - Slice(rest.data() + rest.size() - 4, 4) == ".val") { - LargeValueRef h; - if (!FilenameStringToLargeValueRef(Slice(rest.data(), rest.size() - 4), - &h)) { - return false; - } - *large_ref = h; - *type = kLargeValueFile; } else if (rest.starts_with("MANIFEST-")) { rest.remove_prefix(strlen("MANIFEST-")); uint64_t num; diff --git a/db/filename.h b/db/filename.h index 81ab2fc1e..6a997441e 100644 --- a/db/filename.h +++ b/db/filename.h @@ -16,13 +16,11 @@ namespace leveldb { class Env; -struct LargeValueRef; enum FileType { kLogFile, kDBLockFile, kTableFile, - kLargeValueFile, kDescriptorFile, kCurrentFile, kTempFile, @@ -39,12 +37,6 @@ extern std::string LogFileName(const std::string& dbname, uint64_t number); // "dbname". extern std::string TableFileName(const std::string& dbname, uint64_t number); -// Return the name of the large value file with the specified large -// value reference in the db named by "dbname". The result will be -// prefixed with "dbname". -extern std::string LargeValueFileName(const std::string& dbname, - const LargeValueRef& large_ref); - // Return the name of the descriptor file for the db named by // "dbname" and the specified incarnation number. The result will be // prefixed with "dbname". @@ -71,14 +63,10 @@ extern std::string InfoLogFileName(const std::string& dbname); extern std::string OldInfoLogFileName(const std::string& dbname); // If filename is a leveldb file, store the type of the file in *type. -// If *type is kLargeValueFile, then the large value reference data -// from the filename is stored in "*large_ref. For all other types of -// files, the number encoded in the filename is stored in *number. If -// the filename was successfully parsed, returns true. Else return -// false. +// The number encoded in the filename is stored in *number. If the +// filename was successfully parsed, returns true. Else return false. extern bool ParseFileName(const std::string& filename, uint64_t* number, - LargeValueRef* large_ref, FileType* type); // Make the CURRENT file point to the descriptor file with the diff --git a/db/filename_test.cc b/db/filename_test.cc index 4d2a91ef1..2f61e8d42 100644 --- a/db/filename_test.cc +++ b/db/filename_test.cc @@ -17,42 +17,29 @@ TEST(FileNameTest, Parse) { Slice db; FileType type; uint64_t number; - LargeValueRef large_ref; // Successful parses static struct { const char* fname; uint64_t number; - const char* large_ref; FileType type; } cases[] = { - { "100.log", 100, "", kLogFile }, - { "0.log", 0, "", kLogFile }, - { "0.sst", 0, "", kTableFile }, - { "CURRENT", 0, "", kCurrentFile }, - { "LOCK", 0, "", kDBLockFile }, - { "MANIFEST-2", 2, "", kDescriptorFile }, - { "MANIFEST-7", 7, "", kDescriptorFile }, - { "LOG", 0, "", kInfoLogFile }, - { "LOG.old", 0, "", kInfoLogFile }, - { "18446744073709551615.log", 18446744073709551615ull, "", - kLogFile }, - { "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0.val", 0, - "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-1234-0", kLargeValueFile }, - { "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0.val", 0, - "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-10000000000-0", - kLargeValueFile }, + { "100.log", 100, kLogFile }, + { "0.log", 0, kLogFile }, + { "0.sst", 0, kTableFile }, + { "CURRENT", 0, kCurrentFile }, + { "LOCK", 0, kDBLockFile }, + { "MANIFEST-2", 2, kDescriptorFile }, + { "MANIFEST-7", 7, kDescriptorFile }, + { "LOG", 0, kInfoLogFile }, + { "LOG.old", 0, kInfoLogFile }, + { "18446744073709551615.log", 18446744073709551615ull, kLogFile }, }; for (int i = 0; i < sizeof(cases) / sizeof(cases[0]); i++) { std::string f = cases[i].fname; - ASSERT_TRUE(ParseFileName(f, &number, &large_ref, &type)) << f; + ASSERT_TRUE(ParseFileName(f, &number, &type)) << f; ASSERT_EQ(cases[i].type, type) << f; - if (type == kLargeValueFile) { - ASSERT_EQ(cases[i].large_ref, LargeValueRefToFilenameString(large_ref)) - << f; - } else { - ASSERT_EQ(cases[i].number, number) << f; - } + ASSERT_EQ(cases[i].number, number) << f; } // Errors @@ -78,75 +65,54 @@ TEST(FileNameTest, Parse) { "184467440737095516150.log", "100", "100.", - "100.lop", - "100.val", - ".val", - "123456789012345678901234567890123456789-12340.val", - "1234567890123456789012345678901234567-123-0.val", - "12345678901234567890123456789012345678902-100-1-.val", - // Overflow on value size - "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000000000000000000-1.val", - // '03.val' is a bad compression type - "2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2e2323-100000-3.val" }; + "100.lop" + }; for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) { std::string f = errors[i]; - ASSERT_TRUE(!ParseFileName(f, &number, &large_ref, &type)) << f; + ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f; }; } TEST(FileNameTest, Construction) { uint64_t number; FileType type; - LargeValueRef large_ref; std::string fname; fname = CurrentFileName("foo"); ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(0, number); ASSERT_EQ(kCurrentFile, type); fname = LockFileName("foo"); ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(0, number); ASSERT_EQ(kDBLockFile, type); fname = LogFileName("foo", 192); ASSERT_EQ("foo/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(192, number); ASSERT_EQ(kLogFile, type); fname = TableFileName("bar", 200); ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(200, number); ASSERT_EQ(kTableFile, type); fname = DescriptorFileName("bar", 100); ASSERT_EQ("bar/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(100, number); ASSERT_EQ(kDescriptorFile, type); fname = TempFileName("tmp", 999); ASSERT_EQ("tmp/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); + ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &type)); ASSERT_EQ(999, number); ASSERT_EQ(kTempFile, type); - - for (int i = 0; i <= kSnappyCompression; i++) { - CompressionType ctype = static_cast(i); - std::string value = "abcdef"; - LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype); - fname = LargeValueFileName("tmp", real_large_ref); - ASSERT_EQ("tmp/", std::string(fname.data(), 4)); - ASSERT_TRUE(ParseFileName(fname.c_str() + 4, &number, &large_ref, &type)); - ASSERT_TRUE(real_large_ref == large_ref); - ASSERT_EQ(kLargeValueFile, type); - ASSERT_EQ(large_ref.compression_type(), ctype); - } } } diff --git a/db/log_writer.cc b/db/log_writer.cc index 18ca37a7f..169685105 100644 --- a/db/log_writer.cc +++ b/db/log_writer.cc @@ -46,9 +46,9 @@ Status Writer::AddRecord(const Slice& slice) { } // Invariant: we never leave < kHeaderSize bytes in a block. - const int avail = kBlockSize - block_offset_ - kHeaderSize; - assert(avail >= 0); + assert(kBlockSize - block_offset_ - kHeaderSize >= 0); + const size_t avail = kBlockSize - block_offset_ - kHeaderSize; const size_t fragment_length = (left < avail) ? left : avail; RecordType type; diff --git a/db/repair.cc b/db/repair.cc index 014e00e7e..c8e7b9e5f 100644 --- a/db/repair.cc +++ b/db/repair.cc @@ -6,8 +6,7 @@ // (1) Any log files are first converted to tables // (2) We scan every table to compute // (a) smallest/largest for the table -// (b) large value refs from the table -// (c) largest sequence number in the table +// (b) largest sequence number in the table // (3) We generate descriptor contents: // - log number is set to zero // - next-file-number is set to 1 + largest file number we found @@ -22,9 +21,8 @@ // (c) For each table: if it overlaps earlier table, place in level-0, // else place in level-M. // Possible optimization 2: -// Store per-table metadata (smallest, largest, largest-seq#, -// large-value-refs, ...) in the table's meta section to speed up -// ScanTable. +// Store per-table metadata (smallest, largest, largest-seq#, ...) +// in the table's meta section to speed up ScanTable. #include "db/builder.h" #include "db/db_impl.h" @@ -73,7 +71,7 @@ class Repairer { } if (status.ok()) { unsigned long long bytes = 0; - for (int i = 0; i < tables_.size(); i++) { + for (size_t i = 0; i < tables_.size(); i++) { bytes += tables_[i].meta.file_size; } Log(env_, options_.info_log, @@ -119,13 +117,10 @@ class Repairer { } uint64_t number; - LargeValueRef large_ref; FileType type; - for (int i = 0; i < filenames.size(); i++) { - if (ParseFileName(filenames[i], &number, &large_ref, &type)) { - if (type == kLargeValueFile) { - // Will be picked up when we process a Table that points to it - } else if (type == kDescriptorFile) { + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + if (type == kDescriptorFile) { manifests_.push_back(filenames[i]); } else { if (number + 1 > next_file_number_) { @@ -145,7 +140,7 @@ class Repairer { } void ConvertLogFilesToTables() { - for (int i = 0; i < logs_.size(); i++) { + for (size_t i = 0; i < logs_.size(); i++) { std::string logname = LogFileName(dbname_, logs_[i]); Status status = ConvertLogToTable(logs_[i]); if (!status.ok()) { @@ -239,7 +234,7 @@ class Repairer { void ExtractMetaData() { std::vector kept; - for (int i = 0; i < table_numbers_.size(); i++) { + for (size_t i = 0; i < table_numbers_.size(); i++) { TableInfo t; t.meta.number = table_numbers_[i]; Status status = ScanTable(&t); @@ -283,17 +278,6 @@ class Repairer { if (parsed.sequence > t->max_sequence) { t->max_sequence = parsed.sequence; } - - if (ExtractValueType(key) == kTypeLargeValueRef) { - if (iter->value().size() != LargeValueRef::ByteSize()) { - Log(env_, options_.info_log, "Table #%llu: bad large value ref", - (unsigned long long) t->meta.number); - } else { - edit_.AddLargeValueRef(LargeValueRef::FromRef(iter->value()), - t->meta.number, - key); - } - } } if (!iter->status().ok()) { status = iter->status(); @@ -316,7 +300,7 @@ class Repairer { } SequenceNumber max_sequence = 0; - for (int i = 0; i < tables_.size(); i++) { + for (size_t i = 0; i < tables_.size(); i++) { if (max_sequence < tables_[i].max_sequence) { max_sequence = tables_[i].max_sequence; } @@ -327,7 +311,7 @@ class Repairer { edit_.SetNextFile(next_file_number_); edit_.SetLastSequence(max_sequence); - for (int i = 0; i < tables_.size(); i++) { + for (size_t i = 0; i < tables_.size(); i++) { // TODO(opt): separate out into multiple levels const TableInfo& t = tables_[i]; edit_.AddFile(0, t.meta.number, t.meta.file_size, @@ -351,7 +335,7 @@ class Repairer { env_->DeleteFile(tmp); } else { // Discard older manifests - for (int i = 0; i < manifests_.size(); i++) { + for (size_t i = 0; i < manifests_.size(); i++) { ArchiveFile(dbname_ + "/" + manifests_[i]); } diff --git a/db/version_edit.cc b/db/version_edit.cc index 689dbe037..3941271eb 100644 --- a/db/version_edit.cc +++ b/db/version_edit.cc @@ -19,7 +19,7 @@ enum Tag { kCompactPointer = 5, kDeletedFile = 6, kNewFile = 7, - kLargeValueRef = 8, + // 8 was used for large value refs kPrevLogNumber = 9, }; @@ -36,7 +36,6 @@ void VersionEdit::Clear() { has_last_sequence_ = false; deleted_files_.clear(); new_files_.clear(); - large_refs_added_.clear(); } void VersionEdit::EncodeTo(std::string* dst) const { @@ -61,7 +60,7 @@ void VersionEdit::EncodeTo(std::string* dst) const { PutVarint64(dst, last_sequence_); } - for (int i = 0; i < compact_pointers_.size(); i++) { + for (size_t i = 0; i < compact_pointers_.size(); i++) { PutVarint32(dst, kCompactPointer); PutVarint32(dst, compact_pointers_[i].first); // level PutLengthPrefixedSlice(dst, compact_pointers_[i].second.Encode()); @@ -75,7 +74,7 @@ void VersionEdit::EncodeTo(std::string* dst) const { PutVarint64(dst, iter->second); // file number } - for (int i = 0; i < new_files_.size(); i++) { + for (size_t i = 0; i < new_files_.size(); i++) { const FileMetaData& f = new_files_[i].second; PutVarint32(dst, kNewFile); PutVarint32(dst, new_files_[i].first); // level @@ -84,15 +83,6 @@ void VersionEdit::EncodeTo(std::string* dst) const { PutLengthPrefixedSlice(dst, f.smallest.Encode()); PutLengthPrefixedSlice(dst, f.largest.Encode()); } - - for (int i = 0; i < large_refs_added_.size(); i++) { - const VersionEdit::Large& l = large_refs_added_[i]; - PutVarint32(dst, kLargeValueRef); - PutLengthPrefixedSlice(dst, - Slice(l.large_ref.data, LargeValueRef::ByteSize())); - PutVarint64(dst, l.fnum); - PutLengthPrefixedSlice(dst, l.internal_key.Encode()); - } } static bool GetInternalKey(Slice* input, InternalKey* dst) { @@ -127,7 +117,6 @@ Status VersionEdit::DecodeFrom(const Slice& src) { uint64_t number; FileMetaData f; Slice str; - Large large; InternalKey key; while (msg == NULL && GetVarint32(&input, &tag)) { @@ -203,18 +192,6 @@ Status VersionEdit::DecodeFrom(const Slice& src) { } break; - case kLargeValueRef: - if (GetLengthPrefixedSlice(&input, &str) && - (str.size() == LargeValueRef::ByteSize()) && - GetVarint64(&input, &large.fnum) && - GetInternalKey(&input, &large.internal_key)) { - large.large_ref = LargeValueRef::FromRef(str); - large_refs_added_.push_back(large); - } else { - msg = "large ref"; - } - break; - default: msg = "unknown tag"; break; @@ -255,7 +232,7 @@ std::string VersionEdit::DebugString() const { r.append("\n LastSeq: "); AppendNumberTo(&r, last_sequence_); } - for (int i = 0; i < compact_pointers_.size(); i++) { + for (size_t i = 0; i < compact_pointers_.size(); i++) { r.append("\n CompactPointer: "); AppendNumberTo(&r, compact_pointers_[i].first); r.append(" '"); @@ -270,7 +247,7 @@ std::string VersionEdit::DebugString() const { r.append(" "); AppendNumberTo(&r, iter->second); } - for (int i = 0; i < new_files_.size(); i++) { + for (size_t i = 0; i < new_files_.size(); i++) { const FileMetaData& f = new_files_[i].second; r.append("\n AddFile: "); AppendNumberTo(&r, new_files_[i].first); @@ -284,16 +261,6 @@ std::string VersionEdit::DebugString() const { AppendEscapedStringTo(&r, f.largest.Encode()); r.append("'"); } - for (int i = 0; i < large_refs_added_.size(); i++) { - const VersionEdit::Large& l = large_refs_added_[i]; - r.append("\n LargeRef: "); - AppendNumberTo(&r, l.fnum); - r.append(" "); - r.append(LargeValueRefToFilenameString(l.large_ref)); - r.append(" '"); - AppendEscapedStringTo(&r, l.internal_key.Encode()); - r.append("'"); - } r.append("\n}\n"); return r; } diff --git a/db/version_edit.h b/db/version_edit.h index 7e417b567..ab874da14 100644 --- a/db/version_edit.h +++ b/db/version_edit.h @@ -75,18 +75,6 @@ class VersionEdit { deleted_files_.insert(std::make_pair(level, file)); } - // Record that a large value with the specified large_ref was - // written to the output file numbered "fnum" - void AddLargeValueRef(const LargeValueRef& large_ref, - uint64_t fnum, - const Slice& internal_key) { - large_refs_added_.resize(large_refs_added_.size() + 1); - Large* large = &(large_refs_added_.back()); - large->large_ref = large_ref; - large->fnum = fnum; - large->internal_key.DecodeFrom(internal_key); - } - void EncodeTo(std::string* dst) const; Status DecodeFrom(const Slice& src); @@ -111,12 +99,6 @@ class VersionEdit { std::vector< std::pair > compact_pointers_; DeletedFileSet deleted_files_; std::vector< std::pair > new_files_; - struct Large { - LargeValueRef large_ref; - uint64_t fnum; - InternalKey internal_key; - }; - std::vector large_refs_added_; }; } diff --git a/db/version_edit_test.cc b/db/version_edit_test.cc index 6906ec3a8..67959f7f5 100644 --- a/db/version_edit_test.cc +++ b/db/version_edit_test.cc @@ -26,13 +26,9 @@ TEST(VersionEditTest, EncodeDecode) { for (int i = 0; i < 4; i++) { TestEncodeDecode(edit); edit.AddFile(3, kBig + 300 + i, kBig + 400 + i, - InternalKey("foo", kBig + 500 + i, kTypeLargeValueRef), + InternalKey("foo", kBig + 500 + i, kTypeValue), InternalKey("zoo", kBig + 600 + i, kTypeDeletion)); edit.DeleteFile(4, kBig + 700 + i); - edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression), - kBig + 800 + i, "foobar"); - edit.AddLargeValueRef(LargeValueRef::Make("big2", kSnappyCompression), - kBig + 801 + i, "baz"); edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue)); } diff --git a/db/version_set.cc b/db/version_set.cc index 31f79bb60..c439f49ef 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -58,7 +58,7 @@ std::string IntSetToString(const std::set& s) { Version::~Version() { assert(refs_ == 0); for (int level = 0; level < config::kNumLevels; level++) { - for (int i = 0; i < files_[level].size(); i++) { + for (size_t i = 0; i < files_[level].size(); i++) { FileMetaData* f = files_[level][i]; assert(f->refs >= 0); f->refs--; @@ -134,7 +134,7 @@ class Version::LevelFileNumIterator : public Iterator { private: const InternalKeyComparator icmp_; const std::vector* const flist_; - int index_; + uint32_t index_; // Backing store for value(). Holds the file number and size. mutable char value_buf_[16]; @@ -164,7 +164,7 @@ Iterator* Version::NewConcatenatingIterator(const ReadOptions& options, void Version::AddIterators(const ReadOptions& options, std::vector* iters) { // Merge all level zero files together since they may overlap - for (int i = 0; i < files_[0].size(); i++) { + for (size_t i = 0; i < files_[0].size(); i++) { iters->push_back( vset_->table_cache_->NewIterator( options, files_[0][i]->number, files_[0][i]->file_size)); @@ -201,7 +201,7 @@ std::string Version::DebugString() const { AppendNumberTo(&r, level); r.push_back(':'); const std::vector& files = files_[level]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { r.push_back(' '); AppendNumberTo(&r, files[i]->number); r.push_back(':'); @@ -232,7 +232,7 @@ class VersionSet::Builder { : vset_(vset) { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = base->files_[level]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { FileMetaData* f = files[i]; f->refs++; files_[level].insert(std::make_pair(f->number, f)); @@ -258,7 +258,7 @@ class VersionSet::Builder { // Apply all of the edits in *edit to the current state. void Apply(VersionEdit* edit) { // Update compaction pointers - for (int i = 0; i < edit->compact_pointers_.size(); i++) { + for (size_t i = 0; i < edit->compact_pointers_.size(); i++) { const int level = edit->compact_pointers_[i].first; vset_->compact_pointer_[level] = edit->compact_pointers_[i].second.Encode().ToString(); @@ -284,19 +284,13 @@ class VersionSet::Builder { } // Add new files - for (int i = 0; i < edit->new_files_.size(); i++) { + for (size_t i = 0; i < edit->new_files_.size(); i++) { const int level = edit->new_files_[i].first; FileMetaData* f = new FileMetaData(edit->new_files_[i].second); f->refs = 1; assert(files_[level].count(f->number) == 0); files_[level].insert(std::make_pair(f->number, f)); } - - // Add large value refs - for (int i = 0; i < edit->large_refs_added_.size(); i++) { - const VersionEdit::Large& l = edit->large_refs_added_[i]; - vset_->RegisterLargeValueRef(l.large_ref, l.fnum, l.internal_key); - } } // Save the current state in *v. @@ -545,7 +539,7 @@ Status VersionSet::Recover() { static int64_t TotalFileSize(const std::vector& files) { int64_t sum = 0; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { sum += files[i]->file_size; } return sum; @@ -610,25 +604,12 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { // Save files for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = current_->files_[level]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { const FileMetaData* f = files[i]; edit.AddFile(level, f->number, f->file_size, f->smallest, f->largest); } } - // Save large value refs - for (LargeValueMap::const_iterator it = large_value_refs_.begin(); - it != large_value_refs_.end(); - ++it) { - const LargeValueRef& ref = it->first; - const LargeReferencesSet& pointers = it->second; - for (LargeReferencesSet::const_iterator j = pointers.begin(); - j != pointers.end(); - ++j) { - edit.AddLargeValueRef(ref, j->first, j->second); - } - } - std::string record; edit.EncodeTo(&record); return log->AddRecord(record); @@ -651,7 +632,7 @@ Status VersionSet::SortLevel(Version* v, uint64_t level) { if (result.ok() && level > 0) { // There should be no overlap - for (int i = 1; i < v->files_[level].size(); i++) { + for (size_t i = 1; i < v->files_[level].size(); i++) { const InternalKey& prev_end = v->files_[level][i-1]->largest; const InternalKey& this_begin = v->files_[level][i]->smallest; if (icmp_.Compare(prev_end, this_begin) >= 0) { @@ -676,7 +657,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { uint64_t result = 0; for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = v->files_[level]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { if (icmp_.Compare(files[i]->largest, ikey) <= 0) { // Entire file is before "ikey", so just add the file size result += files[i]->file_size; @@ -701,83 +682,9 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { } } } - - // Add in large value files which are references from internal keys - // stored in the table files - // - // TODO(opt): this is O(# large values in db). If this becomes too slow, - // we could store an auxiliary data structure indexed by internal key - for (LargeValueMap::const_iterator it = large_value_refs_.begin(); - it != large_value_refs_.end(); - ++it) { - const LargeValueRef& lref = it->first; - for (LargeReferencesSet::const_iterator it2 = it->second.begin(); - it2 != it->second.end(); - ++it2) { - if (icmp_.Compare(it2->second, ikey.Encode()) <= 0) { - // Internal key for large value is before our key of interest - result += lref.ValueSize(); - } - } - } - - return result; } -bool VersionSet::RegisterLargeValueRef(const LargeValueRef& large_ref, - uint64_t fnum, - const InternalKey& internal_key) { - LargeReferencesSet* refs = &large_value_refs_[large_ref]; - bool is_first = refs->empty(); - refs->insert(make_pair(fnum, internal_key.Encode().ToString())); - return is_first; -} - -void VersionSet::CleanupLargeValueRefs(const std::set& live_tables) { - for (LargeValueMap::iterator it = large_value_refs_.begin(); - it != large_value_refs_.end(); - ) { - LargeReferencesSet* refs = &it->second; - for (LargeReferencesSet::iterator ref_it = refs->begin(); - ref_it != refs->end(); - ) { - if (ref_it->first != log_number_ && // Not in log file - ref_it->first != prev_log_number_ && // Not in prev log - live_tables.count(ref_it->first) == 0) { // Not in a live table - // No longer live: erase - LargeReferencesSet::iterator to_erase = ref_it; - ++ref_it; - refs->erase(to_erase); - } else { - // Still live: leave this reference alone - ++ref_it; - } - } - if (refs->empty()) { - // No longer any live references to this large value: remove from - // large_value_refs - Log(env_, options_->info_log, "large value is dead: '%s'", - LargeValueRefToFilenameString(it->first).c_str()); - LargeValueMap::iterator to_erase = it; - ++it; - large_value_refs_.erase(to_erase); - } else { - ++it; - } - } -} - -bool VersionSet::LargeValueIsLive(const LargeValueRef& large_ref) { - LargeValueMap::iterator it = large_value_refs_.find(large_ref); - if (it == large_value_refs_.end()) { - return false; - } else { - assert(!it->second.empty()); - return true; - } -} - void VersionSet::MaybeDeleteOldVersions() { // Note: it is important to delete versions in order since a newer // version with zero refs may be holding a pointer to a memtable @@ -793,7 +700,7 @@ void VersionSet::AddLiveFiles(std::set* live) { for (Version* v = oldest_; v != NULL; v = v->next_) { for (int level = 0; level < config::kNumLevels; level++) { const std::vector& files = v->files_[level]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { live->insert(files[i]->number); } } @@ -810,7 +717,7 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() { int64_t result = 0; std::vector overlaps; for (int level = 0; level < config::kNumLevels - 1; level++) { - for (int i = 0; i < current_->files_[level].size(); i++) { + for (size_t i = 0; i < current_->files_[level].size(); i++) { const FileMetaData* f = current_->files_[level][i]; GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps); const int64_t sum = TotalFileSize(overlaps); @@ -832,7 +739,7 @@ void VersionSet::GetOverlappingInputs( Slice user_begin = begin.user_key(); Slice user_end = end.user_key(); const Comparator* user_cmp = icmp_.user_comparator(); - for (int i = 0; i < current_->files_[level].size(); i++) { + for (size_t i = 0; i < current_->files_[level].size(); i++) { FileMetaData* f = current_->files_[level][i]; if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 || user_cmp->Compare(f->smallest.user_key(), user_end) > 0) { @@ -852,7 +759,7 @@ void VersionSet::GetRange(const std::vector& inputs, assert(!inputs.empty()); smallest->Clear(); largest->Clear(); - for (int i = 0; i < inputs.size(); i++) { + for (size_t i = 0; i < inputs.size(); i++) { FileMetaData* f = inputs[i]; if (i == 0) { *smallest = f->smallest; @@ -895,7 +802,7 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { if (!c->inputs_[which].empty()) { if (c->level() + which == 0) { const std::vector& files = c->inputs_[which]; - for (int i = 0; i < files.size(); i++) { + for (size_t i = 0; i < files.size(); i++) { list[num++] = table_cache_->NewIterator( options, files[i]->number, files[i]->file_size); } @@ -927,7 +834,7 @@ Compaction* VersionSet::PickCompaction() { c->input_version_->Ref(); // Pick the first file that comes after compact_pointer_[level] - for (int i = 0; i < current_->files_[level].size(); i++) { + for (size_t i = 0; i < current_->files_[level].size(); i++) { FileMetaData* f = current_->files_[level][i]; if (compact_pointer_[level].empty() || icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) { @@ -1062,7 +969,7 @@ bool Compaction::IsTrivialMove() const { void Compaction::AddInputDeletions(VersionEdit* edit) { for (int which = 0; which < 2; which++) { - for (int i = 0; i < inputs_[which].size(); i++) { + for (size_t i = 0; i < inputs_[which].size(); i++) { edit->DeleteFile(level_ + which, inputs_[which][i]->number); } } diff --git a/db/version_set.h b/db/version_set.h index e1c5a4b91..e37751300 100644 --- a/db/version_set.h +++ b/db/version_set.h @@ -171,22 +171,6 @@ class VersionSet { // "key" as of version "v". uint64_t ApproximateOffsetOf(Version* v, const InternalKey& key); - // Register a reference to a large value with the specified - // large_ref from the specified file number. Returns "true" if this - // is the first recorded reference to the "large_ref" value in the - // database, and false otherwise. - bool RegisterLargeValueRef(const LargeValueRef& large_ref, - uint64_t filenum, - const InternalKey& internal_key); - - // Cleanup the large value reference state by eliminating any - // references from files that are not includes in either "live_tables" - // or the current log. - void CleanupLargeValueRefs(const std::set& live_tables); - - // Returns true if a large value with the given reference is live. - bool LargeValueIsLive(const LargeValueRef& large_ref); - private: class Builder; @@ -237,14 +221,6 @@ class VersionSet { Version* current_; // Pointer to the last (newest) list entry Version* oldest_; // Pointer to the first (oldest) list entry - // Map from large value reference to the set of - // values containing references to the value. We keep the - // internal key as a std::string rather than as an InternalKey because - // we want to be able to easily use a set. - typedef std::set > LargeReferencesSet; - typedef std::map LargeValueMap; - LargeValueMap large_value_refs_; - // Per-level key at which the next compaction at that level should start. // Either an empty string, or a valid InternalKey. std::string compact_pointer_[config::kNumLevels]; @@ -313,7 +289,7 @@ class Compaction { // State used to check for number of of overlapping grandparent files // (parent == level_ + 1, grandparent == level_ + 2) std::vector grandparents_; - int grandparent_index_; // Index in grandparent_starts_ + size_t grandparent_index_; // Index in grandparent_starts_ bool seen_key_; // Some output key has been seen int64_t overlapped_bytes_; // Bytes of overlap between current output // and grandparent files @@ -324,7 +300,7 @@ class Compaction { // is that we are positioned at one of the file ranges for each // higher level than the ones involved in this compaction (i.e. for // all L >= level_ + 2). - int level_ptrs_[config::kNumLevels]; + size_t level_ptrs_[config::kNumLevels]; }; } diff --git a/db/write_batch.cc b/db/write_batch.cc index e84e54889..d5615289a 100644 --- a/db/write_batch.cc +++ b/db/write_batch.cc @@ -8,7 +8,6 @@ // data: record[count] // record := // kTypeValue varstring varstring | -// kTypeLargeValueRef varstring varstring | // kTypeDeletion varstring // varstring := // len: varint32 @@ -58,16 +57,6 @@ void WriteBatch::Put(const Slice& key, const Slice& value) { PutLengthPrefixedSlice(&rep_, value); } -void WriteBatchInternal::PutLargeValueRef(WriteBatch* b, - const Slice& key, - const LargeValueRef& large_ref) { - WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1); - b->rep_.push_back(static_cast(kTypeLargeValueRef)); - PutLengthPrefixedSlice(&b->rep_, key); - PutLengthPrefixedSlice(&b->rep_, - Slice(large_ref.data, sizeof(large_ref.data))); -} - void WriteBatch::Delete(const Slice& key) { WriteBatchInternal::SetCount(this, WriteBatchInternal::Count(this) + 1); rep_.push_back(static_cast(kTypeDeletion)); @@ -87,10 +76,6 @@ Status WriteBatchInternal::InsertInto(const WriteBatch* b, case kTypeValue: memtable->Add(it.sequence_number(), kTypeValue, it.key(), it.value()); break; - case kTypeLargeValueRef: - memtable->Add(it.sequence_number(), kTypeLargeValueRef, - it.key(), it.value()); - break; } found++; } @@ -134,7 +119,6 @@ void WriteBatchInternal::Iterator::GetNextEntry() { input_.remove_prefix(1); switch (tag) { case kTypeValue: - case kTypeLargeValueRef: if (GetLengthPrefixedSlice(&input_, &key_) && GetLengthPrefixedSlice(&input_, &value_)) { op_ = static_cast(tag); diff --git a/db/write_batch_internal.h b/db/write_batch_internal.h index ea28e2d6a..ab0a823a3 100644 --- a/db/write_batch_internal.h +++ b/db/write_batch_internal.h @@ -13,10 +13,6 @@ namespace leveldb { // WriteBatch that we don't want in the public WriteBatch interface. class WriteBatchInternal { public: - static void PutLargeValueRef(WriteBatch* batch, - const Slice& key, - const LargeValueRef& large_ref); - // Return the number of entries in the batch. static int Count(const WriteBatch* batch); diff --git a/db/write_batch_test.cc b/db/write_batch_test.cc index deb841108..2bf1134a6 100644 --- a/db/write_batch_test.cc +++ b/db/write_batch_test.cc @@ -29,13 +29,6 @@ static std::string PrintContents(WriteBatch* b) { state.append(iter->value().ToString()); state.append(")"); break; - case kTypeLargeValueRef: - state.append("PutRef("); - state.append(ikey.user_key.ToString()); - state.append(", "); - state.append(iter->value().ToString()); - state.append(")"); - break; case kTypeDeletion: state.append("Delete("); state.append(ikey.user_key.ToString()); @@ -74,22 +67,6 @@ TEST(WriteBatchTest, Multiple) { PrintContents(&batch)); } -TEST(WriteBatchTest, PutIndirect) { - WriteBatch batch; - batch.Put(Slice("baz"), Slice("boo")); - LargeValueRef h; - for (int i = 0; i < LargeValueRef::ByteSize(); i++) { - h.data[i] = (i < 20) ? 'a' : 'b'; - } - WriteBatchInternal::PutLargeValueRef(&batch, Slice("foo"), h); - WriteBatchInternal::SetSequence(&batch, 100); - ASSERT_EQ(100, WriteBatchInternal::Sequence(&batch)); - ASSERT_EQ(2, WriteBatchInternal::Count(&batch)); - ASSERT_EQ("Put(baz, boo)@100" - "PutRef(foo, aaaaaaaaaaaaaaaaaaaabbbbbbbbb)@101", - PrintContents(&batch)); -} - TEST(WriteBatchTest, Corruption) { WriteBatch batch; batch.Put(Slice("foo"), Slice("bar")); diff --git a/doc/impl.html b/doc/impl.html index b190d2c11..dd09fea17 100644 --- a/doc/impl.html +++ b/doc/impl.html @@ -57,15 +57,6 @@ These merges have the effect of gradually migrating new updates from the young level to the largest level using only bulk reads and writes (i.e., minimizing expensive seeks). -

Large value files

-

-Each large value (greater than 64KB by default) is placed in a large -value file (*.val) of its own. An entry is maintained in the log -and/or sorted tables that maps from the corresponding key to the -name of this large value file. The name of the large value file -is derived from a SHA1 hash of the value and its length so that -identical values share the same file. -

Manifest

A MANIFEST file lists the set of sorted tables that make up each @@ -220,9 +211,7 @@ So maybe even the sharding is not necessary on modern filesystems? compaction and at the end of recovery. It finds the names of all files in the database. It deletes all log files that are not the current log file. It deletes all table files that are not referenced -from some level and are not the output of an active compaction. It -deletes all large value files that are not referenced from any live -table or log file. +from some level and are not the output of an active compaction. diff --git a/doc/index.html b/doc/index.html index 2a83fc37a..c2312b7fb 100644 --- a/doc/index.html +++ b/doc/index.html @@ -412,17 +412,6 @@ We might want to prefix filename keys with one letter (say '/') and over just the metadata do not force us to fetch and cache bulky file contents.

-

Large Values

-

-leveldb has special treatment of large values (by default, a value -of length greater than or equal to 64K is considered large, though a -field in Options can be used to adjust this threshold). Each such -large value is placed in a separate operating system file, and the -normal database blocks just contain pointers to such files. -

-Furthermore, if the same large value occurs multiple times in a single -database, it will be stored just once. -

Checksums

leveldb associates checksums with all data it stores in the file system. diff --git a/include/leveldb/options.h b/include/leveldb/options.h index 87d388e73..a94651f8c 100644 --- a/include/leveldb/options.h +++ b/include/leveldb/options.h @@ -86,16 +86,6 @@ struct Options { // Default: 1000 int max_open_files; - // Handle values larger than "large_value_threshold" bytes - // specially, by writing them into their own files (to avoid - // compaction overhead) and doing content-based elimination of - // duplicate values to save space. - // - // We recommend against changing this value. - // - // Default: 64K - size_t large_value_threshold; - // Control over blocks (user data is stored in a set of blocks, and // a block is the unit of reading from disk). @@ -110,7 +100,7 @@ struct Options { // compression is enabled. This parameter can be changed dynamically. // // Default: 4K - int block_size; + size_t block_size; // Number of keys between restart points for delta encoding of keys. // This parameter can be changed dynamically. Most clients should diff --git a/leveldb.gyp b/leveldb.gyp index d10ac339f..20d1b1d58 100644 --- a/leveldb.gyp +++ b/leveldb.gyp @@ -96,8 +96,6 @@ 'port/port_example.h', 'port/port_posix.cc', 'port/port_posix.h', - 'port/sha1_portable.cc', - 'port/sha1_portable.h', 'table/block.cc', 'table/block.h', 'table/block_builder.cc', @@ -267,16 +265,6 @@ 'db/log_test.cc', ], }, - { - 'target_name': 'leveldb_sha1_test', - 'type': 'executable', - 'dependencies': [ - 'leveldb_testutil', - ], - 'sources': [ - 'port/sha1_test.cc', - ], - }, { 'target_name': 'leveldb_skiplist_test', 'type': 'executable', diff --git a/port/port_android.h b/port/port_android.h index 8680951b0..13df9c943 100644 --- a/port/port_android.h +++ b/port/port_android.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -134,13 +133,6 @@ inline bool Snappy_Uncompress( return false; } -inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { - SHA1_CTX sha1_ctx; - SHA1Init(&sha1_ctx); - SHA1Update(&sha1_ctx, (const u_char*)data, len); - SHA1Final((u_char*)hash_array, &sha1_ctx); -} - inline uint64_t ThreadIdentifier() { pthread_t tid = pthread_self(); uint64_t r = 0; diff --git a/port/port_chromium.h b/port/port_chromium.h index e349f8f9f..1851e6ec1 100644 --- a/port/port_chromium.h +++ b/port/port_chromium.h @@ -13,7 +13,6 @@ #include "base/atomicops.h" #include "base/basictypes.h" #include "base/logging.h" -#include "base/sha1.h" #include "base/synchronization/condition_variable.h" #include "base/synchronization/lock.h" @@ -83,12 +82,6 @@ class AtomicPointer { } }; -inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { - return ::base::SHA1HashBytes(reinterpret_cast(data), - len, - reinterpret_cast(hash_array)); -} - bool Snappy_Compress(const char* input, size_t input_length, std::string* output); bool Snappy_Uncompress(const char* input_data, size_t input_length, diff --git a/port/port_example.h b/port/port_example.h index cf72617b6..8a624f346 100644 --- a/port/port_example.h +++ b/port/port_example.h @@ -89,11 +89,6 @@ class AtomicPointer { void NoBarrier_Store(void* v); }; -// ------------------ Checksumming ------------------- - -// Store a 160-bit hash of "data[0..len-1]" in "hash_array[0]..hash_array[19]" -extern void SHA1_Hash(const char* data, size_t len, char* hash_array); - // ------------------ Compression ------------------- // Store the snappy compression of "input[0,input_length-1]" in *output. diff --git a/port/port_posix.h b/port/port_posix.h index 7adbc018b..c158db1ff 100644 --- a/port/port_posix.h +++ b/port/port_posix.h @@ -13,7 +13,6 @@ #include #include #include -#include "port/sha1_portable.h" namespace leveldb { namespace port { @@ -73,10 +72,6 @@ class AtomicPointer { } }; -inline void SHA1_Hash(const char* data, size_t len, char* hash_array) { - SHA1_Hash_Portable(data, len, hash_array); -} - // TODO(gabor): Implement actual compress inline bool Snappy_Compress(const char* input, size_t input_length, std::string* output) { diff --git a/table/block.cc b/table/block.cc index 0525d2d15..92b28776d 100644 --- a/table/block.cc +++ b/table/block.cc @@ -62,7 +62,9 @@ static inline const char* DecodeEntry(const char* p, const char* limit, if ((p = GetVarint32Ptr(p, limit, value_length)) == NULL) return NULL; } - if (limit - p < (*non_shared + *value_length)) return NULL; + if (static_cast(limit - p) < (*non_shared + *value_length)) { + return NULL; + } return p; } diff --git a/table/block_builder.cc b/table/block_builder.cc index ae18b36c1..dc958c80c 100644 --- a/table/block_builder.cc +++ b/table/block_builder.cc @@ -62,7 +62,7 @@ size_t BlockBuilder::CurrentSizeEstimate() const { Slice BlockBuilder::Finish() { // Append restart array - for (int i = 0; i < restarts_.size(); i++) { + for (size_t i = 0; i < restarts_.size(); i++) { PutFixed32(&buffer_, restarts_[i]); } PutFixed32(&buffer_, restarts_.size()); diff --git a/table/format.cc b/table/format.cc index 8c6b0f373..63971dbe9 100644 --- a/table/format.cc +++ b/table/format.cc @@ -36,7 +36,7 @@ void Footer::EncodeTo(std::string* dst) const { metaindex_handle_.EncodeTo(dst); index_handle_.EncodeTo(dst); dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding - PutFixed32(dst, static_cast(kTableMagicNumber)); + PutFixed32(dst, static_cast(kTableMagicNumber & 0xffffffffu)); PutFixed32(dst, static_cast(kTableMagicNumber >> 32)); assert(dst->size() == original_size + kEncodedLength); } @@ -71,7 +71,7 @@ Status ReadBlock(RandomAccessFile* file, // Read the block contents as well as the type/crc footer. // See table_builder.cc for the code that built this structure. - size_t n = handle.size(); + size_t n = static_cast(handle.size()); char* buf = new char[n + kBlockTrailerSize]; Slice contents; Status s = file->Read(handle.offset(), n + kBlockTrailerSize, &contents, buf); diff --git a/util/arena.cc b/util/arena.cc index 4bf6e36ae..40ab99d92 100644 --- a/util/arena.cc +++ b/util/arena.cc @@ -16,7 +16,7 @@ Arena::Arena() { } Arena::~Arena() { - for (int i = 0; i < blocks_.size(); i++) { + for (size_t i = 0; i < blocks_.size(); i++) { delete[] blocks_[i]; } } diff --git a/util/coding.cc b/util/coding.cc index 680e2ad76..14f21f79c 100644 --- a/util/coding.cc +++ b/util/coding.cc @@ -85,7 +85,7 @@ char* EncodeVarint64(char* dst, uint64_t v) { *(ptr++) = (v & (B-1)) | B; v >>= 7; } - *(ptr++) = v; + *(ptr++) = static_cast(v); return reinterpret_cast(ptr); } diff --git a/util/comparator.cc b/util/comparator.cc index e2b27e3cb..cc2b2635d 100644 --- a/util/comparator.cc +++ b/util/comparator.cc @@ -51,7 +51,7 @@ class BytewiseComparatorImpl : public Comparator { virtual void FindShortSuccessor(std::string* key) const { // Find first character that can be incremented size_t n = key->size(); - for (int i = 0; i < n; i++) { + for (size_t i = 0; i < n; i++) { const uint8_t byte = (*key)[i]; if (byte != static_cast(0xff)) { (*key)[i] = byte + 1; diff --git a/util/logging.cc b/util/logging.cc index 5c9bd4abd..760d33572 100644 --- a/util/logging.cc +++ b/util/logging.cc @@ -20,7 +20,7 @@ void AppendNumberTo(std::string* str, uint64_t num) { } void AppendEscapedStringTo(std::string* str, const Slice& value) { - for (int i = 0; i < value.size(); i++) { + for (size_t i = 0; i < value.size(); i++) { char c = value[i]; if (c >= ' ' && c <= '~') { str->push_back(c); diff --git a/util/options.cc b/util/options.cc index 29272fe1a..0ea5c98c9 100644 --- a/util/options.cc +++ b/util/options.cc @@ -18,7 +18,6 @@ Options::Options() info_log(NULL), write_buffer_size(4<<20), max_open_files(1000), - large_value_threshold(65536), block_cache(NULL), block_size(4096), block_restart_interval(16), diff --git a/util/random.h b/util/random.h index 2d458e87a..d886b4ec1 100644 --- a/util/random.h +++ b/util/random.h @@ -29,7 +29,7 @@ class Random { uint64_t product = seed_ * A; // Compute (product % M) using the fact that ((x << 31) % M) == x. - seed_ = (product >> 31) + (product & M); + seed_ = static_cast((product >> 31) + (product & M)); // The first reduction may overflow by 1 bit, so we may need to // repeat. mod == M is not possible; using > allows the faster // sign-bit-based test.