Pull from upstream.

git-svn-id: https://leveldb.googlecode.com/svn/trunk@14 62dab493-f737-651d-591e-8d6aee1b9529
15 years ago · 8303bb1b33
parent 6d243ebf79
commit 8303bb1b33
21 changed files with 118 additions and 113 deletions
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@ -866,7 +866,7 @@ Iterator* DBImpl::TEST_NewInternalIterator() {
  return NewInternalIterator(ReadOptions(), &ignored);
 }

-int64 DBImpl::TEST_MaxNextLevelOverlappingBytes() {
+int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
  MutexLock l(&mutex_);
  return versions_->MaxNextLevelOverlappingBytes();
 }
@ -989,11 +989,11 @@ void DBImpl::MaybeCompressLargeValue(
    std::string* scratch,
    LargeValueRef* ref) {
  switch (options_.compression) {
-    case kLightweightCompression: {
-      port::Lightweight_Compress(raw_value.data(), raw_value.size(), scratch);
-      if (scratch->size() < (raw_value.size() / 8) * 7) {
+    case kSnappyCompression: {
+      if (port::Snappy_Compress(raw_value.data(), raw_value.size(), scratch) &&
+          (scratch->size() < (raw_value.size() / 8) * 7)) {
        *file_bytes = *scratch;
-        *ref = LargeValueRef::Make(raw_value, kLightweightCompression);
+        *ref = LargeValueRef::Make(raw_value, kSnappyCompression);
        return;
      }

--- a/db/db_impl.h
+++ b/db/db_impl.h
@ -57,7 +57,7 @@ class DBImpl : public DB {

  // Return the maximum overlapping data (in bytes) at next level for any
  // file at a level >= 1.
-  int64 TEST_MaxNextLevelOverlappingBytes();
+  int64_t TEST_MaxNextLevelOverlappingBytes();

 private:
  friend class DB;
--- a/db/db_iter.cc
+++ b/db/db_iter.cc
@ -374,10 +374,10 @@ void DBIter::ReadIndirectValue() const {
            }
            break;
          }
-          case kLightweightCompression: {
+          case kSnappyCompression: {
            std::string uncompressed;
-            if (port::Lightweight_Uncompress(result.data(), result.size(),
-                                       &uncompressed) &&
+            if (port::Snappy_Uncompress(result.data(), result.size(),
+                                        &uncompressed) &&
                uncompressed.size() == large_ref.ValueSize()) {
              swap(uncompressed, large_->value);
            } else {
--- a/db/db_test.cc
+++ b/db/db_test.cc
@ -674,6 +674,12 @@ TEST(DBTest, LargeValues1) {
  ASSERT_TRUE(LargeValuesOK(this, expected));
 }

+static bool SnappyCompressionSupported() {
+  std::string out;
+  Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  return port::Snappy_Compress(in.data(), in.size(), &out);
+}
+
 TEST(DBTest, LargeValues2) {
  Options options;
  options.large_value_threshold = 10000;
@ -694,12 +700,11 @@ TEST(DBTest, LargeValues2) {

  ASSERT_OK(Put("big2", big2));
  ASSERT_EQ(big2, Get("big2"));
-#if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_CHROMIUM)
-  // TODO(sanjay) Reenable after compression support is added
-  expected.insert(LargeValueRef::Make(big2, kNoCompression));
-#else
-  expected.insert(LargeValueRef::Make(big2, kLightweightCompression));
-#endif
+  if (SnappyCompressionSupported()) {
+    expected.insert(LargeValueRef::Make(big2, kSnappyCompression));
+  } else {
+    expected.insert(LargeValueRef::Make(big2, kNoCompression));
+  }
  ASSERT_TRUE(LargeValuesOK(this, expected));

  ASSERT_OK(dbfull()->TEST_CompactMemTable());
--- a/db/dbformat.cc
+++ b/db/dbformat.cc
@ -140,7 +140,7 @@ bool FilenameStringToLargeValueRef(const Slice& s, LargeValueRef* h) {
      ConsumeChar(&in, '-') &&
      ConsumeDecimalNumber(&in, &ctype) &&
      in.empty() &&
-      (ctype <= kLightweightCompression)) {
+      (ctype <= kSnappyCompression)) {
    EncodeFixed64(&h->data[20], value_size);
    h->data[28] = static_cast<unsigned char>(ctype);
    return true;
--- a/db/dbformat_test.cc
+++ b/db/dbformat_test.cc
@ -117,7 +117,7 @@ TEST(FormatTest, SHA1) {
                LargeValueRef::Make("hello", kNoCompression)));
  ASSERT_EQ("aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d-5-1",  // SHA1, lwcompr
            LargeValueRefToFilenameString(
-                LargeValueRef::Make("hello", kLightweightCompression)));
+                LargeValueRef::Make("hello", kSnappyCompression)));
 }

 }
--- a/db/filename_test.cc
+++ b/db/filename_test.cc
@ -136,7 +136,7 @@ TEST(FileNameTest, Construction) {
  ASSERT_EQ(999, number);
  ASSERT_EQ(kTempFile, type);

-  for (int i = 0; i <= kLightweightCompression; i++) {
+  for (int i = 0; i <= kSnappyCompression; i++) {
    CompressionType ctype = static_cast<CompressionType>(i);
    std::string value = "abcdef";
    LargeValueRef real_large_ref = LargeValueRef::Make(Slice(value), ctype);
--- a/db/version_edit_test.cc
+++ b/db/version_edit_test.cc
@ -31,7 +31,7 @@ TEST(VersionEditTest, EncodeDecode) {
    edit.DeleteFile(4, kBig + 700 + i);
    edit.AddLargeValueRef(LargeValueRef::Make("big", kNoCompression),
                          kBig + 800 + i, "foobar");
-    edit.AddLargeValueRef(LargeValueRef::Make("big2", kLightweightCompression),
+    edit.AddLargeValueRef(LargeValueRef::Make("big2", kSnappyCompression),
                          kBig + 801 + i, "baz");
    edit.SetCompactPointer(i, InternalKey("x", kBig + 900 + i, kTypeValue));
  }
--- a/db/version_set.cc
+++ b/db/version_set.cc
@ -20,9 +20,11 @@

 namespace leveldb {

-// Maximum number of overlaps in grandparent (i.e., level+2) before we
+static const int kTargetFileSize = 2 * 1048576;
+
+// Maximum bytes of overlaps in grandparent (i.e., level+2) before we
 // stop building a single file in a level->level+1 compaction.
-static const int kMaxGrandParentFiles = 10;
+static const int64_t kMaxGrandParentOverlapBytes = 10 * kTargetFileSize;

 static double MaxBytesForLevel(int level) {
  if (level == 0) {
@ -38,7 +40,7 @@ static double MaxBytesForLevel(int level) {
 }

 static uint64_t MaxFileSizeForLevel(int level) {
-  return 2 << 20;       // We could vary per level to reduce number of files?
+  return kTargetFileSize;  // We could vary per level to reduce number of files?
 }

 namespace {
@ -755,17 +757,22 @@ void VersionSet::AddLiveFiles(std::set<uint64_t>* live) {
  }
 }

-int64 VersionSet::MaxNextLevelOverlappingBytes() {
-  int64 result = 0;
+static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
+  int64_t sum = 0;
+  for (int i = 0; i < files.size(); i++) {
+    sum += files[i]->file_size;
+  }
+  return sum;
+}
+
+int64_t VersionSet::MaxNextLevelOverlappingBytes() {
+  int64_t result = 0;
  std::vector<FileMetaData*> overlaps;
  for (int level = 0; level < config::kNumLevels - 1; level++) {
    for (int i = 0; i < current_->files_[level].size(); i++) {
      const FileMetaData* f = current_->files_[level][i];
      GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
-      int64 sum = 0;
-      for (int j = 0; j < overlaps.size(); j++) {
-        sum += overlaps[j]->file_size;
-      }
+      const int64_t sum = TotalFileSize(overlaps);
      if (sum > result) {
        result = sum;
      }
@ -989,7 +996,8 @@ Compaction::Compaction(int level)
      max_output_file_size_(MaxFileSizeForLevel(level)),
      input_version_(NULL),
      grandparent_index_(0),
-      output_start_(-1) {
+      seen_key_(false),
+      overlapped_bytes_(0) {
  for (int i = 0; i < config::kNumLevels; i++) {
    level_ptrs_[i] = 0;
  }
@ -1002,12 +1010,12 @@ Compaction::~Compaction() {
 }

 bool Compaction::IsTrivialMove() const {
-  // Avoid a move if there are lots of overlapping grandparent files.
+  // Avoid a move if there is lots of overlapping grandparent data.
  // Otherwise, the move could create a parent file that will require
  // a very expensive merge later on.
-  return (num_input_files(0) == 1
-          && num_input_files(1) == 0
-          && grandparents_.size() <= kMaxGrandParentFiles);
+  return (num_input_files(0) == 1 &&
+          num_input_files(1) == 0 &&
+          TotalFileSize(grandparents_) <= kMaxGrandParentOverlapBytes);
 }

 void Compaction::AddInputDeletions(VersionEdit* edit) {
@ -1044,17 +1052,16 @@ bool Compaction::ShouldStopBefore(const InternalKey& key) {
  const InternalKeyComparator* icmp = &input_version_->vset_->icmp_;
  while (grandparent_index_ < grandparents_.size() &&
      icmp->Compare(key, grandparents_[grandparent_index_]->largest) > 0) {
+    if (seen_key_) {
+      overlapped_bytes_ += grandparents_[grandparent_index_]->file_size;
+    }
    grandparent_index_++;
  }
+  seen_key_ = true;

-  // First call?
-  if (output_start_ < 0) {
-    output_start_ = grandparent_index_;
-  }
-
-  if (grandparent_index_ - output_start_ + 1 > kMaxGrandParentFiles) {
-    // Too many overlaps for current output; start new output
-    output_start_ = grandparent_index_;
+  if (overlapped_bytes_ > kMaxGrandParentOverlapBytes) {
+    // Too much overlap for current output; start new output
+    overlapped_bytes_ = 0;
    return true;
  } else {
    return false;
--- a/db/version_set.h
+++ b/db/version_set.h
@ -141,7 +141,7 @@ class VersionSet {

  // Return the maximum overlapping data (in bytes) at next level for any
  // file at a level >= 1.
-  int64 MaxNextLevelOverlappingBytes();
+  int64_t MaxNextLevelOverlappingBytes();

  // Create an iterator that reads over the compaction inputs for "*c".
  // The caller should delete the iterator when no longer needed.
@ -298,8 +298,10 @@ class Compaction {
  // State used to check for number of of overlapping grandparent files
  // (parent == level_ + 1, grandparent == level_ + 2)
  std::vector<FileMetaData*> grandparents_;
-  int grandparent_index_;   // Index in grandparent_starts_
-  int output_start_;        // Index in grandparent_starts_ where output started
+  int grandparent_index_;     // Index in grandparent_starts_
+  bool seen_key_;             // Some output key has been seen
+  int64_t overlapped_bytes_;  // Bytes of overlap between current output
+                              // and grandparent files

  // State for implementing IsBaseLevelForKey

--- a/include/options.h
+++ b/include/options.h
@ -22,8 +22,8 @@ class WritableFile;
 enum CompressionType {
  // NOTE: do not change the values of existing entries, as these are
  // part of the persistent format on disk.
-  kNoCompression           = 0x0,
-  kLightweightCompression  = 0x1,
+  kNoCompression     = 0x0,
+  kSnappyCompression = 0x1,
 };

 // Options to control the behavior of a database (passed to DB::Open)
@ -122,16 +122,16 @@ struct Options {
  // Compress blocks using the specified compression algorithm.  This
  // parameter can be changed dynamically.
  //
-  // Default: kLightweightCompression, which gives lightweight but fast
+  // Default: kSnappyCompression, which gives lightweight but fast
  // compression.
  //
-  // Typical speeds of kLightweightCompression on an Intel(R) Core(TM)2 2.4GHz:
+  // Typical speeds of kSnappyCompression on an Intel(R) Core(TM)2 2.4GHz:
  //    ~200-500MB/s compression
  //    ~400-800MB/s decompression
  // Note that these speeds are significantly faster than most
  // persistent storage speeds, and therefore it is typically never
  // worth switching to kNoCompression.  Even if the input data is
-  // incompressible, the kLightweightCompression implementation will
+  // incompressible, the kSnappyCompression implementation will
  // efficiently detect that and will switch to uncompressed mode.
  CompressionType compression;

--- a/port/port_android.h
+++ b/port/port_android.h
@ -82,29 +82,20 @@ class AtomicPointer {
  }
 };

-/**
- * TODO(gabor): Implement actual compress
- *     This is a hack - it just copies input to output.
- *     No actual compression occurs.
- */
-inline void Lightweight_Compress(
+// TODO(gabor): Implement actual compress
+inline bool Snappy_Compress(
    const char* input,
    size_t input_length,
    std::string* output) {
-  output->copy((char*)input,0,input_length);
+  return false;
 }

-/**
- * TODO(gabor): Implement actual compress
- *     This is a hack - it just copies input to output.
- *     No actual uncompression occurs.
- */
-inline bool Lightweight_Uncompress(
+// TODO(gabor): Implement actual uncompress
+inline bool Snappy_Uncompress(
    const char* input_data,
    size_t input_length,
    std::string* output) {
-  output->copy((char*)input_data,0,input_length);
-  return (bool)1;
+  return false;
 }

 inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
--- a/port/port_chromium.cc
+++ b/port/port_chromium.cc
@ -49,20 +49,21 @@ void CondVar::SignalAll() {
  cv_.Broadcast();
 }

-void Lightweight_Compress(const char* input, size_t input_length,
-                          std::string* output) {
+bool Snappy_Compress(const char* input, size_t input_length,
+                     std::string* output) {
 #if defined(USE_SNAPPY)
  output->resize(snappy::MaxCompressedLength(input_length));
  size_t outlen;
  snappy::RawCompress(input, input_length, &(*output)[0], &outlen);
  output->resize(outlen);
+  return true;
 #else
-  output->assign(input, input_length);
+  return false;
 #endif
 }

-bool Lightweight_Uncompress(const char* input_data, size_t input_length,
-                            std::string* output) {
+bool Snappy_Uncompress(const char* input_data, size_t input_length,
+                       std::string* output) {
 #if defined(USE_SNAPPY)
  size_t ulength;
  if (!snappy::GetUncompressedLength(input_data, input_length, &ulength)) {
@ -71,8 +72,7 @@ bool Lightweight_Uncompress(const char* input_data, size_t input_length,
  output->resize(ulength);
  return snappy::RawUncompress(input_data, input_length, &(*output)[0]);
 #else
-  output->assign(input_data, input_length);
-  return true;
+  return false;
 #endif
 }

--- a/port/port_chromium.h
+++ b/port/port_chromium.h
@ -89,10 +89,10 @@ inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
                               reinterpret_cast<unsigned char*>(hash_array));
 }

-void Lightweight_Compress(const char* input, size_t input_length,
-                          std::string* output);
-bool Lightweight_Uncompress(const char* input_data, size_t input_length,
-                            std::string* output);
+bool Snappy_Compress(const char* input, size_t input_length,
+                     std::string* output);
+bool Snappy_Uncompress(const char* input_data, size_t input_length,
+                       std::string* output);

 inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
  return false;
--- a/port/port_example.h
+++ b/port/port_example.h
@ -96,15 +96,16 @@ extern void SHA1_Hash(const char* data, size_t len, char* hash_array);

 // ------------------ Compression -------------------

-// Store the lightweight compression of "input[0,input_length-1]" in *output.
-extern void Lightweight_Compress(const char* input, size_t input_length,
-                           std::string* output);
+// Store the snappy compression of "input[0,input_length-1]" in *output.
+// Returns false if snappy is not supported by this port.
+extern bool Snappy_Compress(const char* input, size_t input_length,
+                            std::string* output);

-// Attempt to lightweight uncompress input[0,input_length-1] into *output.
+// Attempt to snappy uncompress input[0,input_length-1] into *output.
 // Returns true if successful, false if the input is invalid lightweight
 // compressed data.
-extern bool Lightweight_Uncompress(const char* input_data, size_t input_length,
-                             std::string* output);
+extern bool Snappy_Uncompress(const char* input_data, size_t input_length,
+                              std::string* output);

 // ------------------ Miscellaneous -------------------

--- a/port/port_posix.h
+++ b/port/port_posix.h
@ -77,25 +77,16 @@ inline void SHA1_Hash(const char* data, size_t len, char* hash_array) {
  SHA1_Hash_Portable(data, len, hash_array);
 }

-/**
- * TODO(gabor): Implement actual compress
- *     This is a hack - it just copies input to output.
- *     No actual compression occurs.
- */
-inline void Lightweight_Compress(const char* input, size_t input_length,
-                           std::string* output) {
-  output->assign(input, input_length);
+// TODO(gabor): Implement actual compress
+inline bool Snappy_Compress(const char* input, size_t input_length,
+                            std::string* output) {
+  return false;
 }

-/**
- * TODO(gabor): Implement actual uncompress
- *     This is a hack - it just copies input to output.
- *     No actual uncompression occurs.
- */
-inline bool Lightweight_Uncompress(const char* input_data, size_t input_length,
-                             std::string* output) {
-  output->assign(input_data, input_length);
-  return true;
+// TODO(gabor): Implement actual uncompress
+inline bool Snappy_Uncompress(const char* input_data, size_t input_length,
+                              std::string* output) {
+  return false;
 }

 inline bool GetHeapProfile(void (*func)(void*, const char*, int), void* arg) {
--- a/table/format.cc
+++ b/table/format.cc
@ -106,9 +106,9 @@ Status ReadBlock(RandomAccessFile* file,

      // Ok
      break;
-    case kLightweightCompression: {
+    case kSnappyCompression: {
      std::string decompressed;
-      if (!port::Lightweight_Uncompress(data, n, &decompressed)) {
+      if (!port::Snappy_Uncompress(data, n, &decompressed)) {
        delete[] buf;
        s = Status::Corruption("corrupted compressed block contents");
        return s;
--- a/table/table_builder.cc
+++ b/table/table_builder.cc
@ -138,11 +138,14 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
      block_contents = raw;
      break;

-    case kLightweightCompression: {
-      port::Lightweight_Compress(raw.data(), raw.size(), &r->compressed_output);
-      block_contents = r->compressed_output;
-      if (block_contents.size() >= raw.size() - (raw.size() / 8u)) {
-        // Compressed less than 12.5%, so just store uncompressed form
+    case kSnappyCompression: {
+      std::string* compressed = &r->compressed_output;
+      if (port::Snappy_Compress(raw.data(), raw.size(), compressed) &&
+          compressed->size() < raw.size() - (raw.size() / 8u)) {
+        block_contents = *compressed;
+      } else {
+        // Snappy not supported, or compressed less than 12.5%, so just
+        // store uncompressed form
        block_contents = raw;
        type = kNoCompression;
      }
--- a/table/table_test.cc
+++ b/table/table_test.cc
@ -800,12 +800,17 @@ TEST(TableTest, ApproximateOffsetOfPlain) {

 }

+static bool SnappyCompressionSupported() {
+  std::string out;
+  Slice in = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
+  return port::Snappy_Compress(in.data(), in.size(), &out);
+}
+
 TEST(TableTest, ApproximateOffsetOfCompressed) {
-#if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_CHROMIUM)
-  // Compression not supported yet, so skip this test.
-  // TODO(sanjay) Reenable after compression support is added
-  return;
-#endif
+  if (!SnappyCompressionSupported()) {
+    fprintf(stderr, "skipping compression tests\n");
+    return;
+  }

  Random rnd(301);
  TableConstructor c(BytewiseComparator());
@ -818,7 +823,7 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
  KVMap kvmap;
  Options options;
  options.block_size = 1024;
-  options.compression = kLightweightCompression;
+  options.compression = kSnappyCompression;
  c.Finish(options, &keys, &kvmap);

  ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
--- a/util/env_chromium.cc
+++ b/util/env_chromium.cc
@ -326,7 +326,7 @@ class ChromiumEnv : public Env {

  virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
    Status s;
-    int64 signed_size;
+    int64_t signed_size;
    if (!::file_util::GetFileSize(CreateFilePath(fname), &signed_size)) {
      *size = 0;
      s = Status::IOError(fname, "Could not determine file size.");
--- a/util/options.cc
+++ b/util/options.cc
@ -22,7 +22,7 @@ Options::Options()
      block_cache(NULL),
      block_size(8192),
      block_restart_interval(16),
-      compression(kLightweightCompression) {
+      compression(kSnappyCompression) {
 }