Summary: Some Mongo+Rocks datasets in Parse's environment are not doing compactions very frequently. During the quiet period (with no IO), we'd like to schedule compactions so that our reads become faster. Also, aggressively compacting during quiet periods helps when write bursts happen. In addition, we also want to compact files that are containing deleted key ranges (like old oplog keys). All of this is currently not possible with CompactRange() because it's single-threaded and blocks all other compactions from happening. Running CompactRange() risks an issue of blocking writes because we generate too much Level 0 files before the compaction is over. Stopping writes is very dangerous because they hold transaction locks. We tried running manual compaction once on Mongo+Rocks and everything fell apart. MarkForCompaction() solves all of those problems. This is very light-weight manual compaction. It is lower priority than automatic compactions, which means it shouldn't interfere with background process keeping the LSM tree clean. However, if no automatic compactions need to be run (or we have extra background threads available), we will start compacting files that are marked for compaction. Test Plan: added a new unit test Reviewers: yhchiang, rven, MarkCallaghan, sdong Reviewed By: sdong Subscribers: yoshinorim, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D37083main
parent
acf8a4141d
commit
6059bdf86a
@ -0,0 +1,59 @@ |
||||
// Copyright (c) 2013, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
//
|
||||
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style license that can be
|
||||
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
||||
|
||||
#include "db/db_impl.h" |
||||
|
||||
#ifndef __STDC_FORMAT_MACROS |
||||
#define __STDC_FORMAT_MACROS |
||||
#endif |
||||
|
||||
#include <vector> |
||||
|
||||
#include "db/column_family.h" |
||||
#include "db/version_set.h" |
||||
#include "rocksdb/status.h" |
||||
|
||||
namespace rocksdb { |
||||
|
||||
#ifndef ROCKSDB_LITE |
||||
Status DBImpl::SuggestCompactRange(ColumnFamilyHandle* column_family, |
||||
const Slice* begin, const Slice* end) { |
||||
auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family); |
||||
auto cfd = cfh->cfd(); |
||||
InternalKey start_key, end_key; |
||||
if (begin != nullptr) { |
||||
start_key = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek); |
||||
} |
||||
if (end != nullptr) { |
||||
end_key = InternalKey(*end, 0, static_cast<ValueType>(0)); |
||||
} |
||||
{ |
||||
InstrumentedMutexLock l(&mutex_); |
||||
auto vstorage = cfd->current()->storage_info(); |
||||
for (int level = 0; level < vstorage->num_non_empty_levels(); ++level) { |
||||
std::vector<FileMetaData*> inputs; |
||||
vstorage->GetOverlappingInputs( |
||||
level, begin == nullptr ? nullptr : &start_key, |
||||
end == nullptr ? nullptr : &end_key, &inputs); |
||||
for (auto f : inputs) { |
||||
f->marked_for_compaction = true; |
||||
} |
||||
} |
||||
// Since we have some more files to compact, we should also recompute
|
||||
// compaction score
|
||||
vstorage->ComputeCompactionScore(*cfd->GetLatestMutableCFOptions(), |
||||
CompactionOptionsFIFO()); |
||||
SchedulePendingCompaction(cfd); |
||||
MaybeScheduleFlushOrCompaction(); |
||||
} |
||||
return Status::OK(); |
||||
} |
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
} // namespace rocksdb
|
@ -0,0 +1,38 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#include "rocksdb/experimental.h" |
||||
|
||||
#include "db/db_impl.h" |
||||
|
||||
namespace rocksdb { |
||||
namespace experimental { |
||||
|
||||
#ifndef ROCKSDB_LITE |
||||
|
||||
Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, |
||||
const Slice* begin, const Slice* end) { |
||||
auto dbimpl = dynamic_cast<DBImpl*>(db); |
||||
if (dbimpl == nullptr) { |
||||
return Status::NotSupported("Didn't recognize DB object"); |
||||
} |
||||
return dbimpl->SuggestCompactRange(column_family, begin, end); |
||||
} |
||||
|
||||
#else // ROCKSDB_LITE
|
||||
|
||||
Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, |
||||
const Slice* begin, const Slice* end) { |
||||
return Status::NotSupported("Not supported in RocksDB LITE"); |
||||
} |
||||
|
||||
#endif // ROCKSDB_LITE
|
||||
|
||||
Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) { |
||||
return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end); |
||||
} |
||||
|
||||
} // namespace experimental
|
||||
} // namespace rocksdb
|
@ -0,0 +1,20 @@ |
||||
// Copyright (c) 2014, Facebook, Inc. All rights reserved.
|
||||
// This source code is licensed under the BSD-style license found in the
|
||||
// LICENSE file in the root directory of this source tree. An additional grant
|
||||
// of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
#pragma once |
||||
|
||||
#include "rocksdb/db.h" |
||||
#include "rocksdb/status.h" |
||||
|
||||
namespace rocksdb { |
||||
namespace experimental { |
||||
|
||||
// Supported only for Leveled compaction
|
||||
Status SuggestCompactRange(DB* db, ColumnFamilyHandle* column_family, |
||||
const Slice* begin, const Slice* end); |
||||
Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end); |
||||
|
||||
} // namespace experimental
|
||||
} // namespace rocksdb
|
Loading…
Reference in new issue