Implement max_size in BlobStore

Summary:
I added max_size option in blobstore. Since we now know the maximum number of buckets we'll ever use, we can allocate an array of buckets and access its elements without use of any locks! Common case Get doesn't lock anything now.

Benchmarks on 16KB block size show no impact on speed, though.

Test Plan: unittests + benchmark

Reviewers: dhruba, haobo, kailiu

Reviewed By: dhruba

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13641
main
Igor Canadi 12 years ago
parent 2fb361ad98
commit cb8a7302e4
  1. 41
      util/blob_store.cc
  2. 15
      util/blob_store.h
  3. 28
      util/blob_store_test.cc

@ -106,21 +106,27 @@ bool FreeList::Overlap(const Blob &blob) const {
BlobStore::BlobStore(const string& directory, BlobStore::BlobStore(const string& directory,
uint64_t block_size, uint64_t block_size,
uint32_t blocks_per_bucket, uint32_t blocks_per_bucket,
uint32_t max_buckets,
Env* env) : Env* env) :
directory_(directory), directory_(directory),
block_size_(block_size), block_size_(block_size),
blocks_per_bucket_(blocks_per_bucket), blocks_per_bucket_(blocks_per_bucket),
env_(env) { env_(env),
max_buckets_(max_buckets) {
env_->CreateDirIfMissing(directory_); env_->CreateDirIfMissing(directory_);
storage_options_.use_mmap_writes = false; storage_options_.use_mmap_writes = false;
storage_options_.use_mmap_reads = false; storage_options_.use_mmap_reads = false;
buckets_size_ = 0;
buckets_ = new unique_ptr<RandomRWFile>[max_buckets_];
CreateNewBucket(); CreateNewBucket();
} }
BlobStore::~BlobStore() { BlobStore::~BlobStore() {
// TODO we don't care about recovery for now // TODO we don't care about recovery for now
delete [] buckets_;
} }
Status BlobStore::Put(const Slice& value, Blob* blob) { Status BlobStore::Put(const Slice& value, Blob* blob) {
@ -129,13 +135,12 @@ Status BlobStore::Put(const Slice& value, Blob* blob) {
if (!s.ok()) { if (!s.ok()) {
return s; return s;
} }
ReadLock l(&buckets_mutex_);
size_t size_left = value.size(); size_t size_left = value.size();
uint64_t offset = 0; // in bytes, not blocks uint64_t offset = 0; // in bytes, not blocks
for (auto chunk : blob->chunks) { for (auto chunk : blob->chunks) {
uint64_t write_size = min(chunk.size * block_size_, size_left); uint64_t write_size = min(chunk.size * block_size_, size_left);
assert(chunk.bucket_id < buckets_.size()); assert(chunk.bucket_id < buckets_size_);
s = buckets_[chunk.bucket_id].get()->Write(chunk.offset * block_size_, s = buckets_[chunk.bucket_id].get()->Write(chunk.offset * block_size_,
Slice(value.data() + offset, Slice(value.data() + offset,
write_size)); write_size));
@ -164,18 +169,19 @@ Status BlobStore::Put(const Slice& value, Blob* blob) {
Status BlobStore::Get(const Blob& blob, Status BlobStore::Get(const Blob& blob,
string* value) const { string* value) const {
ReadLock l(&buckets_mutex_); {
// assert that it doesn't overlap with free list
// assert that it doesn't overlap with free list // it will get compiled out for release
// it will get compiled out for release MutexLock l(&free_list_mutex_);
assert(!free_list_.Overlap(blob)); assert(!free_list_.Overlap(blob));
}
value->resize(blob.Size() * block_size_); value->resize(blob.Size() * block_size_);
uint64_t offset = 0; // in bytes, not blocks uint64_t offset = 0; // in bytes, not blocks
for (auto chunk : blob.chunks) { for (auto chunk : blob.chunks) {
Slice result; Slice result;
assert(chunk.bucket_id < buckets_.size()); assert(chunk.bucket_id < buckets_size_);
Status s; Status s;
s = buckets_[chunk.bucket_id].get()->Read(chunk.offset * block_size_, s = buckets_[chunk.bucket_id].get()->Read(chunk.offset * block_size_,
chunk.size * block_size_, chunk.size * block_size_,
@ -200,8 +206,7 @@ Status BlobStore::Delete(const Blob& blob) {
} }
Status BlobStore::Sync() { Status BlobStore::Sync() {
ReadLock l(&buckets_mutex_); for (size_t i = 0; i < buckets_size_; ++i) {
for (size_t i = 0; i < buckets_.size(); ++i) {
Status s = buckets_[i].get()->Sync(); Status s = buckets_[i].get()->Sync();
if (!s.ok()) { if (!s.ok()) {
return s; return s;
@ -228,10 +233,13 @@ Status BlobStore::Allocate(uint32_t blocks, Blob* blob) {
// called with free_list_mutex_ held // called with free_list_mutex_ held
Status BlobStore::CreateNewBucket() { Status BlobStore::CreateNewBucket() {
WriteLock l(&buckets_mutex_); MutexLock l(&buckets_mutex_);
int new_bucket_id;
new_bucket_id = buckets_.size(); if (buckets_size_ >= max_buckets_) {
buckets_.push_back(unique_ptr<RandomRWFile>()); return Status::IOError("Max size exceeded\n");
}
int new_bucket_id = buckets_size_;
char fname[200]; char fname[200];
sprintf(fname, "%s/%d.bs", directory_.c_str(), new_bucket_id); sprintf(fname, "%s/%d.bs", directory_.c_str(), new_bucket_id);
@ -240,7 +248,6 @@ Status BlobStore::CreateNewBucket() {
&buckets_[new_bucket_id], &buckets_[new_bucket_id],
storage_options_); storage_options_);
if (!s.ok()) { if (!s.ok()) {
buckets_.erase(buckets_.begin() + new_bucket_id);
return s; return s;
} }
@ -249,6 +256,8 @@ Status BlobStore::CreateNewBucket() {
// (also, tmpfs does not support allocate) // (also, tmpfs does not support allocate)
buckets_[new_bucket_id].get()->Allocate(0, block_size_ * blocks_per_bucket_); buckets_[new_bucket_id].get()->Allocate(0, block_size_ * blocks_per_bucket_);
buckets_size_ = new_bucket_id + 1;
return free_list_.Free(Blob(new_bucket_id, 0, blocks_per_bucket_)); return free_list_.Free(Blob(new_bucket_id, 0, blocks_per_bucket_));
} }

@ -102,10 +102,14 @@ class BlobStore {
// Bucket is a device or a file that we use to store the blobs. // Bucket is a device or a file that we use to store the blobs.
// If we don't have enough blocks to allocate a new blob, we will // If we don't have enough blocks to allocate a new blob, we will
// try to create a new file or device. // try to create a new file or device.
// max_buckets - maximum number of buckets BlobStore will create
// BlobStore max size in bytes is
// max_buckets * blocks_per_bucket * block_size
// env - env for creating new files // env - env for creating new files
BlobStore(const std::string& directory, BlobStore(const std::string& directory,
uint64_t block_size, uint64_t block_size,
uint32_t blocks_per_bucket, uint32_t blocks_per_bucket,
uint32_t max_buckets,
Env* env); Env* env);
~BlobStore(); ~BlobStore();
@ -134,10 +138,15 @@ class BlobStore {
EnvOptions storage_options_; EnvOptions storage_options_;
// protected by free_list_mutex_ // protected by free_list_mutex_
FreeList free_list_; FreeList free_list_;
// free_list_mutex_ is locked BEFORE buckets_mutex_
mutable port::Mutex free_list_mutex_; mutable port::Mutex free_list_mutex_;
// protected by buckets mutex // protected by buckets_mutex_
std::vector<unique_ptr<RandomRWFile>> buckets_; // array of buckets
mutable port::RWMutex buckets_mutex_; unique_ptr<RandomRWFile>* buckets_;
// number of buckets in the array
uint32_t buckets_size_;
uint32_t max_buckets_;
mutable port::Mutex buckets_mutex_;
// Calls FreeList allocate. If free list can't allocate // Calls FreeList allocate. If free list can't allocate
// new blob, creates new bucket and tries again // new blob, creates new bucket and tries again

@ -37,6 +37,7 @@ TEST(BlobStoreTest, SanityTest) {
BlobStore blob_store(test::TmpDir() + "/blob_store_test", BlobStore blob_store(test::TmpDir() + "/blob_store_test",
block_size, block_size,
blocks_per_file, blocks_per_file,
1000,
Env::Default()); Env::Default());
string buf; string buf;
@ -98,6 +99,7 @@ TEST(BlobStoreTest, FragmentedChunksTest) {
BlobStore blob_store(test::TmpDir() + "/blob_store_test", BlobStore blob_store(test::TmpDir() + "/blob_store_test",
block_size, block_size,
blocks_per_file, blocks_per_file,
1000,
Env::Default()); Env::Default());
string buf; string buf;
@ -138,6 +140,7 @@ TEST(BlobStoreTest, CreateAndStoreTest) {
BlobStore blob_store(test::TmpDir() + "/blob_store_test", BlobStore blob_store(test::TmpDir() + "/blob_store_test",
block_size, block_size,
blocks_per_file, blocks_per_file,
10000,
Env::Default()); Env::Default());
vector<pair<Blob, string>> ranges; vector<pair<Blob, string>> ranges;
@ -165,6 +168,31 @@ TEST(BlobStoreTest, CreateAndStoreTest) {
ASSERT_OK(blob_store.Sync()); ASSERT_OK(blob_store.Sync());
} }
TEST(BlobStoreTest, MaxSizeTest) {
const uint64_t block_size = 10;
const uint32_t blocks_per_file = 100;
const int max_buckets = 10;
Random random(5);
BlobStore blob_store(test::TmpDir() + "/blob_store_test",
block_size,
blocks_per_file,
max_buckets,
Env::Default());
string buf;
for (int i = 0; i < max_buckets; ++i) {
test::RandomString(&random, 1000, &buf);
Blob r;
ASSERT_OK(blob_store.Put(Slice(buf), &r));
}
test::RandomString(&random, 1000, &buf);
Blob r;
// should fail because max size
Status s = blob_store.Put(Slice(buf), &r);
ASSERT_EQ(s.ok(), false);
}
} // namespace rocksdb } // namespace rocksdb
int main(int argc, char** argv) { int main(int argc, char** argv) {

Loading…
Cancel
Save