Fixed cache key for block cache

Summary:
Added function to `RandomAccessFile` to generate an unique ID for that file. Currently only `PosixRandomAccessFile` has this behaviour implemented and only on Linux.

Changed how key is generated in `Table::BlockReader`.

Added tests to check whether the unique ID is stable, unique and not a prefix of another unique ID. Added tests to see that `Table` uses the cache more efficiently.

Test Plan: make check

Reviewers: chip, vamsi, dhruba

Reviewed By: chip

CC: leveldb

Differential Revision: https://reviews.facebook.net/D8145
main
Kosie van der Merwe 12 years ago
parent 2c3565285e
commit 4dcc0c89f4
  1. 20
      include/leveldb/env.h
  2. 6
      include/leveldb/table.h
  3. 53
      table/table.cc
  4. 87
      table/table_test.cc
  5. 4
      util/coding.h
  6. 34
      util/env_posix.cc
  7. 142
      util/env_test.cc

@ -223,6 +223,26 @@ class RandomAccessFile {
// Safe for concurrent use by multiple threads. // Safe for concurrent use by multiple threads.
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0; char* scratch) const = 0;
// Tries to get an unique ID for this file that will be the same each time
// the file is opened (and will stay the same while the file is open).
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
// ID can be created this function returns the length of the ID and places it
// in "id"; otherwise, this function returns 0, in which case "id" may more
// may not have been modified.
//
// This function guarantees, for IDs from a given environment, two unique ids
// cannot be made equal to eachother by adding arbitrary bytes to one of
// them. That is, no unique ID is the prefix of another.
//
// This function guarantees that the returned ID will not be interpretable as
// a single varint.
//
// Note: these IDs are only valid for the duration of the process.
virtual size_t GetUniqueId(char* id, size_t max_size) const {
return 0; // Default implementation to prevent issues with backwards
// compatibility.
};
}; };
// A file abstraction for sequential writing. The implementation // A file abstraction for sequential writing. The implementation

@ -58,6 +58,10 @@ class Table {
// be close to the file length. // be close to the file length.
uint64_t ApproximateOffsetOf(const Slice& key) const; uint64_t ApproximateOffsetOf(const Slice& key) const;
// Returns true if the block for the specified key is in cache.
// REQUIRES: key is in this table.
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);
private: private:
struct Rep; struct Rep;
Rep* rep_; Rep* rep_;
@ -80,6 +84,8 @@ class Table {
void ReadMeta(const Footer& footer); void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value); void ReadFilter(const Slice& filter_handle_value);
static void SetupCacheKeyPrefix(Rep* rep);
// No copying allowed // No copying allowed
Table(const Table&); Table(const Table&);
void operator=(const Table&); void operator=(const Table&);

@ -18,6 +18,11 @@
namespace leveldb { namespace leveldb {
// The longest the prefix of the cache key used to identify blocks can be.
// We are using the fact that we know for Posix files the unique ID is three
// varints.
const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1;
struct Table::Rep { struct Table::Rep {
~Rep() { ~Rep() {
delete filter; delete filter;
@ -28,7 +33,8 @@ struct Table::Rep {
Options options; Options options;
Status status; Status status;
unique_ptr<RandomAccessFile> file; unique_ptr<RandomAccessFile> file;
uint64_t cache_id; char cache_key_prefix[kMaxCacheKeyPrefixSize];
size_t cache_key_prefix_size;
FilterBlockReader* filter; FilterBlockReader* filter;
const char* filter_data; const char* filter_data;
@ -36,6 +42,25 @@ struct Table::Rep {
Block* index_block; Block* index_block;
}; };
// Helper function to setup the cache key's prefix for the Table.
void Table::SetupCacheKeyPrefix(Rep* rep) {
assert(kMaxCacheKeyPrefixSize >= 10);
rep->cache_key_prefix_size = 0;
if (rep->options.block_cache) {
rep->cache_key_prefix_size = rep->file->GetUniqueId(rep->cache_key_prefix,
kMaxCacheKeyPrefixSize);
if (rep->cache_key_prefix_size == 0) {
// If the prefix wasn't generated or was too long, we create one from the
// cache.
char* end = EncodeVarint64(rep->cache_key_prefix,
rep->options.block_cache->NewId());
rep->cache_key_prefix_size =
static_cast<size_t>(end - rep->cache_key_prefix);
}
}
}
Status Table::Open(const Options& options, Status Table::Open(const Options& options,
unique_ptr<RandomAccessFile>&& file, unique_ptr<RandomAccessFile>&& file,
uint64_t size, uint64_t size,
@ -80,7 +105,7 @@ Status Table::Open(const Options& options,
rep->file = std::move(file); rep->file = std::move(file);
rep->metaindex_handle = footer.metaindex_handle(); rep->metaindex_handle = footer.metaindex_handle();
rep->index_block = index_block; rep->index_block = index_block;
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0); SetupCacheKeyPrefix(rep);
rep->filter_data = NULL; rep->filter_data = NULL;
rep->filter = NULL; rep->filter = NULL;
table->reset(new Table(rep)); table->reset(new Table(rep));
@ -179,10 +204,15 @@ Iterator* Table::BlockReader(void* arg,
if (s.ok()) { if (s.ok()) {
BlockContents contents; BlockContents contents;
if (block_cache != NULL) { if (block_cache != NULL) {
char cache_key_buffer[16]; char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
EncodeFixed64(cache_key_buffer, table->rep_->cache_id); const size_t cache_key_prefix_size = table->rep_->cache_key_prefix_size;
EncodeFixed64(cache_key_buffer+8, handle.offset()); assert(cache_key_prefix_size != 0);
Slice key(cache_key_buffer, sizeof(cache_key_buffer)); assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
memcpy(cache_key, table->rep_->cache_key_prefix,
cache_key_prefix_size);
char* end = EncodeVarint64(cache_key + cache_key_prefix_size,
handle.offset());
Slice key(cache_key, static_cast<size_t>(end-cache_key));
cache_handle = block_cache->Lookup(key); cache_handle = block_cache->Lookup(key);
if (cache_handle != NULL) { if (cache_handle != NULL) {
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle)); block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
@ -274,6 +304,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
return s; return s;
} }
void SaveDidIO(void* arg, const Slice& key, const Slice& value, bool didIO) {
*reinterpret_cast<bool*>(arg) = didIO;
}
bool Table::TEST_KeyInCache(const ReadOptions& options, const Slice& key) {
// We use InternalGet() as it has logic that checks whether we read the
// block from the disk or not.
bool didIO = false;
Status s = InternalGet(options, key, &didIO, SaveDidIO);
assert(s.ok());
return !didIO;
}
uint64_t Table::ApproximateOffsetOf(const Slice& key) const { uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
Iterator* index_iter = Iterator* index_iter =

@ -109,8 +109,8 @@ class StringSink: public WritableFile {
class StringSource: public RandomAccessFile { class StringSource: public RandomAccessFile {
public: public:
StringSource(const Slice& contents) StringSource(const Slice& contents, uint64_t uniq_id)
: contents_(contents.data(), contents.size()) { : contents_(contents.data(), contents.size()), uniq_id_(uniq_id) {
} }
virtual ~StringSource() { } virtual ~StringSource() { }
@ -130,8 +130,20 @@ class StringSource: public RandomAccessFile {
return Status::OK(); return Status::OK();
} }
virtual size_t GetUniqueId(char* id, size_t max_size) const {
if (max_size < 20) {
return 0;
}
char* rid = id;
rid = EncodeVarint64(rid, uniq_id_);
rid = EncodeVarint64(rid, 0);
return static_cast<size_t>(rid-id);
}
private: private:
std::string contents_; std::string contents_;
uint64_t uniq_id_;
}; };
typedef std::map<std::string, std::string, anon::STLLessThan> KVMap; typedef std::map<std::string, std::string, anon::STLLessThan> KVMap;
@ -228,8 +240,8 @@ class TableConstructor: public Constructor {
} }
virtual Status FinishImpl(const Options& options, const KVMap& data) { virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset(); Reset();
StringSink sink; sink_.reset(new StringSink());
TableBuilder builder(options, &sink); TableBuilder builder(options, sink_.get());
for (KVMap::const_iterator it = data.begin(); for (KVMap::const_iterator it = data.begin();
it != data.end(); it != data.end();
@ -240,15 +252,13 @@ class TableConstructor: public Constructor {
Status s = builder.Finish(); Status s = builder.Finish();
ASSERT_TRUE(s.ok()) << s.ToString(); ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(sink.contents().size(), builder.FileSize()); ASSERT_EQ(sink_->contents().size(), builder.FileSize());
// Open the table // Open the table
source_.reset(new StringSource(sink.contents())); uniq_id_ = cur_uniq_id_++;
Options table_options; source_.reset(new StringSource(sink_->contents(), uniq_id_));
table_options.comparator = options.comparator; return Table::Open(options, std::move(source_),
table_options.compression_opts = options.compression_opts; sink_->contents().size(), &table_);
return Table::Open(table_options, std::move(source_),
sink.contents().size(), &table_);
} }
virtual Iterator* NewIterator() const { virtual Iterator* NewIterator() const {
@ -259,17 +269,34 @@ class TableConstructor: public Constructor {
return table_->ApproximateOffsetOf(key); return table_->ApproximateOffsetOf(key);
} }
virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_));
return Table::Open(options, std::move(source_),
sink_->contents().size(), &table_);
}
virtual Table* table() {
return table_.get();
}
private: private:
void Reset() { void Reset() {
uniq_id_ = 0;
table_.reset(); table_.reset();
sink_.reset();
source_.reset(); source_.reset();
} }
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_; unique_ptr<StringSource> source_;
unique_ptr<Table> table_; unique_ptr<Table> table_;
TableConstructor(); TableConstructor();
static uint64_t cur_uniq_id_;
}; };
uint64_t TableConstructor::cur_uniq_id_ = 1;
// A helper class that converts internal format keys into user keys // A helper class that converts internal format keys into user keys
class KeyConvertingIterator: public Iterator { class KeyConvertingIterator: public Iterator {
@ -892,6 +919,44 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
} }
TEST(TableTest, BlockCacheLeak) {
// Check that when we reopen a table we don't lose access to blocks already
// in the cache. This test checks whether the Table actually makes use of the
// unique ID from the file.
Options opt;
opt.block_size = 1024;
opt.compression = kNoCompression;
opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever
// lose cached values.
TableConstructor c(BytewiseComparator());
c.Add("k01", "hello");
c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x'));
c.Add("k04", std::string(200000, 'x'));
c.Add("k05", std::string(300000, 'x'));
c.Add("k06", "hello3");
c.Add("k07", std::string(100000, 'x'));
std::vector<std::string> keys;
KVMap kvmap;
c.Finish(opt, &keys, &kvmap);
unique_ptr<Iterator> iter(c.NewIterator());
iter->SeekToFirst();
while (iter->Valid()) {
iter->key();
iter->value();
iter->Next();
}
ASSERT_OK(iter->status());
ASSERT_OK(c.Reopen(opt));
for (const std::string& key: keys) {
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key));
}
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

@ -18,6 +18,10 @@
namespace leveldb { namespace leveldb {
// The maximum length of a varint in bytes for 32 and 64 bits respectively.
const unsigned int kMaxVarint32Length = 5;
const unsigned int kMaxVarint64Length = 10;
// Standard Put... routines append to a string // Standard Put... routines append to a string
extern void PutFixed32(std::string* dst, uint32_t value); extern void PutFixed32(std::string* dst, uint32_t value);
extern void PutFixed64(std::string* dst, uint64_t value); extern void PutFixed64(std::string* dst, uint64_t value);

@ -11,18 +11,23 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h> #include <sys/types.h>
#include <time.h> #include <time.h>
#include <unistd.h> #include <unistd.h>
#if defined(OS_LINUX)
#include <linux/fs.h>
#endif
#if defined(LEVELDB_PLATFORM_ANDROID) #if defined(LEVELDB_PLATFORM_ANDROID)
#include <sys/stat.h> #include <sys/stat.h>
#endif #endif
#include "leveldb/env.h" #include "leveldb/env.h"
#include "leveldb/slice.h" #include "leveldb/slice.h"
#include "port/port.h" #include "port/port.h"
#include "util/coding.h"
#include "util/logging.h" #include "util/logging.h"
#include "util/posix_logger.h" #include "util/posix_logger.h"
@ -108,6 +113,35 @@ class PosixRandomAccessFile: public RandomAccessFile {
} }
return s; return s;
} }
#if defined(OS_LINUX)
virtual size_t GetUniqueId(char* id, size_t max_size) const {
// TODO: possibly allow this function to handle tighter bounds.
if (max_size < kMaxVarint64Length*3) {
return 0;
}
struct stat buf;
int result = fstat(fd_, &buf);
if (result == -1) {
return 0;
}
long version = 0;
result = ioctl(fd_, FS_IOC_GETVERSION, &version);
if (result == -1) {
return 0;
}
uint64_t uversion = (uint64_t)version;
char* rid = id;
rid = EncodeVarint64(rid, buf.st_dev);
rid = EncodeVarint64(rid, buf.st_ino);
rid = EncodeVarint64(rid, uversion);
assert(rid >= id);
return static_cast<size_t>(rid-id);
}
#endif
}; };
// mmap() based random-access // mmap() based random-access

@ -4,7 +4,9 @@
#include "leveldb/env.h" #include "leveldb/env.h"
#include <unordered_set>
#include "port/port.h" #include "port/port.h"
#include "util/coding.h"
#include "util/testharness.h" #include "util/testharness.h"
namespace leveldb { namespace leveldb {
@ -97,6 +99,146 @@ TEST(EnvPosixTest, StartThread) {
ASSERT_EQ(state.val, 3); ASSERT_EQ(state.val, 3);
} }
bool IsSingleVarint(const std::string& s) {
Slice slice(s);
uint64_t v;
if (!GetVarint64(&slice, &v)) {
return false;
}
return slice.size() == 0;
}
bool IsUniqueIDValid(const std::string& s) {
return !s.empty() && !IsSingleVarint(s);
}
const size_t MAX_ID_SIZE = 100;
char temp_id[MAX_ID_SIZE];
TEST(EnvPosixTest, RandomAccessUniqueID) {
// Create file.
std::string fname = test::TmpDir() + "/" + "testfile";
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
unique_ptr<RandomAccessFile> file;
// Get Unique ID
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id1(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id1));
// Get Unique ID again
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id2(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id2));
// Get Unique ID again after waiting some time.
env_->SleepForMicroseconds(1000000);
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id3(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id3));
// Check IDs are the same.
ASSERT_EQ(unique_id1, unique_id2);
ASSERT_EQ(unique_id2, unique_id3);
// Delete the file
env_->DeleteFile(fname);
}
// Returns true if any of the strings in ss are the prefix of another string.
bool HasPrefix(const std::unordered_set<std::string>& ss) {
for (const std::string& s: ss) {
if (s.empty()) {
return true;
}
for (size_t i = 1; i < s.size(); ++i) {
if (ss.count(s.substr(0, i)) != 0) {
return true;
}
}
}
return false;
}
TEST(EnvPosixTest, RandomAccessUniqueIDConcurrent) {
// Check whether a bunch of concurrently existing files have unique IDs.
// Create the files
std::vector<std::string> fnames;
for (int i = 0; i < 1000; ++i) {
fnames.push_back(test::TmpDir() + "/" + "testfile" + std::to_string(i));
// Create file.
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile));
}
// Collect and check whether the IDs are unique.
std::unordered_set<std::string> ids;
for (const std::string fname: fnames) {
unique_ptr<RandomAccessFile> file;
std::string unique_id;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
}
// Delete the files
for (const std::string fname: fnames) {
ASSERT_OK(env_->DeleteFile(fname));
}
ASSERT_TRUE(!HasPrefix(ids));
}
TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) {
std::string fname = test::TmpDir() + "/" + "testfile";
// Check that after file is deleted we don't get same ID again in a new file.
std::unordered_set<std::string> ids;
for (int i = 0; i < 1000; ++i) {
// Create file.
{
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
}
// Get Unique ID
std::string unique_id;
{
unique_ptr<RandomAccessFile> file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
}
ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}
ASSERT_TRUE(!HasPrefix(ids));
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

Loading…
Cancel
Save