Fixed cache key for block cache

Summary:
Added function to `RandomAccessFile` to generate an unique ID for that file. Currently only `PosixRandomAccessFile` has this behaviour implemented and only on Linux.

Changed how key is generated in `Table::BlockReader`.

Added tests to check whether the unique ID is stable, unique and not a prefix of another unique ID. Added tests to see that `Table` uses the cache more efficiently.

Test Plan: make check

Reviewers: chip, vamsi, dhruba

Reviewed By: chip

CC: leveldb

Differential Revision: https://reviews.facebook.net/D8145
main
Kosie van der Merwe 12 years ago
parent 2c3565285e
commit 4dcc0c89f4
  1. 20
      include/leveldb/env.h
  2. 6
      include/leveldb/table.h
  3. 53
      table/table.cc
  4. 87
      table/table_test.cc
  5. 4
      util/coding.h
  6. 34
      util/env_posix.cc
  7. 142
      util/env_test.cc

@ -223,6 +223,26 @@ class RandomAccessFile {
// Safe for concurrent use by multiple threads.
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const = 0;
// Tries to get an unique ID for this file that will be the same each time
// the file is opened (and will stay the same while the file is open).
// Furthermore, it tries to make this ID at most "max_size" bytes. If such an
// ID can be created this function returns the length of the ID and places it
// in "id"; otherwise, this function returns 0, in which case "id" may more
// may not have been modified.
//
// This function guarantees, for IDs from a given environment, two unique ids
// cannot be made equal to eachother by adding arbitrary bytes to one of
// them. That is, no unique ID is the prefix of another.
//
// This function guarantees that the returned ID will not be interpretable as
// a single varint.
//
// Note: these IDs are only valid for the duration of the process.
virtual size_t GetUniqueId(char* id, size_t max_size) const {
return 0; // Default implementation to prevent issues with backwards
// compatibility.
};
};
// A file abstraction for sequential writing. The implementation

@ -58,6 +58,10 @@ class Table {
// be close to the file length.
uint64_t ApproximateOffsetOf(const Slice& key) const;
// Returns true if the block for the specified key is in cache.
// REQUIRES: key is in this table.
bool TEST_KeyInCache(const ReadOptions& options, const Slice& key);
private:
struct Rep;
Rep* rep_;
@ -80,6 +84,8 @@ class Table {
void ReadMeta(const Footer& footer);
void ReadFilter(const Slice& filter_handle_value);
static void SetupCacheKeyPrefix(Rep* rep);
// No copying allowed
Table(const Table&);
void operator=(const Table&);

@ -18,6 +18,11 @@
namespace leveldb {
// The longest the prefix of the cache key used to identify blocks can be.
// We are using the fact that we know for Posix files the unique ID is three
// varints.
const size_t kMaxCacheKeyPrefixSize = kMaxVarint64Length*3+1;
struct Table::Rep {
~Rep() {
delete filter;
@ -28,7 +33,8 @@ struct Table::Rep {
Options options;
Status status;
unique_ptr<RandomAccessFile> file;
uint64_t cache_id;
char cache_key_prefix[kMaxCacheKeyPrefixSize];
size_t cache_key_prefix_size;
FilterBlockReader* filter;
const char* filter_data;
@ -36,6 +42,25 @@ struct Table::Rep {
Block* index_block;
};
// Helper function to setup the cache key's prefix for the Table.
void Table::SetupCacheKeyPrefix(Rep* rep) {
assert(kMaxCacheKeyPrefixSize >= 10);
rep->cache_key_prefix_size = 0;
if (rep->options.block_cache) {
rep->cache_key_prefix_size = rep->file->GetUniqueId(rep->cache_key_prefix,
kMaxCacheKeyPrefixSize);
if (rep->cache_key_prefix_size == 0) {
// If the prefix wasn't generated or was too long, we create one from the
// cache.
char* end = EncodeVarint64(rep->cache_key_prefix,
rep->options.block_cache->NewId());
rep->cache_key_prefix_size =
static_cast<size_t>(end - rep->cache_key_prefix);
}
}
}
Status Table::Open(const Options& options,
unique_ptr<RandomAccessFile>&& file,
uint64_t size,
@ -80,7 +105,7 @@ Status Table::Open(const Options& options,
rep->file = std::move(file);
rep->metaindex_handle = footer.metaindex_handle();
rep->index_block = index_block;
rep->cache_id = (options.block_cache ? options.block_cache->NewId() : 0);
SetupCacheKeyPrefix(rep);
rep->filter_data = NULL;
rep->filter = NULL;
table->reset(new Table(rep));
@ -179,10 +204,15 @@ Iterator* Table::BlockReader(void* arg,
if (s.ok()) {
BlockContents contents;
if (block_cache != NULL) {
char cache_key_buffer[16];
EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
EncodeFixed64(cache_key_buffer+8, handle.offset());
Slice key(cache_key_buffer, sizeof(cache_key_buffer));
char cache_key[kMaxCacheKeyPrefixSize + kMaxVarint64Length];
const size_t cache_key_prefix_size = table->rep_->cache_key_prefix_size;
assert(cache_key_prefix_size != 0);
assert(cache_key_prefix_size <= kMaxCacheKeyPrefixSize);
memcpy(cache_key, table->rep_->cache_key_prefix,
cache_key_prefix_size);
char* end = EncodeVarint64(cache_key + cache_key_prefix_size,
handle.offset());
Slice key(cache_key, static_cast<size_t>(end-cache_key));
cache_handle = block_cache->Lookup(key);
if (cache_handle != NULL) {
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
@ -274,6 +304,17 @@ Status Table::InternalGet(const ReadOptions& options, const Slice& k,
return s;
}
void SaveDidIO(void* arg, const Slice& key, const Slice& value, bool didIO) {
*reinterpret_cast<bool*>(arg) = didIO;
}
bool Table::TEST_KeyInCache(const ReadOptions& options, const Slice& key) {
// We use InternalGet() as it has logic that checks whether we read the
// block from the disk or not.
bool didIO = false;
Status s = InternalGet(options, key, &didIO, SaveDidIO);
assert(s.ok());
return !didIO;
}
uint64_t Table::ApproximateOffsetOf(const Slice& key) const {
Iterator* index_iter =

@ -109,8 +109,8 @@ class StringSink: public WritableFile {
class StringSource: public RandomAccessFile {
public:
StringSource(const Slice& contents)
: contents_(contents.data(), contents.size()) {
StringSource(const Slice& contents, uint64_t uniq_id)
: contents_(contents.data(), contents.size()), uniq_id_(uniq_id) {
}
virtual ~StringSource() { }
@ -130,8 +130,20 @@ class StringSource: public RandomAccessFile {
return Status::OK();
}
virtual size_t GetUniqueId(char* id, size_t max_size) const {
if (max_size < 20) {
return 0;
}
char* rid = id;
rid = EncodeVarint64(rid, uniq_id_);
rid = EncodeVarint64(rid, 0);
return static_cast<size_t>(rid-id);
}
private:
std::string contents_;
uint64_t uniq_id_;
};
typedef std::map<std::string, std::string, anon::STLLessThan> KVMap;
@ -228,8 +240,8 @@ class TableConstructor: public Constructor {
}
virtual Status FinishImpl(const Options& options, const KVMap& data) {
Reset();
StringSink sink;
TableBuilder builder(options, &sink);
sink_.reset(new StringSink());
TableBuilder builder(options, sink_.get());
for (KVMap::const_iterator it = data.begin();
it != data.end();
@ -240,15 +252,13 @@ class TableConstructor: public Constructor {
Status s = builder.Finish();
ASSERT_TRUE(s.ok()) << s.ToString();
ASSERT_EQ(sink.contents().size(), builder.FileSize());
ASSERT_EQ(sink_->contents().size(), builder.FileSize());
// Open the table
source_.reset(new StringSource(sink.contents()));
Options table_options;
table_options.comparator = options.comparator;
table_options.compression_opts = options.compression_opts;
return Table::Open(table_options, std::move(source_),
sink.contents().size(), &table_);
uniq_id_ = cur_uniq_id_++;
source_.reset(new StringSource(sink_->contents(), uniq_id_));
return Table::Open(options, std::move(source_),
sink_->contents().size(), &table_);
}
virtual Iterator* NewIterator() const {
@ -259,17 +269,34 @@ class TableConstructor: public Constructor {
return table_->ApproximateOffsetOf(key);
}
virtual Status Reopen(const Options& options) {
source_.reset(new StringSource(sink_->contents(), uniq_id_));
return Table::Open(options, std::move(source_),
sink_->contents().size(), &table_);
}
virtual Table* table() {
return table_.get();
}
private:
void Reset() {
uniq_id_ = 0;
table_.reset();
sink_.reset();
source_.reset();
}
uint64_t uniq_id_;
unique_ptr<StringSink> sink_;
unique_ptr<StringSource> source_;
unique_ptr<Table> table_;
TableConstructor();
static uint64_t cur_uniq_id_;
};
uint64_t TableConstructor::cur_uniq_id_ = 1;
// A helper class that converts internal format keys into user keys
class KeyConvertingIterator: public Iterator {
@ -892,6 +919,44 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
}
TEST(TableTest, BlockCacheLeak) {
// Check that when we reopen a table we don't lose access to blocks already
// in the cache. This test checks whether the Table actually makes use of the
// unique ID from the file.
Options opt;
opt.block_size = 1024;
opt.compression = kNoCompression;
opt.block_cache = NewLRUCache(16*1024*1024); // big enough so we don't ever
// lose cached values.
TableConstructor c(BytewiseComparator());
c.Add("k01", "hello");
c.Add("k02", "hello2");
c.Add("k03", std::string(10000, 'x'));
c.Add("k04", std::string(200000, 'x'));
c.Add("k05", std::string(300000, 'x'));
c.Add("k06", "hello3");
c.Add("k07", std::string(100000, 'x'));
std::vector<std::string> keys;
KVMap kvmap;
c.Finish(opt, &keys, &kvmap);
unique_ptr<Iterator> iter(c.NewIterator());
iter->SeekToFirst();
while (iter->Valid()) {
iter->key();
iter->value();
iter->Next();
}
ASSERT_OK(iter->status());
ASSERT_OK(c.Reopen(opt));
for (const std::string& key: keys) {
ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key));
}
}
} // namespace leveldb
int main(int argc, char** argv) {

@ -18,6 +18,10 @@
namespace leveldb {
// The maximum length of a varint in bytes for 32 and 64 bits respectively.
const unsigned int kMaxVarint32Length = 5;
const unsigned int kMaxVarint64Length = 10;
// Standard Put... routines append to a string
extern void PutFixed32(std::string* dst, uint32_t value);
extern void PutFixed64(std::string* dst, uint64_t value);

@ -11,18 +11,23 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
#if defined(OS_LINUX)
#include <linux/fs.h>
#endif
#if defined(LEVELDB_PLATFORM_ANDROID)
#include <sys/stat.h>
#endif
#include "leveldb/env.h"
#include "leveldb/slice.h"
#include "port/port.h"
#include "util/coding.h"
#include "util/logging.h"
#include "util/posix_logger.h"
@ -108,6 +113,35 @@ class PosixRandomAccessFile: public RandomAccessFile {
}
return s;
}
#if defined(OS_LINUX)
virtual size_t GetUniqueId(char* id, size_t max_size) const {
// TODO: possibly allow this function to handle tighter bounds.
if (max_size < kMaxVarint64Length*3) {
return 0;
}
struct stat buf;
int result = fstat(fd_, &buf);
if (result == -1) {
return 0;
}
long version = 0;
result = ioctl(fd_, FS_IOC_GETVERSION, &version);
if (result == -1) {
return 0;
}
uint64_t uversion = (uint64_t)version;
char* rid = id;
rid = EncodeVarint64(rid, buf.st_dev);
rid = EncodeVarint64(rid, buf.st_ino);
rid = EncodeVarint64(rid, uversion);
assert(rid >= id);
return static_cast<size_t>(rid-id);
}
#endif
};
// mmap() based random-access

@ -4,7 +4,9 @@
#include "leveldb/env.h"
#include <unordered_set>
#include "port/port.h"
#include "util/coding.h"
#include "util/testharness.h"
namespace leveldb {
@ -97,6 +99,146 @@ TEST(EnvPosixTest, StartThread) {
ASSERT_EQ(state.val, 3);
}
bool IsSingleVarint(const std::string& s) {
Slice slice(s);
uint64_t v;
if (!GetVarint64(&slice, &v)) {
return false;
}
return slice.size() == 0;
}
bool IsUniqueIDValid(const std::string& s) {
return !s.empty() && !IsSingleVarint(s);
}
const size_t MAX_ID_SIZE = 100;
char temp_id[MAX_ID_SIZE];
TEST(EnvPosixTest, RandomAccessUniqueID) {
// Create file.
std::string fname = test::TmpDir() + "/" + "testfile";
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
unique_ptr<RandomAccessFile> file;
// Get Unique ID
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id1(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id1));
// Get Unique ID again
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id2(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id2));
// Get Unique ID again after waiting some time.
env_->SleepForMicroseconds(1000000);
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
std::string unique_id3(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id3));
// Check IDs are the same.
ASSERT_EQ(unique_id1, unique_id2);
ASSERT_EQ(unique_id2, unique_id3);
// Delete the file
env_->DeleteFile(fname);
}
// Returns true if any of the strings in ss are the prefix of another string.
bool HasPrefix(const std::unordered_set<std::string>& ss) {
for (const std::string& s: ss) {
if (s.empty()) {
return true;
}
for (size_t i = 1; i < s.size(); ++i) {
if (ss.count(s.substr(0, i)) != 0) {
return true;
}
}
}
return false;
}
TEST(EnvPosixTest, RandomAccessUniqueIDConcurrent) {
// Check whether a bunch of concurrently existing files have unique IDs.
// Create the files
std::vector<std::string> fnames;
for (int i = 0; i < 1000; ++i) {
fnames.push_back(test::TmpDir() + "/" + "testfile" + std::to_string(i));
// Create file.
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fnames[i], &wfile));
}
// Collect and check whether the IDs are unique.
std::unordered_set<std::string> ids;
for (const std::string fname: fnames) {
unique_ptr<RandomAccessFile> file;
std::string unique_id;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
}
// Delete the files
for (const std::string fname: fnames) {
ASSERT_OK(env_->DeleteFile(fname));
}
ASSERT_TRUE(!HasPrefix(ids));
}
TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) {
std::string fname = test::TmpDir() + "/" + "testfile";
// Check that after file is deleted we don't get same ID again in a new file.
std::unordered_set<std::string> ids;
for (int i = 0; i < 1000; ++i) {
// Create file.
{
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile));
}
// Get Unique ID
std::string unique_id;
{
unique_ptr<RandomAccessFile> file;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file));
size_t id_size = file->GetUniqueId(temp_id, MAX_ID_SIZE);
ASSERT_TRUE(id_size > 0);
unique_id = std::string(temp_id, id_size);
}
ASSERT_TRUE(IsUniqueIDValid(unique_id));
ASSERT_TRUE(ids.count(unique_id) == 0);
ids.insert(unique_id);
// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}
ASSERT_TRUE(!HasPrefix(ids));
}
} // namespace leveldb
int main(int argc, char** argv) {

Loading…
Cancel
Save