Implement apis in the Environment to clear out pages in the OS cache.

Summary:
Added a new api to the Environment that allows clearing out not-needed
pages from the OS cache. This will be helpful when the compressed
block cache replaces the OS cache.

Test Plan: EnvPosixTest.InvalidateCache

Reviewers: haobo

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D13041
main
Dhruba Borthakur 11 years ago
parent 9262061b0d
commit 87d6eb2f6b
  1. 23
      include/rocksdb/env.h
  2. 52
      util/env_posix.cc
  3. 40
      util/env_test.cc

@ -41,7 +41,7 @@ struct EnvOptions {
EnvOptions(); EnvOptions();
// construct from Options // construct from Options
EnvOptions(const Options& options); explicit EnvOptions(const Options& options);
// If true, then allow caching of data in environment buffers // If true, then allow caching of data in environment buffers
bool use_os_buffer; bool use_os_buffer;
@ -253,6 +253,13 @@ class SequentialFile {
// //
// REQUIRES: External synchronization // REQUIRES: External synchronization
virtual Status Skip(uint64_t n) = 0; virtual Status Skip(uint64_t n) = 0;
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
}; };
// A file abstraction for randomly reading the contents of a file. // A file abstraction for randomly reading the contents of a file.
@ -298,6 +305,12 @@ class RandomAccessFile {
virtual void Hint(AccessPattern pattern) {} virtual void Hint(AccessPattern pattern) {}
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
}; };
// A file abstraction for sequential writing. The implementation // A file abstraction for sequential writing. The implementation
@ -347,6 +360,14 @@ class WritableFile {
*block_size = preallocation_block_size_; *block_size = preallocation_block_size_;
} }
// Remove any kind of caching of data from the offset to offset+length
// of this file. If the length is 0, then it refers to the end of file.
// If the system is not caching the file contents, then this is a noop.
// This call has no effect on dirty pages in the cache.
virtual Status InvalidateCache(size_t offset, size_t length) {
return Status::NotSupported("InvalidateCache not supported.");
}
protected: protected:
// PrepareWrite performs any necessary preparation for a write // PrepareWrite performs any necessary preparation for a write
// before the write actually occurs. This allows for pre-allocation // before the write actually occurs. This allows for pre-allocation

@ -137,6 +137,15 @@ class PosixSequentialFile: public SequentialFile {
} }
return Status::OK(); return Status::OK();
} }
virtual Status InvalidateCache(size_t offset, size_t length) {
// free OS pages
int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
if (ret == 0) {
return Status::OK();
}
return IOError(filename_, errno);
}
}; };
// pread() based random-access // pread() based random-access
@ -223,20 +232,30 @@ class PosixRandomAccessFile: public RandomAccessFile {
} }
} }
virtual Status InvalidateCache(size_t offset, size_t length) {
// free OS pages
int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
if (ret == 0) {
return Status::OK();
}
return IOError(filename_, errno);
}
}; };
// mmap() based random-access // mmap() based random-access
class PosixMmapReadableFile: public RandomAccessFile { class PosixMmapReadableFile: public RandomAccessFile {
private: private:
int fd_;
std::string filename_; std::string filename_;
void* mmapped_region_; void* mmapped_region_;
size_t length_; size_t length_;
public: public:
// base[0,length-1] contains the mmapped contents of the file. // base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile(const std::string& fname, void* base, size_t length, PosixMmapReadableFile(const int fd, const std::string& fname,
void* base, size_t length,
const EnvOptions& options) const EnvOptions& options)
: filename_(fname), mmapped_region_(base), length_(length) { : fd_(fd), filename_(fname), mmapped_region_(base), length_(length) {
assert(options.use_mmap_reads); assert(options.use_mmap_reads);
assert(options.use_os_buffer); assert(options.use_os_buffer);
} }
@ -253,6 +272,14 @@ class PosixMmapReadableFile: public RandomAccessFile {
} }
return s; return s;
} }
virtual Status InvalidateCache(size_t offset, size_t length) {
// free OS pages
int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
if (ret == 0) {
return Status::OK();
}
return IOError(filename_, errno);
}
}; };
// We preallocate up to an extra megabyte and use memcpy to append new // We preallocate up to an extra megabyte and use memcpy to append new
@ -480,6 +507,15 @@ class PosixMmapFile : public WritableFile {
return file_offset_ + used; return file_offset_ + used;
} }
virtual Status InvalidateCache(size_t offset, size_t length) {
// free OS pages
int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
if (ret == 0) {
return Status::OK();
}
return IOError(filename_, errno);
}
#ifdef OS_LINUX #ifdef OS_LINUX
virtual Status Allocate(off_t offset, off_t len) { virtual Status Allocate(off_t offset, off_t len) {
TEST_KILL_RANDOM(leveldb_kill_odds); TEST_KILL_RANDOM(leveldb_kill_odds);
@ -644,6 +680,15 @@ class PosixWritableFile : public WritableFile {
return filesize_; return filesize_;
} }
virtual Status InvalidateCache(size_t offset, size_t length) {
// free OS pages
int ret = posix_fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
if (ret == 0) {
return Status::OK();
}
return IOError(filename_, errno);
}
#ifdef OS_LINUX #ifdef OS_LINUX
virtual Status Allocate(off_t offset, off_t len) { virtual Status Allocate(off_t offset, off_t len) {
TEST_KILL_RANDOM(leveldb_kill_odds); TEST_KILL_RANDOM(leveldb_kill_odds);
@ -768,7 +813,8 @@ class PosixEnv : public Env {
if (s.ok()) { if (s.ok()) {
void* base = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0); void* base = mmap(nullptr, size, PROT_READ, MAP_SHARED, fd, 0);
if (base != MAP_FAILED) { if (base != MAP_FAILED) {
result->reset(new PosixMmapReadableFile(fname, base, size, options)); result->reset(new PosixMmapReadableFile(fd, fname, base,
size, options));
} else { } else {
s = IOError(fname, errno); s = IOError(fname, errno);
} }

@ -317,6 +317,46 @@ TEST(EnvPosixTest, RandomAccessUniqueIDDeletes) {
ASSERT_TRUE(!HasPrefix(ids)); ASSERT_TRUE(!HasPrefix(ids));
} }
TEST(EnvPosixTest, InvalidateCache) {
const EnvOptions soptions;
std::string fname = test::TmpDir() + "/" + "testfile";
// Create file.
{
unique_ptr<WritableFile> wfile;
ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
ASSERT_OK(wfile.get()->Append(Slice("Hello world")));
ASSERT_OK(wfile.get()->InvalidateCache(0, 0));
ASSERT_OK(wfile.get()->Close());
}
// Random Read
{
unique_ptr<RandomAccessFile> file;
char scratch[100];
Slice result;
ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
ASSERT_OK(file.get()->Read(0, 11, &result, scratch));
ASSERT_EQ(memcmp(scratch, "Hello world", 11), 0);
ASSERT_OK(file.get()->InvalidateCache(0, 11));
ASSERT_OK(file.get()->InvalidateCache(0, 0));
}
// Sequential Read
{
unique_ptr<SequentialFile> file;
char scratch[100];
Slice result;
ASSERT_OK(env_->NewSequentialFile(fname, &file, soptions));
ASSERT_OK(file.get()->Read(11, &result, scratch));
ASSERT_EQ(memcmp(scratch, "Hello world", 11), 0);
ASSERT_OK(file.get()->InvalidateCache(0, 11));
ASSERT_OK(file.get()->InvalidateCache(0, 0));
}
// Delete the file
ASSERT_OK(env_->DeleteFile(fname));
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

Loading…
Cancel
Save