From 44f0ff31c21164685a6cd25a2beb944767c39e46 Mon Sep 17 00:00:00 2001 From: Lei Jin Date: Wed, 29 Oct 2014 12:24:49 -0700 Subject: [PATCH] use fallocate(FALLOC_FL_PUNCH_HOLE) to release unused blocks at the end of file Summary: ftruncate does not always free preallocated unused space at the end of file. In some cases, we pin too much disk space than it should Test Plan: env_test Reviewers: sdong, rven, yhchiang, igor Reviewed By: igor Subscribers: nkg-, dhruba, leveldb Differential Revision: https://reviews.facebook.net/D25641 --- util/env_posix.cc | 24 +++++++++++++++++++----- util/env_test.cc | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/util/env_posix.cc b/util/env_posix.cc index 76ba4a6bd..84c9e558e 100644 --- a/util/env_posix.cc +++ b/util/env_posix.cc @@ -737,14 +737,28 @@ class PosixWritableFile : public WritableFile { GetPreallocationStatus(&block_size, &last_allocated_block); if (last_allocated_block > 0) { // trim the extra space preallocated at the end of the file - int dummy __attribute__((unused)); - dummy = ftruncate(fd_, filesize_); // ignore errors + // NOTE(ljin): we probably don't want to surface failure as an IOError, + // but it will be nice to log these errors. + ftruncate(fd_, filesize_); +#ifdef ROCKSDB_FALLOCATE_PRESENT + // in some file systems, ftruncate only trims trailing space if the + // new file size is smaller than the current size. Calling fallocate + // with FALLOC_FL_PUNCH_HOLE flag to explicitly release these unused + // blocks. FALLOC_FL_PUNCH_HOLE is supported on at least the following + // filesystems: + // XFS (since Linux 2.6.38) + // ext4 (since Linux 3.0) + // Btrfs (since Linux 3.7) + // tmpfs (since Linux 3.5) + // We ignore error since failure of this operation does not affect + // correctness. + fallocate(fd_, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, + filesize_, block_size * last_allocated_block - filesize_); +#endif } if (close(fd_) < 0) { - if (s.ok()) { - s = IOError(filename_, errno); - } + s = IOError(filename_, errno); } fd_ = -1; return s; diff --git a/util/env_test.cc b/util/env_test.cc index 3d7a9a4db..48e7d353d 100644 --- a/util/env_test.cc +++ b/util/env_test.cc @@ -518,7 +518,7 @@ TEST(EnvPosixTest, AllocateTest) { // allocate 100 MB size_t kPreallocateSize = 100 * 1024 * 1024; size_t kBlockSize = 512; - std::string data = "test"; + std::string data(1024 * 1024, 'a'); wfile->SetPreallocationBlockSize(kPreallocateSize); ASSERT_OK(wfile->Append(Slice(data))); ASSERT_OK(wfile->Flush()); @@ -540,7 +540,7 @@ TEST(EnvPosixTest, AllocateTest) { stat(fname.c_str(), &f_stat); ASSERT_EQ((unsigned int)data.size(), f_stat.st_size); // verify that preallocated blocks were deallocated on file close - ASSERT_GT(st_blocks, f_stat.st_blocks); + ASSERT_EQ((f_stat.st_size + kBlockSize - 1) / kBlockSize, f_stat.st_blocks); } #endif // ROCKSDB_FALLOCATE_PRESENT