Env function for bulk metadata retrieval

Summary:
Added this new function, which returns filename, size, and modified
timestamp for each file in the provided directory. The default implementation
retrieves the metadata sequentially using existing functions. In the next diff
I'll make HdfsEnv override this function to use libhdfs's bulk get function.

This won't work on windows due to the path separator.

Test Plan:
new unit test

  $ ./env_test --gtest_filter=EnvPosixTest.ConsistentChildrenMetadata

Reviewers: yhchiang, sdong

Reviewed By: sdong

Subscribers: IslamAbdelRahman, dhruba, leveldb

Differential Revision: https://reviews.facebook.net/D53781
main
Andrew Kryczka 9 years ago
parent 4a8cbf4e31
commit 59b3ee658f
  1. 21
      include/rocksdb/env.h
  2. 10
      port/win/env_win.cc
  3. 26
      util/env.cc
  4. 36
      util/env_test.cc

@ -103,6 +103,14 @@ struct EnvOptions {
class Env { class Env {
public: public:
struct FileAttributes {
// File name
std::string name;
// Size of file in bytes
uint64_t size_bytes;
};
Env() : thread_status_updater_(nullptr) {} Env() : thread_status_updater_(nullptr) {}
virtual ~Env(); virtual ~Env();
@ -177,6 +185,15 @@ class Env {
virtual Status GetChildren(const std::string& dir, virtual Status GetChildren(const std::string& dir,
std::vector<std::string>* result) = 0; std::vector<std::string>* result) = 0;
// Store in *result the attributes of the children of the specified directory.
// In case the implementation lists the directory prior to iterating the files
// and files are concurrently deleted, the deleted files will be omitted from
// result.
// The name attributes are relative to "dir".
// Original contents of *results are dropped.
virtual Status GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result);
// Delete the named file. // Delete the named file.
virtual Status DeleteFile(const std::string& fname) = 0; virtual Status DeleteFile(const std::string& fname) = 0;
@ -789,6 +806,10 @@ class EnvWrapper : public Env {
std::vector<std::string>* r) override { std::vector<std::string>* r) override {
return target_->GetChildren(dir, r); return target_->GetChildren(dir, r);
} }
Status GetChildrenFileAttributes(
const std::string& dir, std::vector<FileAttributes>* result) override {
return target_->GetChildrenFileAttributes(dir, result);
}
Status DeleteFile(const std::string& f) override { Status DeleteFile(const std::string& f) override {
return target_->DeleteFile(f); return target_->DeleteFile(f);
} }

@ -1415,6 +1415,11 @@ class WinEnv : public Env {
return status; return status;
} }
virtual Status Env::GetChildrenFileMetadata(
const std::string& dir, std::vector<FileMetadata>* result) override {
return Status::NotSupported("Not supported in WinEnv");
}
virtual Status CreateDir(const std::string& name) override { virtual Status CreateDir(const std::string& name) override {
Status result; Status result;
@ -1723,9 +1728,8 @@ class WinEnv : public Env {
virtual Status GetHostName(char* name, uint64_t len) override { virtual Status GetHostName(char* name, uint64_t len) override {
Status s; Status s;
DWORD nSize = DWORD nSize = static_cast<DWORD>(
static_cast<DWORD>(std::min<uint64_t>(len, std::min<uint64_t>(len, std::numeric_limits<DWORD>::max()));
std::numeric_limits<DWORD>::max()));
if (!::GetComputerNameA(name, &nSize)) { if (!::GetComputerNameA(name, &nSize)) {
auto lastError = GetLastError(); auto lastError = GetLastError();

@ -38,6 +38,32 @@ Status Env::ReuseWritableFile(const std::string& fname,
return NewWritableFile(fname, result, options); return NewWritableFile(fname, result, options);
} }
Status Env::GetChildrenFileAttributes(const std::string& dir,
std::vector<FileAttributes>* result) {
assert(result != nullptr);
std::vector<std::string> child_fnames;
Status s = GetChildren(dir, &child_fnames);
if (!s.ok()) {
return s;
}
result->resize(child_fnames.size());
size_t result_size = 0;
for (size_t i = 0; i < child_fnames.size(); ++i) {
const std::string path = dir + "/" + child_fnames[i];
if (!(s = GetFileSize(path, &(*result)[result_size].size_bytes)).ok()) {
if (FileExists(path).IsNotFound()) {
// The file may have been deleted since we listed the directory
continue;
}
return s;
}
(*result)[result_size].name = std::move(child_fnames[i]);
result_size++;
}
result->resize(result_size);
return Status::OK();
}
SequentialFile::~SequentialFile() { SequentialFile::~SequentialFile() {
} }

@ -935,6 +935,42 @@ TEST_F(EnvPosixTest, Preallocation) {
ASSERT_EQ(last_allocated_block, 7UL); ASSERT_EQ(last_allocated_block, 7UL);
} }
// Test that the two ways to get children file attributes (in bulk or
// individually) behave consistently.
TEST_F(EnvPosixTest, ConsistentChildrenAttributes) {
const EnvOptions soptions;
const int kNumChildren = 10;
std::string data;
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << test::TmpDir() << "/testfile_" << i;
const std::string path = oss.str();
unique_ptr<WritableFile> file;
ASSERT_OK(env_->NewWritableFile(path, &file, soptions));
file->Append(data);
data.append("test");
}
std::vector<Env::FileAttributes> file_attrs;
ASSERT_OK(env_->GetChildrenFileAttributes(test::TmpDir(), &file_attrs));
for (int i = 0; i < kNumChildren; ++i) {
std::ostringstream oss;
oss << "testfile_" << i;
const std::string name = oss.str();
const std::string path = test::TmpDir() + "/" + name;
auto file_attrs_iter = std::find_if(
file_attrs.begin(), file_attrs.end(),
[&name](const Env::FileAttributes& fm) { return fm.name == name; });
ASSERT_TRUE(file_attrs_iter != file_attrs.end());
uint64_t size;
ASSERT_OK(env_->GetFileSize(path, &size));
ASSERT_EQ(size, 4 * i);
ASSERT_EQ(size, file_attrs_iter->size_bytes);
}
}
// Test that all WritableFileWrapper forwards all calls to WritableFile. // Test that all WritableFileWrapper forwards all calls to WritableFile.
TEST_F(EnvPosixTest, WritableFileWrapper) { TEST_F(EnvPosixTest, WritableFileWrapper) {
class Base : public WritableFile { class Base : public WritableFile {

Loading…
Cancel
Save