Improve EnvHdfs

Summary: Copy improvements from fbcode's version of EnvHdfs to our open-source version. Some very important bug fixes in there.

Test Plan: compiles

Reviewers: dhruba, haobo, sdong

Reviewed By: haobo

CC: leveldb

Differential Revision: https://reviews.facebook.net/D18711
main
Igor Canadi 11 years ago
parent f4574449e9
commit eea73226e9
  1. 22
      hdfs/env_hdfs.h
  2. 117
      util/env_hdfs.cc

@ -18,9 +18,6 @@
namespace rocksdb { namespace rocksdb {
static const std::string kProto = "hdfs://";
static const std::string pathsep = "/";
// Thrown during execution when there is an issue with the supplied // Thrown during execution when there is an issue with the supplied
// arguments. // arguments.
class HdfsUsageException : public std::exception { }; class HdfsUsageException : public std::exception { };
@ -58,20 +55,23 @@ class HdfsEnv : public Env {
} }
virtual Status NewSequentialFile(const std::string& fname, virtual Status NewSequentialFile(const std::string& fname,
SequentialFile** result); std::unique_ptr<SequentialFile>* result,
const EnvOptions& options);
virtual Status NewRandomAccessFile(const std::string& fname, virtual Status NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result); std::unique_ptr<RandomAccessFile>* result,
const EnvOptions& options);
virtual Status NewWritableFile(const std::string& fname, virtual Status NewWritableFile(const std::string& fname,
WritableFile** result); std::unique_ptr<WritableFile>* result,
const EnvOptions& options);
virtual Status NewRandomRWFile(const std::string& fname, virtual Status NewRandomRWFile(const std::string& fname,
unique_ptr<RandomRWFile>* result, std::unique_ptr<RandomRWFile>* result,
const EnvOptions& options); const EnvOptions& options);
virtual Status NewDirectory(const std::string& name, virtual Status NewDirectory(const std::string& name,
unique_ptr<Directory>* result); std::unique_ptr<Directory>* result);
virtual bool FileExists(const std::string& fname); virtual bool FileExists(const std::string& fname);
@ -97,7 +97,8 @@ class HdfsEnv : public Env {
virtual Status UnlockFile(FileLock* lock); virtual Status UnlockFile(FileLock* lock);
virtual Status NewLogger(const std::string& fname, Logger** result); virtual Status NewLogger(const std::string& fname,
std::shared_ptr<Logger>* result);
virtual void Schedule(void (*function)(void* arg), void* arg, virtual void Schedule(void (*function)(void* arg), void* arg,
Priority pri = LOW) { Priority pri = LOW) {
@ -161,6 +162,9 @@ class HdfsEnv : public Env {
// object here so that we can use posix timers, // object here so that we can use posix timers,
// posix threads, etc. // posix threads, etc.
static const std::string kProto;
static const std::string pathsep;
/** /**
* If the URI is specified of the form hdfs://server:port/path, * If the URI is specified of the form hdfs://server:port/path,
* then connect to the specified cluster * then connect to the specified cluster

@ -18,6 +18,9 @@
#include "hdfs/hdfs.h" #include "hdfs/hdfs.h"
#include "hdfs/env_hdfs.h" #include "hdfs/env_hdfs.h"
#define HDFS_EXISTS 0
#define HDFS_DOESNT_EXIST 1
// //
// This file defines an HDFS environment for rocksdb. It uses the libhdfs // This file defines an HDFS environment for rocksdb. It uses the libhdfs
// api to access HDFS. All HDFS files created by one instance of rocksdb // api to access HDFS. All HDFS files created by one instance of rocksdb
@ -39,7 +42,8 @@ static Logger* mylog = nullptr;
// Used for reading a file from HDFS. It implements both sequential-read // Used for reading a file from HDFS. It implements both sequential-read
// access methods as well as random read access methods. // access methods as well as random read access methods.
class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAccessFile { class HdfsReadableFile : virtual public SequentialFile,
virtual public RandomAccessFile {
private: private:
hdfsFS fileSys_; hdfsFS fileSys_;
std::string filename_; std::string filename_;
@ -73,17 +77,34 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
Status s; Status s;
Log(mylog, "[hdfs] HdfsReadableFile reading %s %ld\n", Log(mylog, "[hdfs] HdfsReadableFile reading %s %ld\n",
filename_.c_str(), n); filename_.c_str(), n);
size_t bytes_read = hdfsRead(fileSys_, hfile_, scratch, (tSize)n);
Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str()); char* buffer = scratch;
*result = Slice(scratch, bytes_read); size_t total_bytes_read = 0;
if (bytes_read < n) { tSize bytes_read = 0;
if (feof()) { tSize remaining_bytes = (tSize)n;
// We leave status as ok if we hit the end of the file
} else { // Read a total of n bytes repeatedly until we hit error or eof
// A partial read with an error: return a non-ok status while (remaining_bytes > 0) {
s = IOError(filename_, errno); bytes_read = hdfsRead(fileSys_, hfile_, buffer, remaining_bytes);
if (bytes_read <= 0) {
break;
} }
assert(bytes_read <= remaining_bytes);
total_bytes_read += bytes_read;
remaining_bytes -= bytes_read;
buffer += bytes_read;
}
assert(total_bytes_read <= n);
Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str());
if (bytes_read < 0) {
s = IOError(filename_, errno);
} else {
*result = Slice(scratch, total_bytes_read);
} }
return s; return s;
} }
@ -139,8 +160,7 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
size = pFileInfo->mSize; size = pFileInfo->mSize;
hdfsFreeFileInfo(pFileInfo, 1); hdfsFreeFileInfo(pFileInfo, 1);
} else { } else {
throw rocksdb::HdfsFatalException("fileSize on unknown file " + throw HdfsFatalException("fileSize on unknown file " + filename_);
filename_);
} }
return size; return size;
} }
@ -236,9 +256,8 @@ class HdfsLogger : public Logger {
uint64_t (*gettid_)(); // Return the thread id for the current thread uint64_t (*gettid_)(); // Return the thread id for the current thread
public: public:
HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)(), HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)())
const InfoLogLevel log_level = InfoLogLevel::ERROR) : file_(f), gettid_(gettid) {
: Logger(log_level), file_(f), gettid_(gettid) {
Log(mylog, "[hdfs] HdfsLogger opened %s\n", Log(mylog, "[hdfs] HdfsLogger opened %s\n",
file_->getName().c_str()); file_->getName().c_str());
} }
@ -324,40 +343,52 @@ class HdfsLogger : public Logger {
// Finally, the hdfs environment // Finally, the hdfs environment
const std::string HdfsEnv::kProto = "hdfs://";
const std::string HdfsEnv::pathsep = "/";
// open a file for sequential reading // open a file for sequential reading
Status HdfsEnv::NewSequentialFile(const std::string& fname, Status HdfsEnv::NewSequentialFile(const std::string& fname,
SequentialFile** result) { unique_ptr<SequentialFile>* result,
const EnvOptions& options) {
result->reset();
HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname);
if (f == nullptr) { if (f == nullptr || !f->isValid()) {
delete f;
*result = nullptr; *result = nullptr;
return IOError(fname, errno); return IOError(fname, errno);
} }
*result = dynamic_cast<SequentialFile*>(f); result->reset(dynamic_cast<SequentialFile*>(f));
return Status::OK(); return Status::OK();
} }
// open a file for random reading // open a file for random reading
Status HdfsEnv::NewRandomAccessFile(const std::string& fname, Status HdfsEnv::NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result) { unique_ptr<RandomAccessFile>* result,
const EnvOptions& options) {
result->reset();
HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname); HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname);
if (f == nullptr) { if (f == nullptr || !f->isValid()) {
delete f;
*result = nullptr; *result = nullptr;
return IOError(fname, errno); return IOError(fname, errno);
} }
*result = dynamic_cast<RandomAccessFile*>(f); result->reset(dynamic_cast<RandomAccessFile*>(f));
return Status::OK(); return Status::OK();
} }
// create a new file for writing // create a new file for writing
Status HdfsEnv::NewWritableFile(const std::string& fname, Status HdfsEnv::NewWritableFile(const std::string& fname,
WritableFile** result) { unique_ptr<WritableFile>* result,
const EnvOptions& options) {
result->reset();
Status s; Status s;
HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname);
if (f == nullptr || !f->isValid()) { if (f == nullptr || !f->isValid()) {
delete f;
*result = nullptr; *result = nullptr;
return IOError(fname, errno); return IOError(fname, errno);
} }
*result = dynamic_cast<WritableFile*>(f); result->reset(dynamic_cast<WritableFile*>(f));
return Status::OK(); return Status::OK();
} }
@ -367,24 +398,30 @@ Status HdfsEnv::NewRandomRWFile(const std::string& fname,
return Status::NotSupported("NewRandomRWFile not supported on HdfsEnv"); return Status::NotSupported("NewRandomRWFile not supported on HdfsEnv");
} }
virtual Status NewDirectory(const std::string& name, Status HdfsEnv::NewDirectory(const std::string& name,
unique_ptr<Directory>* result) { unique_ptr<Directory>* result) {
return Status::NotSupported("NewDirectory not yet supported on HdfsEnv"); return Status::NotSupported("NewDirectory not supported on HdfsEnv");
} }
bool HdfsEnv::FileExists(const std::string& fname) { bool HdfsEnv::FileExists(const std::string& fname) {
int value = hdfsExists(fileSys_, fname.c_str()); int value = hdfsExists(fileSys_, fname.c_str());
if (value == 0) { switch (value) {
case HDFS_EXISTS:
return true; return true;
case HDFS_DOESNT_EXIST:
return false;
default: // anything else should be an error
Log(mylog, "FileExists hdfsExists call failed");
throw HdfsFatalException("hdfsExists call failed with error " +
std::to_string(value) + ".\n");
} }
return false;
} }
Status HdfsEnv::GetChildren(const std::string& path, Status HdfsEnv::GetChildren(const std::string& path,
std::vector<std::string>* result) { std::vector<std::string>* result) {
int value = hdfsExists(fileSys_, path.c_str()); int value = hdfsExists(fileSys_, path.c_str());
switch (value) { switch (value) {
case 0: { case HDFS_EXISTS: { // directory exists
int numEntries = 0; int numEntries = 0;
hdfsFileInfo* pHdfsFileInfo = 0; hdfsFileInfo* pHdfsFileInfo = 0;
pHdfsFileInfo = hdfsListDirectory(fileSys_, path.c_str(), &numEntries); pHdfsFileInfo = hdfsListDirectory(fileSys_, path.c_str(), &numEntries);
@ -402,15 +439,17 @@ Status HdfsEnv::GetChildren(const std::string& path,
} else { } else {
// numEntries < 0 indicates error // numEntries < 0 indicates error
Log(mylog, "hdfsListDirectory call failed with error "); Log(mylog, "hdfsListDirectory call failed with error ");
throw HdfsFatalException("hdfsListDirectory call failed negative error.\n"); throw HdfsFatalException(
"hdfsListDirectory call failed negative error.\n");
} }
break; break;
} }
case 1: // directory does not exist, exit case HDFS_DOESNT_EXIST: // directory does not exist, exit
break; break;
default: // anything else should be an error default: // anything else should be an error
Log(mylog, "hdfsListDirectory call failed with error "); Log(mylog, "GetChildren hdfsExists call failed");
throw HdfsFatalException("hdfsListDirectory call failed with error.\n"); throw HdfsFatalException("hdfsExists call failed with error " +
std::to_string(value) + ".\n");
} }
return Status::OK(); return Status::OK();
} }
@ -432,10 +471,15 @@ Status HdfsEnv::CreateDir(const std::string& name) {
Status HdfsEnv::CreateDirIfMissing(const std::string& name) { Status HdfsEnv::CreateDirIfMissing(const std::string& name) {
const int value = hdfsExists(fileSys_, name.c_str()); const int value = hdfsExists(fileSys_, name.c_str());
// Not atomic. state might change b/w hdfsExists and CreateDir. // Not atomic. state might change b/w hdfsExists and CreateDir.
if (value == 0) { switch (value) {
case HDFS_EXISTS:
return Status::OK(); return Status::OK();
} else { case HDFS_DOESNT_EXIST:
return CreateDir(name); return CreateDir(name);
default: // anything else should be an error
Log(mylog, "CreateDirIfMissing hdfsExists call failed");
throw HdfsFatalException("hdfsExists call failed with error " +
std::to_string(value) + ".\n");
} }
}; };
@ -492,11 +536,12 @@ Status HdfsEnv::NewLogger(const std::string& fname,
shared_ptr<Logger>* result) { shared_ptr<Logger>* result) {
HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname); HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname);
if (f == nullptr || !f->isValid()) { if (f == nullptr || !f->isValid()) {
delete f;
*result = nullptr; *result = nullptr;
return IOError(fname, errno); return IOError(fname, errno);
} }
HdfsLogger* h = new HdfsLogger(f, &HdfsEnv::gettid); HdfsLogger* h = new HdfsLogger(f, &HdfsEnv::gettid);
*result = h; result->reset(h);
if (mylog == nullptr) { if (mylog == nullptr) {
// mylog = h; // uncomment this for detailed logging // mylog = h; // uncomment this for detailed logging
} }

Loading…
Cancel
Save