@ -18,6 +18,9 @@
# include "hdfs/hdfs.h"
# include "hdfs/env_hdfs.h"
# define HDFS_EXISTS 0
# define HDFS_DOESNT_EXIST 1
//
// This file defines an HDFS environment for rocksdb. It uses the libhdfs
// api to access HDFS. All HDFS files created by one instance of rocksdb
@ -39,7 +42,8 @@ static Logger* mylog = nullptr;
// Used for reading a file from HDFS. It implements both sequential-read
// access methods as well as random read access methods.
class HdfsReadableFile : virtual public SequentialFile , virtual public RandomAccessFile {
class HdfsReadableFile : virtual public SequentialFile ,
virtual public RandomAccessFile {
private :
hdfsFS fileSys_ ;
std : : string filename_ ;
@ -73,17 +77,34 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
Status s ;
Log ( mylog , " [hdfs] HdfsReadableFile reading %s %ld \n " ,
filename_ . c_str ( ) , n ) ;
size_t bytes_read = hdfsRead ( fileSys_ , hfile_ , scratch , ( tSize ) n ) ;
char * buffer = scratch ;
size_t total_bytes_read = 0 ;
tSize bytes_read = 0 ;
tSize remaining_bytes = ( tSize ) n ;
// Read a total of n bytes repeatedly until we hit error or eof
while ( remaining_bytes > 0 ) {
bytes_read = hdfsRead ( fileSys_ , hfile_ , buffer , remaining_bytes ) ;
if ( bytes_read < = 0 ) {
break ;
}
assert ( bytes_read < = remaining_bytes ) ;
total_bytes_read + = bytes_read ;
remaining_bytes - = bytes_read ;
buffer + = bytes_read ;
}
assert ( total_bytes_read < = n ) ;
Log ( mylog , " [hdfs] HdfsReadableFile read %s \n " , filename_ . c_str ( ) ) ;
* result = Slice ( scratch , bytes_read ) ;
if ( bytes_read < n ) {
if ( feof ( ) ) {
// We leave status as ok if we hit the end of the file
} else {
// A partial read with an error: return a non-ok status
if ( bytes_read < 0 ) {
s = IOError ( filename_ , errno ) ;
} else {
* result = Slice ( scratch , total_bytes_read ) ;
}
}
return s ;
}
@ -139,8 +160,7 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
size = pFileInfo - > mSize ;
hdfsFreeFileInfo ( pFileInfo , 1 ) ;
} else {
throw rocksdb : : HdfsFatalException ( " fileSize on unknown file " +
filename_ ) ;
throw HdfsFatalException ( " fileSize on unknown file " + filename_ ) ;
}
return size ;
}
@ -236,9 +256,8 @@ class HdfsLogger : public Logger {
uint64_t ( * gettid_ ) ( ) ; // Return the thread id for the current thread
public :
HdfsLogger ( HdfsWritableFile * f , uint64_t ( * gettid ) ( ) ,
const InfoLogLevel log_level = InfoLogLevel : : ERROR )
: Logger ( log_level ) , file_ ( f ) , gettid_ ( gettid ) {
HdfsLogger ( HdfsWritableFile * f , uint64_t ( * gettid ) ( ) )
: file_ ( f ) , gettid_ ( gettid ) {
Log ( mylog , " [hdfs] HdfsLogger opened %s \n " ,
file_ - > getName ( ) . c_str ( ) ) ;
}
@ -324,40 +343,52 @@ class HdfsLogger : public Logger {
// Finally, the hdfs environment
const std : : string HdfsEnv : : kProto = " hdfs:// " ;
const std : : string HdfsEnv : : pathsep = " / " ;
// open a file for sequential reading
Status HdfsEnv : : NewSequentialFile ( const std : : string & fname ,
SequentialFile * * result ) {
unique_ptr < SequentialFile > * result ,
const EnvOptions & options ) {
result - > reset ( ) ;
HdfsReadableFile * f = new HdfsReadableFile ( fileSys_ , fname ) ;
if ( f = = nullptr ) {
if ( f = = nullptr | | ! f - > isValid ( ) ) {
delete f ;
* result = nullptr ;
return IOError ( fname , errno ) ;
}
* result = dynamic_cast < SequentialFile * > ( f ) ;
result - > reset ( dynamic_cast < SequentialFile * > ( f ) ) ;
return Status : : OK ( ) ;
}
// open a file for random reading
Status HdfsEnv : : NewRandomAccessFile ( const std : : string & fname ,
RandomAccessFile * * result ) {
unique_ptr < RandomAccessFile > * result ,
const EnvOptions & options ) {
result - > reset ( ) ;
HdfsReadableFile * f = new HdfsReadableFile ( fileSys_ , fname ) ;
if ( f = = nullptr ) {
if ( f = = nullptr | | ! f - > isValid ( ) ) {
delete f ;
* result = nullptr ;
return IOError ( fname , errno ) ;
}
* result = dynamic_cast < RandomAccessFile * > ( f ) ;
result - > reset ( dynamic_cast < RandomAccessFile * > ( f ) ) ;
return Status : : OK ( ) ;
}
// create a new file for writing
Status HdfsEnv : : NewWritableFile ( const std : : string & fname ,
WritableFile * * result ) {
unique_ptr < WritableFile > * result ,
const EnvOptions & options ) {
result - > reset ( ) ;
Status s ;
HdfsWritableFile * f = new HdfsWritableFile ( fileSys_ , fname ) ;
if ( f = = nullptr | | ! f - > isValid ( ) ) {
delete f ;
* result = nullptr ;
return IOError ( fname , errno ) ;
}
* result = dynamic_cast < WritableFile * > ( f ) ;
result - > reset ( dynamic_cast < WritableFile * > ( f ) ) ;
return Status : : OK ( ) ;
}
@ -367,24 +398,30 @@ Status HdfsEnv::NewRandomRWFile(const std::string& fname,
return Status : : NotSupported ( " NewRandomRWFile not supported on HdfsEnv " ) ;
}
virtual Status NewDirectory ( const std : : string & name ,
Status HdfsEnv : : NewDirectory ( const std : : string & name ,
unique_ptr < Directory > * result ) {
return Status : : NotSupported ( " NewDirectory not yet supported on HdfsEnv " ) ;
return Status : : NotSupported ( " NewDirectory not supported on HdfsEnv " ) ;
}
bool HdfsEnv : : FileExists ( const std : : string & fname ) {
int value = hdfsExists ( fileSys_ , fname . c_str ( ) ) ;
if ( value = = 0 ) {
switch ( value ) {
case HDFS_EXISTS :
return true ;
}
case HDFS_DOESNT_EXIST :
return false ;
default : // anything else should be an error
Log ( mylog , " FileExists hdfsExists call failed " ) ;
throw HdfsFatalException ( " hdfsExists call failed with error " +
std : : to_string ( value ) + " . \n " ) ;
}
}
Status HdfsEnv : : GetChildren ( const std : : string & path ,
std : : vector < std : : string > * result ) {
int value = hdfsExists ( fileSys_ , path . c_str ( ) ) ;
switch ( value ) {
case 0 : {
case HDFS_EXISTS : { // directory exists
int numEntries = 0 ;
hdfsFileInfo * pHdfsFileInfo = 0 ;
pHdfsFileInfo = hdfsListDirectory ( fileSys_ , path . c_str ( ) , & numEntries ) ;
@ -402,15 +439,17 @@ Status HdfsEnv::GetChildren(const std::string& path,
} else {
// numEntries < 0 indicates error
Log ( mylog , " hdfsListDirectory call failed with error " ) ;
throw HdfsFatalException ( " hdfsListDirectory call failed negative error. \n " ) ;
throw HdfsFatalException (
" hdfsListDirectory call failed negative error. \n " ) ;
}
break ;
}
case 1 : // directory does not exist, exit
case HDFS_DOESNT_EXIST : // directory does not exist, exit
break ;
default : // anything else should be an error
Log ( mylog , " hdfsListDirectory call failed with error " ) ;
throw HdfsFatalException ( " hdfsListDirectory call failed with error. \n " ) ;
Log ( mylog , " GetChildren hdfsExists call failed " ) ;
throw HdfsFatalException ( " hdfsExists call failed with error " +
std : : to_string ( value ) + " . \n " ) ;
}
return Status : : OK ( ) ;
}
@ -432,10 +471,15 @@ Status HdfsEnv::CreateDir(const std::string& name) {
Status HdfsEnv : : CreateDirIfMissing ( const std : : string & name ) {
const int value = hdfsExists ( fileSys_ , name . c_str ( ) ) ;
// Not atomic. state might change b/w hdfsExists and CreateDir.
if ( value = = 0 ) {
switch ( value ) {
case HDFS_EXISTS :
return Status : : OK ( ) ;
} else {
case HDFS_DOESNT_EXIST :
return CreateDir ( name ) ;
default : // anything else should be an error
Log ( mylog , " CreateDirIfMissing hdfsExists call failed " ) ;
throw HdfsFatalException ( " hdfsExists call failed with error " +
std : : to_string ( value ) + " . \n " ) ;
}
} ;
@ -492,11 +536,12 @@ Status HdfsEnv::NewLogger(const std::string& fname,
shared_ptr < Logger > * result ) {
HdfsWritableFile * f = new HdfsWritableFile ( fileSys_ , fname ) ;
if ( f = = nullptr | | ! f - > isValid ( ) ) {
delete f ;
* result = nullptr ;
return IOError ( fname , errno ) ;
}
HdfsLogger * h = new HdfsLogger ( f , & HdfsEnv : : gettid ) ;
* result = h ;
result - > reset ( h ) ;
if ( mylog = = nullptr ) {
// mylog = h; // uncomment this for detailed logging
}