Upstream changes.

git-svn-id: https://leveldb.googlecode.com/svn/trunk@16 62dab493-f737-651d-591e-8d6aee1b9529
main
jorlow@chromium.org 14 years ago
parent e11bdf1935
commit e2da744e12
  1. 4
      db/builder.cc
  2. 4
      db/db_bench.cc
  3. 4
      db/db_impl.cc
  4. 5
      db/db_iter.cc
  5. 6
      db/log_reader.cc
  6. 11
      db/log_writer.cc
  7. 2
      db/repair.cc
  8. 3
      db/table_cache.cc
  9. 15
      db/table_cache.h
  10. 22
      db/version_set.cc
  11. 15
      doc/log_format.txt
  12. 3
      include/env.h
  13. 6
      include/table.h
  14. 2
      table/table.cc
  15. 4
      table/table_test.cc
  16. 16
      util/env_chromium.cc
  17. 16
      util/env_posix.cc

@ -74,7 +74,9 @@ Status BuildTable(const std::string& dbname,
if (s.ok()) { if (s.ok()) {
// Verify that the table is usable // Verify that the table is usable
Iterator* it = table_cache->NewIterator(ReadOptions(), meta->number); Iterator* it = table_cache->NewIterator(ReadOptions(),
meta->number,
meta->file_size);
s = it->status(); s = it->status();
delete it; delete it;
} }

@ -354,7 +354,7 @@ class Benchmark {
private: private:
void Crc32c(int size, const char* label) { void Crc32c(int size, const char* label) {
// Checksum about 500MB of data total // Checksum about 500MB of data total
string data(size, 'x'); std::string data(size, 'x');
int64_t bytes = 0; int64_t bytes = 0;
uint32_t crc = 0; uint32_t crc = 0;
while (bytes < 500 * 1048576) { while (bytes < 500 * 1048576) {
@ -371,7 +371,7 @@ class Benchmark {
void SHA1(int size, const char* label) { void SHA1(int size, const char* label) {
// SHA1 about 100MB of data total // SHA1 about 100MB of data total
string data(size, 'x'); std::string data(size, 'x');
int64_t bytes = 0; int64_t bytes = 0;
char sha1[20]; char sha1[20];
while (bytes < 100 * 1048576) { while (bytes < 100 * 1048576) {

@ -642,7 +642,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
if (s.ok() && current_entries > 0) { if (s.ok() && current_entries > 0) {
// Verify that the table is usable // Verify that the table is usable
Iterator* iter = table_cache_->NewIterator(ReadOptions(),output_number); Iterator* iter = table_cache_->NewIterator(ReadOptions(),
output_number,
current_bytes);
s = iter->status(); s = iter->status();
delete iter; delete iter;
if (s.ok()) { if (s.ok()) {

@ -340,8 +340,11 @@ void DBIter::ReadIndirectValue(Slice ref) const {
std::string fname = LargeValueFileName(*dbname_, large_ref); std::string fname = LargeValueFileName(*dbname_, large_ref);
RandomAccessFile* file; RandomAccessFile* file;
Status s = env_->NewRandomAccessFile(fname, &file); Status s = env_->NewRandomAccessFile(fname, &file);
uint64_t file_size = 0;
if (s.ok()) {
s = env_->GetFileSize(fname, &file_size);
}
if (s.ok()) { if (s.ok()) {
uint64_t file_size = file->Size();
uint64_t value_size = large_ref.ValueSize(); uint64_t value_size = large_ref.ValueSize();
large_->value.resize(value_size); large_->value.resize(value_size);
Slice result; Slice result;

@ -105,7 +105,7 @@ void Reader::ReportDrop(size_t bytes, const char* reason) {
unsigned int Reader::ReadPhysicalRecord(Slice* result) { unsigned int Reader::ReadPhysicalRecord(Slice* result) {
while (true) { while (true) {
if (buffer_.size() <= kHeaderSize) { if (buffer_.size() < kHeaderSize) {
if (!eof_) { if (!eof_) {
// Last read was a full read, so this is a trailer to skip // Last read was a full read, so this is a trailer to skip
buffer_.clear(); buffer_.clear();
@ -124,12 +124,10 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
} else if (buffer_.size() == 0) { } else if (buffer_.size() == 0) {
// End of file // End of file
return kEof; return kEof;
} else if (buffer_.size() < kHeaderSize) { } else {
ReportDrop(buffer_.size(), "truncated record at end of file"); ReportDrop(buffer_.size(), "truncated record at end of file");
buffer_.clear(); buffer_.clear();
return kEof; return kEof;
} else {
// We have a trailing zero-length record. Fall through and check it.
} }
} }

@ -35,18 +35,19 @@ Status Writer::AddRecord(const Slice& slice) {
do { do {
const int leftover = kBlockSize - block_offset_; const int leftover = kBlockSize - block_offset_;
assert(leftover >= 0); assert(leftover >= 0);
if (leftover <= kHeaderSize) { if (leftover < kHeaderSize) {
// Switch to a new block // Switch to a new block
if (leftover > 0) { if (leftover > 0) {
// Fill the trailer // Fill the trailer (literal below relies on kHeaderSize being 7)
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00\x00", leftover)); assert(kHeaderSize == 7);
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
} }
block_offset_ = 0; block_offset_ = 0;
} }
// Invariant: we never leave <= kHeaderSize bytes in a block. // Invariant: we never leave < kHeaderSize bytes in a block.
const int avail = kBlockSize - block_offset_ - kHeaderSize; const int avail = kBlockSize - block_offset_ - kHeaderSize;
assert(avail > 0); assert(avail >= 0);
const size_t fragment_length = (left < avail) ? left : avail; const size_t fragment_length = (left < avail) ? left : avail;

@ -261,7 +261,7 @@ class Repairer {
Status status = env_->GetFileSize(fname, &t->meta.file_size); Status status = env_->GetFileSize(fname, &t->meta.file_size);
if (status.ok()) { if (status.ok()) {
Iterator* iter = table_cache_->NewIterator( Iterator* iter = table_cache_->NewIterator(
ReadOptions(), t->meta.number); ReadOptions(), t->meta.number, t->meta.file_size);
bool empty = true; bool empty = true;
ParsedInternalKey parsed; ParsedInternalKey parsed;
t->max_sequence = 0; t->max_sequence = 0;

@ -44,6 +44,7 @@ TableCache::~TableCache() {
Iterator* TableCache::NewIterator(const ReadOptions& options, Iterator* TableCache::NewIterator(const ReadOptions& options,
uint64_t file_number, uint64_t file_number,
uint64_t file_size,
Table** tableptr) { Table** tableptr) {
if (tableptr != NULL) { if (tableptr != NULL) {
*tableptr = NULL; *tableptr = NULL;
@ -59,7 +60,7 @@ Iterator* TableCache::NewIterator(const ReadOptions& options,
Table* table = NULL; Table* table = NULL;
Status s = env_->NewRandomAccessFile(fname, &file); Status s = env_->NewRandomAccessFile(fname, &file);
if (s.ok()) { if (s.ok()) {
s = Table::Open(*options_, file, &table); s = Table::Open(*options_, file, file_size, &table);
} }
if (!s.ok()) { if (!s.ok()) {

@ -23,15 +23,16 @@ class TableCache {
TableCache(const std::string& dbname, const Options* options, int entries); TableCache(const std::string& dbname, const Options* options, int entries);
~TableCache(); ~TableCache();
// Get an iterator for the specified file number and return it. If // Return an iterator for the specified file number (the corresponding
// "tableptr" is non-NULL, also sets "*tableptr" to point to the // file length must be exactly "file_size" bytes). If "tableptr" is
// Table object underlying the returned iterator, or NULL if no // non-NULL, also sets "*tableptr" to point to the Table object
// Table object underlies the returned iterator. The returned // underlying the returned iterator, or NULL if no Table object underlies
// "*tableptr" object is owned by the cache and should not be // the returned iterator. The returned "*tableptr" object is owned by
// deleted, and is valid for as long as the returned iterator is // the cache and should not be deleted, and is valid for as long as the
// live. // returned iterator is live.
Iterator* NewIterator(const ReadOptions& options, Iterator* NewIterator(const ReadOptions& options,
uint64_t file_number, uint64_t file_number,
uint64_t file_size,
Table** tableptr = NULL); Table** tableptr = NULL);
// Evict any entry for the specified file number // Evict any entry for the specified file number

@ -75,8 +75,8 @@ Version::~Version() {
// An internal iterator. For a given version/level pair, yields // An internal iterator. For a given version/level pair, yields
// information about the files in the level. For a given entry, key() // information about the files in the level. For a given entry, key()
// is the largest key that occurs in the file, and value() is an // is the largest key that occurs in the file, and value() is an
// 8-byte value containing the file number of the file, encoding using // 16-byte value containing the file number and file size, both
// EncodeFixed64. // encoded using EncodeFixed64.
class Version::LevelFileNumIterator : public Iterator { class Version::LevelFileNumIterator : public Iterator {
public: public:
LevelFileNumIterator(const Version* version, LevelFileNumIterator(const Version* version,
@ -129,6 +129,7 @@ class Version::LevelFileNumIterator : public Iterator {
Slice value() const { Slice value() const {
assert(Valid()); assert(Valid());
EncodeFixed64(value_buf_, (*flist_)[index_]->number); EncodeFixed64(value_buf_, (*flist_)[index_]->number);
EncodeFixed64(value_buf_+8, (*flist_)[index_]->file_size);
return Slice(value_buf_, sizeof(value_buf_)); return Slice(value_buf_, sizeof(value_buf_));
} }
virtual Status status() const { return Status::OK(); } virtual Status status() const { return Status::OK(); }
@ -137,18 +138,21 @@ class Version::LevelFileNumIterator : public Iterator {
const std::vector<FileMetaData*>* const flist_; const std::vector<FileMetaData*>* const flist_;
int index_; int index_;
mutable char value_buf_[8]; // Used for encoding the file number for value() // Backing store for value(). Holds the file number and size.
mutable char value_buf_[16];
}; };
static Iterator* GetFileIterator(void* arg, static Iterator* GetFileIterator(void* arg,
const ReadOptions& options, const ReadOptions& options,
const Slice& file_value) { const Slice& file_value) {
TableCache* cache = reinterpret_cast<TableCache*>(arg); TableCache* cache = reinterpret_cast<TableCache*>(arg);
if (file_value.size() != 8) { if (file_value.size() != 16) {
return NewErrorIterator( return NewErrorIterator(
Status::Corruption("FileReader invoked with unexpected value")); Status::Corruption("FileReader invoked with unexpected value"));
} else { } else {
return cache->NewIterator(options, DecodeFixed64(file_value.data())); return cache->NewIterator(options,
DecodeFixed64(file_value.data()),
DecodeFixed64(file_value.data() + 8));
} }
} }
@ -164,7 +168,8 @@ void Version::AddIterators(const ReadOptions& options,
// Merge all level zero files together since they may overlap // Merge all level zero files together since they may overlap
for (int i = 0; i < files_[0].size(); i++) { for (int i = 0; i < files_[0].size(); i++) {
iters->push_back( iters->push_back(
vset_->table_cache_->NewIterator(options, files_[0][i]->number)); vset_->table_cache_->NewIterator(
options, files_[0][i]->number, files_[0][i]->file_size));
} }
// For levels > 0, we can use a concatenating iterator that sequentially // For levels > 0, we can use a concatenating iterator that sequentially
@ -650,7 +655,7 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) {
// approximate offset of "ikey" within the table. // approximate offset of "ikey" within the table.
Table* tableptr; Table* tableptr;
Iterator* iter = table_cache_->NewIterator( Iterator* iter = table_cache_->NewIterator(
ReadOptions(), files[i]->number, &tableptr); ReadOptions(), files[i]->number, files[i]->file_size, &tableptr);
if (tableptr != NULL) { if (tableptr != NULL) {
result += tableptr->ApproximateOffsetOf(ikey.Encode()); result += tableptr->ApproximateOffsetOf(ikey.Encode());
} }
@ -855,7 +860,8 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
if (c->level() + which == 0) { if (c->level() + which == 0) {
const std::vector<FileMetaData*>& files = c->inputs_[which]; const std::vector<FileMetaData*>& files = c->inputs_[which];
for (int i = 0; i < files.size(); i++) { for (int i = 0; i < files.size(); i++) {
list[num++] = table_cache_->NewIterator(options, files[i]->number); list[num++] = table_cache_->NewIterator(
options, files[i]->number, files[i]->file_size);
} }
} else { } else {
// Create concatenating iterator for the files from this level // Create concatenating iterator for the files from this level

@ -9,12 +9,15 @@ Each block consists of a sequence of records:
type: uint8 // One of FULL, FIRST, MIDDLE, LAST type: uint8 // One of FULL, FIRST, MIDDLE, LAST
data: uint8[length] data: uint8[length]
A record never starts within the last seven bytes of a block. Any A record never starts within the last six bytes of a block (since it
leftover bytes here form the trailer, which must consist entirely of won't fit). Any leftover bytes here form the trailer, which must
zero bytes and must be skipped by readers. In particular, even if consist entirely of zero bytes and must be skipped by readers.
there are exactly seven bytes left in the block, and a zero-length
user record is added (which will fit in these seven bytes), the writer Aside: if exactly seven bytes are left in the current block, and a new
must skip these trailer bytes and add the record to the next block. non-zero length record is added, the writer must emit a FIRST record
(which contains zero bytes of user data) to fill up the trailing seven
bytes of the block and then emit all of the user data in subsequent
blocks.
More types may be added in the future. Some Readers may skip record More types may be added in the future. Some Readers may skip record
types they do not understand, others may report that some data was types they do not understand, others may report that some data was

@ -168,9 +168,6 @@ class RandomAccessFile {
RandomAccessFile() { } RandomAccessFile() { }
virtual ~RandomAccessFile(); virtual ~RandomAccessFile();
// Return the length of this file in bytes.
virtual uint64_t Size() const = 0;
// Read up to "n" bytes from the file starting at "offset". // Read up to "n" bytes from the file starting at "offset".
// "scratch[0..n-1]" may be written by this routine. Sets "*result" // "scratch[0..n-1]" may be written by this routine. Sets "*result"
// to the data that was read (including if fewer than "n" bytes were // to the data that was read (including if fewer than "n" bytes were

@ -20,8 +20,9 @@ struct ReadOptions;
// immutable and persistent. // immutable and persistent.
class Table { class Table {
public: public:
// Attempt to open the table that is stored in "file", and read the // Attempt to open the table that is stored in bytes [0..file_size)
// metadata entries necessary to allow retrieving data from the table. // of "file", and read the metadata entries necessary to allow
// retrieving data from the table.
// //
// If successful, returns ok and sets "*table" to the newly opened // If successful, returns ok and sets "*table" to the newly opened
// table. The client should delete "*table" when no longer needed. // table. The client should delete "*table" when no longer needed.
@ -33,6 +34,7 @@ class Table {
// *file must remain live while this Table is in use. // *file must remain live while this Table is in use.
static Status Open(const Options& options, static Status Open(const Options& options,
RandomAccessFile* file, RandomAccessFile* file,
uint64_t file_size,
Table** table); Table** table);
~Table(); ~Table();

@ -29,9 +29,9 @@ struct Table::Rep {
Status Table::Open(const Options& options, Status Table::Open(const Options& options,
RandomAccessFile* file, RandomAccessFile* file,
uint64_t size,
Table** table) { Table** table) {
*table = NULL; *table = NULL;
const uint64_t size = file->Size();
if (size < Footer::kEncodedLength) { if (size < Footer::kEncodedLength) {
return Status::InvalidArgument("file is too short to be an sstable"); return Status::InvalidArgument("file is too short to be an sstable");
} }

@ -110,7 +110,7 @@ class StringSource: public RandomAccessFile {
virtual ~StringSource() { } virtual ~StringSource() { }
virtual uint64_t Size() const { return contents_.size(); } uint64_t Size() const { return contents_.size(); }
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const { char* scratch) const {
@ -246,7 +246,7 @@ class TableConstructor: public Constructor {
source_ = new StringSource(sink.contents()); source_ = new StringSource(sink.contents());
Options table_options; Options table_options;
table_options.comparator = options.comparator; table_options.comparator = options.comparator;
return Table::Open(table_options, source_, &table_); return Table::Open(table_options, source_, sink.contents().size(), &table_);
} }
virtual size_t NumBytes() const { return source_->Size(); } virtual size_t NumBytes() const { return source_->Size(); }

@ -144,17 +144,13 @@ class ChromiumSequentialFile: public SequentialFile {
class ChromiumRandomAccessFile: public RandomAccessFile { class ChromiumRandomAccessFile: public RandomAccessFile {
private: private:
std::string filename_; std::string filename_;
uint64_t size_;
::base::PlatformFile file_; ::base::PlatformFile file_;
public: public:
ChromiumRandomAccessFile(const std::string& fname, uint64_t size, ChromiumRandomAccessFile(const std::string& fname, ::base::PlatformFile file)
::base::PlatformFile file) : filename_(fname), file_(file) { }
: filename_(fname), size_(size), file_(file) { }
virtual ~ChromiumRandomAccessFile() { ::base::ClosePlatformFile(file_); } virtual ~ChromiumRandomAccessFile() { ::base::ClosePlatformFile(file_); }
virtual uint64_t Size() const { return size_; }
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const { char* scratch) const {
Status s; Status s;
@ -256,13 +252,7 @@ class ChromiumEnv : public Env {
*result = NULL; *result = NULL;
return Status::IOError(fname, PlatformFileErrorString(error_code)); return Status::IOError(fname, PlatformFileErrorString(error_code));
} }
::base::PlatformFileInfo info; *result = new ChromiumRandomAccessFile(fname, file);
if (!::base::GetPlatformFileInfo(file, &info)) {
*result = NULL;
::base::ClosePlatformFile(file);
return Status::IOError(fname, PlatformFileErrorString(error_code));
}
*result = new ChromiumRandomAccessFile(fname, info.size, file);
return Status::OK(); return Status::OK();
} }

@ -57,16 +57,13 @@ class PosixSequentialFile: public SequentialFile {
class PosixRandomAccessFile: public RandomAccessFile { class PosixRandomAccessFile: public RandomAccessFile {
private: private:
std::string filename_; std::string filename_;
uint64_t size_;
int fd_; int fd_;
public: public:
PosixRandomAccessFile(const std::string& fname, uint64_t size, int fd) PosixRandomAccessFile(const std::string& fname, int fd)
: filename_(fname), size_(size), fd_(fd) { } : filename_(fname), fd_(fd) { }
virtual ~PosixRandomAccessFile() { close(fd_); } virtual ~PosixRandomAccessFile() { close(fd_); }
virtual uint64_t Size() const { return size_; }
virtual Status Read(uint64_t offset, size_t n, Slice* result, virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const { char* scratch) const {
Status s; Status s;
@ -286,14 +283,7 @@ class PosixEnv : public Env {
*result = NULL; *result = NULL;
return Status::IOError(fname, strerror(errno)); return Status::IOError(fname, strerror(errno));
} }
struct stat sbuf; *result = new PosixRandomAccessFile(fname, fd);
if (fstat(fd, &sbuf) != 0) {
*result = NULL;
Status s = Status::IOError(fname, strerror(errno));
close(fd);
return s;
}
*result = new PosixRandomAccessFile(fname, sbuf.st_size, fd);
return Status::OK(); return Status::OK();
} }

Loading…
Cancel
Save