diff --git a/db/builder.cc b/db/builder.cc index 1cd2ea8f9..b3bf894ef 100644 --- a/db/builder.cc +++ b/db/builder.cc @@ -18,6 +18,7 @@ #include "rocksdb/table.h" #include "rocksdb/env.h" #include "rocksdb/iterator.h" +#include "rocksdb/options.h" #include "table/block_based_table_builder.h" #include "util/stop_watch.h" @@ -26,9 +27,9 @@ namespace rocksdb { class TableFactory; TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, - int level, const bool enable_compression) { - return options.table_factory->GetTableBuilder(options, file, level, - enable_compression); + CompressionType compression_type) { + return options.table_factory->GetTableBuilder(options, file, + compression_type); } Status BuildTable(const std::string& dbname, @@ -63,8 +64,8 @@ Status BuildTable(const std::string& dbname, return s; } - TableBuilder* builder = GetTableBuilder(options, file.get(), 0, - enable_compression); + TableBuilder* builder = GetTableBuilder(options, file.get(), + options.compression); // the first key is the smallest key Slice key = iter->key(); diff --git a/db/builder.h b/db/builder.h index 17f4aa380..c5810d952 100644 --- a/db/builder.h +++ b/db/builder.h @@ -9,6 +9,7 @@ #include "rocksdb/comparator.h" #include "rocksdb/status.h" #include "rocksdb/types.h" +#include "rocksdb/options.h" namespace rocksdb { @@ -23,8 +24,9 @@ class VersionEdit; class TableBuilder; class WritableFile; + extern TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, - int level, const bool enable_compression); + CompressionType compression_type); // Build a Table file from the contents of *iter. The generated file // will be named according to meta->number. On success, the rest of diff --git a/db/db_impl.cc b/db/db_impl.cc index b4961f31f..75e25b0cd 100644 --- a/db/db_impl.cc +++ b/db/db_impl.cc @@ -211,6 +211,27 @@ Options SanitizeOptions(const std::string& dbname, return result; } +CompressionType GetCompressionType(const Options& options, int level, + const bool enable_compression) { + if (!enable_compression) { + // disable compression + return kNoCompression; + } + // If the use has specified a different compression level for each level, + // then pick the compresison for that level. + if (!options.compression_per_level.empty()) { + const int n = options.compression_per_level.size() - 1; + // It is possible for level_ to be -1; in that case, we use level + // 0's compression. This occurs mostly in backwards compatibility + // situations when the builder doesn't know what level the file + // belongs to. Likewise, if level_ is beyond the end of the + // specified compression levels, use the last value. + return options.compression_per_level[std::max(0, std::min(level, n))]; + } else { + return options.compression; + } +} + DBImpl::DBImpl(const Options& options, const std::string& dbname) : env_(options.env), dbname_(dbname), @@ -1774,10 +1795,12 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { compact->outfile->SetPreallocationBlockSize( 1.1 * versions_->MaxFileSizeForLevel(compact->compaction->output_level())); + CompressionType compression_type = GetCompressionType( + options_, compact->compaction->output_level(), + compact->compaction->enable_compression()); + compact->builder.reset( - GetTableBuilder(options_, compact->outfile.get(), - compact->compaction->output_level(), - compact->compaction->enable_compression())); + GetTableBuilder(options_, compact->outfile.get(), compression_type)); } return s; } diff --git a/db/db_impl.h b/db/db_impl.h index e769d19f5..556be2c77 100644 --- a/db/db_impl.h +++ b/db/db_impl.h @@ -444,4 +444,13 @@ extern Options SanitizeOptions(const std::string& db, const InternalFilterPolicy* ipolicy, const Options& src); + +// Determine compression type, based on user options, level of the output +// file and whether compression is disabled. +// If enable_compression is false, then compression is always disabled no +// matter what the values of the other two parameters are. +// Otherwise, the compression type is determined based on options and level. +CompressionType GetCompressionType(const Options& options, int level, + const bool enable_compression); + } // namespace rocksdb diff --git a/db/simple_table_db_test.cc b/db/simple_table_db_test.cc index 4253ce647..c0fb42c9a 100644 --- a/db/simple_table_db_test.cc +++ b/db/simple_table_db_test.cc @@ -60,11 +60,10 @@ namespace rocksdb { // | index_block_offset (8 bytes) | // +------------------------------+ - // SimpleTable is a simple table format for UNIT TEST ONLY. It is not built // as production quality. -class SimpleTable : public Table { - public: +class SimpleTableReader: public TableReader { +public: // Attempt to open the table that is stored in bytes [0..file_size) // of "file", and read the metadata entries necessary to allow // retrieving data from the table. @@ -77,19 +76,16 @@ class SimpleTable : public Table { // for the duration of the returned table's lifetime. // // *file must remain live while this Table is in use. - static Status Open(const Options& options, - const EnvOptions& soptions, - unique_ptr&& file, - uint64_t file_size, - unique_ptr* table); + static Status Open(const Options& options, const EnvOptions& soptions, + unique_ptr && file, uint64_t file_size, + unique_ptr* table_reader); bool PrefixMayMatch(const Slice& internal_prefix) override; Iterator* NewIterator(const ReadOptions&) override; Status Get( - const ReadOptions&, const Slice& key, - void* arg, + const ReadOptions&, const Slice& key, void* arg, bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), void (*mark_key_may_exist)(void*) = nullptr) override; @@ -101,13 +97,13 @@ class SimpleTable : public Table { TableStats& GetTableStats() override; - ~SimpleTable(); + ~SimpleTableReader(); - private: +private: struct Rep; Rep* rep_; - explicit SimpleTable(Rep* rep) { + explicit SimpleTableReader(Rep* rep) { rep_ = rep; } friend class TableCache; @@ -116,51 +112,51 @@ class SimpleTable : public Table { Status GetOffset(const Slice& target, uint64_t* offset); // No copying allowed - explicit SimpleTable(const Table&) = delete; - void operator=(const Table&) = delete; + explicit SimpleTableReader(const TableReader&) = delete; + void operator=(const TableReader&) = delete; }; // Iterator to iterate SimpleTable class SimpleTableIterator: public Iterator { public: - explicit SimpleTableIterator(SimpleTable* table); + explicit SimpleTableIterator(SimpleTableReader* table); ~SimpleTableIterator(); - bool Valid() const; + bool Valid() const; - void SeekToFirst(); + void SeekToFirst(); - void SeekToLast(); + void SeekToLast(); - void Seek(const Slice& target); + void Seek(const Slice& target); - void Next(); + void Next(); - void Prev(); + void Prev(); - Slice key() const; + Slice key() const; - Slice value() const; + Slice value() const; - Status status() const; + Status status() const; private: - SimpleTable* table_; - uint64_t offset_; - uint64_t next_offset_; - Slice key_; - Slice value_; - char tmp_str_[4]; - char* key_str_; - char* value_str_; - int value_str_len_; - Status status_; - // No copying allowed - SimpleTableIterator(const SimpleTableIterator&) = delete; - void operator=(const Iterator&) = delete; + SimpleTableReader* table_; + uint64_t offset_; + uint64_t next_offset_; + Slice key_; + Slice value_; + char tmp_str_[4]; + char* key_str_; + char* value_str_; + int value_str_len_; + Status status_; + // No copying allowed + SimpleTableIterator(const SimpleTableIterator&) = delete; + void operator=(const Iterator&) = delete; }; -struct SimpleTable::Rep { +struct SimpleTableReader::Rep { ~Rep() { } Rep(const EnvOptions& storage_options, uint64_t index_start_offset, @@ -186,13 +182,15 @@ struct SimpleTable::Rep { } }; -SimpleTable::~SimpleTable() { +SimpleTableReader::~SimpleTableReader() { delete rep_; } -Status SimpleTable::Open(const Options& options, const EnvOptions& soptions, - unique_ptr && file, uint64_t size, - unique_ptr
* table) { +Status SimpleTableReader::Open(const Options& options, + const EnvOptions& soptions, + unique_ptr && file, + uint64_t size, + unique_ptr* table_reader) { char footer_space[Rep::offset_length]; Slice footer_input; Status s = file->Read(size - Rep::offset_length, Rep::offset_length, @@ -202,33 +200,33 @@ Status SimpleTable::Open(const Options& options, const EnvOptions& soptions, int num_entries = (size - Rep::offset_length - index_start_offset) / (Rep::GetInternalKeyLength() + Rep::offset_length); - SimpleTable::Rep* rep = new SimpleTable::Rep(soptions, index_start_offset, - num_entries); - + SimpleTableReader::Rep* rep = new SimpleTableReader::Rep(soptions, + index_start_offset, + num_entries); rep->file = std::move(file); rep->options = options; - table->reset(new SimpleTable(rep)); + table_reader->reset(new SimpleTableReader(rep)); } return s; } -void SimpleTable::SetupForCompaction() { +void SimpleTableReader::SetupForCompaction() { } -TableStats& SimpleTable::GetTableStats() { +TableStats& SimpleTableReader::GetTableStats() { return rep_->table_stats; } -bool SimpleTable::PrefixMayMatch(const Slice& internal_prefix) { +bool SimpleTableReader::PrefixMayMatch(const Slice& internal_prefix) { return true; } -Iterator* SimpleTable::NewIterator(const ReadOptions& options) { +Iterator* SimpleTableReader::NewIterator(const ReadOptions& options) { return new SimpleTableIterator(this); } -Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) { +Status SimpleTableReader::GetOffset(const Slice& target, uint64_t* offset) { uint32_t left = 0; uint32_t right = rep_->num_entries - 1; char key_chars[Rep::GetInternalKeyLength()]; @@ -281,9 +279,10 @@ Status SimpleTable::GetOffset(const Slice& target, uint64_t* offset) { return s; } -Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg, - bool (*saver)(void*, const Slice&, const Slice&, bool), - void (*mark_key_may_exist)(void*)) { +Status SimpleTableReader::Get( + const ReadOptions& options, const Slice& k, void* arg, + bool (*saver)(void*, const Slice&, const Slice&, bool), + void (*mark_key_may_exist)(void*)) { Status s; SimpleTableIterator* iter = new SimpleTableIterator(this); for (iter->Seek(k); iter->Valid(); iter->Next()) { @@ -296,18 +295,18 @@ Status SimpleTable::Get(const ReadOptions& options, const Slice& k, void* arg, return s; } -bool SimpleTable::TEST_KeyInCache(const ReadOptions& options, - const Slice& key) { +bool SimpleTableReader::TEST_KeyInCache(const ReadOptions& options, + const Slice& key) { return false; } -uint64_t SimpleTable::ApproximateOffsetOf(const Slice& key) { +uint64_t SimpleTableReader::ApproximateOffsetOf(const Slice& key) { return 0; } -SimpleTableIterator::SimpleTableIterator(SimpleTable* table) : +SimpleTableIterator::SimpleTableIterator(SimpleTableReader* table) : table_(table) { - key_str_ = new char[table->rep_->GetInternalKeyLength()]; + key_str_ = new char[SimpleTableReader::Rep::GetInternalKeyLength()]; value_str_len_ = -1; SeekToFirst(); } @@ -346,7 +345,7 @@ void SimpleTableIterator::Next() { return; } Slice result; - int internal_key_size = table_->rep_->GetInternalKeyLength(); + int internal_key_size = SimpleTableReader::Rep::GetInternalKeyLength(); Status s = table_->rep_->file->Read(next_offset_, internal_key_size, &result, key_str_); @@ -389,14 +388,15 @@ Status SimpleTableIterator::status() const { return status_; } -class SimpleTableBuilder : public TableBuilder { - public: +class SimpleTableBuilder: public TableBuilder { +public: // Create a builder that will store the contents of the table it is // building in *file. Does not close the file. It is up to the // caller to close the file after calling Finish(). The output file // will be part of level specified by 'level'. A value of -1 means // that the caller does not know which level the output file will reside. - SimpleTableBuilder(const Options& options, WritableFile* file, int level=-1); + SimpleTableBuilder(const Options& options, WritableFile* file, + CompressionType compression_type); // REQUIRES: Either Finish() or Abandon() has been called. ~SimpleTableBuilder(); @@ -428,7 +428,7 @@ class SimpleTableBuilder : public TableBuilder { // Finish() call, returns the size of the final generated file. uint64_t FileSize() const override; - private: +private: struct Rep; Rep* rep_; @@ -457,25 +457,25 @@ struct SimpleTableBuilder::Rep { std::string index; - Rep(const Options& opt, WritableFile* f) - : options(opt), - file(f) { + Rep(const Options& opt, WritableFile* f) : + options(opt), file(f) { } ~Rep() { } }; SimpleTableBuilder::SimpleTableBuilder(const Options& options, - WritableFile* file, int level) - : TableBuilder(level), rep_(new SimpleTableBuilder::Rep(options, file)) { + WritableFile* file, + CompressionType compression_type) : + rep_(new SimpleTableBuilder::Rep(options, file)) { } SimpleTableBuilder::~SimpleTableBuilder() { - delete(rep_); + delete (rep_); } void SimpleTableBuilder::Add(const Slice& key, const Slice& value) { - assert((int) key.size() == Rep::GetInternalKeyLength()); + assert((int ) key.size() == Rep::GetInternalKeyLength()); // Update index rep_->index.append(key.data(), key.size()); @@ -531,204 +531,50 @@ uint64_t SimpleTableBuilder::FileSize() const { return rep_->offset; } -class SimpleTableFactory : public TableFactory { - public: - ~SimpleTableFactory() {} - SimpleTableFactory() {} +class SimpleTableFactory: public TableFactory { +public: + ~SimpleTableFactory() { + } + SimpleTableFactory() { + } const char* Name() const override { return "SimpleTable"; } - Status OpenTable(const Options& options, const EnvOptions& soptions, - unique_ptr && file, uint64_t file_size, - unique_ptr
* table) const; + Status GetTableReader(const Options& options, const EnvOptions& soptions, + unique_ptr && file, + uint64_t file_size, + unique_ptr* table_reader) const; TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, - int level, const bool enable_compression) const; + CompressionType compression_type) const; }; -Status SimpleTableFactory::OpenTable(const Options& options, - const EnvOptions& soptions, - unique_ptr && file, - uint64_t file_size, - unique_ptr
* table) const { +Status SimpleTableFactory::GetTableReader( + const Options& options, const EnvOptions& soptions, + unique_ptr && file, uint64_t file_size, + unique_ptr* table_reader) const { - return SimpleTable::Open(options, soptions, std::move(file), file_size, - table); + return SimpleTableReader::Open(options, soptions, std::move(file), file_size, + table_reader); } TableBuilder* SimpleTableFactory::GetTableBuilder( - const Options& options, WritableFile* file, int level, - const bool enable_compression) const { - return new SimpleTableBuilder(options, file, level); + const Options& options, WritableFile* file, + CompressionType compression_type) const { + return new SimpleTableBuilder(options, file, compression_type); } - -namespace anon { -class AtomicCounter { - private: - port::Mutex mu_; - int count_; - public: - AtomicCounter() : count_(0) { } - void Increment() { - MutexLock l(&mu_); - count_++; - } - int Read() { - MutexLock l(&mu_); - return count_; - } - void Reset() { - MutexLock l(&mu_); - count_ = 0; - } -}; - -} - -// Special Env used to delay background operations -class SpecialEnv : public EnvWrapper { - public: - // sstable Sync() calls are blocked while this pointer is non-nullptr. - port::AtomicPointer delay_sstable_sync_; - - // Simulate no-space errors while this pointer is non-nullptr. - port::AtomicPointer no_space_; - - // Simulate non-writable file system while this pointer is non-nullptr - port::AtomicPointer non_writable_; - - // Force sync of manifest files to fail while this pointer is non-nullptr - port::AtomicPointer manifest_sync_error_; - - // Force write to manifest files to fail while this pointer is non-nullptr - port::AtomicPointer manifest_write_error_; - - bool count_random_reads_; - anon::AtomicCounter random_read_counter_; - - anon::AtomicCounter sleep_counter_; - - explicit SpecialEnv(Env* base) : EnvWrapper(base) { - delay_sstable_sync_.Release_Store(nullptr); - no_space_.Release_Store(nullptr); - non_writable_.Release_Store(nullptr); - count_random_reads_ = false; - manifest_sync_error_.Release_Store(nullptr); - manifest_write_error_.Release_Store(nullptr); - } - - Status NewWritableFile(const std::string& f, unique_ptr* r, - const EnvOptions& soptions) { - class SSTableFile : public WritableFile { - private: - SpecialEnv* env_; - unique_ptr base_; - - public: - SSTableFile(SpecialEnv* env, unique_ptr&& base) - : env_(env), - base_(std::move(base)) { - } - Status Append(const Slice& data) { - if (env_->no_space_.Acquire_Load() != nullptr) { - // Drop writes on the floor - return Status::OK(); - } else { - return base_->Append(data); - } - } - Status Close() { return base_->Close(); } - Status Flush() { return base_->Flush(); } - Status Sync() { - while (env_->delay_sstable_sync_.Acquire_Load() != nullptr) { - env_->SleepForMicroseconds(100000); - } - return base_->Sync(); - } - }; - class ManifestFile : public WritableFile { - private: - SpecialEnv* env_; - unique_ptr base_; - public: - ManifestFile(SpecialEnv* env, unique_ptr&& b) - : env_(env), base_(std::move(b)) { } - Status Append(const Slice& data) { - if (env_->manifest_write_error_.Acquire_Load() != nullptr) { - return Status::IOError("simulated writer error"); - } else { - return base_->Append(data); - } - } - Status Close() { return base_->Close(); } - Status Flush() { return base_->Flush(); } - Status Sync() { - if (env_->manifest_sync_error_.Acquire_Load() != nullptr) { - return Status::IOError("simulated sync error"); - } else { - return base_->Sync(); - } - } - }; - - if (non_writable_.Acquire_Load() != nullptr) { - return Status::IOError("simulated write error"); - } - - Status s = target()->NewWritableFile(f, r, soptions); - if (s.ok()) { - if (strstr(f.c_str(), ".sst") != nullptr) { - r->reset(new SSTableFile(this, std::move(*r))); - } else if (strstr(f.c_str(), "MANIFEST") != nullptr) { - r->reset(new ManifestFile(this, std::move(*r))); - } - } - return s; - } - - Status NewRandomAccessFile(const std::string& f, - unique_ptr* r, - const EnvOptions& soptions) { - class CountingFile : public RandomAccessFile { - private: - unique_ptr target_; - anon::AtomicCounter* counter_; - public: - CountingFile(unique_ptr&& target, - anon::AtomicCounter* counter) - : target_(std::move(target)), counter_(counter) { - } - virtual Status Read(uint64_t offset, size_t n, Slice* result, - char* scratch) const { - counter_->Increment(); - return target_->Read(offset, n, result, scratch); - } - }; - - Status s = target()->NewRandomAccessFile(f, r, soptions); - if (s.ok() && count_random_reads_) { - r->reset(new CountingFile(std::move(*r), &random_read_counter_)); - } - return s; - } - - virtual void SleepForMicroseconds(int micros) { - sleep_counter_.Increment(); - target()->SleepForMicroseconds(micros); - } -}; - class SimpleTableDBTest { - protected: - public: +protected: +public: std::string dbname_; - SpecialEnv* env_; + Env* env_; DB* db_; Options last_options_; - SimpleTableDBTest() : env_(new SpecialEnv(Env::Default())) { + SimpleTableDBTest() : + env_(Env::Default()) { dbname_ = test::TmpDir() + "/simple_table_db_test"; ASSERT_OK(DestroyDB(dbname_, Options())); db_ = nullptr; @@ -738,7 +584,6 @@ class SimpleTableDBTest { ~SimpleTableDBTest() { delete db_; ASSERT_OK(DestroyDB(dbname_, Options())); - delete env_; } // Return the current option configuration. @@ -813,81 +658,6 @@ class SimpleTableDBTest { return result; } - // Return a string that contains all key,value pairs in order, - // formatted like "(k1->v1)(k2->v2)". - std::string Contents() { - std::vector forward; - std::string result; - Iterator* iter = db_->NewIterator(ReadOptions()); - for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { - std::string s = IterStatus(iter); - result.push_back('('); - result.append(s); - result.push_back(')'); - forward.push_back(s); - } - - // Check reverse iteration results are the reverse of forward results - unsigned int matched = 0; - for (iter->SeekToLast(); iter->Valid(); iter->Prev()) { - ASSERT_LT(matched, forward.size()); - ASSERT_EQ(IterStatus(iter), forward[forward.size() - matched - 1]); - matched++; - } - ASSERT_EQ(matched, forward.size()); - - delete iter; - return result; - } - - std::string AllEntriesFor(const Slice& user_key) { - Iterator* iter = dbfull()->TEST_NewInternalIterator(); - InternalKey target(user_key, kMaxSequenceNumber, kTypeValue); - iter->Seek(target.Encode()); - std::string result; - if (!iter->status().ok()) { - result = iter->status().ToString(); - } else { - result = "[ "; - bool first = true; - while (iter->Valid()) { - ParsedInternalKey ikey; - if (!ParseInternalKey(iter->key(), &ikey)) { - result += "CORRUPTED"; - } else { - if (last_options_.comparator->Compare(ikey.user_key, user_key) != 0) { - break; - } - if (!first) { - result += ", "; - } - first = false; - switch (ikey.type) { - case kTypeValue: - result += iter->value().ToString(); - break; - case kTypeMerge: - // keep it the same as kTypeValue for testing kMergePut - result += iter->value().ToString(); - break; - case kTypeDeletion: - result += "DEL"; - break; - case kTypeLogData: - assert(false); - break; - } - } - iter->Next(); - } - if (!first) { - result += " "; - } - result += "]"; - } - delete iter; - return result; - } int NumTableFilesAtLevel(int level) { std::string property; @@ -897,14 +667,6 @@ class SimpleTableDBTest { return atoi(property.c_str()); } - int TotalTableFiles() { - int result = 0; - for (int level = 0; level < db_->NumberLevels(); level++) { - result += NumTableFilesAtLevel(level); - } - return result; - } - // Return spread of files per level std::string FilesPerLevel() { std::string result; @@ -922,71 +684,6 @@ class SimpleTableDBTest { return result; } - int CountFiles() { - std::vector files; - env_->GetChildren(dbname_, &files); - - std::vector logfiles; - if (dbname_ != last_options_.wal_dir) { - env_->GetChildren(last_options_.wal_dir, &logfiles); - } - - return static_cast(files.size() + logfiles.size()); - } - - int CountLiveFiles() { - std::vector files; - uint64_t manifest_file_size; - db_->GetLiveFiles(files, &manifest_file_size); - return files.size(); - } - - uint64_t Size(const Slice& start, const Slice& limit) { - Range r(start, limit); - uint64_t size; - db_->GetApproximateSizes(&r, 1, &size); - return size; - } - - void Compact(const Slice& start, const Slice& limit) { - db_->CompactRange(&start, &limit); - } - - // Do n memtable compactions, each of which produces an sstable - // covering the range [small,large]. - void MakeTables(int n, const std::string& small, const std::string& large) { - for (int i = 0; i < n; i++) { - Put(small, "begin"); - Put(large, "end"); - dbfull()->TEST_FlushMemTable(); - } - } - - // Prevent pushing of new sstables into deeper levels by adding - // tables that cover a specified range to all levels. - void FillLevels(const std::string& smallest, const std::string& largest) { - MakeTables(db_->NumberLevels(), smallest, largest); - } - - void DumpFileCounts(const char* label) { - fprintf(stderr, "---\n%s:\n", label); - fprintf(stderr, "maxoverlap: %lld\n", - static_cast( - dbfull()->TEST_MaxNextLevelOverlappingBytes())); - for (int level = 0; level < db_->NumberLevels(); level++) { - int num = NumTableFilesAtLevel(level); - if (num > 0) { - fprintf(stderr, " level %3d : %d files\n", level, num); - } - } - } - - std::string DumpSSTableList() { - std::string property; - db_->GetProperty("rocksdb.sstables", &property); - return property; - } - std::string IterStatus(Iterator* iter) { std::string result; if (iter->Valid()) { @@ -996,26 +693,6 @@ class SimpleTableDBTest { } return result; } - - Options OptionsForLogIterTest() { - Options options = CurrentOptions(); - options.create_if_missing = true; - options.WAL_ttl_seconds = 1000; - return options; - } - - std::unique_ptr OpenTransactionLogIter( - const SequenceNumber seq) { - unique_ptr iter; - Status status = dbfull()->GetUpdatesSince(seq, &iter); - ASSERT_OK(status); - ASSERT_TRUE(iter->Valid()); - return std::move(iter); - } - - std::string DummyString(size_t len, char c = 'a') { - return std::string(len, c); - } }; TEST(SimpleTableDBTest, Empty) { @@ -1077,7 +754,7 @@ static std::string RandomString(Random* rnd, int len) { TEST(SimpleTableDBTest, CompactionTrigger) { Options options = CurrentOptions(); - options.write_buffer_size = 100<<10; //100KB + options.write_buffer_size = 100 << 10; //100KB options.num_levels = 3; options.max_mem_compaction_level = 0; options.level0_file_num_compaction_trigger = 3; @@ -1085,9 +762,8 @@ TEST(SimpleTableDBTest, CompactionTrigger) { Random rnd(301); - for (int num = 0; - num < options.level0_file_num_compaction_trigger - 1; - num++) { + for (int num = 0; num < options.level0_file_num_compaction_trigger - 1; + num++) { std::vector values; // Write 120KB (12 values, each 10K) for (int i = 0; i < 12; i++) { diff --git a/db/table_cache.cc b/db/table_cache.cc index 2482ee0f9..a1f466b5a 100644 --- a/db/table_cache.cc +++ b/db/table_cache.cc @@ -19,8 +19,8 @@ namespace rocksdb { static void DeleteEntry(const Slice& key, void* value) { - Table* table = reinterpret_cast(value); - delete table; + TableReader* table_reader = reinterpret_cast(value); + delete table_reader; } static void UnrefEntry(void* arg1, void* arg2) { @@ -63,7 +63,7 @@ Status TableCache::FindTable(const EnvOptions& toptions, } std::string fname = TableFileName(dbname_, file_number); unique_ptr file; - unique_ptr
table; + unique_ptr table_reader; s = env_->NewRandomAccessFile(fname, &file, toptions); RecordTick(options_->statistics, NO_FILE_OPENS); if (s.ok()) { @@ -71,19 +71,19 @@ Status TableCache::FindTable(const EnvOptions& toptions, file->Hint(RandomAccessFile::RANDOM); } StopWatch sw(env_, options_->statistics, TABLE_OPEN_IO_MICROS); - s = options_->table_factory->OpenTable(*options_, toptions, - std::move(file), - file_size, &table); + s = options_->table_factory->GetTableReader(*options_, toptions, + std::move(file), file_size, + &table_reader); } if (!s.ok()) { - assert(table == nullptr); + assert(table_reader == nullptr); RecordTick(options_->statistics, NO_FILE_ERRORS); // We do not cache error results so that if the error is transient, // or somebody repairs the file, we recover automatically. } else { assert(file.get() == nullptr); - *handle = cache_->Insert(key, table.release(), 1, &DeleteEntry); + *handle = cache_->Insert(key, table_reader.release(), 1, &DeleteEntry); } } return s; @@ -93,10 +93,10 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, const EnvOptions& toptions, uint64_t file_number, uint64_t file_size, - Table** tableptr, + TableReader** table_reader_ptr, bool for_compaction) { - if (tableptr != nullptr) { - *tableptr = nullptr; + if (table_reader_ptr != nullptr) { + *table_reader_ptr = nullptr; } Cache::Handle* handle = nullptr; @@ -106,16 +106,16 @@ Iterator* TableCache::NewIterator(const ReadOptions& options, return NewErrorIterator(s); } - Table* table = - reinterpret_cast(cache_->Value(handle)); - Iterator* result = table->NewIterator(options); + TableReader* table_reader = + reinterpret_cast(cache_->Value(handle)); + Iterator* result = table_reader->NewIterator(options); result->RegisterCleanup(&UnrefEntry, cache_.get(), handle); - if (tableptr != nullptr) { - *tableptr = table; + if (table_reader_ptr != nullptr) { + *table_reader_ptr = table_reader; } if (for_compaction) { - table->SetupForCompaction(); + table_reader->SetupForCompaction(); } return result; @@ -134,8 +134,8 @@ Status TableCache::Get(const ReadOptions& options, &handle, table_io, options.read_tier == kBlockCacheTier); if (s.ok()) { - Table* t = - reinterpret_cast(cache_->Value(handle)); + TableReader* t = + reinterpret_cast(cache_->Value(handle)); s = t->Get(options, k, arg, saver, mark_key_may_exist); cache_->Release(handle); } else if (options.read_tier && s.IsIncomplete()) { @@ -156,8 +156,8 @@ bool TableCache::PrefixMayMatch(const ReadOptions& options, file_size, &handle, table_io); bool may_match = true; if (s.ok()) { - Table* t = - reinterpret_cast(cache_->Value(handle)); + TableReader* t = + reinterpret_cast(cache_->Value(handle)); may_match = t->PrefixMayMatch(internal_prefix); cache_->Release(handle); } diff --git a/db/table_cache.h b/db/table_cache.h index 135b04435..4b225af9b 100644 --- a/db/table_cache.h +++ b/db/table_cache.h @@ -39,7 +39,7 @@ class TableCache { const EnvOptions& toptions, uint64_t file_number, uint64_t file_size, - Table** tableptr = nullptr, + TableReader** table_reader_ptr = nullptr, bool for_compaction = false); // If a seek to internal key "k" in specified file finds an entry, diff --git a/db/table_stats_collector_test.cc b/db/table_stats_collector_test.cc index 16387ed3a..52f4b4bb8 100644 --- a/db/table_stats_collector_test.cc +++ b/db/table_stats_collector_test.cc @@ -20,7 +20,7 @@ namespace rocksdb { class TableStatsTest { private: - unique_ptr
table_; + unique_ptr table_reader_; }; // TODO(kailiu) the following classes should be moved to some more general @@ -88,22 +88,21 @@ void MakeBuilder( std::unique_ptr* builder) { writable->reset(new FakeWritableFile); builder->reset( - options.table_factory->GetTableBuilder(options, writable->get(), 0, - true) - ); + options.table_factory->GetTableBuilder(options, writable->get(), + options.compression)); } void OpenTable( const Options& options, const std::string& contents, - std::unique_ptr
* table) { + std::unique_ptr* table_reader) { std::unique_ptr file(new FakeRandomeAccessFile(contents)); - auto s = options.table_factory->OpenTable( + auto s = options.table_factory->GetTableReader( options, EnvOptions(), std::move(file), contents.size(), - table + table_reader ); ASSERT_OK(s); } @@ -176,9 +175,9 @@ TEST(TableStatsTest, CustomizedTableStatsCollector) { ASSERT_OK(builder->Finish()); // -- Step 2: Open table - std::unique_ptr
table; - OpenTable(options, writable->contents(), &table); - const auto& stats = table->GetTableStats().user_collected_stats; + std::unique_ptr table_reader; + OpenTable(options, writable->contents(), &table_reader); + const auto& stats = table_reader->GetTableStats().user_collected_stats; ASSERT_EQ("Rocksdb", stats.at("TableStatsTest")); @@ -234,9 +233,9 @@ TEST(TableStatsTest, InternalKeyStatsCollector) { ASSERT_OK(builder->Finish()); - std::unique_ptr
table; - OpenTable(options, writable->contents(), &table); - const auto& stats = table->GetTableStats().user_collected_stats; + std::unique_ptr table_reader; + OpenTable(options, writable->contents(), &table_reader); + const auto& stats = table_reader->GetTableStats().user_collected_stats; uint64_t deleted = 0; Slice key(stats.at(InternalKeyTableStatsNames::kDeletedKeys)); diff --git a/db/version_set.cc b/db/version_set.cc index 38d6c79f1..04e5c6753 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -1920,12 +1920,12 @@ uint64_t VersionSet::ApproximateOffsetOf(Version* v, const InternalKey& ikey) { } else { // "ikey" falls in the range for this table. Add the // approximate offset of "ikey" within the table. - Table* tableptr; + TableReader* table_reader_ptr; Iterator* iter = table_cache_->NewIterator( ReadOptions(), storage_options_, files[i]->number, - files[i]->file_size, &tableptr); - if (tableptr != nullptr) { - result += tableptr->ApproximateOffsetOf(ikey.Encode()); + files[i]->file_size, &table_reader_ptr); + if (table_reader_ptr != nullptr) { + result += table_reader_ptr->ApproximateOffsetOf(ikey.Encode()); } delete iter; } diff --git a/include/rocksdb/options.h b/include/rocksdb/options.h index c158d3f38..296c41020 100644 --- a/include/rocksdb/options.h +++ b/include/rocksdb/options.h @@ -19,7 +19,6 @@ #include "rocksdb/universal_compaction.h" #include "rocksdb/memtablerep.h" #include "rocksdb/slice_transform.h" -#include "rocksdb/table.h" namespace rocksdb { diff --git a/include/rocksdb/table.h b/include/rocksdb/table.h index 244c91fb7..f1b3632e8 100644 --- a/include/rocksdb/table.h +++ b/include/rocksdb/table.h @@ -13,6 +13,7 @@ #include "rocksdb/env.h" #include "rocksdb/iterator.h" #include "rocksdb/table_stats.h" +#include "rocksdb/options.h" namespace rocksdb { @@ -31,22 +32,8 @@ using std::unique_ptr; // external synchronization, but if any of the threads may call a // non-const method, all threads accessing the same TableBuilder must use // external synchronization. - class TableBuilder { public: - // Create a builder that will store the contents of the table it is - // building in *file. Does not close the file. It is up to the - // caller to close the file after calling Finish(). The output file - // will be part of level specified by 'level'. A value of -1 means - // that the caller does not know which level the output file will reside. - // - // If enable_compression=true, this table will follow the compression - // setting given in parameter options. If enable_compression=false, the - // table will not be compressed. - explicit TableBuilder(int level = -1, const bool enable_compression = true) : - level_(level) { - } - // REQUIRES: Either Finish() or Abandon() has been called. virtual ~TableBuilder() {} @@ -74,17 +61,14 @@ class TableBuilder { // Size of the file generated so far. If invoked after a successful // Finish() call, returns the size of the final generated file. virtual uint64_t FileSize() const = 0; - -protected: - int level_; }; // A Table is a sorted map from strings to strings. Tables are // immutable and persistent. A Table may be safely accessed from // multiple threads without external synchronization. -class Table { +class TableReader { public: - virtual ~Table() {} + virtual ~TableReader() {} // Determine whether there is a chance that the current table file // contains the key a key starting with iternal_prefix. The specific @@ -116,29 +100,25 @@ class Table { virtual TableStats& GetTableStats() = 0; - // Get function issued to look for specific key. - // The table will search the first entry in the table whose user key - // matches key, and pass it to the call back function handle_result, - // with the first argument to be parameter arg, and the last bool - // parameter to be whether an I/O is issued. - // mark_key_may_exist call back is called when it is configured to be + // Calls (*result_handler)(handle_context, ...) repeatedly, starting with + // the entry found after a call to Seek(key), until result_handler returns + // false, where k is the actual internal key for a row found and v as the + // value of the key. didIO is true if I/O is involved in the operation. May + // not make such a call if filter policy says that key is not present. + // + // mark_key_may_exist_handler needs to be called when it is configured to be // memory only and the key is not found in the block cache, with - // the parameter to be arg. + // the parameter to be handle_context. + // + // readOptions is the options for the read + // key is the key to search for virtual Status Get( - const ReadOptions&, const Slice& key, - void* arg, - bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), - void (*mark_key_may_exist)(void*) = nullptr) = 0; -}; - -struct TableStatsNames { - static const std::string kDataSize; - static const std::string kIndexSize; - static const std::string kRawKeySize; - static const std::string kRawValueSize; - static const std::string kNumDataBlocks; - static const std::string kNumEntries; - static const std::string kFilterPolicy; + const ReadOptions& readOptions, + const Slice& key, + void* handle_context, + bool (*result_handler)(void* handle_context, const Slice& k, + const Slice& v, bool didIO), + void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) = 0; }; // A base class for table factories @@ -146,7 +126,7 @@ class TableFactory { public: virtual ~TableFactory() {} - // The name of the comparator. + // The type of the table. // // The client of this package should switch to a new name whenever // the table format implementation changes. @@ -159,16 +139,21 @@ class TableFactory { // in parameter file. It's the caller's responsibility to make sure // file is in the correct format. // - // OpenTable() is called in two places: + // GetTableReader() is called in two places: // (1) TableCache::FindTable() calls the function when table cache miss // and cache the table object returned. // (1) SstFileReader (for SST Dump) opens the table and dump the table // contents using the interator of the table. - virtual Status OpenTable(const Options& options, - const EnvOptions& soptions, - unique_ptr&& file, - uint64_t file_size, - unique_ptr
* table) const = 0; + // options and soptions are options. options is the general options. + // Multiple configured can be accessed from there, including and not + // limited to block cache and key comparators. + // file is a file handler to handle the file for the table + // file_size is the physical file size of the file + // table_reader is the output table reader + virtual Status GetTableReader( + const Options& options, const EnvOptions& soptions, + unique_ptr && file, uint64_t file_size, + unique_ptr* table_reader) const = 0; // Return a table builder to write to a file for this table type. // @@ -182,8 +167,14 @@ class TableFactory { // by calling BuildTable()) // (4) When running Repairer, it creates a table builder to convert logs to // SST files (In Repairer::ConvertLogToTable() by calling BuildTable()) + // + // options is the general options. Multiple configured can be acceseed from + // there, including and not limited to compression options. + // file is a handle of a writable file. It is the caller's responsibility to + // keep the file open and close the file after closing the table builder. + // compression_type is the compression type to use in this table. virtual TableBuilder* GetTableBuilder( - const Options& options, WritableFile* file, int level, - const bool enable_compression) const = 0; + const Options& options, WritableFile* file, + CompressionType compression_type) const = 0; }; } // namespace rocksdb diff --git a/table/block_based_table_builder.cc b/table/block_based_table_builder.cc index 1b4db69f2..fde6c81e8 100644 --- a/table/block_based_table_builder.cc +++ b/table/block_based_table_builder.cc @@ -17,7 +17,7 @@ #include "rocksdb/env.h" #include "rocksdb/filter_policy.h" #include "rocksdb/options.h" -#include "table/block_based_table.h" +#include "table/block_based_table_reader.h" #include "table/block_builder.h" #include "table/filter_block.h" #include "table/format.h" @@ -81,8 +81,7 @@ struct BlockBasedTableBuilder::Rep { BlockBuilder data_block; BlockBuilder index_block; std::string last_key; - // Whether enable compression in this table. - bool enable_compression; + CompressionType compression_type; uint64_t num_entries = 0; uint64_t num_data_blocks = 0; @@ -107,13 +106,13 @@ struct BlockBasedTableBuilder::Rep { std::string compressed_output; - Rep(const Options& opt, WritableFile* f, bool enable_compression) + Rep(const Options& opt, WritableFile* f, CompressionType compression_type) : options(opt), index_block_options(opt), file(f), data_block(&options), index_block(1, index_block_options.comparator), - enable_compression(enable_compression), + compression_type(compression_type), filter_block(opt.filter_policy == nullptr ? nullptr : new FilterBlockBuilder(opt)), pending_index_entry(false) { @@ -121,9 +120,9 @@ struct BlockBasedTableBuilder::Rep { }; BlockBasedTableBuilder::BlockBasedTableBuilder(const Options& options, - WritableFile* file, int level, - const bool enable_compression) - : TableBuilder(level), rep_(new Rep(options, file, enable_compression)) { + WritableFile* file, + CompressionType compression_type) + : rep_(new Rep(options, file, compression_type)) { if (rep_->filter_block != nullptr) { rep_->filter_block->StartBlock(0); } @@ -220,26 +219,7 @@ void BlockBasedTableBuilder::WriteBlock(BlockBuilder* block, Slice block_contents; std::string* compressed = &r->compressed_output; - CompressionType type; - if (!r->enable_compression) { - // disable compression - type = kNoCompression; - } else { - // If the use has specified a different compression level for each level, - // then pick the compresison for that level. - if (!r->options.compression_per_level.empty()) { - const int n = r->options.compression_per_level.size(); - // It is possible for level_ to be -1; in that case, we use level - // 0's compression. This occurs mostly in backwards compatibility - // situations when the builder doesn't know what level the file - // belongs to. Likewise, if level_ is beyond the end of the - // specified compression levels, use the last value. - type = r->options.compression_per_level[std::max(0, - std::min(level_, n))]; - } else { - type = r->options.compression; - } - } + CompressionType type = r->compression_type; switch (type) { case kNoCompression: block_contents = raw; @@ -376,19 +356,21 @@ Status BlockBasedTableBuilder::Finish() { BytewiseSortedMap stats; // Add basic stats - AddStats(stats, TableStatsNames::kRawKeySize, r->raw_key_size); - AddStats(stats, TableStatsNames::kRawValueSize, r->raw_value_size); - AddStats(stats, TableStatsNames::kDataSize, r->data_size); + AddStats(stats, BlockBasedTableStatsNames::kRawKeySize, r->raw_key_size); + AddStats(stats, BlockBasedTableStatsNames::kRawValueSize, + r->raw_value_size); + AddStats(stats, BlockBasedTableStatsNames::kDataSize, r->data_size); AddStats( stats, - TableStatsNames::kIndexSize, + BlockBasedTableStatsNames::kIndexSize, r->index_block.CurrentSizeEstimate() + kBlockTrailerSize ); - AddStats(stats, TableStatsNames::kNumEntries, r->num_entries); - AddStats(stats, TableStatsNames::kNumDataBlocks, r->num_data_blocks); + AddStats(stats, BlockBasedTableStatsNames::kNumEntries, r->num_entries); + AddStats(stats, BlockBasedTableStatsNames::kNumDataBlocks, + r->num_data_blocks); if (r->filter_block != nullptr) { stats.insert(std::make_pair( - TableStatsNames::kFilterPolicy, + BlockBasedTableStatsNames::kFilterPolicy, r->options.filter_policy->Name() )); } diff --git a/table/block_based_table_builder.h b/table/block_based_table_builder.h index b551e49a0..b7c82b68f 100644 --- a/table/block_based_table_builder.h +++ b/table/block_based_table_builder.h @@ -11,24 +11,22 @@ #include #include "rocksdb/options.h" #include "rocksdb/status.h" +#include "rocksdb/table.h" namespace rocksdb { class BlockBuilder; class BlockHandle; class WritableFile; -class TableBuilder; class BlockBasedTableBuilder : public TableBuilder { public: // Create a builder that will store the contents of the table it is // building in *file. Does not close the file. It is up to the - // caller to close the file after calling Finish(). The output file - // will be part of level specified by 'level'. A value of -1 means - // that the caller does not know which level the output file will reside. + // caller to close the file after calling Finish(). BlockBasedTableBuilder(const Options& options, WritableFile* file, - int level = -1, const bool enable_compression = true); + CompressionType compression_type); // REQUIRES: Either Finish() or Abandon() has been called. ~BlockBasedTableBuilder(); diff --git a/table/block_based_table_factory.cc b/table/block_based_table_factory.cc index c2c3a79ba..0944c7f56 100644 --- a/table/block_based_table_factory.cc +++ b/table/block_based_table_factory.cc @@ -13,24 +13,22 @@ #include #include #include "table/block_based_table_builder.h" -#include "table/block_based_table.h" +#include "table/block_based_table_reader.h" #include "port/port.h" namespace rocksdb { -Status BlockBasedTableFactory::OpenTable(const Options& options, - const EnvOptions& soptions, - unique_ptr && file, - uint64_t file_size, - unique_ptr
* table) const { - +Status BlockBasedTableFactory::GetTableReader( + const Options& options, const EnvOptions& soptions, + unique_ptr && file, uint64_t file_size, + unique_ptr* table_reader) const { return BlockBasedTable::Open(options, soptions, std::move(file), file_size, - table); + table_reader); } TableBuilder* BlockBasedTableFactory::GetTableBuilder( - const Options& options, WritableFile* file, int level, - const bool enable_compression) const { - return new BlockBasedTableBuilder(options, file, level, enable_compression); + const Options& options, WritableFile* file, + CompressionType compression_type) const { + return new BlockBasedTableBuilder(options, file, compression_type); } } // namespace rocksdb diff --git a/table/block_based_table_factory.h b/table/block_based_table_factory.h index fa2cafbc0..677979f4e 100644 --- a/table/block_based_table_factory.h +++ b/table/block_based_table_factory.h @@ -36,12 +36,13 @@ public: const char* Name() const override { return "BlockBasedTable"; } - Status OpenTable(const Options& options, const EnvOptions& soptions, - unique_ptr && file, uint64_t file_size, - unique_ptr
* table) const override; + Status GetTableReader(const Options& options, const EnvOptions& soptions, + unique_ptr && file, + uint64_t file_size, + unique_ptr* table_reader) const override; TableBuilder* GetTableBuilder(const Options& options, WritableFile* file, - int level, const bool enable_compression) const + CompressionType compression_type) const override; }; diff --git a/table/block_based_table.cc b/table/block_based_table_reader.cc similarity index 89% rename from table/block_based_table.cc rename to table/block_based_table_reader.cc index 3105a044f..bec087033 100644 --- a/table/block_based_table.cc +++ b/table/block_based_table_reader.cc @@ -7,7 +7,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. -#include "table/block_based_table.h" +#include "table/block_based_table_reader.h" #include "db/dbformat.h" @@ -113,8 +113,8 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions, unique_ptr && file, uint64_t size, - unique_ptr
* table) { - table->reset(); + unique_ptr* table_reader) { + table_reader->reset(); if (size < Footer::kEncodedLength) { return Status::InvalidArgument("file is too short to be an sstable"); } @@ -151,8 +151,8 @@ Status BlockBasedTable::Open(const Options& options, SetupCacheKeyPrefix(rep); rep->filter_data = nullptr; rep->filter = nullptr; - table->reset(new BlockBasedTable(rep)); - ((BlockBasedTable*) (table->get()))->ReadMeta(footer); + table_reader->reset(new BlockBasedTable(rep)); + ((BlockBasedTable*) (table_reader->get()))->ReadMeta(footer); } else { if (index_block) delete index_block; } @@ -275,12 +275,12 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) { auto& table_stats = rep->table_stats; // All pre-defined stats of type uint64_t std::unordered_map predefined_uint64_stats = { - { TableStatsNames::kDataSize, &table_stats.data_size }, - { TableStatsNames::kIndexSize, &table_stats.index_size }, - { TableStatsNames::kRawKeySize, &table_stats.raw_key_size }, - { TableStatsNames::kRawValueSize, &table_stats.raw_value_size }, - { TableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks }, - { TableStatsNames::kNumEntries, &table_stats.num_entries }, + { BlockBasedTableStatsNames::kDataSize, &table_stats.data_size }, + { BlockBasedTableStatsNames::kIndexSize, &table_stats.index_size }, + { BlockBasedTableStatsNames::kRawKeySize, &table_stats.raw_key_size }, + { BlockBasedTableStatsNames::kRawValueSize, &table_stats.raw_value_size }, + { BlockBasedTableStatsNames::kNumDataBlocks, &table_stats.num_data_blocks}, + { BlockBasedTableStatsNames::kNumEntries, &table_stats.num_entries }, }; std::string last_key; @@ -313,7 +313,7 @@ Status BlockBasedTable::ReadStats(const Slice& handle_value, Rep* rep) { continue; } *(pos->second) = val; - } else if (key == TableStatsNames::kFilterPolicy) { + } else if (key == BlockBasedTableStatsNames::kFilterPolicy) { table_stats.filter_policy_name = raw_val.ToString(); } else { // handle user-collected @@ -464,7 +464,7 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) { // we're past end of file may_match = false; } else if (ExtractUserKey(iiter->key()).starts_with( - ExtractUserKey(internal_prefix))) { + ExtractUserKey(internal_prefix))) { // we need to check for this subtle case because our only // guarantee is that "the key is a string >= last key in that data // block" according to the doc/table_format.txt spec. @@ -497,10 +497,6 @@ bool BlockBasedTable::PrefixMayMatch(const Slice& internal_prefix) { return may_match; } -Iterator* Table::NewIterator(const ReadOptions& options) { - return nullptr; -} - Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) { if (options.prefix) { InternalKey internal_prefix(*options.prefix, 0, kTypeValue); @@ -517,21 +513,23 @@ Iterator* BlockBasedTable::NewIterator(const ReadOptions& options) { options, rep_->soptions); } -Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k, - void* arg, - bool (*saver)(void*, const Slice&, const Slice&, - bool), - void (*mark_key_may_exist)(void*)) { +Status BlockBasedTable::Get( + const ReadOptions& readOptions, + const Slice& key, + void* handle_context, + bool (*result_handler)(void* handle_context, const Slice& k, + const Slice& v, bool didIO), + void (*mark_key_may_exist_handler)(void* handle_context)) { Status s; Iterator* iiter = rep_->index_block->NewIterator(rep_->options.comparator); bool done = false; - for (iiter->Seek(k); iiter->Valid() && !done; iiter->Next()) { + for (iiter->Seek(key); iiter->Valid() && !done; iiter->Next()) { Slice handle_value = iiter->value(); FilterBlockReader* filter = rep_->filter; BlockHandle handle; if (filter != nullptr && handle.DecodeFrom(&handle_value).ok() && - !filter->KeyMayMatch(handle.offset(), k)) { + !filter->KeyMayMatch(handle.offset(), key)) { // Not found // TODO: think about interaction with Merge. If a user key cannot // cross one data block, we should be fine. @@ -540,19 +538,20 @@ Status BlockBasedTable::Get(const ReadOptions& options, const Slice& k, } else { bool didIO = false; std::unique_ptr block_iter( - BlockReader(this, options, iiter->value(), &didIO)); + BlockReader(this, readOptions, iiter->value(), &didIO)); - if (options.read_tier && block_iter->status().IsIncomplete()) { + if (readOptions.read_tier && block_iter->status().IsIncomplete()) { // couldn't get block from block_cache // Update Saver.state to Found because we are only looking for whether // we can guarantee the key is not there when "no_io" is set - (*mark_key_may_exist)(arg); + (*mark_key_may_exist_handler)(handle_context); break; } // Call the *saver function on each entry/block until it returns false - for (block_iter->Seek(k); block_iter->Valid(); block_iter->Next()) { - if (!(*saver)(arg, block_iter->key(), block_iter->value(), didIO)) { + for (block_iter->Seek(key); block_iter->Valid(); block_iter->Next()) { + if (!(*result_handler)(handle_context, block_iter->key(), + block_iter->value(), didIO)) { done = true; break; } @@ -611,12 +610,17 @@ uint64_t BlockBasedTable::ApproximateOffsetOf(const Slice& key) { const std::string BlockBasedTable::kFilterBlockPrefix = "filter."; const std::string BlockBasedTable::kStatsBlock = "rocksdb.stats"; -const std::string TableStatsNames::kDataSize = "rocksdb.data.size"; -const std::string TableStatsNames::kIndexSize = "rocksdb.index.size"; -const std::string TableStatsNames::kRawKeySize = "rocksdb.raw.key.size"; -const std::string TableStatsNames::kRawValueSize = "rocksdb.raw.value.size"; -const std::string TableStatsNames::kNumDataBlocks = "rocksdb.num.data.blocks"; -const std::string TableStatsNames::kNumEntries = "rocksdb.num.entries"; -const std::string TableStatsNames::kFilterPolicy = "rocksdb.filter.policy"; +const std::string BlockBasedTableStatsNames::kDataSize = "rocksdb.data.size"; +const std::string BlockBasedTableStatsNames::kIndexSize = "rocksdb.index.size"; +const std::string BlockBasedTableStatsNames::kRawKeySize = + "rocksdb.raw.key.size"; +const std::string BlockBasedTableStatsNames::kRawValueSize = + "rocksdb.raw.value.size"; +const std::string BlockBasedTableStatsNames::kNumDataBlocks = + "rocksdb.num.data.blocks"; +const std::string BlockBasedTableStatsNames::kNumEntries = + "rocksdb.num.entries"; +const std::string BlockBasedTableStatsNames::kFilterPolicy = + "rocksdb.filter.policy"; } // namespace rocksdb diff --git a/table/block_based_table.h b/table/block_based_table_reader.h similarity index 77% rename from table/block_based_table.h rename to table/block_based_table_reader.h index e1ff04940..848c55655 100644 --- a/table/block_based_table.h +++ b/table/block_based_table_reader.h @@ -24,14 +24,14 @@ struct Options; class RandomAccessFile; struct ReadOptions; class TableCache; -class Table; +class TableReader; using std::unique_ptr; // A Table is a sorted map from strings to strings. Tables are // immutable and persistent. A Table may be safely accessed from // multiple threads without external synchronization. -class BlockBasedTable : public Table { +class BlockBasedTable : public TableReader { public: static const std::string kFilterBlockPrefix; static const std::string kStatsBlock; @@ -40,19 +40,17 @@ class BlockBasedTable : public Table { // of "file", and read the metadata entries necessary to allow // retrieving data from the table. // - // If successful, returns ok and sets "*table" to the newly opened - // table. The client should delete "*table" when no longer needed. - // If there was an error while initializing the table, sets "*table" - // to nullptr and returns a non-ok status. Does not take ownership of - // "*source", but the client must ensure that "source" remains live - // for the duration of the returned table's lifetime. + // If successful, returns ok and sets "*table_reader" to the newly opened + // table. The client should delete "*table_reader" when no longer needed. + // If there was an error while initializing the table, sets "*table_reader" + // to nullptr and returns a non-ok status. // // *file must remain live while this Table is in use. static Status Open(const Options& options, const EnvOptions& soptions, unique_ptr&& file, uint64_t file_size, - unique_ptr
* table); + unique_ptr* table_reader); bool PrefixMayMatch(const Slice& internal_prefix) override; @@ -62,10 +60,13 @@ class BlockBasedTable : public Table { Iterator* NewIterator(const ReadOptions&) override; Status Get( - const ReadOptions&, const Slice& key, - void* arg, - bool (*handle_result)(void* arg, const Slice& k, const Slice& v, bool), - void (*mark_key_may_exist)(void*) = nullptr) override; + const ReadOptions& readOptions, + const Slice& key, + void* handle_context, + bool (*result_handler)(void* handle_context, const Slice& k, + const Slice& v, bool didIO), + void (*mark_key_may_exist_handler)(void* handle_context) = nullptr) + override; // Given a key, return an approximate byte offset in the file where // the data for that key begins (or would begin if the key were @@ -115,8 +116,18 @@ class BlockBasedTable : public Table { } // No copying allowed - explicit BlockBasedTable(const Table&) = delete; - void operator=(const Table&) = delete; + explicit BlockBasedTable(const TableReader&) = delete; + void operator=(const TableReader&) = delete; +}; + +struct BlockBasedTableStatsNames { + static const std::string kDataSize; + static const std::string kIndexSize; + static const std::string kRawKeySize; + static const std::string kRawValueSize; + static const std::string kNumDataBlocks; + static const std::string kNumEntries; + static const std::string kFilterPolicy; }; } // namespace rocksdb diff --git a/table/table_test.cc b/table/table_test.cc index 161edd0b1..0b080673c 100644 --- a/table/table_test.cc +++ b/table/table_test.cc @@ -22,7 +22,7 @@ #include "table/block.h" #include "table/block_builder.h" #include "table/format.h" -#include "table/block_based_table.h" +#include "table/block_based_table_reader.h" #include "table/block_based_table_builder.h" #include "util/random.h" #include "util/testharness.h" @@ -250,7 +250,7 @@ class BlockBasedTableConstructor: public Constructor { virtual Status FinishImpl(const Options& options, const KVMap& data) { Reset(); sink_.reset(new StringSink()); - BlockBasedTableBuilder builder(options, sink_.get()); + BlockBasedTableBuilder builder(options, sink_.get(), options.compression); for (KVMap::const_iterator it = data.begin(); it != data.end(); @@ -267,36 +267,36 @@ class BlockBasedTableConstructor: public Constructor { uniq_id_ = cur_uniq_id_++; source_.reset(new StringSource(sink_->contents(), uniq_id_)); unique_ptr table_factory; - return options.table_factory->OpenTable(options, soptions, - std::move(source_), - sink_->contents().size(), - &table_); + return options.table_factory->GetTableReader(options, soptions, + std::move(source_), + sink_->contents().size(), + &table_reader_); } virtual Iterator* NewIterator() const { - return table_->NewIterator(ReadOptions()); + return table_reader_->NewIterator(ReadOptions()); } uint64_t ApproximateOffsetOf(const Slice& key) const { - return table_->ApproximateOffsetOf(key); + return table_reader_->ApproximateOffsetOf(key); } virtual Status Reopen(const Options& options) { source_.reset(new StringSource(sink_->contents(), uniq_id_)); - return options.table_factory->OpenTable(options, soptions, - std::move(source_), - sink_->contents().size(), - &table_); + return options.table_factory->GetTableReader(options, soptions, + std::move(source_), + sink_->contents().size(), + &table_reader_); } - virtual Table* table() { - return table_.get(); + virtual TableReader* table_reader() { + return table_reader_.get(); } private: void Reset() { uniq_id_ = 0; - table_.reset(); + table_reader_.reset(); sink_.reset(); source_.reset(); } @@ -304,7 +304,7 @@ class BlockBasedTableConstructor: public Constructor { uint64_t uniq_id_; unique_ptr sink_; unique_ptr source_; - unique_ptr
table_; + unique_ptr table_reader_; BlockBasedTableConstructor(); @@ -883,7 +883,7 @@ TEST(TableTest, BasicTableStats) { c.Finish(options, &keys, &kvmap); - auto& stats = c.table()->GetTableStats(); + auto& stats = c.table_reader()->GetTableStats(); ASSERT_EQ(kvmap.size(), stats.num_entries); auto raw_key_size = kvmap.size() * 2ul; @@ -918,7 +918,7 @@ TEST(TableTest, FilterPolicyNameStats) { options.filter_policy = filter_policy.get(); c.Finish(options, &keys, &kvmap); - auto& stats = c.table()->GetTableStats(); + auto& stats = c.table_reader()->GetTableStats(); ASSERT_EQ("rocksdb.BuiltinBloomFilter", stats.filter_policy_name); } @@ -960,7 +960,7 @@ TEST(TableTest, IndexSizeStat) { c.Finish(options, &ks, &kvmap); auto index_size = - c.table()->GetTableStats().index_size; + c.table_reader()->GetTableStats().index_size; ASSERT_GT(index_size, last_index_size); last_index_size = index_size; } @@ -985,7 +985,7 @@ TEST(TableTest, NumBlockStat) { c.Finish(options, &ks, &kvmap); ASSERT_EQ( kvmap.size(), - c.table()->GetTableStats().num_data_blocks + c.table_reader()->GetTableStats().num_data_blocks ); } @@ -1100,7 +1100,7 @@ TEST(TableTest, BlockCacheLeak) { ASSERT_OK(c.Reopen(opt)); for (const std::string& key: keys) { - ASSERT_TRUE(c.table()->TEST_KeyInCache(ReadOptions(), key)); + ASSERT_TRUE(c.table_reader()->TEST_KeyInCache(ReadOptions(), key)); } } diff --git a/tools/sst_dump.cc b/tools/sst_dump.cc index 6ae059201..c89b8e372 100644 --- a/tools/sst_dump.cc +++ b/tools/sst_dump.cc @@ -63,7 +63,7 @@ Status SstFileReader::ReadSequential(bool print_kv, bool has_to, const std::string& to_key) { - unique_ptr
table; + unique_ptr table_reader; InternalKeyComparator internal_comparator_(BytewiseComparator()); Options table_options; table_options.comparator = &internal_comparator_; @@ -76,14 +76,15 @@ Status SstFileReader::ReadSequential(bool print_kv, uint64_t file_size; table_options.env->GetFileSize(file_name_, &file_size); unique_ptr table_factory; - s = table_options.table_factory->OpenTable(table_options, soptions_, - std::move(file), file_size, - &table); + s = table_options.table_factory->GetTableReader(table_options, soptions_, + std::move(file), file_size, + &table_reader); if(!s.ok()) { return s; } - Iterator* iter = table->NewIterator(ReadOptions(verify_checksum_, false)); + Iterator* iter = table_reader->NewIterator(ReadOptions(verify_checksum_, + false)); uint64_t i = 0; if (has_from) { InternalKey ikey(from_key, kMaxSequenceNumber, kValueTypeForSeek);