// Copyright (c) 2011-present, Facebook, Inc. All rights reserved. // This source code is licensed under both the GPLv2 (found in the // COPYING file in the root directory) and Apache 2.0 License // (found in the LICENSE.Apache file in the root directory). #pragma once #include <string> #include <unordered_set> #include <vector> #include "db/column_family.h" #include "db/dbformat.h" #include "db/internal_stats.h" #include "db/snapshot_impl.h" #include "options/db_options.h" #include "rocksdb/db.h" #include "rocksdb/env.h" #include "rocksdb/sst_file_writer.h" #include "util/autovector.h" namespace rocksdb { struct IngestedFileInfo { // External file path std::string external_file_path; // Smallest user key in external file std::string smallest_user_key; // Largest user key in external file std::string largest_user_key; // Sequence number for keys in external file SequenceNumber original_seqno; // Offset of the global sequence number field in the file, will // be zero if version is 1 (global seqno is not supported) size_t global_seqno_offset; // External file size uint64_t file_size; // total number of keys in external file uint64_t num_entries; // Id of column family this file shoule be ingested into uint32_t cf_id; // TableProperties read from external file TableProperties table_properties; // Version of external file int version; // FileDescriptor for the file inside the DB FileDescriptor fd; // file path that we picked for file inside the DB std::string internal_file_path = ""; // Global sequence number that we picked for the file inside the DB SequenceNumber assigned_seqno = 0; // Level inside the DB we picked for the external file. int picked_level = 0; InternalKey smallest_internal_key() const { return InternalKey(smallest_user_key, assigned_seqno, ValueType::kTypeValue); } InternalKey largest_internal_key() const { return InternalKey(largest_user_key, assigned_seqno, ValueType::kTypeValue); } }; class ExternalSstFileIngestionJob { public: ExternalSstFileIngestionJob( Env* env, VersionSet* versions, ColumnFamilyData* cfd, const ImmutableDBOptions& db_options, const EnvOptions& env_options, SnapshotList* db_snapshots, const IngestExternalFileOptions& ingestion_options) : env_(env), versions_(versions), cfd_(cfd), db_options_(db_options), env_options_(env_options), db_snapshots_(db_snapshots), ingestion_options_(ingestion_options), job_start_time_(env_->NowMicros()) {} // Prepare the job by copying external files into the DB. Status Prepare(const std::vector<std::string>& external_files_paths); // Check if we need to flush the memtable before running the ingestion job // This will be true if the files we are ingesting are overlapping with any // key range in the memtable. // REQUIRES: Mutex held Status NeedsFlush(bool* flush_needed); // Will execute the ingestion job and prepare edit() to be applied. // REQUIRES: Mutex held Status Run(); // Update column family stats. // REQUIRES: Mutex held void UpdateStats(); // Cleanup after successful/failed job void Cleanup(const Status& status); VersionEdit* edit() { return &edit_; } const autovector<IngestedFileInfo>& files_to_ingest() const { return files_to_ingest_; } private: // Open the external file and populate `file_to_ingest` with all the // external information we need to ingest this file. Status GetIngestedFileInfo(const std::string& external_file, IngestedFileInfo* file_to_ingest); // Check if the files we are ingesting overlap with any memtable. // REQUIRES: Mutex held Status IngestedFilesOverlapWithMemtables(SuperVersion* sv, bool* overlap); // Assign `file_to_ingest` the appropriate sequence number and the lowest // possible level that it can be ingested to according to compaction_style. // REQUIRES: Mutex held Status AssignLevelAndSeqnoForIngestedFile(SuperVersion* sv, bool force_global_seqno, CompactionStyle compaction_style, IngestedFileInfo* file_to_ingest, SequenceNumber* assigned_seqno); // File that we want to ingest behind always goes to the lowest level; // we just check that it fits in the level, that DB allows ingest_behind, // and that we don't have 0 seqnums at the upper levels. // REQUIRES: Mutex held Status CheckLevelForIngestedBehindFile(IngestedFileInfo* file_to_ingest); // Set the file global sequence number to `seqno` Status AssignGlobalSeqnoForIngestedFile(IngestedFileInfo* file_to_ingest, SequenceNumber seqno); // Check if `file_to_ingest` key range overlap with the range `iter` represent // REQUIRES: Mutex held Status IngestedFileOverlapWithIteratorRange( const IngestedFileInfo* file_to_ingest, InternalIterator* iter, bool* overlap); // Check if `file_to_ingest` key range overlap with level // REQUIRES: Mutex held Status IngestedFileOverlapWithLevel(SuperVersion* sv, IngestedFileInfo* file_to_ingest, int lvl, bool* overlap_with_level); // Check if `file_to_ingest` can fit in level `level` // REQUIRES: Mutex held bool IngestedFileFitInLevel(const IngestedFileInfo* file_to_ingest, int level); Env* env_; VersionSet* versions_; ColumnFamilyData* cfd_; const ImmutableDBOptions& db_options_; const EnvOptions& env_options_; SnapshotList* db_snapshots_; autovector<IngestedFileInfo> files_to_ingest_; const IngestExternalFileOptions& ingestion_options_; VersionEdit edit_; uint64_t job_start_time_; }; } // namespace rocksdb