|  |  | @ -5,6 +5,7 @@ | 
			
		
	
		
		
			
				
					
					|  |  |  | #include "table/plain_table_reader.h" |  |  |  | #include "table/plain_table_reader.h" | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | #include <string> |  |  |  | #include <string> | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | #include <vector> | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | #include "db/dbformat.h" |  |  |  | #include "db/dbformat.h" | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -35,7 +36,7 @@ namespace rocksdb { | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | namespace { |  |  |  | namespace { | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | inline uint32_t GetSliceHash(Slice const& s) { |  |  |  | inline uint32_t GetSliceHash(const Slice& s) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   return Hash(s.data(), s.size(), 397) ; |  |  |  |   return Hash(s.data(), s.size(), 397) ; | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -43,6 +44,12 @@ inline uint32_t GetBucketIdFromHash(uint32_t hash, uint32_t num_buckets) { | 
			
		
	
		
		
			
				
					
					|  |  |  |   return hash % num_buckets; |  |  |  |   return hash % num_buckets; | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | // Safely getting a uint32_t element from a char array, where, starting from
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | // `base`, every 4 bytes are considered as an fixed 32 bit integer.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | inline uint32_t GetFixed32Element(const char* base, size_t offset) { | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   return DecodeFixed32(base + offset * sizeof(uint32_t)); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | }  // namespace
 |  |  |  | }  // namespace
 | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | // Iterator to iterate IndexedTable
 |  |  |  | // Iterator to iterate IndexedTable
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -84,13 +91,14 @@ class PlainTableIterator : public Iterator { | 
			
		
	
		
		
			
				
					
					|  |  |  | }; |  |  |  | }; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | extern const uint64_t kPlainTableMagicNumber; |  |  |  | extern const uint64_t kPlainTableMagicNumber; | 
			
		
	
		
		
			
				
					
					|  |  |  | PlainTableReader::PlainTableReader(const EnvOptions& storage_options, |  |  |  | PlainTableReader::PlainTableReader( | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                    const InternalKeyComparator& icomparator, |  |  |  |     const Options& options, unique_ptr<RandomAccessFile>&& file, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                    uint64_t file_size, int bloom_bits_per_key, |  |  |  |     const EnvOptions& storage_options, const InternalKeyComparator& icomparator, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                    double hash_table_ratio, |  |  |  |     uint64_t file_size, int bloom_bits_per_key, double hash_table_ratio, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                    size_t index_sparseness, |  |  |  |     size_t index_sparseness, const TableProperties* table_properties) | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                    const TableProperties* table_properties) |  |  |  |     : options_(options), | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     : soptions_(storage_options), |  |  |  |       soptions_(storage_options), | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |       file_(std::move(file)), | 
			
		
	
		
		
			
				
					
					|  |  |  |       internal_comparator_(icomparator), |  |  |  |       internal_comparator_(icomparator), | 
			
		
	
		
		
			
				
					
					|  |  |  |       file_size_(file_size), |  |  |  |       file_size_(file_size), | 
			
		
	
		
		
			
				
					
					|  |  |  |       kHashTableRatio(hash_table_ratio), |  |  |  |       kHashTableRatio(hash_table_ratio), | 
			
		
	
	
		
		
			
				
					|  |  | @ -98,12 +106,11 @@ PlainTableReader::PlainTableReader(const EnvOptions& storage_options, | 
			
		
	
		
		
			
				
					
					|  |  |  |       kIndexIntervalForSamePrefixKeys(index_sparseness), |  |  |  |       kIndexIntervalForSamePrefixKeys(index_sparseness), | 
			
		
	
		
		
			
				
					
					|  |  |  |       table_properties_(table_properties), |  |  |  |       table_properties_(table_properties), | 
			
		
	
		
		
			
				
					
					|  |  |  |       data_end_offset_(table_properties_->data_size), |  |  |  |       data_end_offset_(table_properties_->data_size), | 
			
		
	
		
		
			
				
					
					|  |  |  |       user_key_len_(table_properties->fixed_key_len) {} |  |  |  |       user_key_len_(table_properties->fixed_key_len) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   assert(kHashTableRatio >= 0.0); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | PlainTableReader::~PlainTableReader() { |  |  |  | PlainTableReader::~PlainTableReader() { | 
			
		
	
		
		
			
				
					
					|  |  |  |   delete[] hash_table_; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   delete[] sub_index_; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   delete bloom_; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::Open( |  |  |  | Status PlainTableReader::Open( | 
			
		
	
	
		
		
			
				
					|  |  | @ -126,10 +133,8 @@ Status PlainTableReader::Open( | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader( |  |  |  |   std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader( | 
			
		
	
		
		
			
				
					
					|  |  |  |       soptions, internal_comparator, file_size, bloom_bits_per_key, |  |  |  |       options, std::move(file), soptions, internal_comparator, file_size, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |       hash_table_ratio, index_sparseness, props)); |  |  |  |       bloom_bits_per_key, hash_table_ratio, index_sparseness, props)); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   new_reader->file_ = std::move(file); |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   new_reader->options_ = options; |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   // -- Populate Index
 |  |  |  |   // -- Populate Index
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   s = new_reader->PopulateIndex(); |  |  |  |   s = new_reader->PopulateIndex(); | 
			
		
	
	
		
		
			
				
					|  |  | @ -198,6 +203,9 @@ class PlainTableReader::IndexRecordList { | 
			
		
	
		
		
			
				
					
					|  |  |  |     return result; |  |  |  |     return result; | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   // Each group in `groups_` contains fix-sized records (determined by
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   // kNumRecordsPerGroup). Which can help us minimize the cost if resizing
 | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |   // occurs.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const size_t kNumRecordsPerGroup; |  |  |  |   const size_t kNumRecordsPerGroup; | 
			
		
	
		
		
			
				
					
					|  |  |  |   IndexRecord* current_group_; |  |  |  |   IndexRecord* current_group_; | 
			
		
	
		
		
			
				
					
					|  |  |  |   // List of arrays allocated
 |  |  |  |   // List of arrays allocated
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -206,12 +214,11 @@ class PlainTableReader::IndexRecordList { | 
			
		
	
		
		
			
				
					
					|  |  |  | }; |  |  |  | }; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, |  |  |  | Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, | 
			
		
	
		
		
			
				
					
					|  |  |  |                                                  int* num_prefixes, |  |  |  |                                                  int* num_prefixes) const { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                                  DynamicBloom* bloom_) const { |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   Slice prev_key_prefix_slice; |  |  |  |   Slice prev_key_prefix_slice; | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t prev_key_prefix_hash = 0; |  |  |  |   uint32_t prev_key_prefix_hash = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t pos = data_start_offset_; |  |  |  |   uint32_t pos = data_start_offset_; | 
			
		
	
		
		
			
				
					
					|  |  |  |   int key_index_within_prefix = 0; |  |  |  |   int num_keys_per_prefix = 0; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   bool is_first_record = true; |  |  |  |   bool is_first_record = true; | 
			
		
	
		
		
			
				
					
					|  |  |  |   HistogramImpl keys_per_prefix_hist; |  |  |  |   HistogramImpl keys_per_prefix_hist; | 
			
		
	
		
		
			
				
					
					|  |  |  |   // Need map to be ordered to make sure sub indexes generated
 |  |  |  |   // Need map to be ordered to make sure sub indexes generated
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -222,7 +229,7 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t key_offset = pos; |  |  |  |     uint32_t key_offset = pos; | 
			
		
	
		
		
			
				
					
					|  |  |  |     ParsedInternalKey key; |  |  |  |     ParsedInternalKey key; | 
			
		
	
		
		
			
				
					
					|  |  |  |     Slice value_slice; |  |  |  |     Slice value_slice; | 
			
		
	
		
		
			
				
					
					|  |  |  |     Status s = Next(pos, &key, &value_slice, &pos); |  |  |  |     Status s = Next(&pos, &key, &value_slice); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     if (!s.ok()) { |  |  |  |     if (!s.ok()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       return s; |  |  |  |       return s; | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
	
		
		
			
				
					|  |  | @ -235,22 +242,22 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (is_first_record || prev_key_prefix_slice != key_prefix_slice) { |  |  |  |     if (is_first_record || prev_key_prefix_slice != key_prefix_slice) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       ++(*num_prefixes); |  |  |  |       ++(*num_prefixes); | 
			
		
	
		
		
			
				
					
					|  |  |  |       if (!is_first_record) { |  |  |  |       if (!is_first_record) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         keys_per_prefix_hist.Add(key_index_within_prefix); |  |  |  |         keys_per_prefix_hist.Add(num_keys_per_prefix); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       } |  |  |  |       } | 
			
		
	
		
		
			
				
					
					|  |  |  |       key_index_within_prefix = 0; |  |  |  |       num_keys_per_prefix = 0; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       prev_key_prefix_slice = key_prefix_slice; |  |  |  |       prev_key_prefix_slice = key_prefix_slice; | 
			
		
	
		
		
			
				
					
					|  |  |  |       prev_key_prefix_hash = GetSliceHash(key_prefix_slice); |  |  |  |       prev_key_prefix_hash = GetSliceHash(key_prefix_slice); | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (kIndexIntervalForSamePrefixKeys == 0 || |  |  |  |     if (kIndexIntervalForSamePrefixKeys == 0 || | 
			
		
	
		
		
			
				
					
					|  |  |  |         key_index_within_prefix++ % kIndexIntervalForSamePrefixKeys == 0) { |  |  |  |         num_keys_per_prefix++ % kIndexIntervalForSamePrefixKeys == 0) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       // Add an index key for every kIndexIntervalForSamePrefixKeys keys
 |  |  |  |       // Add an index key for every kIndexIntervalForSamePrefixKeys keys
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       record_list->AddRecord(prev_key_prefix_hash, key_offset); |  |  |  |       record_list->AddRecord(prev_key_prefix_hash, key_offset); | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |     is_first_record = false; |  |  |  |     is_first_record = false; | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   keys_per_prefix_hist.Add(key_index_within_prefix); |  |  |  |   keys_per_prefix_hist.Add(num_keys_per_prefix); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   Log(options_.info_log, "Number of Keys per prefix Histogram: %s", |  |  |  |   Log(options_.info_log, "Number of Keys per prefix Histogram: %s", | 
			
		
	
		
		
			
				
					
					|  |  |  |       keys_per_prefix_hist.ToString().c_str()); |  |  |  |       keys_per_prefix_hist.ToString().c_str()); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -258,36 +265,35 @@ Status PlainTableReader::PopulateIndexRecordList(IndexRecordList* record_list, | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) { |  |  |  | void PlainTableReader::AllocateIndexAndBloom(int num_prefixes) { | 
			
		
	
		
		
			
				
					
					|  |  |  |   delete[] hash_table_; |  |  |  |   index_.reset(); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (options_.prefix_extractor != nullptr) { |  |  |  |   if (options_.prefix_extractor != nullptr) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey; |  |  |  |     uint32_t bloom_total_bits = num_prefixes * kBloomBitsPerKey; | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (bloom_total_bits > 0) { |  |  |  |     if (bloom_total_bits > 0) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       bloom_ = new DynamicBloom(bloom_total_bits); |  |  |  |       bloom_.reset(new DynamicBloom(bloom_total_bits)); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (options_.prefix_extractor == nullptr || kHashTableRatio <= 0) { |  |  |  |   if (options_.prefix_extractor == nullptr || kHashTableRatio <= 0) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     // Fall back to pure binary search if the user fails to specify a prefix
 |  |  |  |     // Fall back to pure binary search if the user fails to specify a prefix
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     // extractor.
 |  |  |  |     // extractor.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     hash_table_size_ = 1; |  |  |  |     index_size_ = 1; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } else { |  |  |  |   } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |     double hash_table_size_multipier = 1.0 / kHashTableRatio; |  |  |  |     double hash_table_size_multipier = 1.0 / kHashTableRatio; | 
			
		
	
		
		
			
				
					
					|  |  |  |     hash_table_size_ = num_prefixes * hash_table_size_multipier + 1; |  |  |  |     index_size_ = num_prefixes * hash_table_size_multipier + 1; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  |   hash_table_ = new uint32_t[hash_table_size_]; |  |  |  |   index_.reset(new uint32_t[index_size_]); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | size_t PlainTableReader::BucketizeIndexesAndFillBloom( |  |  |  | size_t PlainTableReader::BucketizeIndexesAndFillBloom( | 
			
		
	
		
		
			
				
					
					|  |  |  |     IndexRecordList& record_list, int num_prefixes, |  |  |  |     IndexRecordList* record_list, std::vector<IndexRecord*>* hash_to_offsets, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     std::vector<IndexRecord*>* hash_to_offsets, |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     std::vector<uint32_t>* bucket_count) { |  |  |  |     std::vector<uint32_t>* bucket_count) { | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t sub_index_size_needed = 0; |  |  |  |   size_t sub_index_size_needed = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   bool first = true; |  |  |  |   bool first = true; | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t prev_hash = 0; |  |  |  |   uint32_t prev_hash = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t num_records = record_list.GetNumRecords(); |  |  |  |   size_t num_records = record_list->GetNumRecords(); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   for (size_t i = 0; i < num_records; i++) { |  |  |  |   for (size_t i = 0; i < num_records; i++) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     IndexRecord* index_record = record_list.At(i); |  |  |  |     IndexRecord* index_record = record_list->At(i); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     uint32_t cur_hash = index_record->hash; |  |  |  |     uint32_t cur_hash = index_record->hash; | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (first || prev_hash != cur_hash) { |  |  |  |     if (first || prev_hash != cur_hash) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       prev_hash = cur_hash; |  |  |  |       prev_hash = cur_hash; | 
			
		
	
	
		
		
			
				
					|  |  | @ -296,7 +302,7 @@ size_t PlainTableReader::BucketizeIndexesAndFillBloom( | 
			
		
	
		
		
			
				
					
					|  |  |  |         bloom_->AddHash(cur_hash); |  |  |  |         bloom_->AddHash(cur_hash); | 
			
		
	
		
		
			
				
					
					|  |  |  |       } |  |  |  |       } | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t bucket = GetBucketIdFromHash(cur_hash, hash_table_size_); |  |  |  |     uint32_t bucket = GetBucketIdFromHash(cur_hash, index_size_); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket]; |  |  |  |     IndexRecord* prev_bucket_head = (*hash_to_offsets)[bucket]; | 
			
		
	
		
		
			
				
					
					|  |  |  |     index_record->next = prev_bucket_head; |  |  |  |     index_record->next = prev_bucket_head; | 
			
		
	
		
		
			
				
					
					|  |  |  |     (*hash_to_offsets)[bucket] = index_record; |  |  |  |     (*hash_to_offsets)[bucket] = index_record; | 
			
		
	
	
		
		
			
				
					|  |  | @ -326,27 +332,24 @@ void PlainTableReader::FillIndexes( | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t buffer_size = 8 * 8; |  |  |  |   size_t buffer_size = 8 * 8; | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t buffer_used = 0; |  |  |  |   size_t buffer_used = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   sub_index_size_needed += buffer_size; |  |  |  |   sub_index_size_needed += buffer_size; | 
			
		
	
		
		
			
				
					
					|  |  |  |   sub_index_ = new char[sub_index_size_needed]; |  |  |  |   sub_index_.reset(new char[sub_index_size_needed]); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   size_t sub_index_offset = 0; |  |  |  |   size_t sub_index_offset = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   char* prev_ptr; |  |  |  |   for (int i = 0; i < index_size_; i++) { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   char* cur_ptr; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t* sub_index_ptr; |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |   for (int i = 0; i < hash_table_size_; i++) { |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     uint32_t num_keys_for_bucket = bucket_count[i]; |  |  |  |     uint32_t num_keys_for_bucket = bucket_count[i]; | 
			
		
	
		
		
			
				
					
					|  |  |  |     switch (num_keys_for_bucket) { |  |  |  |     switch (num_keys_for_bucket) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     case 0: |  |  |  |     case 0: | 
			
		
	
		
		
			
				
					
					|  |  |  |       // No key for bucket
 |  |  |  |       // No key for bucket
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       hash_table_[i] = data_end_offset_; |  |  |  |       index_[i] = data_end_offset_; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       break; |  |  |  |       break; | 
			
		
	
		
		
			
				
					
					|  |  |  |     case 1: |  |  |  |     case 1: | 
			
		
	
		
		
			
				
					
					|  |  |  |       // point directly to the file offset
 |  |  |  |       // point directly to the file offset
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       hash_table_[i] = hash_to_offsets[i]->offset; |  |  |  |       index_[i] = hash_to_offsets[i]->offset; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       break; |  |  |  |       break; | 
			
		
	
		
		
			
				
					
					|  |  |  |     default: |  |  |  |     default: | 
			
		
	
		
		
			
				
					
					|  |  |  |       // point to second level indexes.
 |  |  |  |       // point to second level indexes.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       hash_table_[i] = sub_index_offset | kSubIndexMask; |  |  |  |       index_[i] = sub_index_offset | kSubIndexMask; | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |       prev_ptr = sub_index_ + sub_index_offset; |  |  |  |       char* prev_ptr = &sub_index_[sub_index_offset]; | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |       cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); |  |  |  |       char* cur_ptr = EncodeVarint32(prev_ptr, num_keys_for_bucket); | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       sub_index_offset += (cur_ptr - prev_ptr); |  |  |  |       sub_index_offset += (cur_ptr - prev_ptr); | 
			
		
	
		
		
			
				
					
					|  |  |  |       if (cur_ptr - prev_ptr > 2 |  |  |  |       if (cur_ptr - prev_ptr > 2 | 
			
		
	
		
		
			
				
					
					|  |  |  |           || (cur_ptr - prev_ptr == 2 && num_keys_for_bucket <= 127)) { |  |  |  |           || (cur_ptr - prev_ptr == 2 && num_keys_for_bucket <= 127)) { | 
			
		
	
	
		
		
			
				
					|  |  | @ -359,17 +362,16 @@ void PlainTableReader::FillIndexes( | 
			
		
	
		
		
			
				
					
					|  |  |  |           sub_index_size_needed += buffer_size; |  |  |  |           sub_index_size_needed += buffer_size; | 
			
		
	
		
		
			
				
					
					|  |  |  |           buffer_size *= 2; |  |  |  |           buffer_size *= 2; | 
			
		
	
		
		
			
				
					
					|  |  |  |           char* new_sub_index = new char[sub_index_size_needed]; |  |  |  |           char* new_sub_index = new char[sub_index_size_needed]; | 
			
		
	
		
		
			
				
					
					|  |  |  |           memcpy(new_sub_index, sub_index_, sub_index_offset); |  |  |  |           memcpy(new_sub_index, sub_index_.get(), sub_index_offset); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |           delete[] sub_index_; |  |  |  |           sub_index_.reset(new_sub_index); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |           sub_index_ = new_sub_index; |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         } |  |  |  |         } | 
			
		
	
		
		
			
				
					
					|  |  |  |       } |  |  |  |       } | 
			
		
	
		
		
			
				
					
					|  |  |  |       sub_index_ptr = (uint32_t*) (sub_index_ + sub_index_offset); |  |  |  |       char* sub_index_pos = &sub_index_[sub_index_offset]; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       IndexRecord* record = hash_to_offsets[i]; |  |  |  |       IndexRecord* record = hash_to_offsets[i]; | 
			
		
	
		
		
			
				
					
					|  |  |  |       int j; |  |  |  |       int j; | 
			
		
	
		
		
			
				
					
					|  |  |  |       for (j = num_keys_for_bucket - 1; j >= 0 && record; |  |  |  |       for (j = num_keys_for_bucket - 1; j >= 0 && record; | 
			
		
	
		
		
			
				
					
					|  |  |  |            j--, record = record->next) { |  |  |  |            j--, record = record->next) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         sub_index_ptr[j] = record->offset; |  |  |  |         EncodeFixed32(sub_index_pos + j * sizeof(uint32_t), record->offset); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       } |  |  |  |       } | 
			
		
	
		
		
			
				
					
					|  |  |  |       assert(j == -1 && record == nullptr); |  |  |  |       assert(j == -1 && record == nullptr); | 
			
		
	
		
		
			
				
					
					|  |  |  |       sub_index_offset += kOffsetLen * num_keys_for_bucket; |  |  |  |       sub_index_offset += kOffsetLen * num_keys_for_bucket; | 
			
		
	
	
		
		
			
				
					|  |  | @ -378,7 +380,7 @@ void PlainTableReader::FillIndexes( | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   Log(options_.info_log, "hash table size: %d, suffix_map length %zu", |  |  |  |   Log(options_.info_log, "hash table size: %d, suffix_map length %zu", | 
			
		
	
		
		
			
				
					
					|  |  |  |       hash_table_size_, sub_index_size_needed); |  |  |  |       index_size_, sub_index_size_needed); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::PopulateIndex() { |  |  |  | Status PlainTableReader::PopulateIndex() { | 
			
		
	
	
		
		
			
				
					|  |  | @ -405,11 +407,11 @@ Status PlainTableReader::PopulateIndex() { | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (IsTotalOrderMode()) { |  |  |  |   if (IsTotalOrderMode()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey; |  |  |  |     uint32_t num_bloom_bits = table_properties_->num_entries * kBloomBitsPerKey; | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (num_bloom_bits > 0) { |  |  |  |     if (num_bloom_bits > 0) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       bloom_ = new DynamicBloom(num_bloom_bits); |  |  |  |       bloom_.reset(new DynamicBloom(num_bloom_bits)); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   s = PopulateIndexRecordList(&record_list, &num_prefixes, bloom_); |  |  |  |   s = PopulateIndexRecordList(&record_list, &num_prefixes); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   if (!s.ok()) { |  |  |  |   if (!s.ok()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     return s; |  |  |  |     return s; | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
	
		
		
			
				
					|  |  | @ -419,10 +421,10 @@ Status PlainTableReader::PopulateIndex() { | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   // Bucketize all the index records to a temp data structure, in which for
 |  |  |  |   // Bucketize all the index records to a temp data structure, in which for
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   // each bucket, we generate a linked list of IndexRecord, in reversed order.
 |  |  |  |   // each bucket, we generate a linked list of IndexRecord, in reversed order.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   std::vector<IndexRecord*> hash_to_offsets(hash_table_size_, nullptr); |  |  |  |   std::vector<IndexRecord*> hash_to_offsets(index_size_, nullptr); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   std::vector<uint32_t> bucket_count(hash_table_size_, 0); |  |  |  |   std::vector<uint32_t> bucket_count(index_size_, 0); | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   size_t sub_index_size_needed = BucketizeIndexesAndFillBloom( |  |  |  |   size_t sub_index_size_needed = BucketizeIndexesAndFillBloom( | 
			
		
	
		
		
			
				
					
					|  |  |  |       record_list, num_prefixes, &hash_to_offsets, &bucket_count); |  |  |  |       &record_list, &hash_to_offsets, &bucket_count); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   // From the temp data structure, populate indexes.
 |  |  |  |   // From the temp data structure, populate indexes.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   FillIndexes(sub_index_size_needed, hash_to_offsets, bucket_count); |  |  |  |   FillIndexes(sub_index_size_needed, hash_to_offsets, bucket_count); | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -431,16 +433,16 @@ Status PlainTableReader::PopulateIndex() { | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, |  |  |  | Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, | 
			
		
	
		
		
			
				
					
					|  |  |  |                                    uint32_t prefix_hash, bool& prefix_matched, |  |  |  |                                    uint32_t prefix_hash, bool& prefix_matched, | 
			
		
	
		
		
			
				
					
					|  |  |  |                                    uint32_t* ret_offset) const { |  |  |  |                                    uint32_t* offset) const { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   prefix_matched = false; |  |  |  |   prefix_matched = false; | 
			
		
	
		
		
			
				
					
					|  |  |  |   int bucket = GetBucketIdFromHash(prefix_hash, hash_table_size_); |  |  |  |   int bucket = GetBucketIdFromHash(prefix_hash, index_size_); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t bucket_value = hash_table_[bucket]; |  |  |  |   uint32_t bucket_value = index_[bucket]; | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   if (bucket_value == data_end_offset_) { |  |  |  |   if (bucket_value == data_end_offset_) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     *ret_offset = data_end_offset_; |  |  |  |     *offset = data_end_offset_; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     return Status::OK(); |  |  |  |     return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } else if ((bucket_value & kSubIndexMask) == 0) { |  |  |  |   } else if ((bucket_value & kSubIndexMask) == 0) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     // point directly to the file
 |  |  |  |     // point directly to the file
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     *ret_offset = bucket_value; |  |  |  |     *offset = bucket_value; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     return Status::OK(); |  |  |  |     return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
	
		
		
			
				
					|  |  | @ -448,11 +450,9 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t low = 0; |  |  |  |   uint32_t low = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint64_t prefix_index_offset = bucket_value ^ kSubIndexMask; |  |  |  |   uint64_t prefix_index_offset = bucket_value ^ kSubIndexMask; | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const char* index_ptr = sub_index_ + prefix_index_offset; |  |  |  |   const char* index_ptr = &sub_index_[prefix_index_offset]; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   uint32_t upper_bound = 0; |  |  |  |   uint32_t upper_bound = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   const uint32_t* base_ptr = (const uint32_t*) GetVarint32Ptr(index_ptr, |  |  |  |   const char* base_ptr = GetVarint32Ptr(index_ptr, index_ptr + 4, &upper_bound); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                                               index_ptr + 4, |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |                                                               &upper_bound); |  |  |  |  | 
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   uint32_t high = upper_bound; |  |  |  |   uint32_t high = upper_bound; | 
			
		
	
		
		
			
				
					
					|  |  |  |   ParsedInternalKey mid_key; |  |  |  |   ParsedInternalKey mid_key; | 
			
		
	
		
		
			
				
					
					|  |  |  |   ParsedInternalKey parsed_target; |  |  |  |   ParsedInternalKey parsed_target; | 
			
		
	
	
		
		
			
				
					|  |  | @ -463,7 +463,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, | 
			
		
	
		
		
			
				
					
					|  |  |  |   // The key is between [low, high). Do a binary search between it.
 |  |  |  |   // The key is between [low, high). Do a binary search between it.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   while (high - low > 1) { |  |  |  |   while (high - low > 1) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t mid = (high + low) / 2; |  |  |  |     uint32_t mid = (high + low) / 2; | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t file_offset = base_ptr[mid]; |  |  |  |     uint32_t file_offset = GetFixed32Element(base_ptr, mid); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     size_t tmp; |  |  |  |     size_t tmp; | 
			
		
	
		
		
			
				
					
					|  |  |  |     Status s = ReadKey(file_data_.data() + file_offset, &mid_key, &tmp); |  |  |  |     Status s = ReadKey(file_data_.data() + file_offset, &mid_key, &tmp); | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (!s.ok()) { |  |  |  |     if (!s.ok()) { | 
			
		
	
	
		
		
			
				
					|  |  | @ -477,7 +477,7 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, | 
			
		
	
		
		
			
				
					
					|  |  |  |         // Happen to have found the exact key or target is smaller than the
 |  |  |  |         // Happen to have found the exact key or target is smaller than the
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         // first key after base_offset.
 |  |  |  |         // first key after base_offset.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |         prefix_matched = true; |  |  |  |         prefix_matched = true; | 
			
		
	
		
		
			
				
					
					|  |  |  |         *ret_offset = file_offset; |  |  |  |         *offset = file_offset; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |         return Status::OK(); |  |  |  |         return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  |       } else { |  |  |  |       } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |         high = mid; |  |  |  |         high = mid; | 
			
		
	
	
		
		
			
				
					|  |  | @ -489,19 +489,19 @@ Status PlainTableReader::GetOffset(const Slice& target, const Slice& prefix, | 
			
		
	
		
		
			
				
					
					|  |  |  |   // to the wrong prefix.
 |  |  |  |   // to the wrong prefix.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   ParsedInternalKey low_key; |  |  |  |   ParsedInternalKey low_key; | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t tmp; |  |  |  |   size_t tmp; | 
			
		
	
		
		
			
				
					
					|  |  |  |   uint32_t low_key_offset = base_ptr[low]; |  |  |  |   uint32_t low_key_offset = GetFixed32Element(base_ptr, low); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   Status s = ReadKey(file_data_.data() + low_key_offset, &low_key, &tmp); |  |  |  |   Status s = ReadKey(file_data_.data() + low_key_offset, &low_key, &tmp); | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (GetPrefix(low_key) == prefix) { |  |  |  |   if (GetPrefix(low_key) == prefix) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     prefix_matched = true; |  |  |  |     prefix_matched = true; | 
			
		
	
		
		
			
				
					
					|  |  |  |     *ret_offset = low_key_offset; |  |  |  |     *offset = low_key_offset; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } else if (low + 1 < upper_bound) { |  |  |  |   } else if (low + 1 < upper_bound) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     // There is possible a next prefix, return it
 |  |  |  |     // There is possible a next prefix, return it
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     prefix_matched = false; |  |  |  |     prefix_matched = false; | 
			
		
	
		
		
			
				
					
					|  |  |  |     *ret_offset = base_ptr[low + 1]; |  |  |  |     *offset = GetFixed32Element(base_ptr, low + 1); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } else { |  |  |  |   } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |     // target is larger than a key of the last prefix in this bucket
 |  |  |  |     // target is larger than a key of the last prefix in this bucket
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     // but with a different prefix. Key does not exist.
 |  |  |  |     // but with a different prefix. Key does not exist.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |     *ret_offset = data_end_offset_; |  |  |  |     *offset = data_end_offset_; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  |   return Status::OK(); |  |  |  |   return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
	
		
		
			
				
					|  |  | @ -514,23 +514,23 @@ Slice PlainTableReader::GetPrefix(const ParsedInternalKey& target) const { | 
			
		
	
		
		
			
				
					
					|  |  |  |   return GetPrefixFromUserKey(target.user_key); |  |  |  |   return GetPrefixFromUserKey(target.user_key); | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, |  |  |  | Status PlainTableReader::ReadKey(const char* start, ParsedInternalKey* key, | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |                                  size_t* bytes_read) const { |  |  |  |                                  size_t* bytes_read) const { | 
			
		
	
		
		
			
				
					
					|  |  |  |   const char* key_ptr = nullptr; |  |  |  |   const char* key_ptr = nullptr; | 
			
		
	
		
		
			
				
					
					|  |  |  |   *bytes_read = 0; |  |  |  |   *bytes_read = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   size_t user_key_size = 0; |  |  |  |   size_t user_key_size = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (IsFixedLength()) { |  |  |  |   if (IsFixedLength()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     user_key_size = user_key_len_; |  |  |  |     user_key_size = user_key_len_; | 
			
		
	
		
		
			
				
					
					|  |  |  |     key_ptr = row_ptr; |  |  |  |     key_ptr = start; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } else { |  |  |  |   } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |     uint32_t tmp_size = 0; |  |  |  |     uint32_t tmp_size = 0; | 
			
		
	
		
		
			
				
					
					|  |  |  |     key_ptr = GetVarint32Ptr(row_ptr, file_data_.data() + data_end_offset_, |  |  |  |     key_ptr = | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                              &tmp_size); |  |  |  |         GetVarint32Ptr(start, file_data_.data() + data_end_offset_, &tmp_size); | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     if (key_ptr == nullptr) { |  |  |  |     if (key_ptr == nullptr) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       return Status::Corruption("Unable to read the next key"); |  |  |  |       return Status::Corruption("Unable to read the next key"); | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |     user_key_size = (size_t)tmp_size; |  |  |  |     user_key_size = (size_t)tmp_size; | 
			
		
	
		
		
			
				
					
					|  |  |  |     *bytes_read = key_ptr - row_ptr; |  |  |  |     *bytes_read = key_ptr - start; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (key_ptr + user_key_size + 1 >= file_data_.data() + data_end_offset_) { |  |  |  |   if (key_ptr + user_key_size + 1 >= file_data_.data() + data_end_offset_) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     return Status::Corruption("Unable to read the next key"); |  |  |  |     return Status::Corruption("Unable to read the next key"); | 
			
		
	
	
		
		
			
				
					|  |  | @ -543,7 +543,7 @@ Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, | 
			
		
	
		
		
			
				
					
					|  |  |  |     key->type = kTypeValue; |  |  |  |     key->type = kTypeValue; | 
			
		
	
		
		
			
				
					
					|  |  |  |     *bytes_read += user_key_size + 1; |  |  |  |     *bytes_read += user_key_size + 1; | 
			
		
	
		
		
			
				
					
					|  |  |  |   } else { |  |  |  |   } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (row_ptr + user_key_size + 8 >= file_data_.data() + data_end_offset_) { |  |  |  |     if (start + user_key_size + 8 >= file_data_.data() + data_end_offset_) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |       return Status::Corruption("Unable to read the next key"); |  |  |  |       return Status::Corruption("Unable to read the next key"); | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |     if (!ParseInternalKey(Slice(key_ptr, user_key_size + 8), key)) { |  |  |  |     if (!ParseInternalKey(Slice(key_ptr, user_key_size + 8), key)) { | 
			
		
	
	
		
		
			
				
					|  |  | @ -555,29 +555,28 @@ Status PlainTableReader::ReadKey(const char* row_ptr, ParsedInternalKey* key, | 
			
		
	
		
		
			
				
					
					|  |  |  |   return Status::OK(); |  |  |  |   return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  | } |  |  |  | } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  | Status PlainTableReader::Next(uint32_t offset, ParsedInternalKey* key, |  |  |  | Status PlainTableReader::Next(uint32_t* offset, ParsedInternalKey* key, | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                               Slice* value, uint32_t* next_offset) const { |  |  |  |                               Slice* value) const { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   if (offset == data_end_offset_) { |  |  |  |   if (*offset == data_end_offset_) { | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |     *next_offset = data_end_offset_; |  |  |  |     *offset = data_end_offset_; | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     return Status::OK(); |  |  |  |     return Status::OK(); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (offset > data_end_offset_) { |  |  |  |   if (*offset > data_end_offset_) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     return Status::Corruption("Offset is out of file size"); |  |  |  |     return Status::Corruption("Offset is out of file size"); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   const char* row_ptr = file_data_.data() + offset; |  |  |  |   const char* start = file_data_.data() + *offset; | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   size_t bytes_for_key; |  |  |  |   size_t bytes_for_key; | 
			
		
	
		
		
			
				
					
					|  |  |  |   Status s = ReadKey(row_ptr, key, &bytes_for_key); |  |  |  |   Status s = ReadKey(start, key, &bytes_for_key); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   uint32_t value_size; |  |  |  |   uint32_t value_size; | 
			
		
	
		
		
			
				
					
					|  |  |  |   const char* value_ptr = GetVarint32Ptr(row_ptr + bytes_for_key, |  |  |  |   const char* value_ptr = GetVarint32Ptr( | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                          file_data_.data() + data_end_offset_, |  |  |  |       start + bytes_for_key, file_data_.data() + data_end_offset_, &value_size); | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |                                          &value_size); |  |  |  |  | 
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |   if (value_ptr == nullptr) { |  |  |  |   if (value_ptr == nullptr) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     return Status::Corruption("Error reading value length."); |  |  |  |     return Status::Corruption("Error reading value length."); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  |   *next_offset = offset + (value_ptr - row_ptr) + value_size; |  |  |  |   *offset = *offset + (value_ptr - start) + value_size; | 
			
				
				
			
		
	
		
		
			
				
					
					|  |  |  |   if (*next_offset > data_end_offset_) { |  |  |  |   if (*offset > data_end_offset_) { | 
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     return Status::Corruption("Reach end of file when reading value"); |  |  |  |     return Status::Corruption("Reach end of file when reading value"); | 
			
		
	
		
		
			
				
					
					|  |  |  |   } |  |  |  |   } | 
			
		
	
		
		
			
				
					
					|  |  |  |   *value = Slice(value_ptr, value_size); |  |  |  |   *value = Slice(value_ptr, value_size); | 
			
		
	
	
		
		
			
				
					|  |  | @ -624,7 +623,7 @@ Status PlainTableReader::Get(const ReadOptions& ro, const Slice& target, | 
			
		
	
		
		
			
				
					
					|  |  |  | 
 |  |  |  | 
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   Slice found_value; |  |  |  |   Slice found_value; | 
			
		
	
		
		
			
				
					
					|  |  |  |   while (offset < data_end_offset_) { |  |  |  |   while (offset < data_end_offset_) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     Status s = Next(offset, &found_key, &found_value, &offset); |  |  |  |     Status s = Next(&offset, &found_key, &found_value); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     if (!s.ok()) { |  |  |  |     if (!s.ok()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       return s; |  |  |  |       return s; | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
	
		
		
			
				
					|  |  | @ -680,7 +679,7 @@ void PlainTableIterator::SeekToLast() { | 
			
		
	
		
		
			
				
					
					|  |  |  | void PlainTableIterator::Seek(const Slice& target) { |  |  |  | void PlainTableIterator::Seek(const Slice& target) { | 
			
		
	
		
		
			
				
					
					|  |  |  |   // If the user doesn't set prefix seek option and we are not able to do a
 |  |  |  |   // If the user doesn't set prefix seek option and we are not able to do a
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   // total Seek(). assert failure.
 |  |  |  |   // total Seek(). assert failure.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (!use_prefix_seek_ && table_->hash_table_size_ > 1) { |  |  |  |   if (!use_prefix_seek_ && table_->index_size_ > 1) { | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     assert(false); |  |  |  |     assert(false); | 
			
		
	
		
		
			
				
					
					|  |  |  |     status_ = Status::NotSupported( |  |  |  |     status_ = Status::NotSupported( | 
			
		
	
		
		
			
				
					
					|  |  |  |         "PlainTable cannot issue non-prefix seek unless in total order mode."); |  |  |  |         "PlainTable cannot issue non-prefix seek unless in total order mode."); | 
			
		
	
	
		
		
			
				
					|  |  | @ -736,7 +735,7 @@ void PlainTableIterator::Next() { | 
			
		
	
		
		
			
				
					
					|  |  |  |   if (offset_ < table_->data_end_offset_) { |  |  |  |   if (offset_ < table_->data_end_offset_) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     Slice tmp_slice; |  |  |  |     Slice tmp_slice; | 
			
		
	
		
		
			
				
					
					|  |  |  |     ParsedInternalKey parsed_key; |  |  |  |     ParsedInternalKey parsed_key; | 
			
		
	
		
		
			
				
					
					|  |  |  |     status_ = table_->Next(next_offset_, &parsed_key, &value_, &next_offset_); |  |  |  |     status_ = table_->Next(&next_offset_, &parsed_key, &value_); | 
			
				
				
			
		
	
		
		
	
		
		
			
				
					
					|  |  |  |     if (status_.ok()) { |  |  |  |     if (status_.ok()) { | 
			
		
	
		
		
			
				
					
					|  |  |  |       // Make a copy in this case. TODO optimize.
 |  |  |  |       // Make a copy in this case. TODO optimize.
 | 
			
		
	
		
		
			
				
					
					|  |  |  |       tmp_str_.clear(); |  |  |  |       tmp_str_.clear(); | 
			
		
	
	
		
		
			
				
					|  |  | 
 |