@ -4872,84 +4872,134 @@ Status VersionSet::WriteSnapshot(log::Writer* log) { 
			
		
	
		
			
				
					// (a,b) then (b,c) then (c,d). Knowing this, an optimization is possible where
  
			
		
	
		
			
				
					// we avoid doing binary search for the keys b and c twice and instead somehow
  
			
		
	
		
			
				
					// maintain state of where they first appear in the files.
  
			
		
	
		
			
				
					uint64_t  VersionSet : : ApproximateSize ( Version *  v ,  const  Slice &  start ,  
			
		
	
		
			
				
					uint64_t  VersionSet : : ApproximateSize ( const  SizeApproximationOptions &  options ,  
			
		
	
		
			
				
					                                     Version *  v ,  const  Slice &  start ,   
			
		
	
		
			
				
					                                     const  Slice &  end ,  int  start_level ,   
			
		
	
		
			
				
					                                     int  end_level ,  TableReaderCaller  caller )  {   
			
		
	
		
			
				
					  const  auto &  icmp  =  v - > cfd_ - > internal_comparator ( ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  // pre-condition
   
			
		
	
		
			
				
					  assert ( v - > cfd_ - > internal_comparator ( ) . Compare ( start ,  end )  < =  0 ) ;   
			
		
	
		
			
				
					  assert ( icmp . Compare ( start ,  end )  < =  0 ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  uint64_t  size  =  0 ;   
			
		
	
		
			
				
					  uint64_t  total_full_ size=  0 ;   
			
		
	
		
			
				
					  const  auto *  vstorage  =  v - > storage_info ( ) ;   
			
		
	
		
			
				
					  end_level  =  end_level  = =  - 1   
			
		
	
		
			
				
					                  ?  vstorage - > num_non_empty_levels ( )    
			
		
	
		
			
				
					                  :  std : : min ( end_level ,  vstorage - > num_non_empty_levels ( ) ) ;   
			
		
	
		
			
				
					  const  int  num_non_empty_levels  =  vstorage - > num_non_empty_levels ( ) ;   
			
		
	
		
			
				
					  end_level  =  ( end_level  = =  - 1 )  ?  num_non_empty_levels   
			
		
	
		
			
				
					                                 :  std : : min ( end_level ,  num_non_empty_levels ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  assert ( start_level  < =  end_level ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  for  ( int  level  =  start_level ;  level  <  end_level ;  level + + )  {   
			
		
	
		
			
				
					  // Outline of the optimization that uses options.files_size_error_margin.
   
			
		
	
		
			
				
					  // When approximating the files total size that is used to store a keys range,
   
			
		
	
		
			
				
					  // we first sum up the sizes of the files that fully fall into the range.
   
			
		
	
		
			
				
					  // Then we sum up the sizes of all the files that may intersect with the range
   
			
		
	
		
			
				
					  // (this includes all files in L0 as well). Then, if total_intersecting_size
   
			
		
	
		
			
				
					  // is smaller than total_full_size * options.files_size_error_margin - we can
   
			
		
	
		
			
				
					  // infer that the intersecting files have a sufficiently negligible
   
			
		
	
		
			
				
					  // contribution to the total size, and we can approximate the storage required
   
			
		
	
		
			
				
					  // for the keys in range as just half of the intersecting_files_size.
   
			
		
	
		
			
				
					  // E.g., if the value of files_size_error_margin is 0.1, then the error of the
   
			
		
	
		
			
				
					  // approximation is limited to only ~10% of the total size of files that fully
   
			
		
	
		
			
				
					  // fall into the keys range. In such case, this helps to avoid a costly
   
			
		
	
		
			
				
					  // process of binary searching the intersecting files that is required only
   
			
		
	
		
			
				
					  // for a more precise calculation of the total size.
   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  autovector < FdWithKeyRange * ,  32 >  first_files ;   
			
		
	
		
			
				
					  autovector < FdWithKeyRange * ,  16 >  last_files ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  // scan all the levels
   
			
		
	
		
			
				
					  for  ( int  level  =  start_level ;  level  <  end_level ;  + + level )  {   
			
		
	
		
			
				
					    const  LevelFilesBrief &  files_brief  =  vstorage - > LevelFilesBrief ( level ) ;   
			
		
	
		
			
				
					    if  ( ! files_brief . num_files )  {   
			
		
	
		
			
				
					    if  ( files_brief . num_files  = =  0 )  {   
			
		
	
		
			
				
					      // empty level, skip exploration
   
			
		
	
		
			
				
					      continue ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    if  ( ! level )  {   
			
		
	
		
			
				
					      // level 0 data is sorted order, handle the use case explicitly
   
			
		
	
		
			
				
					      size  + =  ApproximateSizeLevel0 ( v ,  files_brief ,  start ,  end ,  caller ) ;   
			
		
	
		
			
				
					    if  ( level  = =  0 )  {   
			
		
	
		
			
				
					      // level 0 files are not in sorted order, we need to iterate through
   
			
		
	
		
			
				
					      // the list to compute the total bytes that require scanning,
   
			
		
	
		
			
				
					      // so handle the case explicitly (similarly to first_files case)
   
			
		
	
		
			
				
					      for  ( size_t  i  =  0 ;  i  <  files_brief . num_files ;  i + + )  {   
			
		
	
		
			
				
					        first_files . push_back ( & files_brief . files [ i ] ) ;   
			
		
	
		
			
				
					      }   
			
		
	
		
			
				
					      continue ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    assert ( level  >  0 ) ;   
			
		
	
		
			
				
					    assert ( files_brief . num_files  >  0 ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    // identify the file position for starting key
   
			
		
	
		
			
				
					    const  uint64_t  idx_start  =  FindFileInRange (   
			
		
	
		
			
				
					        v - > cfd_ - > internal_comparator ( ) ,  files_brief ,  start ,   
			
		
	
		
			
				
					        /*start=*/ 0 ,  static_cast < uint32_t > ( files_brief . num_files  -  1 ) ) ;   
			
		
	
		
			
				
					    assert ( idx_start  <  files_brief . num_files ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    // scan all files from the starting position until the ending position
   
			
		
	
		
			
				
					    // inferred from the sorted order
   
			
		
	
		
			
				
					    for  ( uint64_t  i  =  idx_start ;  i  <  files_brief . num_files ;  i + + )  {   
			
		
	
		
			
				
					      uint64_t  val ;   
			
		
	
		
			
				
					      val  =  ApproximateSize ( v ,  files_brief . files [ i ] ,  end ,  caller ) ;   
			
		
	
		
			
				
					      if  ( ! val )  {   
			
		
	
		
			
				
					        // the files after this will not have the range
   
			
		
	
		
			
				
					        break ;   
			
		
	
		
			
				
					      }   
			
		
	
		
			
				
					    // identify the file position for start key
   
			
		
	
		
			
				
					    const  int  idx_start  =   
			
		
	
		
			
				
					        FindFileInRange ( icmp ,  files_brief ,  start ,  0 ,   
			
		
	
		
			
				
					                        static_cast < uint32_t > ( files_brief . num_files  -  1 ) ) ;   
			
		
	
		
			
				
					    assert ( static_cast < size_t > ( idx_start )  <  files_brief . num_files ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					      size  + =  val ;   
			
		
	
		
			
				
					    // identify the file position for end key
   
			
		
	
		
			
				
					    int  idx_end  =  idx_start ;   
			
		
	
		
			
				
					    if  ( icmp . Compare ( files_brief . files [ idx_end ] . largest_key ,  end )  <  0 )  {   
			
		
	
		
			
				
					      idx_end  =   
			
		
	
		
			
				
					          FindFileInRange ( icmp ,  files_brief ,  end ,  idx_start ,   
			
		
	
		
			
				
					                          static_cast < uint32_t > ( files_brief . num_files  -  1 ) ) ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					    assert ( idx_end  > =  idx_start  & &   
			
		
	
		
			
				
					           static_cast < size_t > ( idx_end )  <  files_brief . num_files ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					      if  ( i  = =  idx_start )  {   
			
		
	
		
			
				
					        // subtract the bytes needed to be scanned to get to the starting
   
			
		
	
		
			
				
					        // key
   
			
		
	
		
			
				
					        val  =  ApproximateSize ( v ,  files_brief . files [ i ] ,  start ,  caller ) ;   
			
		
	
		
			
				
					        assert ( size  > =  val ) ;   
			
		
	
		
			
				
					        size  - =  val ;   
			
		
	
		
			
				
					      }   
			
		
	
		
			
				
					    // scan all files from the starting index to the ending index
   
			
		
	
		
			
				
					    // (inferred from the sorted order)
   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    // first scan all the intermediate full files (excluding first and last)
   
			
		
	
		
			
				
					    for  ( int  i  =  idx_start  +  1 ;  i  <  idx_end ;  + + i )  {   
			
		
	
		
			
				
					      uint64_t  file_size  =  files_brief . files [ i ] . fd . GetFileSize ( ) ;   
			
		
	
		
			
				
					      // The entire file falls into the range, so we can just take its size.
   
			
		
	
		
			
				
					      assert ( file_size  = =   
			
		
	
		
			
				
					             ApproximateSize ( v ,  files_brief . files [ i ] ,  end ,  caller ) ) ;   
			
		
	
		
			
				
					      total_full_size  + =  file_size ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    // save the first and the last files (which may be the same file), so we
   
			
		
	
		
			
				
					    // can scan them later.
   
			
		
	
		
			
				
					    first_files . push_back ( & files_brief . files [ idx_start ] ) ;   
			
		
	
		
			
				
					    if  ( idx_start  ! =  idx_end )  {   
			
		
	
		
			
				
					      // we need to estimate size for both files, only if they are different
   
			
		
	
		
			
				
					      last_files . push_back ( & files_brief . files [ idx_end ] ) ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					  }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  return  size ;   
			
		
	
		
			
				
					}  
			
		
	
		
			
				
					  // The sum of all file sizes that intersect the [start, end] keys range.
   
			
		
	
		
			
				
					  uint64_t  total_intersecting_size  =  0 ;   
			
		
	
		
			
				
					  for  ( const  auto *  file_ptr  :  first_files )  {   
			
		
	
		
			
				
					    total_intersecting_size  + =  file_ptr - > fd . GetFileSize ( ) ;   
			
		
	
		
			
				
					  }   
			
		
	
		
			
				
					  for  ( const  auto *  file_ptr  :  last_files )  {   
			
		
	
		
			
				
					    total_intersecting_size  + =  file_ptr - > fd . GetFileSize ( ) ;   
			
		
	
		
			
				
					  }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					uint64_t  VersionSet : : ApproximateSizeLevel0 ( Version *  v ,  
			
		
	
		
			
				
					                                           const  LevelFilesBrief &  files_brief ,   
			
		
	
		
			
				
					                                           const  Slice &  key_start ,   
			
		
	
		
			
				
					                                           const  Slice &  key_end ,   
			
		
	
		
			
				
					                                           TableReaderCaller  caller )  {   
			
		
	
		
			
				
					  // level 0 files are not in sorted order, we need to iterate through
   
			
		
	
		
			
				
					  // the list to compute the total bytes that require scanning
   
			
		
	
		
			
				
					  uint64_t  size  =  0 ;   
			
		
	
		
			
				
					  for  ( size_t  i  =  0 ;  i  <  files_brief . num_files ;  i + + )  {   
			
		
	
		
			
				
					    const  uint64_t  start  =   
			
		
	
		
			
				
					        ApproximateSize ( v ,  files_brief . files [ i ] ,  key_start ,  caller ) ;   
			
		
	
		
			
				
					    const  uint64_t  end  =   
			
		
	
		
			
				
					        ApproximateSize ( v ,  files_brief . files [ i ] ,  key_end ,  caller ) ;   
			
		
	
		
			
				
					    assert ( end  > =  start ) ;   
			
		
	
		
			
				
					    size  + =  end  -  start ;   
			
		
	
		
			
				
					  // Now scan all the first & last files at each level, and estimate their size.
   
			
		
	
		
			
				
					  // If the total_intersecting_size is less than X% of the total_full_size - we
   
			
		
	
		
			
				
					  // want to approximate the result in order to avoid the costly binary search
   
			
		
	
		
			
				
					  // inside ApproximateSize. We use half of file size as an approximation below.
   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  const  double  margin  =  options . files_size_error_margin ;   
			
		
	
		
			
				
					  if  ( margin  >  0  & &  total_intersecting_size  <   
			
		
	
		
			
				
					                        static_cast < uint64_t > ( total_full_size  *  margin ) )  {   
			
		
	
		
			
				
					    total_full_size  + =  total_intersecting_size  /  2 ;   
			
		
	
		
			
				
					  }  else  {   
			
		
	
		
			
				
					    // Estimate for all the first files, at each level
   
			
		
	
		
			
				
					    for  ( const  auto  file_ptr  :  first_files )  {   
			
		
	
		
			
				
					      total_full_size  + =  ApproximateSize ( v ,  * file_ptr ,  end ,  caller ) ;   
			
		
	
		
			
				
					      // subtract the bytes needed to be scanned to get to the starting key
   
			
		
	
		
			
				
					      uint64_t  val  =  ApproximateSize ( v ,  * file_ptr ,  start ,  caller ) ;   
			
		
	
		
			
				
					      assert ( total_full_size  > =  val ) ;   
			
		
	
		
			
				
					      total_full_size  - =  val ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					    // Estimate for all the last files, at each level
   
			
		
	
		
			
				
					    for  ( const  auto  file_ptr  :  last_files )  {   
			
		
	
		
			
				
					      total_full_size  + =  ApproximateSize ( v ,  * file_ptr ,  end ,  caller ) ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					  }   
			
		
	
		
			
				
					  return  size ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  return  total_full_size ;   
			
		
	
		
			
				
					}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					uint64_t  VersionSet : : ApproximateSize ( Version *  v ,  const  FdWithKeyRange &  f ,  
			
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
				@ -4957,12 +5007,13 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, 
			
		
	
		
			
				
					                                     TableReaderCaller  caller )  {   
			
		
	
		
			
				
					  // pre-condition
   
			
		
	
		
			
				
					  assert ( v ) ;   
			
		
	
		
			
				
					  const  auto &  icmp  =  v - > cfd_ - > internal_comparator ( ) ;   
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					  uint64_t  result  =  0 ;   
			
		
	
		
			
				
					  if  ( v - > cfd_ - > internal_ co mparator ( )  . Compare ( f . largest_key ,  key )  < =  0 )  {   
			
		
	
		
			
				
					  if  ( icmp . Compare ( f . largest_key ,  key )  < =  0 )  {   
			
		
	
		
			
				
					    // Entire file is before "key", so just add the file size
   
			
		
	
		
			
				
					    result  =  f . fd . GetFileSize ( ) ;   
			
		
	
		
			
				
					  }  else  if  ( v - > cfd_ - > internal_ co mparator ( )  . Compare ( f . smallest_key ,  key )  >  0 )  {   
			
		
	
		
			
				
					  }  else  if  ( icmp . Compare ( f . smallest_key ,  key )  >  0 )  {   
			
		
	
		
			
				
					    // Entire file is after "key", so ignore
   
			
		
	
		
			
				
					    result  =  0 ;   
			
		
	
		
			
				
					  }  else  {   
			
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
				@ -4971,7 +5022,7 @@ uint64_t VersionSet::ApproximateSize(Version* v, const FdWithKeyRange& f, 
			
		
	
		
			
				
					    TableCache *  table_cache  =  v - > cfd_ - > table_cache ( ) ;   
			
		
	
		
			
				
					    if  ( table_cache  ! =  nullptr )  {   
			
		
	
		
			
				
					      result  =  table_cache - > ApproximateOffsetOf (   
			
		
	
		
			
				
					          key ,  f . file_metadata - > fd ,  caller ,  v - > cfd ( ) - > internal_ co mparator ( )  ,   
			
		
	
		
			
				
					          key ,  f . file_metadata - > fd ,  caller ,  icmp ,   
			
		
	
		
			
				
					          v - > GetMutableCFOptions ( ) . prefix_extractor . get ( ) ) ;   
			
		
	
		
			
				
					    }   
			
		
	
		
			
				
					  }