// Copyright (c) 2022-2024 Niko Bonnieure, Par le Peuple, NextGraph.org developers // All rights reserved. // Licensed under the Apache License, Version 2.0 // // or the MIT license , // at your option. All files in the project carrying such // notice may not be copied, modified, or distributed except // according to those terms. //! SmallFile and RandomAccessFile objects use core::fmt; use std::cmp::min; use std::collections::HashMap; use std::sync::Arc; use chacha20::cipher::{KeyIvInit, StreamCipher}; use chacha20::ChaCha20; use zeroize::Zeroize; use crate::block_storage::*; use crate::errors::*; #[allow(unused_imports)] use crate::log::*; use crate::object::*; use crate::store::Store; use crate::types::*; /// File errors #[derive(Debug, Eq, PartialEq, Clone)] pub enum FileError { /// Missing blocks MissingBlocks(Vec), /// Missing root key MissingRootKey, /// Invalid BlockId encountered in the tree InvalidBlockId, /// Too many or too few children of a block InvalidChildren, /// Number of keys does not match number of children of a block InvalidKeys, /// Invalid CommitHeader object content InvalidHeader, /// Error deserializing content of a block BlockDeserializeError, /// Error deserializing content of the RandomAccessFileMeta MetaDeserializeError, /// Files are immutable, you cannot modify them and this one was already saved once. Create a new File for your new data (and delete the old one if needed) AlreadySaved, /// File is too big TooBig, NotFound, StorageError, EndOfFile, InvalidArgument, NotAFile, } impl From for FileError { fn from(e: StorageError) -> Self { match e { StorageError::NotFound => FileError::NotFound, _ => FileError::StorageError, } } } impl From for FileError { fn from(e: ObjectParseError) -> Self { match e { _ => FileError::BlockDeserializeError, } } } pub trait ReadFile { fn read(&self, pos: usize, size: usize) -> Result, FileError>; fn get_all_blocks_ids(&self) -> Result, FileError>; } /// A File in memory (read access only) pub struct File<'a> { internal: Box, blocks_ids: Vec, } impl<'a> File<'a> { pub fn open(id: ObjectId, key: SymKey, store: Arc) -> Result, FileError> { let root_block = store.get(&id)?; if root_block.children().len() == 2 && *root_block.content().commit_header_obj() == CommitHeaderObject::RandomAccess { Ok(File { internal: Box::new(RandomAccessFile::open(id, key, store)?), blocks_ids: vec![], }) } else { let obj = Object::load(id, Some(key), &store)?; match obj.content_v0()? { ObjectContentV0::SmallFile(small_file) => Ok(File { internal: Box::new(small_file), blocks_ids: obj.block_ids(), }), _ => Err(FileError::NotAFile), } } } } impl<'a> ReadFile for File<'a> { fn read(&self, pos: usize, size: usize) -> Result, FileError> { self.internal.read(pos, size) } fn get_all_blocks_ids(&self) -> Result, FileError> { if self.blocks_ids.len() > 0 { Ok(self.blocks_ids.to_vec()) } else { self.internal.get_all_blocks_ids() } } } impl ReadFile for SmallFile { fn read(&self, pos: usize, size: usize) -> Result, FileError> { match self { Self::V0(v0) => v0.read(pos, size), } } fn get_all_blocks_ids(&self) -> Result, FileError> { unimplemented!(); } } impl ReadFile for SmallFileV0 { fn read(&self, pos: usize, size: usize) -> Result, FileError> { if size == 0 { return Err(FileError::InvalidArgument); } if pos + size > self.content.len() { return Err(FileError::EndOfFile); } Ok(self.content[pos..pos + size].to_vec()) } fn get_all_blocks_ids(&self) -> Result, FileError> { unimplemented!(); } } /// A RandomAccessFile in memory. This is not used to serialize data pub struct RandomAccessFile { //storage: Arc<&'a dyn BlockStorage>, store: Arc, /// accurate once saved or opened meta: RandomAccessFileMeta, //meta_object_id: Option, //content_block_id: Option, /// keeps the deduplicated blocks' IDs, used for async writes block_contents: HashMap, /// Blocks of the Object (nodes of the tree). Only used when writing asynchronously, before saving. blocks: Vec<(BlockId, BlockKey)>, /// When an id is present, the File is opened in Read mode, and cannot be saved. id: Option, key: Option, content_block: Option<(BlockId, BlockKey)>, // used for writes conv_key: Option<[u8; 32]>, remainder: Vec, size: usize, } impl ReadFile for RandomAccessFile { fn get_all_blocks_ids(&self) -> Result, FileError> { if self.id.is_none() { unimplemented!(); } let mut res = Vec::with_capacity(4); let _: Vec<()> = self .blocks .iter() .map(|(id, _)| res.push(id.clone())) .collect(); recurse_tree( &self.store, self.content_block.as_ref().unwrap().clone(), &mut res, self.meta.depth(), )?; fn recurse_tree( store: &Store, current_block_id_key: (Digest, SymKey), res: &mut Vec, level: u8, ) -> Result<(), FileError> { res.push(current_block_id_key.0); if level > 0 { let tree_block = store.get(¤t_block_id_key.0)?; let (children, content) = tree_block.read(¤t_block_id_key.1)?; if children.is_empty() || content.len() > 0 { return Err(FileError::BlockDeserializeError); } for child in children { recurse_tree(store, child, res, level - 1)?; } } Ok(()) } Ok(res) } /// reads at most one block from the file. the returned vector should be tested for size. it might be smaller than what you asked for. /// `pos`ition can be anywhere in the file. //TODO: parallelize decryption on multi threads (cores) fn read(&self, pos: usize, mut size: usize) -> Result, FileError> { if size == 0 { return Err(FileError::InvalidArgument); } if self.id.is_some() { let total = self.meta.total_size() as usize; if pos > total { return Err(FileError::EndOfFile); } size = min(total - pos, size); let mut current_block_id_key = self.content_block.as_ref().unwrap().clone(); let depth = self.meta.depth(); let arity = self.meta.arity(); let mut level_pos = pos; for level in 0..depth { let tree_block = self.store.get(¤t_block_id_key.0)?; let (children, content) = tree_block.read(¤t_block_id_key.1)?; if children.is_empty() || content.len() > 0 { return Err(FileError::BlockDeserializeError); } let factor = (arity as usize).pow(depth as u32 - level as u32 - 1) * self.meta.chunk_size() as usize; let level_index = pos / factor; if level_index >= children.len() { return Err(FileError::EndOfFile); } current_block_id_key = (children[level_index]).clone(); level_pos = pos as usize % factor; } let content_block = self.store.get(¤t_block_id_key.0)?; //log_debug!("CONTENT BLOCK SIZE {}", content_block.size()); let (children, content) = content_block.read(¤t_block_id_key.1)?; if children.is_empty() && content.len() > 0 { //log_debug!("CONTENT SIZE {}", content.len()); if level_pos >= content.len() { return Err(FileError::EndOfFile); } let end = min(content.len(), level_pos + size); return Ok(content[level_pos..end].to_vec()); } else { return Err(FileError::BlockDeserializeError); } } else { // hasn't been saved yet, we can use the self.blocks as a flat array and the remainder too let factor = self.meta.chunk_size() as usize; let index = pos / factor as usize; let level_pos = pos % factor as usize; let remainder_pos = self.blocks.len() * factor; if pos >= remainder_pos { let pos_in_remainder = pos - remainder_pos; if self.remainder.len() > 0 && pos_in_remainder < self.remainder.len() { let end = min(self.remainder.len(), pos_in_remainder + size); return Ok(self.remainder[pos_in_remainder..end].to_vec()); } else { return Err(FileError::EndOfFile); } } //log_debug!("{} {} {} {}", index, self.blocks.len(), factor, level_pos); if index >= self.blocks.len() { return Err(FileError::EndOfFile); } let block = &self.blocks[index]; let content_block = self.store.get(&block.0)?; let (children, content) = content_block.read(&block.1)?; if children.is_empty() && content.len() > 0 { //log_debug!("CONTENT SIZE {}", content.len()); if level_pos >= content.len() { return Err(FileError::EndOfFile); } let end = min(content.len(), level_pos + size); return Ok(content[level_pos..end].to_vec()); } else { return Err(FileError::BlockDeserializeError); } } } } impl RandomAccessFile { pub fn meta(&self) -> &RandomAccessFileMeta { &self.meta } pub fn id(&self) -> &Option { &self.id } pub fn key(&self) -> &Option { &self.key } fn make_block( mut content: Vec, conv_key: &[u8; blake3::OUT_LEN], children: Vec, already_existing: &mut HashMap, store: &Store, ) -> Result<(BlockId, BlockKey), StorageError> { let key_hash = blake3::keyed_hash(conv_key, &content); let key_slice = key_hash.as_bytes(); let key = SymKey::ChaCha20Key(key_slice.clone()); let it = already_existing.get(&key); if it.is_some() { return Ok((*it.unwrap(), key)); } let nonce = [0u8; 12]; let mut cipher = ChaCha20::new(key_slice.into(), &nonce.into()); //let mut content_enc = Vec::from(content); let mut content_enc_slice = &mut content.as_mut_slice(); cipher.apply_keystream(&mut content_enc_slice); let mut block = Block::new_random_access(children, content, None); //log_debug!(">>> make_block random access: {}", block.id()); //log_debug!("!! children: ({}) {:?}", children.len(), children); let id = block.get_and_save_id(); already_existing.insert(key.clone(), id); //log_debug!("putting *** {}", id); store.put(&block)?; Ok((id, key)) } fn make_parent_block( conv_key: &[u8; blake3::OUT_LEN], children: Vec<(BlockId, BlockKey)>, already_existing: &mut HashMap, store: &Store, ) -> Result<(BlockId, BlockKey), StorageError> { let mut ids: Vec = Vec::with_capacity(children.len()); let mut keys: Vec = Vec::with_capacity(children.len()); children.iter().for_each(|child| { ids.push(child.0); keys.push(child.1.clone()); }); let content = ChunkContentV0::InternalNode(keys); let content_ser = serde_bare::to_vec(&content).unwrap(); Self::make_block(content_ser, conv_key, ids, already_existing, store) } /// Build tree from leaves, returns parent nodes fn make_tree( already_existing: &mut HashMap, leaves: &[(BlockId, BlockKey)], conv_key: &ChaCha20Key, arity: u16, store: &Store, ) -> Result<(BlockId, BlockKey), StorageError> { let mut parents: Vec<(BlockId, BlockKey)> = vec![]; let mut chunks = leaves.chunks(arity as usize); while let Some(nodes) = chunks.next() { //log_debug!("making parent"); parents.push(Self::make_parent_block( conv_key, nodes.to_vec(), already_existing, store, )?); } //log_debug!("level with {} parents", parents.len()); if 1 < parents.len() { return Self::make_tree(already_existing, parents.as_slice(), conv_key, arity, store); } Ok(parents[0].clone()) } /// returns content_block id/key pair, and root_block id/key pair fn save_( already_existing: &mut HashMap, blocks: &[(BlockId, BlockKey)], meta: &mut RandomAccessFileMeta, conv_key: &ChaCha20Key, store: &Store, ) -> Result<((BlockId, BlockKey), (BlockId, BlockKey)), FileError> { let leaf_blocks_nbr = blocks.len(); let arity = meta.arity(); let mut depth: u8 = u8::MAX; for i in 0..u8::MAX { if leaf_blocks_nbr <= (arity as usize).pow(i.into()) { depth = i; break; } } if depth == u8::MAX { return Err(FileError::TooBig); } meta.set_depth(depth); //log_debug!("depth={} leaves={}", depth, leaf_blocks_nbr); let content_block = if depth == 0 { assert!(blocks.len() == 1); blocks[0].clone() } else { // we create the tree Self::make_tree(already_existing, &blocks, &conv_key, arity, store)? }; let meta_object = Object::new_with_convergence_key( ObjectContent::V0(ObjectContentV0::RandomAccessFileMeta(meta.clone())), None, store_valid_value_size(meta.chunk_size() as usize), conv_key, ); //log_debug!("saving meta object"); _ = meta_object.save(store)?; // creating the root block that contains as first child the meta_object, and as second child the content_block // it is added to storage in make_parent_block //log_debug!("saving root block"); let root_block = Self::make_parent_block( conv_key, vec![ (meta_object.id(), meta_object.key().unwrap()), content_block.clone(), ], already_existing, store, )?; Ok((content_block, root_block)) } /// Creates a new file based on a content that is fully known at the time of creation. /// /// If you want to stream progressively the content into the new file, you should use new_empty(), write() and save() instead pub fn new_from_slice( content: &[u8], block_size: usize, content_type: String, metadata: Vec, store: Arc, ) -> Result { //let max_block_size = store_max_value_size(); let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA; let arity = ((valid_block_size) / CHILD_SIZE) as u16; let total_size = content.len() as u64; let mut conv_key = Object::convergence_key(&store); let mut blocks: Vec<(BlockId, BlockKey)> = vec![]; let mut already_existing: HashMap = HashMap::new(); //log_debug!("making the leaves"); for chunk in content.chunks(valid_block_size) { let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec()); let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); blocks.push(Self::make_block( content_ser, &conv_key, vec![], &mut already_existing, &store, )?); } assert_eq!( (total_size as usize + valid_block_size - 1) / valid_block_size, blocks.len() ); let mut meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 { content_type, metadata, chunk_size: valid_block_size as u32, total_size, arity, depth: 0, }); let (content_block, root_block) = Self::save_(&mut already_existing, &blocks, &mut meta, &conv_key, &store)?; conv_key.zeroize(); Ok(Self { store, meta, block_contents: HashMap::new(), // not used in this case blocks: vec![], // not used in this case id: Some(root_block.0.clone()), key: Some(root_block.1.clone()), content_block: Some(content_block), conv_key: None, // not used in this case remainder: vec![], // not used in this case size: 0, // not used in this case }) } pub fn new_empty( block_size: usize, content_type: String, metadata: Vec, store: Arc, ) -> Self { let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA; let arity = ((valid_block_size) / CHILD_SIZE) as u16; let meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 { content_type, metadata, chunk_size: valid_block_size as u32, arity, total_size: 0, // will be filled in later, during save depth: 0, // will be filled in later, during save }); Self { store: Arc::clone(&store), meta, block_contents: HashMap::new(), blocks: vec![], id: None, key: None, content_block: None, conv_key: Some(Object::convergence_key(&store)), remainder: vec![], size: 0, } } /// Appends some data at the end of the file currently created with new_empty() and not saved yet. /// you can call it many times. Don't forget to eventually call save() pub fn write(&mut self, data: &[u8]) -> Result<(), FileError> { if self.id.is_some() { return Err(FileError::AlreadySaved); } let remainder = self.remainder.len(); let chunk_size = self.meta.chunk_size() as usize; let mut pos: usize = 0; let conv_key = self.conv_key.unwrap(); // TODO: provide an option to search in storage for already existing, when doing a resume of previously aborted write let mut already_existing: HashMap = HashMap::new(); if remainder > 0 { if data.len() >= chunk_size - remainder { let mut new_block = Vec::with_capacity(chunk_size); new_block.append(&mut self.remainder); pos = chunk_size - remainder; self.size += chunk_size; //log_debug!("size += chunk_size {} {}", self.size, chunk_size); new_block.extend(data[0..pos].iter()); assert_eq!(new_block.len(), chunk_size); let data_chunk = ChunkContentV0::DataChunk(new_block); let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); self.blocks.push(Self::make_block( content_ser, &conv_key, vec![], &mut already_existing, &self.store, )?); } else { // not enough data to create a new block self.remainder.extend(data.iter()); return Ok(()); } } else if data.len() < chunk_size { self.remainder = Vec::from(data); return Ok(()); } for chunk in data[pos..].chunks(chunk_size) { if chunk.len() == chunk_size { self.size += chunk_size; //log_debug!("size += chunk_size {} {}", self.size, chunk_size); let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec()); let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); self.blocks.push(Self::make_block( content_ser, &conv_key, vec![], &mut already_existing, &self.store, )?); } else { self.remainder = Vec::from(chunk); return Ok(()); } } Ok(()) } pub fn save(&mut self) -> Result { if self.id.is_some() { return Err(FileError::AlreadySaved); } // save the remainder, if any. if self.remainder.len() > 0 { self.size += self.remainder.len(); //log_debug!("size += remainder {} {}", self.size, self.remainder.len()); let mut remainder = Vec::with_capacity(self.remainder.len()); remainder.append(&mut self.remainder); let data_chunk = ChunkContentV0::DataChunk(remainder); let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); self.blocks.push(Self::make_block( content_ser, &self.conv_key.unwrap(), vec![], &mut HashMap::new(), &self.store, )?); } self.meta.set_total_size(self.size as u64); let mut already_existing: HashMap = HashMap::new(); let (content_block, root_block) = Self::save_( &mut already_existing, &self.blocks, &mut self.meta, self.conv_key.as_ref().unwrap(), &self.store, )?; self.conv_key.as_mut().unwrap().zeroize(); self.conv_key = None; self.id = Some(root_block.0); self.key = Some(root_block.1.clone()); self.content_block = Some(content_block); self.blocks = vec![]; self.blocks.shrink_to_fit(); Ok(root_block.0) } pub fn reference(&self) -> Option { if self.key.is_some() && self.id.is_some() { Some(ObjectRef::from_id_key( self.id.unwrap(), self.key.as_ref().unwrap().clone(), )) } else { None } } /// Opens a file for read purpose. pub fn open( id: ObjectId, key: SymKey, store: Arc, ) -> Result { // load root block let root_block = store.get(&id)?; if root_block.children().len() != 2 || *root_block.content().commit_header_obj() != CommitHeaderObject::RandomAccess { return Err(FileError::BlockDeserializeError); } let (root_sub_blocks, _) = root_block.read(&key)?; // load meta object (first one in root block) let meta_object = Object::load( root_sub_blocks[0].0, Some(root_sub_blocks[0].1.clone()), &store, )?; let meta = match meta_object.content_v0()? { ObjectContentV0::RandomAccessFileMeta(meta) => meta, _ => return Err(FileError::InvalidChildren), }; Ok(RandomAccessFile { store, meta, block_contents: HashMap::new(), // not used in this case blocks: vec![(id, SymKey::nil()), (root_sub_blocks[0].0, SymKey::nil())], // not used in this case id: Some(id), key: Some(key), content_block: Some(root_sub_blocks[1].clone()), conv_key: None, remainder: vec![], size: 0, }) } pub fn blocks(&self) -> impl Iterator + '_ { self.blocks .iter() .map(|key| self.store.get(&key.0).unwrap()) } /// Size once encoded, before deduplication. Only available before save() pub fn size(&self) -> usize { let mut total = 0; self.blocks().for_each(|b| total += b.size()); total } /// Real size on disk pub fn dedup_size(&self) -> usize { let mut total = 0; self.block_contents .values() .for_each(|b| total += self.store.get(b).unwrap().size()); total } pub fn depth(&self) -> Result { Ok(self.meta.depth()) // unimplemented!(); // if self.key().is_none() { // return Err(ObjectParseError::MissingRootKey); // } // let parents = vec![(self.id(), self.key().unwrap())]; // Self::collect_leaves( // &self.blocks, // &parents, // self.blocks.len() - 1, // &mut None, // &mut None, // &self.block_contents, // ) } } impl fmt::Display for RandomAccessFile { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, "====== File ID {}", self.id .map_or("NOT SAVED".to_string(), |i| format!("{}", i)) )?; writeln!( f, "== Key: {}", self.key .as_ref() .map_or("None".to_string(), |k| format!("{}", k)) )?; writeln!(f, "== depth: {}", self.meta.depth())?; writeln!(f, "== arity: {}", self.meta.arity())?; writeln!(f, "== chunk_size: {}", self.meta.chunk_size())?; writeln!(f, "== total_size: {}", self.meta.total_size())?; writeln!(f, "== content_type: {}", self.meta.content_type())?; writeln!(f, "== metadata len: {}", self.meta.metadata().len())?; if self.id.is_none() { writeln!(f, "== blocks to save: {}", self.blocks.len())?; } Ok(()) } } #[cfg(test)] mod test { use time::Instant; use crate::file::*; use std::io::BufReader; use std::io::Read; /// Checks that a content that does fit in one block, creates an arity of 0 #[test] pub fn test_depth_0() { let block_size = store_max_value_size(); //store_valid_value_size(0) ////// 1 MB of data! let data_size = block_size - BLOCK_EXTRA; let store = Store::dummy_public_v0(); log_debug!("creating 1MB of data"); let content: Vec = vec![99; data_size]; log_debug!("creating random access file with that data"); let file: RandomAccessFile = RandomAccessFile::new_from_slice( &content, block_size, "text/plain".to_string(), vec![], Arc::clone(&store), ) .expect("new_from_slice"); log_debug!("{}", file); let id = file.id.as_ref().unwrap().clone(); let file_size = file.size(); log_debug!("file size to save : {}", file_size); log_debug!("data size: {}", data_size); let read_content = file.read(0, data_size).expect("reading all"); assert_eq!(read_content, content); let read_content2 = file.read(0, data_size + 1); assert_eq!(read_content2.unwrap().len(), 1048564); let read_content = file.read(data_size - 9, 9).expect("reading end"); assert_eq!(read_content, vec![99, 99, 99, 99, 99, 99, 99, 99, 99]); let read_content = file.read(data_size - 9, 10); assert_eq!(read_content, Ok(vec![99, 99, 99, 99, 99, 99, 99, 99, 99])); // log_debug!( // "overhead: {} - {}%", // file_size - data_size, // ((file_size - data_size) * 100) as f32 / data_size as f32 // ); // let dedup_size = file.dedup_size(); // log_debug!( // "dedup compression: {} - {}%", // data_size - dedup_size, // ((data_size - dedup_size) * 100) as f32 / data_size as f32 // ); // log_debug!("number of blocks : {}", file.blocks.len()); // assert_eq!( // file.blocks.len(), // MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1 // ); assert_eq!(file.depth(), Ok(0)); assert_eq!(store.len(), Ok(3)); let file = RandomAccessFile::open(id, file.key.unwrap(), store).expect("re open"); log_debug!("{}", file); let read_content = file.read(0, data_size).expect("reading all after re open"); assert_eq!(read_content, content); } /// Checks that a content that doesn't fit in all the children of first level in tree #[ignore] #[test] pub fn test_depth_1() { const MAX_ARITY_LEAVES: usize = 15887; const MAX_DATA_PAYLOAD_SIZE: usize = 1048564; ////// 16 GB of data! let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; let store = Store::dummy_public_v0(); log_debug!("creating 16GB of data"); let content: Vec = vec![99; data_size]; log_debug!("creating random access file with that data"); let file: RandomAccessFile = RandomAccessFile::new_from_slice( &content, store_max_value_size(), "text/plain".to_string(), vec![], Arc::clone(&store), ) .expect("new_from_slice"); log_debug!("{}", file); let _id = file.id.as_ref().unwrap().clone(); log_debug!("data size: {}", data_size); assert_eq!(file.depth(), Ok(1)); assert_eq!(store.len(), Ok(4)); } /// Checks that a content that doesn't fit in all the children of first level in tree #[ignore] #[test] pub fn test_depth_2() { const MAX_ARITY_LEAVES: usize = 15887; const MAX_DATA_PAYLOAD_SIZE: usize = 1048564; ////// 16 GB of data! let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE + 1; let store = Store::dummy_public_v0(); log_debug!("creating 16GB of data"); let content: Vec = vec![99; data_size]; log_debug!("creating file with that data"); let file: RandomAccessFile = RandomAccessFile::new_from_slice( &content, store_max_value_size(), "text/plain".to_string(), vec![], Arc::clone(&store), ) .expect("new_from_slice"); log_debug!("{}", file); let file_size = file.size(); log_debug!("file size: {}", file_size); log_debug!("data size: {}", data_size); assert_eq!(file.depth().unwrap(), 2); assert_eq!(store.len(), Ok(7)); } /// Checks that a content that doesn't fit in all the children of first level in tree #[test] pub fn test_depth_3() { const MAX_ARITY_LEAVES: usize = 61; const MAX_DATA_PAYLOAD_SIZE: usize = 4084; ////// 900 MB of data! let data_size = MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; let store = Store::dummy_public_v0(); log_debug!("creating 900MB of data"); let content: Vec = vec![99; data_size]; log_debug!("creating file with that data"); let file: RandomAccessFile = RandomAccessFile::new_from_slice( &content, store_valid_value_size(0), "text/plain".to_string(), vec![], Arc::clone(&store), ) .expect("new_from_slice"); log_debug!("{}", file); let file_size = file.size(); log_debug!("file size: {}", file_size); let read_content = file.read(0, data_size).expect("reading all"); assert_eq!(read_content.len(), MAX_DATA_PAYLOAD_SIZE); let read_content = file.read(9000, 10000).expect("reading 10k"); assert_eq!(read_content, vec![99; 3252]); // log_debug!("data size: {}", data_size); // log_debug!( // "overhead: {} - {}%", // file_size - data_size, // ((file_size - data_size) * 100) as f32 / data_size as f32 // ); // let dedup_size = file.dedup_size(); // log_debug!( // "dedup compression: {} - {}%", // data_size - dedup_size, // ((data_size - dedup_size) * 100) as f32 / data_size as f32 // ); // log_debug!("number of blocks : {}", file.blocks.len()); // assert_eq!( // file.blocks.len(), // MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1 // ); assert_eq!(file.depth().unwrap(), 3); assert_eq!(store.len(), Ok(6)); } /// Checks that a content that doesn't fit in all the children of first level in tree #[ignore] #[test] pub fn test_depth_4() { const MAX_ARITY_LEAVES: usize = 61; const MAX_DATA_PAYLOAD_SIZE: usize = 4084; ////// 52GB of data! let data_size = MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; let store = Store::dummy_public_v0(); log_debug!("creating 55GB of data"); let content: Vec = vec![99; data_size]; log_debug!("creating file with that data"); let file: RandomAccessFile = RandomAccessFile::new_from_slice( &content, store_valid_value_size(0), "text/plain".to_string(), vec![], Arc::clone(&store), ) .expect("new_from_slice"); log_debug!("{}", file); let file_size = file.size(); log_debug!("file size: {}", file_size); log_debug!("data size: {}", data_size); assert_eq!(file.depth().unwrap(), 4); assert_eq!(store.len(), Ok(7)); } /// Test async write to a file all at once #[test] pub fn test_write_all_at_once() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let store = Store::dummy_public_v0(); log_debug!("creating file with the JPG content"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_max_value_size(), //store_valid_value_size(0),// "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); file.write(&img_buffer).expect("write all at once"); // !!! all those tests work only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. assert_eq!( file.read(0, img_buffer.len()).expect("read before save"), img_buffer ); // asking too much, receiving just enough assert_eq!( file.read(0, img_buffer.len() + 1) .expect("read before save"), img_buffer ); // // reading too far, well behind the size of the JPG // assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); file.save().expect("save"); let res = file.read(0, img_buffer.len()).expect("read all"); assert_eq!(res, img_buffer); // // asking too much, receiving an error, as now we know the total size of file, and we check it // assert_eq!( // file.read(0, img_buffer.len() + 1), // Err(FileError::EndOfFile) // ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); } /// Test async write to a file by increments #[test] pub fn test_write_by_increments() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let store = Store::dummy_public_v0(); log_debug!("creating file with the JPG content"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_max_value_size(), //store_valid_value_size(0),// "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); for chunk in img_buffer.chunks(1000) { file.write(chunk).expect("write a chunk"); } assert_eq!( file.read(0, img_buffer.len()).expect("read before save"), img_buffer ); // asking too much, receiving just enough assert_eq!( file.read(0, img_buffer.len() + 1) .expect("read before save"), img_buffer ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); // reading one byte after the end of the file size. assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); file.save().expect("save"); // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. let res = file.read(0, img_buffer.len()).expect("read all"); assert_eq!(res, img_buffer); // // asking too much, receiving an error, as now we know the total size of file, and we check it // assert_eq!( // file.read(0, img_buffer.len() + 1), // Err(FileError::EndOfFile) // ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); } /// Test async write to a file by increments small blocks #[test] pub fn test_write_by_increments_small_blocks() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let store = Store::dummy_public_v0(); log_debug!("creating file with the JPG content"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_valid_value_size(0), "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); let first_block_content = img_buffer[0..4084].to_vec(); for chunk in img_buffer.chunks(1000) { file.write(chunk).expect("write a chunk"); } log_debug!("{}", file); assert_eq!( file.read(0, img_buffer.len()).expect("read before save"), first_block_content ); // asking too much, receiving just enough assert_eq!( file.read(0, img_buffer.len() + 1) .expect("read before save"), first_block_content ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); file.save().expect("save"); log_debug!("{}", file); assert_eq!(img_buffer.len(), file.meta.total_size() as usize); let res = file.read(0, img_buffer.len()).expect("read all"); assert_eq!(res, first_block_content); // // asking too much, not receiving an error, as we know the total size of file, and return what we can // assert_eq!( // file.read(0, img_buffer.len() + 1), // Err(FileError::EndOfFile) // ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); } /// Test async write to a file all at once #[test] pub fn test_write_all_at_once_small_blocks() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let first_block_content = img_buffer[0..4084].to_vec(); let store = Store::dummy_public_v0(); log_debug!("creating file with the JPG content"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_valid_value_size(0), "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); file.write(&img_buffer).expect("write all at once"); assert_eq!( file.read(0, img_buffer.len()).expect("read before save"), first_block_content ); // asking too much, receiving just enough assert_eq!( file.read(0, img_buffer.len() + 1) .expect("read before save"), first_block_content ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); file.save().expect("save"); let res = file.read(0, img_buffer.len()).expect("read all"); assert_eq!(res, first_block_content); let res = file.read(10, img_buffer.len() - 10).expect("read all"); assert_eq!(res, first_block_content[10..].to_vec()); // // asking too much, receiving an error, as now we know the total size of file, and we check it // assert_eq!( // file.read(0, img_buffer.len() + 1), // Err(FileError::EndOfFile) // ); // reading too far, well behind the size of the JPG assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); } /// Test depth 4 with 52GB of data, but using write in small increments, so the memory burden on the system will be minimal #[ignore] #[test] pub fn test_depth_4_write_small() { const MAX_ARITY_LEAVES: usize = 61; const MAX_DATA_PAYLOAD_SIZE: usize = 4084; ////// 52GB of data! let data_size = MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; // chunks of 5 MB let chunk_nbr = data_size / 5000000; let last_chunk = data_size % 5000000; let store = Store::dummy_public_v0(); log_debug!("creating empty file"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_valid_value_size(0), "image/jpeg".to_string(), vec![], Arc::clone(&store), ); log_debug!("{}", file); let chunk = vec![99; 5000000]; let last_chunk = vec![99; last_chunk]; for _i in 0..chunk_nbr { file.write(&chunk).expect("write a chunk"); } file.write(&last_chunk).expect("write last chunk"); log_debug!("{}", file); file.save().expect("save"); log_debug!("{}", file); let file_size = file.size(); log_debug!("file size: {}", file_size); log_debug!("data size: {}", data_size); assert_eq!(data_size, file.meta.total_size() as usize); assert_eq!(file.depth().unwrap(), 4); assert_eq!(store.len(), Ok(7)); } /// Test open #[test] pub fn test_open() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let store = Store::dummy_public_v0(); log_debug!("creating file with the JPG content"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_max_value_size(), //store_valid_value_size(0),// "image/jpeg".to_string(), vec![], Arc::clone(&store), ); log_debug!("{}", file); for chunk in img_buffer.chunks(1000) { file.write(chunk).expect("write a chunk"); } file.save().expect("save"); let file2 = RandomAccessFile::open(file.id().unwrap(), file.key.unwrap(), store) .expect("reopen file"); // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. let res = file2.read(0, img_buffer.len()).expect("read all"); log_debug!("{}", file2); assert_eq!(res, img_buffer); // // asking too much, receiving an error, as now we know the total size of file, and we check it // assert_eq!( // file2.read(0, img_buffer.len() + 1), // Err(FileError::EndOfFile) // ); // reading too far, well behind the size of the JPG assert_eq!(file2.read(100000, 1), Err(FileError::EndOfFile)); assert_eq!(file2.read(10000, 1).expect("read after save"), vec![41]); // // reading one byte after the end of the file size. // assert_eq!(file2.read(29454, 1), Err(FileError::EndOfFile)); assert_eq!(file2.read(29454, 0), Err(FileError::InvalidArgument)); } /// Test read JPEG file small #[test] pub fn test_read_small_file() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let len = img_buffer.len(); let content = ObjectContent::new_file_v0_with_content(img_buffer.clone(), "image/jpeg"); let max_object_size = store_max_value_size(); let store = Store::dummy_public_v0(); let mut obj = Object::new(content, None, max_object_size, &store); log_debug!("{}", obj); let _ = obj.save_in_test(&store).expect("save"); let file = File::open(obj.id(), obj.key().unwrap(), store).expect("open"); let res = file.read(0, len).expect("read all"); assert_eq!(res, img_buffer); } /// Test read JPEG file random access #[test] pub fn test_read_random_access_file() { let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); let mut reader = BufReader::new(f); let mut img_buffer: Vec = Vec::new(); reader .read_to_end(&mut img_buffer) .expect("read of test.jpg"); let len = img_buffer.len(); let max_object_size = store_max_value_size(); let store = Store::dummy_public_v0(); log_debug!("creating empty file"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( max_object_size, "image/jpeg".to_string(), vec![], Arc::clone(&store), ); file.write(&img_buffer).expect("write all"); log_debug!("{}", file); file.save().expect("save"); log_debug!("{}", file); let file = File::open( file.id().unwrap(), file.key().as_ref().unwrap().clone(), store, ) .expect("open"); // this only works because we chose a big block size (1MB) so the small JPG file files in one block. // if not, we would have to call read repeatedly and append the results into a buffer, in order to get the full file let res = file.read(0, len).expect("read all"); assert_eq!(res, img_buffer); } /// Test depth 4, but using write in increments, so the memory burden on the system will be minimal #[ignore] #[test] pub fn test_depth_4_big_write_small() { let encoding_big_file = Instant::now(); let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file"); let mut reader = BufReader::new(f); let store = Store::dummy_public_v0(); log_debug!("creating empty file"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_valid_value_size(0), "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); let mut chunk = [0u8; 1000000]; loop { let size = reader.read(&mut chunk).expect("read a chunk"); //log_debug!("{}", size); file.write(&chunk[0..size]).expect("write a chunk"); if size != 1000000 { break; } } log_debug!("{}", file); file.save().expect("save"); log_debug!("{}", file); log_debug!("data size: {}", file.meta.total_size()); //assert_eq!(data_size, file.meta.total_size() as usize); assert_eq!(file.depth().unwrap(), 4); log_debug!( "encoding_big_file took: {} s", encoding_big_file.elapsed().as_seconds_f32() ); } /// Test depth 4 with 2.7GB of data, but using write in increments, so the memory burden on the system will be minimal #[ignore] #[test] pub fn test_depth_4_big_write_big() { let encoding_big_file = Instant::now(); let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file"); let mut reader = BufReader::new(f); let store = Store::dummy_public_v0(); log_debug!("creating empty file"); let mut file: RandomAccessFile = RandomAccessFile::new_empty( store_max_value_size(), "image/jpeg".to_string(), vec![], store, ); log_debug!("{}", file); let mut chunk = [0u8; 2000000]; loop { let size = reader.read(&mut chunk).expect("read a chunk"); //log_debug!("{}", size); file.write(&chunk[0..size]).expect("write a chunk"); if size != 2000000 { break; } } log_debug!("{}", file); file.save().expect("save"); log_debug!("{}", file); log_debug!("data size: {}", file.meta.total_size()); //assert_eq!(data_size, file.meta.total_size() as usize); assert_eq!(file.depth().unwrap(), 1); log_debug!( "encoding_big_file took: {} s", encoding_big_file.elapsed().as_seconds_f32() ); } }