From f03b6bcc10f84ce19887356e6a6a866ccc5408d1 Mon Sep 17 00:00:00 2001 From: Niko PLP Date: Thu, 21 Mar 2024 22:39:33 +0200 Subject: [PATCH] RandomAccessFile --- p2p-repo/src/block.rs | 36 + p2p-repo/src/file.rs | 1537 ++++++++++++++++++++++++++++++++++++++++ p2p-repo/src/lib.rs | 2 + p2p-repo/src/object.rs | 63 +- p2p-repo/src/store.rs | 18 +- p2p-repo/src/types.rs | 81 ++- 6 files changed, 1704 insertions(+), 33 deletions(-) create mode 100644 p2p-repo/src/file.rs diff --git a/p2p-repo/src/block.rs b/p2p-repo/src/block.rs index 10d1861..1eba501 100644 --- a/p2p-repo/src/block.rs +++ b/p2p-repo/src/block.rs @@ -40,6 +40,26 @@ impl BlockV0 { b } + pub fn new_random_access( + children: Vec, + content: Vec, + key: Option, + ) -> BlockV0 { + let bc = BlockContentV0 { + children, + commit_header: CommitHeaderObject::RandomAccess, + encrypted_content: content, + }; + let mut b = BlockV0 { + id: None, + key, + content: BlockContent::V0(bc), + commit_header_key: None, + }; + b.id = Some(b.compute_id()); + b + } + /// Compute the ID pub fn compute_id(&self) -> BlockId { let ser = serde_bare::to_vec(&self.content).unwrap(); @@ -92,6 +112,14 @@ impl Block { Block::V0(BlockV0::new(children, header_ref, content, key)) } + pub fn new_random_access( + children: Vec, + content: Vec, + key: Option, + ) -> Block { + Block::V0(BlockV0::new_random_access(children, content, key)) + } + pub fn size(&self) -> usize { serde_bare::to_vec(&self).unwrap().len() } @@ -128,6 +156,13 @@ impl Block { } } + /// Get the content + pub fn content(&self) -> &BlockContent { + match self { + Block::V0(b) => &b.content, + } + } + /// Get the encrypted content pub fn encrypted_content(&self) -> &Vec { match self { @@ -148,6 +183,7 @@ impl Block { Block::V0(b) => match b.commit_header_key.as_ref() { Some(key) => match b.content.commit_header_obj() { CommitHeaderObject::None => None, + CommitHeaderObject::RandomAccess => None, _ => Some(CommitHeaderRef { obj: b.content.commit_header_obj().clone(), key: key.clone(), diff --git a/p2p-repo/src/file.rs b/p2p-repo/src/file.rs new file mode 100644 index 0000000..4eb6f7f --- /dev/null +++ b/p2p-repo/src/file.rs @@ -0,0 +1,1537 @@ +// Copyright (c) 2022-2024 Niko Bonnieure, Par le Peuple, NextGraph.org developers +// All rights reserved. +// Licensed under the Apache License, Version 2.0 +// +// or the MIT license , +// at your option. All files in the project carrying such +// notice may not be copied, modified, or distributed except +// according to those terms. + +//! File and RandomAccessFile objects + +use core::fmt; +use std::cmp::min; +use std::collections::HashMap; + +use chacha20::cipher::{KeyIvInit, StreamCipher}; +use chacha20::ChaCha20; + +use crate::errors::*; +use crate::log::*; +use crate::object::*; +use crate::store::*; +use crate::types::*; + +/// File errors +#[derive(Debug, PartialEq)] +pub enum FileError { + /// Missing blocks + MissingBlocks(Vec), + /// Missing root key + MissingRootKey, + /// Invalid BlockId encountered in the tree + InvalidBlockId, + /// Too many or too few children of a block + InvalidChildren, + /// Number of keys does not match number of children of a block + InvalidKeys, + /// Invalid CommitHeader object content + InvalidHeader, + /// Error deserializing content of a block + BlockDeserializeError, + /// Error deserializing content of the RandomAccessFileMeta + MetaDeserializeError, + /// Files are immutable, you cannot modify them and this one was already saved once. Create a new File for your new data (and delete the old one if needed) + AlreadySaved, + /// File is too big + TooBig, + NotFound, + StorageError, + EndOfFile, + InvalidArgument, +} + +impl From for FileError { + fn from(e: StorageError) -> Self { + match e { + StorageError::NotFound => FileError::NotFound, + _ => FileError::StorageError, + } + } +} + +impl From for FileError { + fn from(e: ObjectParseError) -> Self { + match e { + _ => FileError::BlockDeserializeError, + } + } +} + +/// A RandomAccessFile in memory. This is not used to serialize data +pub struct RandomAccessFile<'a> { + //storage: Arc<&'a dyn RepoStore>, + storage: &'a Box, + /// accurate once saved or opened + meta: RandomAccessFileMeta, + + //meta_object_id: Option, + //content_block_id: Option, + /// keeps the deduplicated blocks' IDs, used for async writes + block_contents: HashMap, + + /// Blocks of the Object (nodes of the tree). Only used when writing asynchronously, before saving. + blocks: Vec<(BlockId, BlockKey)>, + + /// When an id is present, the File is opened in Read mode, and cannot be saved. + id: Option, + key: Option, + + content_block: Option<(BlockId, BlockKey)>, + + // used for writes + conv_key: Option<[u8; 32]>, + remainder: Vec, + size: usize, +} + +impl<'a> RandomAccessFile<'a> { + pub fn meta(&self) -> &RandomAccessFileMeta { + &self.meta + } + + pub fn id(&self) -> &Option { + &self.id + } + + pub fn key(&self) -> &Option { + &self.key + } + + fn make_block( + mut content: Vec, + conv_key: &[u8; blake3::OUT_LEN], + children: Vec, + already_existing: &mut HashMap, + storage: &Box, + ) -> Result<(BlockId, BlockKey), StorageError> { + let key_hash = blake3::keyed_hash(conv_key, &content); + + let key_slice = key_hash.as_bytes(); + let key = SymKey::ChaCha20Key(key_slice.clone()); + let it = already_existing.get(&key); + if it.is_some() { + return Ok((*it.unwrap(), key)); + } + let nonce = [0u8; 12]; + let mut cipher = ChaCha20::new(key_slice.into(), &nonce.into()); + //let mut content_enc = Vec::from(content); + let mut content_enc_slice = &mut content.as_mut_slice(); + cipher.apply_keystream(&mut content_enc_slice); + + let mut block = Block::new_random_access(children, content, None); + //log_debug!(">>> make_block random access: {}", block.id()); + //log_debug!("!! children: ({}) {:?}", children.len(), children); + + let id = block.get_and_save_id(); + already_existing.insert(key.clone(), id); + //log_debug!("putting *** {}", id); + storage.put(&block)?; + Ok((id, key)) + } + + fn read_block( + block: Block, + key: &SymKey, + ) -> Result<(Vec<(BlockId, BlockKey)>, Vec), ObjectParseError> { + match block { + Block::V0(b) => { + // decrypt content in place (this is why we have to clone first) + let mut content_dec = b.content.encrypted_content().clone(); + match key { + SymKey::ChaCha20Key(key) => { + let nonce = [0u8; 12]; + let mut cipher = ChaCha20::new(key.into(), &nonce.into()); + let mut content_dec_slice = &mut content_dec.as_mut_slice(); + cipher.apply_keystream(&mut content_dec_slice); + } + } + + // deserialize content + let content: ChunkContentV0; + match serde_bare::from_slice(content_dec.as_slice()) { + Ok(c) => content = c, + Err(e) => { + log_debug!("Block deserialize error: {}", e); + return Err(ObjectParseError::BlockDeserializeError); + } + } + // parse content + match content { + ChunkContentV0::InternalNode(keys) => { + let b_children = b.children(); + if keys.len() != b_children.len() { + log_debug!( + "Invalid keys length: got {}, expected {}", + keys.len(), + b_children.len() + ); + log_debug!("!!! children: {:?}", b_children); + log_debug!("!!! keys: {:?}", keys); + return Err(ObjectParseError::InvalidKeys); + } + let mut children = Vec::with_capacity(b_children.len()); + for (id, key) in b_children.iter().zip(keys.iter()) { + children.push((id.clone(), key.clone())); + } + Ok((children, vec![])) + } + ChunkContentV0::DataChunk(chunk) => Ok((vec![], chunk)), + } + } + } + } + + fn make_parent_block( + conv_key: &[u8; blake3::OUT_LEN], + children: Vec<(BlockId, BlockKey)>, + already_existing: &mut HashMap, + storage: &Box, + ) -> Result<(BlockId, BlockKey), StorageError> { + let mut ids: Vec = Vec::with_capacity(children.len()); + let mut keys: Vec = Vec::with_capacity(children.len()); + children.iter().for_each(|child| { + ids.push(child.0); + keys.push(child.1.clone()); + }); + let content = ChunkContentV0::InternalNode(keys); + let content_ser = serde_bare::to_vec(&content).unwrap(); + + Self::make_block(content_ser, conv_key, ids, already_existing, storage) + } + + /// Build tree from leaves, returns parent nodes + fn make_tree( + already_existing: &mut HashMap, + leaves: &[(BlockId, BlockKey)], + conv_key: &ChaCha20Key, + arity: u16, + storage: &'a Box, + ) -> Result<(BlockId, BlockKey), StorageError> { + let mut parents: Vec<(BlockId, BlockKey)> = vec![]; + let mut chunks = leaves.chunks(arity as usize); + while let Some(nodes) = chunks.next() { + //log_debug!("making parent"); + parents.push(Self::make_parent_block( + conv_key, + nodes.to_vec(), + already_existing, + storage, + )?); + } + //log_debug!("level with {} parents", parents.len()); + + if 1 < parents.len() { + return Self::make_tree( + already_existing, + parents.as_slice(), + conv_key, + arity, + storage, + ); + } + Ok(parents[0].clone()) + } + + /// returns content_block id/key pair, and root_block id/key pair + fn save_( + already_existing: &mut HashMap, + blocks: &[(BlockId, BlockKey)], + meta: &mut RandomAccessFileMeta, + conv_key: &ChaCha20Key, + storage: &'a Box, + ) -> Result<((BlockId, BlockKey), (BlockId, BlockKey)), FileError> { + let leaf_blocks_nbr = blocks.len(); + let arity = meta.arity(); + + let mut depth: u8 = u8::MAX; + for i in 0..u8::MAX { + if leaf_blocks_nbr <= (arity as usize).pow(i.into()) { + depth = i; + break; + } + } + if depth == u8::MAX { + return Err(FileError::TooBig); + } + meta.set_depth(depth); + //log_debug!("depth={} leaves={}", depth, leaf_blocks_nbr); + + let content_block = if depth == 0 { + assert!(blocks.len() == 1); + blocks[0].clone() + } else { + // we create the tree + Self::make_tree(already_existing, &blocks, &conv_key, arity, storage)? + }; + + let meta_object = Object::new_with_convergence_key( + ObjectContent::V0(ObjectContentV0::RandomAccessFileMeta(meta.clone())), + None, + store_valid_value_size(meta.chunk_size() as usize), + conv_key, + ); + //log_debug!("saving meta object"); + meta_object.save(storage)?; + + // creating the root block that contains as first child the meta_object, and as second child the content_block + // it is added to storage in make_parent_block + //log_debug!("saving root block"); + let root_block = Self::make_parent_block( + conv_key, + vec![ + (meta_object.id(), meta_object.key().unwrap()), + content_block.clone(), + ], + already_existing, + storage, + )?; + Ok((content_block, root_block)) + } + + /// Creates a new file based on a content that is fully known at the time of creation. + /// If you want to stream progressively the content into the new file, you should use new_empty(), write() and save() instead + pub fn new_from_slice( + content: &[u8], + block_size: usize, + content_type: String, + metadata: Vec, + store: &StoreRepo, + store_secret: &ReadCapSecret, + storage: &'a Box, + ) -> Result, FileError> { + //let max_block_size = store_max_value_size(); + let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA; + + let arity = ((valid_block_size) / CHILD_SIZE) as u16; + + let total_size = content.len() as u64; + + let conv_key = Object::convergence_key(store, store_secret); + + let mut blocks: Vec<(BlockId, BlockKey)> = vec![]; + + let mut already_existing: HashMap = HashMap::new(); + + //log_debug!("making the leaves"); + for chunck in content.chunks(valid_block_size) { + let data_chunk = ChunkContentV0::DataChunk(chunck.to_vec()); + let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); + blocks.push(Self::make_block( + content_ser, + &conv_key, + vec![], + &mut already_existing, + storage, + )?); + } + assert_eq!( + (total_size as usize + valid_block_size - 1) / valid_block_size, + blocks.len() + ); + + let mut meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 { + content_type, + metadata, + chunk_size: valid_block_size as u32, + total_size, + arity, + depth: 0, + }); + + let (content_block, root_block) = Self::save_( + &mut already_existing, + &blocks, + &mut meta, + &conv_key, + storage, + )?; + + Ok(Self { + storage, + meta, + block_contents: HashMap::new(), // not used in this case + blocks: vec![], // not used in this case + id: Some(root_block.0.clone()), + key: Some(root_block.1.clone()), + content_block: Some(content_block), + conv_key: None, // not used in this case + remainder: vec![], // not used in this case + size: 0, // not used in this case + }) + } + + pub fn new_empty( + block_size: usize, + content_type: String, + metadata: Vec, + store: &StoreRepo, + store_secret: &ReadCapSecret, + storage: &'a Box, + ) -> Self { + let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA; + + let arity = ((valid_block_size) / CHILD_SIZE) as u16; + + let meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 { + content_type, + metadata, + chunk_size: valid_block_size as u32, + arity, + total_size: 0, // will be filled in later, during save + depth: 0, // will be filled in later, during save + }); + + Self { + storage, + meta, + block_contents: HashMap::new(), + blocks: vec![], + id: None, + key: None, + content_block: None, + conv_key: Some(Object::convergence_key(store, store_secret)), + remainder: vec![], + size: 0, + } + } + + /// Appends some data at the end of the file currently created with new_empty() and not saved yet. + /// you can call it many times. Don't forget to eventually call save() + pub fn write(&mut self, data: &[u8]) -> Result<(), FileError> { + if self.id.is_some() { + return Err(FileError::AlreadySaved); + } + let remainder = self.remainder.len(); + let chunk_size = self.meta.chunk_size() as usize; + let mut pos: usize = 0; + let conv_key = self.conv_key.unwrap(); + // TODO: provide an option to search in storage for already existing, when doing a resume of previously aborted write + let mut already_existing: HashMap = HashMap::new(); + + if remainder > 0 { + if data.len() >= chunk_size - remainder { + let mut new_block = Vec::with_capacity(chunk_size); + new_block.append(&mut self.remainder); + pos = chunk_size - remainder; + self.size += chunk_size; + //log_debug!("size += chunk_size {} {}", self.size, chunk_size); + new_block.extend(data[0..pos].iter()); + assert_eq!(new_block.len(), chunk_size); + let data_chunk = ChunkContentV0::DataChunk(new_block); + let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); + self.blocks.push(Self::make_block( + content_ser, + &conv_key, + vec![], + &mut already_existing, + self.storage, + )?); + } else { + // not enough data to create a new block + self.remainder.extend(data.iter()); + return Ok(()); + } + } else if data.len() < chunk_size { + self.remainder = Vec::from(data); + return Ok(()); + } + + for chunck in data[pos..].chunks(chunk_size) { + if chunck.len() == chunk_size { + self.size += chunk_size; + //log_debug!("size += chunk_size {} {}", self.size, chunk_size); + let data_chunk = ChunkContentV0::DataChunk(chunck.to_vec()); + let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); + self.blocks.push(Self::make_block( + content_ser, + &conv_key, + vec![], + &mut already_existing, + self.storage, + )?); + } else { + self.remainder = Vec::from(chunck); + return Ok(()); + } + } + Ok(()) + } + + pub fn save(&mut self) -> Result<(), FileError> { + if self.id.is_some() { + return Err(FileError::AlreadySaved); + } + // save the remainder, if any. + if self.remainder.len() > 0 { + self.size += self.remainder.len(); + //log_debug!("size += remainder {} {}", self.size, self.remainder.len()); + let mut remainder = Vec::with_capacity(self.remainder.len()); + remainder.append(&mut self.remainder); + let data_chunk = ChunkContentV0::DataChunk(remainder); + let content_ser = serde_bare::to_vec(&data_chunk).unwrap(); + self.blocks.push(Self::make_block( + content_ser, + &self.conv_key.unwrap(), + vec![], + &mut HashMap::new(), + self.storage, + )?); + } + + self.meta.set_total_size(self.size as u64); + + let mut already_existing: HashMap = HashMap::new(); + let (content_block, root_block) = Self::save_( + &mut already_existing, + &self.blocks, + &mut self.meta, + self.conv_key.as_ref().unwrap(), + self.storage, + )?; + + self.id = Some(root_block.0); + self.key = Some(root_block.1.clone()); + self.content_block = Some(content_block); + + self.blocks = vec![]; + self.blocks.shrink_to_fit(); + + Ok(()) + } + + /// Opens a file for read purpose. + pub fn open( + id: ObjectId, + key: SymKey, + storage: &'a Box, + ) -> Result, FileError> { + // load root block + let root_block = storage.get(&id)?; + + if root_block.children().len() != 2 + || *root_block.content().commit_header_obj() != CommitHeaderObject::RandomAccess + { + return Err(FileError::BlockDeserializeError); + } + + let (root_sub_blocks, _) = Self::read_block(root_block, &key)?; + + // load meta object (first one in root block) + let meta_object = Object::load( + root_sub_blocks[0].0, + Some(root_sub_blocks[0].1.clone()), + storage, + )?; + + let meta = match meta_object.content_v0()? { + ObjectContentV0::RandomAccessFileMeta(meta) => meta, + _ => return Err(FileError::InvalidChildren), + }; + + Ok(RandomAccessFile { + storage, + meta, + block_contents: HashMap::new(), // not used in this case + blocks: vec![], // not used in this case + id: Some(id), + key: Some(key), + content_block: Some(root_sub_blocks[1].clone()), + conv_key: None, + remainder: vec![], + size: 0, + }) + } + + /// reads at most one block from the file. the returned vector should be tested for size. it might be smaller than what you asked for. + /// `pos`ition can be anywhere in the file. + pub fn read(&self, pos: usize, size: usize) -> Result, FileError> { + if size == 0 { + return Err(FileError::InvalidArgument); + } + if self.id.is_some() { + if pos + size > self.meta.total_size() as usize { + return Err(FileError::EndOfFile); + } + let mut current_block_id_key = self.content_block.as_ref().unwrap().clone(); + + let depth = self.meta.depth(); + let arity = self.meta.arity(); + + let mut level_pos = pos; + for level in 0..depth { + let tree_block = self.storage.get(¤t_block_id_key.0)?; + let (children, content) = Self::read_block(tree_block, ¤t_block_id_key.1)?; + if children.len() == 0 || content.len() > 0 { + return Err(FileError::BlockDeserializeError); + } + let factor = (arity as usize).pow(depth as u32 - level as u32 - 1) + * self.meta.chunk_size() as usize; + let level_index = pos / factor; + if level_index >= children.len() { + return Err(FileError::EndOfFile); + } + current_block_id_key = (children[level_index]).clone(); + level_pos = pos as usize % factor; + } + + let content_block = self.storage.get(¤t_block_id_key.0)?; + //log_debug!("CONTENT BLOCK SIZE {}", content_block.size()); + + let (children, content) = Self::read_block(content_block, ¤t_block_id_key.1)?; + + if children.len() == 0 && content.len() > 0 { + //log_debug!("CONTENT SIZE {}", content.len()); + + if level_pos >= content.len() { + return Err(FileError::EndOfFile); + } + let end = min(content.len(), level_pos + size); + return Ok(content[level_pos..end].to_vec()); + } else { + return Err(FileError::BlockDeserializeError); + } + } else { + // hasn't been saved yet, we can use the self.blocks as a flat array and the remainder too + let factor = self.meta.chunk_size() as usize; + let index = pos / factor as usize; + let level_pos = pos % factor as usize; + let remainder_pos = self.blocks.len() * factor; + if pos >= remainder_pos { + let pos_in_remainder = pos - remainder_pos; + if self.remainder.len() > 0 && pos_in_remainder < self.remainder.len() { + let end = min(self.remainder.len(), pos_in_remainder + size); + return Ok(self.remainder[pos_in_remainder..end].to_vec()); + } else { + return Err(FileError::EndOfFile); + } + } + //log_debug!("{} {} {} {}", index, self.blocks.len(), factor, level_pos); + if index >= self.blocks.len() { + return Err(FileError::EndOfFile); + } + let block = &self.blocks[index]; + let content_block = self.storage.get(&block.0)?; + let (children, content) = Self::read_block(content_block, &block.1)?; + if children.len() == 0 && content.len() > 0 { + //log_debug!("CONTENT SIZE {}", content.len()); + + if level_pos >= content.len() { + return Err(FileError::EndOfFile); + } + let end = min(content.len(), level_pos + size); + return Ok(content[level_pos..end].to_vec()); + } else { + return Err(FileError::BlockDeserializeError); + } + } + } + + pub fn blocks(&self) -> impl Iterator + '_ { + self.blocks + .iter() + .map(|key| self.storage.get(&key.0).unwrap()) + } + + /// Size once encoded, before deduplication. Only available before save() + pub fn size(&self) -> usize { + let mut total = 0; + self.blocks().for_each(|b| total += b.size()); + total + } + + /// Real size on disk + pub fn dedup_size(&self) -> usize { + let mut total = 0; + self.block_contents + .values() + .for_each(|b| total += self.storage.get(b).unwrap().size()); + total + } + + pub fn depth(&self) -> Result { + Ok(self.meta.depth()) + + // unimplemented!(); + // if self.key().is_none() { + // return Err(ObjectParseError::MissingRootKey); + // } + // let parents = vec![(self.id(), self.key().unwrap())]; + // Self::collect_leaves( + // &self.blocks, + // &parents, + // self.blocks.len() - 1, + // &mut None, + // &mut None, + // &self.block_contents, + // ) + } +} + +impl fmt::Display for RandomAccessFile<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "====== File ID {}", + self.id + .map_or("NOT SAVED".to_string(), |i| format!("{}", i)) + )?; + writeln!( + f, + "== Key: {}", + self.key + .as_ref() + .map_or("None".to_string(), |k| format!("{}", k)) + )?; + writeln!(f, "== depth: {}", self.meta.depth())?; + writeln!(f, "== arity: {}", self.meta.arity())?; + writeln!(f, "== chunk_size: {}", self.meta.chunk_size())?; + writeln!(f, "== total_size: {}", self.meta.total_size())?; + writeln!(f, "== content_type: {}", self.meta.content_type())?; + writeln!(f, "== metadata len: {}", self.meta.metadata().len())?; + if self.id.is_none() { + writeln!(f, "== blocks to save: {}", self.blocks.len())?; + } + Ok(()) + } +} + +#[cfg(test)] +mod test { + + use time::Instant; + + use crate::file::*; + use std::io::BufReader; + use std::io::Read; + + struct Test<'a> { + storage: Box, + } + + impl<'a> Test<'a> { + fn storage(s: impl RepoStore + 'a) -> Self { + Test { + storage: Box::new(s), + } + } + fn s(&self) -> &Box { + &self.storage + } + } + + /// Checks that a content that does fit in one block, creates an arity of 0 + #[test] + pub fn test_depth_0() { + let block_size = store_max_value_size(); + //store_valid_value_size(0) + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + //let storage: Arc<&dyn RepoStore> = Arc::new(&hashmap_storage); + + ////// 1 MB of data! + let data_size = block_size - BLOCK_EXTRA; + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + log_debug!("creating 1MB of data"); + let content: Vec = vec![99; data_size]; + + log_debug!("creating random access file with that data"); + let file: RandomAccessFile = RandomAccessFile::new_from_slice( + &content, + block_size, + "text/plain".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ) + .expect("new_from_slice"); + log_debug!("{}", file); + + let id = file.id.as_ref().unwrap().clone(); + + let file_size = file.size(); + log_debug!("file size to save : {}", file_size); + + log_debug!("data size: {}", data_size); + + let read_content = file.read(0, data_size).expect("reading all"); + assert_eq!(read_content, content); + + let read_content2 = file.read(0, data_size + 1); + assert_eq!(read_content2, Err(FileError::EndOfFile)); + + let read_content = file.read(data_size - 9, 9).expect("reading end"); + assert_eq!(read_content, vec![99, 99, 99, 99, 99, 99, 99, 99, 99]); + + let read_content = file.read(data_size - 9, 10); + assert_eq!(read_content, Err(FileError::EndOfFile)); + + // log_debug!( + // "overhead: {} - {}%", + // file_size - data_size, + // ((file_size - data_size) * 100) as f32 / data_size as f32 + // ); + + // let dedup_size = file.dedup_size(); + // log_debug!( + // "dedup compression: {} - {}%", + // data_size - dedup_size, + // ((data_size - dedup_size) * 100) as f32 / data_size as f32 + // ); + + // log_debug!("number of blocks : {}", file.blocks.len()); + // assert_eq!( + // file.blocks.len(), + // MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1 + // ); + assert_eq!(file.depth(), Ok(0)); + assert_eq!(t.s().len(), Ok(3)); + + let file = RandomAccessFile::open(id, file.key.unwrap(), t.s()).expect("re open"); + + log_debug!("{}", file); + + let read_content = file.read(0, data_size).expect("reading all after re open"); + assert_eq!(read_content, content); + } + + /// Checks that a content that doesn't fit in all the children of first level in tree + #[test] + pub fn test_depth_1() { + const MAX_ARITY_LEAVES: usize = 15887; + const MAX_DATA_PAYLOAD_SIZE: usize = 1048564; + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + ////// 16 GB of data! + let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + log_debug!("creating 16GB of data"); + + let content: Vec = vec![99; data_size]; + + log_debug!("creating random access file with that data"); + let file: RandomAccessFile = RandomAccessFile::new_from_slice( + &content, + store_max_value_size(), + "text/plain".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ) + .expect("new_from_slice"); + log_debug!("{}", file); + + let _id = file.id.as_ref().unwrap().clone(); + + log_debug!("data size: {}", data_size); + + assert_eq!(file.depth(), Ok(1)); + + assert_eq!(t.s().len(), Ok(4)); + } + + /// Checks that a content that doesn't fit in all the children of first level in tree + #[test] + pub fn test_depth_2() { + const MAX_ARITY_LEAVES: usize = 15887; + const MAX_DATA_PAYLOAD_SIZE: usize = 1048564; + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + ////// 16 GB of data! + let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE + 1; + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + log_debug!("creating 16GB of data"); + let content: Vec = vec![99; data_size]; + + log_debug!("creating file with that data"); + let file: RandomAccessFile = RandomAccessFile::new_from_slice( + &content, + store_max_value_size(), + "text/plain".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ) + .expect("new_from_slice"); + log_debug!("{}", file); + + let file_size = file.size(); + log_debug!("file size: {}", file_size); + + log_debug!("data size: {}", data_size); + + assert_eq!(file.depth().unwrap(), 2); + + assert_eq!(t.s().len(), Ok(7)); + } + + /// Checks that a content that doesn't fit in all the children of first level in tree + #[test] + pub fn test_depth_3() { + const MAX_ARITY_LEAVES: usize = 61; + const MAX_DATA_PAYLOAD_SIZE: usize = 4084; + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + ////// 900 MB of data! + let data_size = + MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE; + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + log_debug!("creating 900MB of data"); + let content: Vec = vec![99; data_size]; + + log_debug!("creating file with that data"); + let file: RandomAccessFile = RandomAccessFile::new_from_slice( + &content, + store_valid_value_size(0), + "text/plain".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ) + .expect("new_from_slice"); + log_debug!("{}", file); + + let file_size = file.size(); + log_debug!("file size: {}", file_size); + + let read_content = file.read(0, data_size).expect("reading all"); + assert_eq!(read_content.len(), MAX_DATA_PAYLOAD_SIZE); + + let read_content = file.read(9000, 10000).expect("reading 10k"); + assert_eq!(read_content, vec![99; 3252]); + + // log_debug!("data size: {}", data_size); + // log_debug!( + // "overhead: {} - {}%", + // file_size - data_size, + // ((file_size - data_size) * 100) as f32 / data_size as f32 + // ); + + // let dedup_size = file.dedup_size(); + // log_debug!( + // "dedup compression: {} - {}%", + // data_size - dedup_size, + // ((data_size - dedup_size) * 100) as f32 / data_size as f32 + // ); + + // log_debug!("number of blocks : {}", file.blocks.len()); + // assert_eq!( + // file.blocks.len(), + // MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1 + // ); + assert_eq!(file.depth().unwrap(), 3); + + assert_eq!(t.s().len(), Ok(6)); + } + + /// Checks that a content that doesn't fit in all the children of first level in tree + #[test] + pub fn test_depth_4() { + const MAX_ARITY_LEAVES: usize = 61; + const MAX_DATA_PAYLOAD_SIZE: usize = 4084; + + ////// 52GB of data! + let data_size = MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_DATA_PAYLOAD_SIZE; + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + log_debug!("creating 55GB of data"); + let content: Vec = vec![99; data_size]; + + log_debug!("creating file with that data"); + let file: RandomAccessFile = RandomAccessFile::new_from_slice( + &content, + store_valid_value_size(0), + "text/plain".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ) + .expect("new_from_slice"); + + log_debug!("{}", file); + + let file_size = file.size(); + log_debug!("file size: {}", file_size); + + log_debug!("data size: {}", data_size); + + assert_eq!(file.depth().unwrap(), 4); + + assert_eq!(t.s().len(), Ok(7)); + } + + /// Test async write to a file all at once + #[test] + pub fn test_write_all_at_once() { + let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); + let mut reader = BufReader::new(f); + let mut img_buffer: Vec = Vec::new(); + reader + .read_to_end(&mut img_buffer) + .expect("read of test.jpg"); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating file with the JPG content"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_max_value_size(), //store_valid_value_size(0),// + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + file.write(&img_buffer).expect("write all at once"); + + // !!! all those tests work only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. + + assert_eq!( + file.read(0, img_buffer.len()).expect("read before save"), + img_buffer + ); + + // asking too much, receiving just enough + assert_eq!( + file.read(0, img_buffer.len() + 1) + .expect("read before save"), + img_buffer + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + + file.save().expect("save"); + + let res = file.read(0, img_buffer.len()).expect("read all"); + assert_eq!(res, img_buffer); + + // asking too much, receiving an error, as now we know the total size of file, and we check it + assert_eq!( + file.read(0, img_buffer.len() + 1), + Err(FileError::EndOfFile) + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + } + + /// Test async write to a file by increments + #[test] + pub fn test_write_by_increments() { + let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); + let mut reader = BufReader::new(f); + let mut img_buffer: Vec = Vec::new(); + reader + .read_to_end(&mut img_buffer) + .expect("read of test.jpg"); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating file with the JPG content"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_max_value_size(), //store_valid_value_size(0),// + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + for chunk in img_buffer.chunks(1000) { + file.write(chunk).expect("write a chunk"); + } + + assert_eq!( + file.read(0, img_buffer.len()).expect("read before save"), + img_buffer + ); + + // asking too much, receiving just enough + assert_eq!( + file.read(0, img_buffer.len() + 1) + .expect("read before save"), + img_buffer + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + + file.save().expect("save"); + + // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. + let res = file.read(0, img_buffer.len()).expect("read all"); + + assert_eq!(res, img_buffer); + + // asking too much, receiving an error, as now we know the total size of file, and we check it + assert_eq!( + file.read(0, img_buffer.len() + 1), + Err(FileError::EndOfFile) + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + } + + /// Test async write to a file by increments small blocks + #[test] + pub fn test_write_by_increments_small_blocks() { + let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); + let mut reader = BufReader::new(f); + let mut img_buffer: Vec = Vec::new(); + reader + .read_to_end(&mut img_buffer) + .expect("read of test.jpg"); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating file with the JPG content"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_valid_value_size(0), + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + let first_block_content = img_buffer[0..4084].to_vec(); + + for chunk in img_buffer.chunks(1000) { + file.write(chunk).expect("write a chunk"); + } + + log_debug!("{}", file); + + assert_eq!( + file.read(0, img_buffer.len()).expect("read before save"), + first_block_content + ); + + // asking too much, receiving just enough + assert_eq!( + file.read(0, img_buffer.len() + 1) + .expect("read before save"), + first_block_content + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + + file.save().expect("save"); + + log_debug!("{}", file); + + assert_eq!(img_buffer.len(), file.meta.total_size() as usize); + + let res = file.read(0, img_buffer.len()).expect("read all"); + assert_eq!(res, first_block_content); + + // asking too much, receiving an error, as now we know the total size of file, and we check it + assert_eq!( + file.read(0, img_buffer.len() + 1), + Err(FileError::EndOfFile) + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + } + + /// Test async write to a file all at once + #[test] + pub fn test_write_all_at_once_small_blocks() { + let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); + let mut reader = BufReader::new(f); + let mut img_buffer: Vec = Vec::new(); + reader + .read_to_end(&mut img_buffer) + .expect("read of test.jpg"); + + let first_block_content = img_buffer[0..4084].to_vec(); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating file with the JPG content"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_valid_value_size(0), + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + file.write(&img_buffer).expect("write all at once"); + + assert_eq!( + file.read(0, img_buffer.len()).expect("read before save"), + first_block_content + ); + + // asking too much, receiving just enough + assert_eq!( + file.read(0, img_buffer.len() + 1) + .expect("read before save"), + first_block_content + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + + file.save().expect("save"); + + let res = file.read(0, img_buffer.len()).expect("read all"); + assert_eq!(res, first_block_content); + + let res = file.read(10, img_buffer.len() - 10).expect("read all"); + assert_eq!(res, first_block_content[10..].to_vec()); + + // asking too much, receiving an error, as now we know the total size of file, and we check it + assert_eq!( + file.read(0, img_buffer.len() + 1), + Err(FileError::EndOfFile) + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument)); + } + + /// Test depth 4 with 52GB of data, but using write in small increments, so the memory burden on the system will be minimal + #[test] + pub fn test_depth_4_write_small() { + const MAX_ARITY_LEAVES: usize = 61; + const MAX_DATA_PAYLOAD_SIZE: usize = 4084; + + ////// 52GB of data! + let data_size = MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_ARITY_LEAVES + * MAX_DATA_PAYLOAD_SIZE; + + // chunks of 5 MB + let chunk_nbr = data_size / 5000000; + let last_chunk = data_size % 5000000; + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating empty file"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_valid_value_size(0), + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + let chunk = vec![99; 5000000]; + let last_chunk = vec![99; last_chunk]; + + for _i in 0..chunk_nbr { + file.write(&chunk).expect("write a chunk"); + } + + file.write(&last_chunk).expect("write last chunk"); + + log_debug!("{}", file); + + file.save().expect("save"); + + log_debug!("{}", file); + + let file_size = file.size(); + log_debug!("file size: {}", file_size); + + log_debug!("data size: {}", data_size); + + assert_eq!(data_size, file.meta.total_size() as usize); + + assert_eq!(file.depth().unwrap(), 4); + + assert_eq!(t.s().len(), Ok(7)); + } + + /// Test open + #[test] + pub fn test_open() { + let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg"); + let mut reader = BufReader::new(f); + let mut img_buffer: Vec = Vec::new(); + reader + .read_to_end(&mut img_buffer) + .expect("read of test.jpg"); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating file with the JPG content"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_max_value_size(), //store_valid_value_size(0),// + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + for chunk in img_buffer.chunks(1000) { + file.write(chunk).expect("write a chunk"); + } + + file.save().expect("save"); + + let file2 = RandomAccessFile::open(file.id().unwrap(), file.key.unwrap(), t.s()) + .expect("reopen file"); + + // this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block. + let res = file2.read(0, img_buffer.len()).expect("read all"); + + log_debug!("{}", file2); + + assert_eq!(res, img_buffer); + + // asking too much, receiving an error, as now we know the total size of file, and we check it + assert_eq!( + file2.read(0, img_buffer.len() + 1), + Err(FileError::EndOfFile) + ); + + // reading too far, well behind the size of the JPG + assert_eq!(file2.read(100000, 1), Err(FileError::EndOfFile)); + + assert_eq!(file2.read(10000, 1).expect("read after save"), vec![41]); + + // reading one byte after the end of the file size. + assert_eq!(file2.read(29454, 1), Err(FileError::EndOfFile)); + + assert_eq!(file2.read(29454, 0), Err(FileError::InvalidArgument)); + } + + /// Test depth 4, but using write in increments, so the memory burden on the system will be minimal + #[test] + pub fn test_depth_4_big_write_small() { + let encoding_big_file = Instant::now(); + + let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file"); + let mut reader = BufReader::new(f); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating empty file"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_valid_value_size(0), + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + let mut chunk = [0u8; 1000000]; + + loop { + let size = reader.read(&mut chunk).expect("read a chunk"); + //log_debug!("{}", size); + file.write(&chunk[0..size]).expect("write a chunk"); + if size != 1000000 { + break; + } + } + + log_debug!("{}", file); + + file.save().expect("save"); + + log_debug!("{}", file); + + log_debug!("data size: {}", file.meta.total_size()); + + //assert_eq!(data_size, file.meta.total_size() as usize); + + assert_eq!(file.depth().unwrap(), 4); + + log_debug!( + "encoding_big_file took: {} s", + encoding_big_file.elapsed().as_seconds_f32() + ); + } + + /// Test depth 4 with 2.7GB of data, but using write in increments, so the memory burden on the system will be minimal + #[test] + pub fn test_depth_4_big_write_big() { + let encoding_big_file = Instant::now(); + + let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file"); + let mut reader = BufReader::new(f); + + let hashmap_storage = HashMapRepoStore::new(); + let t = Test::storage(hashmap_storage); + + let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); + + log_debug!("creating empty file"); + let mut file: RandomAccessFile = RandomAccessFile::new_empty( + store_max_value_size(), + "image/jpeg".to_string(), + vec![], + &store_repo, + &store_secret, + t.s(), + ); + + log_debug!("{}", file); + + let mut chunk = [0u8; 2000000]; + + loop { + let size = reader.read(&mut chunk).expect("read a chunk"); + //log_debug!("{}", size); + file.write(&chunk[0..size]).expect("write a chunk"); + if size != 2000000 { + break; + } + } + + log_debug!("{}", file); + + file.save().expect("save"); + + log_debug!("{}", file); + + log_debug!("data size: {}", file.meta.total_size()); + + //assert_eq!(data_size, file.meta.total_size() as usize); + + assert_eq!(file.depth().unwrap(), 1); + + log_debug!( + "encoding_big_file took: {} s", + encoding_big_file.elapsed().as_seconds_f32() + ); + } +} diff --git a/p2p-repo/src/lib.rs b/p2p-repo/src/lib.rs index 2369127..887a654 100644 --- a/p2p-repo/src/lib.rs +++ b/p2p-repo/src/lib.rs @@ -14,6 +14,8 @@ pub mod block; pub mod object; +pub mod file; + pub mod commit; pub mod branch; diff --git a/p2p-repo/src/object.rs b/p2p-repo/src/object.rs index a8928b9..a6a79a4 100644 --- a/p2p-repo/src/object.rs +++ b/p2p-repo/src/object.rs @@ -21,21 +21,21 @@ use crate::log::*; use crate::store::*; use crate::types::*; -const BLOCK_EXTRA: usize = 12; // 8 is the smallest extra + BLOCK_MAX_DATA_EXTRA -const HEADER_REF_EXTRA: usize = 66; -const HEADER_EMBED_EXTRA: usize = 34; -const CHILD_SIZE: usize = 66; +pub const BLOCK_EXTRA: usize = 12; // 8 is the smallest extra + BLOCK_MAX_DATA_EXTRA +pub const HEADER_REF_EXTRA: usize = 66; +pub const HEADER_EMBED_EXTRA: usize = 34; +pub const CHILD_SIZE: usize = 66; -const BLOCK_ID_SIZE: usize = 33; +pub const BLOCK_ID_SIZE: usize = 33; /// Size of serialized SymKey -const BLOCK_KEY_SIZE: usize = 33; +pub const BLOCK_KEY_SIZE: usize = 33; /// Size of serialized Object with deps reference. /// Varint extra bytes when reaching the maximum value we will ever use in one block -const BIG_VARINT_EXTRA: usize = 2; +pub const BIG_VARINT_EXTRA: usize = 2; /// Varint extra bytes when reaching the maximum size of data byte arrays. -const DATA_VARINT_EXTRA: usize = 4; +pub const DATA_VARINT_EXTRA: usize = 4; -const BLOCK_MAX_DATA_EXTRA: usize = 4; +pub const BLOCK_MAX_DATA_EXTRA: usize = 4; #[derive(Debug)] /// An Object in memory. This is not used to serialize data @@ -85,7 +85,7 @@ pub enum ObjectCopyError { } impl Object { - fn convergence_key( + pub(crate) fn convergence_key( store_pubkey: &StoreRepo, store_readcap_secret: &ReadCapSecret, ) -> [u8; blake3::OUT_LEN] { @@ -128,15 +128,13 @@ impl Object { fn make_header_v0( header: CommitHeaderV0, object_size: usize, - store: &StoreRepo, - store_secret: &ReadCapSecret, + conv_key: &ChaCha20Key, ) -> (ObjectRef, Vec) { - let header_obj = Object::new( + let header_obj = Object::new_with_convergence_key( ObjectContent::V0(ObjectContentV0::CommitHeader(CommitHeader::V0(header))), None, object_size, - store, - store_secret, + conv_key, ); let header_ref = ObjectRef { id: header_obj.id(), @@ -148,11 +146,10 @@ impl Object { fn make_header( header: CommitHeader, object_size: usize, - store: &StoreRepo, - store_secret: &ReadCapSecret, + conv_key: &ChaCha20Key, ) -> (ObjectRef, Vec) { match header { - CommitHeader::V0(v0) => Self::make_header_v0(v0, object_size, store, store_secret), + CommitHeader::V0(v0) => Self::make_header_v0(v0, object_size, conv_key), } } @@ -300,6 +297,16 @@ impl Object { block_size: usize, store: &StoreRepo, store_secret: &ReadCapSecret, + ) -> Object { + let conv_key = Self::convergence_key(store, store_secret); + Self::new_with_convergence_key(content, header, block_size, &conv_key) + } + + pub fn new_with_convergence_key( + content: ObjectContent, + mut header: Option, + block_size: usize, + conv_key: &ChaCha20Key, ) -> Object { if header.is_some() && !content.can_have_header() { panic!( @@ -321,13 +328,11 @@ impl Object { let mut blocks: Vec = vec![]; let mut block_contents: HashMap = HashMap::new(); let mut already_existing: HashMap = HashMap::new(); - let conv_key = Self::convergence_key(store, store_secret); let header_prepare = match &header { None => (0 as usize, None, vec![]), Some(h) => { - let block_info = - Self::make_header(h.clone(), valid_block_size, store, store_secret); + let block_info = Self::make_header(h.clone(), valid_block_size, conv_key); if block_info.1.len() == 1 { ( block_info.1[0].encrypted_content().len(), @@ -367,7 +372,7 @@ impl Object { Self::add_block( Self::make_block( content_ser, - &conv_key, + conv_key, vec![], header_ref, &mut already_existing, @@ -386,7 +391,7 @@ impl Object { let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec()); let chunk_ser = serde_bare::to_vec(&data_chunk).unwrap(); Self::add_block( - Self::make_block(chunk_ser, &conv_key, vec![], None, &mut already_existing), + Self::make_block(chunk_ser, conv_key, vec![], None, &mut already_existing), &mut blocks, &mut block_contents, &mut already_existing, @@ -401,7 +406,7 @@ impl Object { &mut block_contents, &mut already_existing, blocks.as_slice(), - &conv_key, + conv_key, header_prepare.0, header_prepare.1, header_prepare.2, @@ -489,7 +494,9 @@ impl Object { let header = match root.header_ref() { Some(header_ref) => match header_ref.obj { - CommitHeaderObject::None => panic!("shouldn't happen"), + CommitHeaderObject::None | CommitHeaderObject::RandomAccess => { + panic!("shouldn't happen") + } CommitHeaderObject::Id(id) => { let obj = Object::load(id, Some(header_ref.key.clone()), store)?; match obj.content()? { @@ -539,6 +546,7 @@ impl Object { } Ok(()) } + #[cfg(test)] pub fn save_in_test( &mut self, @@ -891,6 +899,7 @@ impl fmt::Display for ObjectContent { ObjectContentV0::Signature(_) => "Signature", ObjectContentV0::Certificate(_) => "Certificate", ObjectContentV0::File(_) => "File", + ObjectContentV0::RandomAccessFileMeta(_) => "RandomAccessFileMeta", }, ), }; @@ -1300,7 +1309,7 @@ mod test { const MAX_ARITY_LEAVES: usize = 61; const MAX_DATA_PAYLOAD_SIZE: usize = 4084; - ////// 55GB of data! + ////// 52GB of data! let data_size = MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES @@ -1309,7 +1318,7 @@ mod test { - 12; let (store_repo, store_secret) = StoreRepo::dummy_public_v0(); - log_debug!("creating 55GB of data"); + log_debug!("creating 52GB of data"); let content = ObjectContent::V0(ObjectContentV0::File(File::V0(FileV0 { content_type: "".into(), metadata: vec![], diff --git a/p2p-repo/src/store.rs b/p2p-repo/src/store.rs index 9f17b15..f4facfa 100644 --- a/p2p-repo/src/store.rs +++ b/p2p-repo/src/store.rs @@ -13,9 +13,9 @@ use futures::StreamExt; +use crate::log::*; use crate::types::*; use crate::utils::Receiver; - use std::sync::{Arc, RwLock}; use std::{ cmp::{max, min}, @@ -24,14 +24,17 @@ use std::{ }; pub trait RepoStore: Send + Sync { - /// Load a block from the store. + /// Load a block from the storage. fn get(&self, id: &BlockId) -> Result; - /// Save a block to the store. + /// Save a block to the storage. fn put(&self, block: &Block) -> Result; - /// Delete a block from the store. + /// Delete a block from the storage. fn del(&self, id: &BlockId) -> Result<(Block, usize), StorageError>; + + /// number of Blocks in the storage + fn len(&self) -> Result; } #[derive(Debug, PartialEq)] @@ -91,7 +94,7 @@ const MAX_FACTOR: usize = 256; /// Returns a valid/optimal value size for the entries of the storage backend. pub fn store_valid_value_size(size: usize) -> usize { min( - max(1, (size as f32 / DISK_BLOCK_SIZE as f32).ceil() as usize), + max(1, (size + DISK_BLOCK_SIZE - 1) / DISK_BLOCK_SIZE), MAX_FACTOR, ) * DISK_BLOCK_SIZE } @@ -151,8 +154,13 @@ impl RepoStore for HashMapRepoStore { } } + fn len(&self) -> Result { + Ok(self.get_len()) + } + fn put(&self, block: &Block) -> Result { let id = block.id(); + //log_debug!("PUTTING {}", id); let mut b = block.clone(); b.set_key(None); self.blocks.write().unwrap().insert(id, b); diff --git a/p2p-repo/src/types.rs b/p2p-repo/src/types.rs index a6b899a..be6dfc8 100644 --- a/p2p-repo/src/types.rs +++ b/p2p-repo/src/types.rs @@ -665,6 +665,7 @@ pub enum CommitHeaderObject { Id(ObjectId), EncryptedContent(Vec), None, + RandomAccess, } #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] @@ -706,7 +707,7 @@ pub struct BlockContentV0 { /// is empty if ObjectContent fits in one block or this block is a leaf. in both cases, encrypted_content is then not empty pub children: Vec, - /// contains encrypted ChunkContentV0 (entirety, when fitting, or chunks of ObjectContentV0, in DataChunk) used for leafs of the Merkle tree, + /// contains encrypted ChunkContentV0 (entirety, when fitting, or chunks of ObjectContentV0, in DataChunk) used for leaves of the Merkle tree, /// or to store the keys of children (in InternalNode) /// /// Encrypted using convergent encryption with ChaCha20: @@ -1664,6 +1665,83 @@ pub enum File { V0(FileV0), } +/// Random Access File Object +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct RandomAccessFileMetaV0 { + pub content_type: String, + + #[serde(with = "serde_bytes")] + pub metadata: Vec, + + pub total_size: u64, + + pub chunk_size: u32, + + pub arity: u16, + + pub depth: u8, +} + +/// A Random Access file stored in an Object +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub enum RandomAccessFileMeta { + V0(RandomAccessFileMetaV0), +} + +impl RandomAccessFileMeta { + pub fn arity(&self) -> u16 { + match self { + Self::V0(v0) => v0.arity, + } + } + + pub fn depth(&self) -> u8 { + match self { + Self::V0(v0) => v0.depth, + } + } + + pub fn set_depth(&mut self, depth: u8) { + match self { + Self::V0(v0) => { + v0.depth = depth; + } + } + } + + pub fn chunk_size(&self) -> u32 { + match self { + Self::V0(v0) => v0.chunk_size, + } + } + + pub fn total_size(&self) -> u64 { + match self { + Self::V0(v0) => v0.total_size, + } + } + + pub fn set_total_size(&mut self, size: u64) { + match self { + Self::V0(v0) => { + v0.total_size = size; + } + } + } + + pub fn metadata(&self) -> &Vec { + match self { + Self::V0(v0) => &v0.metadata, + } + } + + pub fn content_type(&self) -> &String { + match self { + Self::V0(v0) => &v0.content_type, + } + } +} + /// Immutable data stored encrypted in a Merkle tree V0 #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] pub enum ObjectContentV0 { @@ -1674,6 +1752,7 @@ pub enum ObjectContentV0 { Signature(Signature), Certificate(Certificate), File(File), + RandomAccessFileMeta(RandomAccessFileMeta), } /// Immutable data stored encrypted in a Merkle tree