Rust implementation of NextGraph, a Decentralized and local-first web 3.0 ecosystem https://nextgraph.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
nextgraph-rs/ng-repo/src/file.rs

1609 lines
53 KiB

// Copyright (c) 2022-2024 Niko Bonnieure, Par le Peuple, NextGraph.org developers
// All rights reserved.
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
//! SmallFile and RandomAccessFile objects
use core::fmt;
use std::cmp::min;
use std::collections::HashMap;
use std::sync::Arc;
use chacha20::cipher::{KeyIvInit, StreamCipher};
use chacha20::ChaCha20;
use zeroize::Zeroize;
use crate::block_storage::*;
use crate::errors::*;
#[allow(unused_imports)]
use crate::log::*;
use crate::object::*;
use crate::store::Store;
use crate::types::*;
/// File errors
#[derive(Debug, Eq, PartialEq, Clone)]
pub enum FileError {
/// Missing blocks
MissingBlocks(Vec<BlockId>),
/// Missing root key
MissingRootKey,
/// Invalid BlockId encountered in the tree
InvalidBlockId,
/// Too many or too few children of a block
InvalidChildren,
/// Number of keys does not match number of children of a block
InvalidKeys,
/// Invalid CommitHeader object content
InvalidHeader,
/// Error deserializing content of a block
BlockDeserializeError,
/// Error deserializing content of the RandomAccessFileMeta
MetaDeserializeError,
/// Files are immutable, you cannot modify them and this one was already saved once. Create a new File for your new data (and delete the old one if needed)
AlreadySaved,
/// File is too big
TooBig,
NotFound,
StorageError,
EndOfFile,
InvalidArgument,
NotAFile,
}
impl From<StorageError> for FileError {
fn from(e: StorageError) -> Self {
match e {
StorageError::NotFound => FileError::NotFound,
_ => FileError::StorageError,
}
}
}
impl From<ObjectParseError> for FileError {
fn from(e: ObjectParseError) -> Self {
match e {
_ => FileError::BlockDeserializeError,
}
}
}
pub trait ReadFile {
fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError>;
fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError>;
}
/// A File in memory (read access only)
pub struct File<'a> {
internal: Box<dyn ReadFile + 'a>,
blocks_ids: Vec<BlockId>,
}
impl<'a> File<'a> {
pub fn open(id: ObjectId, key: SymKey, store: Arc<Store>) -> Result<File<'a>, FileError> {
let root_block = store.get(&id)?;
if root_block.children().len() == 2
&& *root_block.content().commit_header_obj() == CommitHeaderObject::RandomAccess
{
Ok(File {
internal: Box::new(RandomAccessFile::open(id, key, store)?),
blocks_ids: vec![],
})
} else {
let obj = Object::load(id, Some(key), &store)?;
match obj.content_v0()? {
ObjectContentV0::SmallFile(small_file) => Ok(File {
internal: Box::new(small_file),
blocks_ids: obj.block_ids(),
}),
_ => Err(FileError::NotAFile),
}
}
}
}
impl<'a> ReadFile for File<'a> {
fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
self.internal.read(pos, size)
}
fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
if self.blocks_ids.len() > 0 {
Ok(self.blocks_ids.to_vec())
} else {
self.internal.get_all_blocks_ids()
}
}
}
impl ReadFile for SmallFile {
fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
match self {
Self::V0(v0) => v0.read(pos, size),
}
}
fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
unimplemented!();
}
}
impl ReadFile for SmallFileV0 {
fn read(&self, pos: usize, size: usize) -> Result<Vec<u8>, FileError> {
if size == 0 {
return Err(FileError::InvalidArgument);
}
if pos + size > self.content.len() {
return Err(FileError::EndOfFile);
}
Ok(self.content[pos..pos + size].to_vec())
}
fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
unimplemented!();
}
}
/// A RandomAccessFile in memory. This is not used to serialize data
pub struct RandomAccessFile {
//storage: Arc<&'a dyn BlockStorage>,
store: Arc<Store>,
/// accurate once saved or opened
meta: RandomAccessFileMeta,
//meta_object_id: Option<BlockId>,
//content_block_id: Option<BlockId>,
/// keeps the deduplicated blocks' IDs, used for async writes
block_contents: HashMap<BlockKey, BlockId>,
/// Blocks of the Object (nodes of the tree). Only used when writing asynchronously, before saving.
blocks: Vec<(BlockId, BlockKey)>,
/// When an id is present, the File is opened in Read mode, and cannot be saved.
id: Option<ObjectId>,
key: Option<ObjectKey>,
content_block: Option<(BlockId, BlockKey)>,
// used for writes
conv_key: Option<[u8; 32]>,
remainder: Vec<u8>,
size: usize,
}
impl ReadFile for RandomAccessFile {
fn get_all_blocks_ids(&self) -> Result<Vec<ObjectId>, FileError> {
if self.id.is_none() {
unimplemented!();
}
let mut res = Vec::with_capacity(4);
let _: Vec<()> = self
.blocks
.iter()
.map(|(id, _)| res.push(id.clone()))
.collect();
recurse_tree(
&self.store,
self.content_block.as_ref().unwrap().clone(),
&mut res,
self.meta.depth(),
)?;
fn recurse_tree(
store: &Store,
current_block_id_key: (Digest, SymKey),
res: &mut Vec<Digest>,
level: u8,
) -> Result<(), FileError> {
res.push(current_block_id_key.0);
if level > 0 {
let tree_block = store.get(&current_block_id_key.0)?;
let (children, content) = tree_block.read(&current_block_id_key.1)?;
if children.is_empty() || content.len() > 0 {
return Err(FileError::BlockDeserializeError);
}
for child in children {
recurse_tree(store, child, res, level - 1)?;
}
}
Ok(())
}
Ok(res)
}
/// reads at most one block from the file. the returned vector should be tested for size. it might be smaller than what you asked for.
/// `pos`ition can be anywhere in the file.
//TODO: parallelize decryption on multi threads (cores)
fn read(&self, pos: usize, mut size: usize) -> Result<Vec<u8>, FileError> {
if size == 0 {
return Err(FileError::InvalidArgument);
}
if self.id.is_some() {
let total = self.meta.total_size() as usize;
if pos > total {
return Err(FileError::EndOfFile);
}
size = min(total - pos, size);
let mut current_block_id_key = self.content_block.as_ref().unwrap().clone();
let depth = self.meta.depth();
let arity = self.meta.arity();
let mut level_pos = pos;
for level in 0..depth {
let tree_block = self.store.get(&current_block_id_key.0)?;
let (children, content) = tree_block.read(&current_block_id_key.1)?;
if children.is_empty() || content.len() > 0 {
return Err(FileError::BlockDeserializeError);
}
let factor = (arity as usize).pow(depth as u32 - level as u32 - 1)
* self.meta.chunk_size() as usize;
let level_index = pos / factor;
if level_index >= children.len() {
return Err(FileError::EndOfFile);
}
current_block_id_key = (children[level_index]).clone();
level_pos = pos as usize % factor;
}
let content_block = self.store.get(&current_block_id_key.0)?;
//log_debug!("CONTENT BLOCK SIZE {}", content_block.size());
let (children, content) = content_block.read(&current_block_id_key.1)?;
if children.is_empty() && content.len() > 0 {
//log_debug!("CONTENT SIZE {}", content.len());
if level_pos >= content.len() {
return Err(FileError::EndOfFile);
}
let end = min(content.len(), level_pos + size);
return Ok(content[level_pos..end].to_vec());
} else {
return Err(FileError::BlockDeserializeError);
}
} else {
// hasn't been saved yet, we can use the self.blocks as a flat array and the remainder too
let factor = self.meta.chunk_size() as usize;
let index = pos / factor as usize;
let level_pos = pos % factor as usize;
let remainder_pos = self.blocks.len() * factor;
if pos >= remainder_pos {
let pos_in_remainder = pos - remainder_pos;
if self.remainder.len() > 0 && pos_in_remainder < self.remainder.len() {
let end = min(self.remainder.len(), pos_in_remainder + size);
return Ok(self.remainder[pos_in_remainder..end].to_vec());
} else {
return Err(FileError::EndOfFile);
}
}
//log_debug!("{} {} {} {}", index, self.blocks.len(), factor, level_pos);
if index >= self.blocks.len() {
return Err(FileError::EndOfFile);
}
let block = &self.blocks[index];
let content_block = self.store.get(&block.0)?;
let (children, content) = content_block.read(&block.1)?;
if children.is_empty() && content.len() > 0 {
//log_debug!("CONTENT SIZE {}", content.len());
if level_pos >= content.len() {
return Err(FileError::EndOfFile);
}
let end = min(content.len(), level_pos + size);
return Ok(content[level_pos..end].to_vec());
} else {
return Err(FileError::BlockDeserializeError);
}
}
}
}
impl RandomAccessFile {
pub fn meta(&self) -> &RandomAccessFileMeta {
&self.meta
}
pub fn id(&self) -> &Option<ObjectId> {
&self.id
}
pub fn key(&self) -> &Option<ObjectKey> {
&self.key
}
fn make_block(
mut content: Vec<u8>,
conv_key: &[u8; blake3::OUT_LEN],
children: Vec<ObjectId>,
already_existing: &mut HashMap<BlockKey, BlockId>,
store: &Store,
) -> Result<(BlockId, BlockKey), StorageError> {
let key_hash = blake3::keyed_hash(conv_key, &content);
let key_slice = key_hash.as_bytes();
let key = SymKey::ChaCha20Key(key_slice.clone());
let it = already_existing.get(&key);
if it.is_some() {
return Ok((*it.unwrap(), key));
}
let nonce = [0u8; 12];
let mut cipher = ChaCha20::new(key_slice.into(), &nonce.into());
//let mut content_enc = Vec::from(content);
let mut content_enc_slice = &mut content.as_mut_slice();
cipher.apply_keystream(&mut content_enc_slice);
let mut block = Block::new_random_access(children, content, None);
//log_debug!(">>> make_block random access: {}", block.id());
//log_debug!("!! children: ({}) {:?}", children.len(), children);
let id = block.get_and_save_id();
already_existing.insert(key.clone(), id);
//log_debug!("putting *** {}", id);
store.put(&block)?;
Ok((id, key))
}
fn make_parent_block(
conv_key: &[u8; blake3::OUT_LEN],
children: Vec<(BlockId, BlockKey)>,
already_existing: &mut HashMap<BlockKey, BlockId>,
store: &Store,
) -> Result<(BlockId, BlockKey), StorageError> {
let mut ids: Vec<BlockId> = Vec::with_capacity(children.len());
let mut keys: Vec<BlockKey> = Vec::with_capacity(children.len());
children.iter().for_each(|child| {
ids.push(child.0);
keys.push(child.1.clone());
});
let content = ChunkContentV0::InternalNode(keys);
let content_ser = serde_bare::to_vec(&content).unwrap();
Self::make_block(content_ser, conv_key, ids, already_existing, store)
}
/// Build tree from leaves, returns parent nodes
fn make_tree(
already_existing: &mut HashMap<BlockKey, BlockId>,
leaves: &[(BlockId, BlockKey)],
conv_key: &ChaCha20Key,
arity: u16,
store: &Store,
) -> Result<(BlockId, BlockKey), StorageError> {
let mut parents: Vec<(BlockId, BlockKey)> = vec![];
let mut chunks = leaves.chunks(arity as usize);
while let Some(nodes) = chunks.next() {
//log_debug!("making parent");
parents.push(Self::make_parent_block(
conv_key,
nodes.to_vec(),
already_existing,
store,
)?);
}
//log_debug!("level with {} parents", parents.len());
if 1 < parents.len() {
return Self::make_tree(already_existing, parents.as_slice(), conv_key, arity, store);
}
Ok(parents[0].clone())
}
/// returns content_block id/key pair, and root_block id/key pair
fn save_(
already_existing: &mut HashMap<BlockKey, BlockId>,
blocks: &[(BlockId, BlockKey)],
meta: &mut RandomAccessFileMeta,
conv_key: &ChaCha20Key,
store: &Store,
) -> Result<((BlockId, BlockKey), (BlockId, BlockKey)), FileError> {
let leaf_blocks_nbr = blocks.len();
let arity = meta.arity();
let mut depth: u8 = u8::MAX;
for i in 0..u8::MAX {
if leaf_blocks_nbr <= (arity as usize).pow(i.into()) {
depth = i;
break;
}
}
if depth == u8::MAX {
return Err(FileError::TooBig);
}
meta.set_depth(depth);
//log_debug!("depth={} leaves={}", depth, leaf_blocks_nbr);
let content_block = if depth == 0 {
assert!(blocks.len() == 1);
blocks[0].clone()
} else {
// we create the tree
Self::make_tree(already_existing, &blocks, &conv_key, arity, store)?
};
let meta_object = Object::new_with_convergence_key(
ObjectContent::V0(ObjectContentV0::RandomAccessFileMeta(meta.clone())),
None,
store_valid_value_size(meta.chunk_size() as usize),
conv_key,
);
//log_debug!("saving meta object");
_ = meta_object.save(store)?;
// creating the root block that contains as first child the meta_object, and as second child the content_block
// it is added to storage in make_parent_block
//log_debug!("saving root block");
let root_block = Self::make_parent_block(
conv_key,
vec![
(meta_object.id(), meta_object.key().unwrap()),
content_block.clone(),
],
already_existing,
store,
)?;
Ok((content_block, root_block))
}
/// Creates a new file based on a content that is fully known at the time of creation.
///
/// If you want to stream progressively the content into the new file, you should use new_empty(), write() and save() instead
pub fn new_from_slice(
content: &[u8],
block_size: usize,
content_type: String,
metadata: Vec<u8>,
store: Arc<Store>,
) -> Result<RandomAccessFile, FileError> {
//let max_block_size = store_max_value_size();
let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
let arity = ((valid_block_size) / CHILD_SIZE) as u16;
let total_size = content.len() as u64;
let mut conv_key = Object::convergence_key(&store);
let mut blocks: Vec<(BlockId, BlockKey)> = vec![];
let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
//log_debug!("making the leaves");
for chunk in content.chunks(valid_block_size) {
let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
blocks.push(Self::make_block(
content_ser,
&conv_key,
vec![],
&mut already_existing,
&store,
)?);
}
assert_eq!(
(total_size as usize + valid_block_size - 1) / valid_block_size,
blocks.len()
);
let mut meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
content_type,
metadata,
chunk_size: valid_block_size as u32,
total_size,
arity,
depth: 0,
});
let (content_block, root_block) =
Self::save_(&mut already_existing, &blocks, &mut meta, &conv_key, &store)?;
conv_key.zeroize();
Ok(Self {
store,
meta,
block_contents: HashMap::new(), // not used in this case
blocks: vec![], // not used in this case
id: Some(root_block.0.clone()),
key: Some(root_block.1.clone()),
content_block: Some(content_block),
conv_key: None, // not used in this case
remainder: vec![], // not used in this case
size: 0, // not used in this case
})
}
pub fn new_empty(
block_size: usize,
content_type: String,
metadata: Vec<u8>,
store: Arc<Store>,
) -> Self {
let valid_block_size = store_valid_value_size(block_size) - BLOCK_EXTRA;
let arity = ((valid_block_size) / CHILD_SIZE) as u16;
let meta = RandomAccessFileMeta::V0(RandomAccessFileMetaV0 {
content_type,
metadata,
chunk_size: valid_block_size as u32,
arity,
total_size: 0, // will be filled in later, during save
depth: 0, // will be filled in later, during save
});
Self {
store: Arc::clone(&store),
meta,
block_contents: HashMap::new(),
blocks: vec![],
id: None,
key: None,
content_block: None,
conv_key: Some(Object::convergence_key(&store)),
remainder: vec![],
size: 0,
}
}
/// Appends some data at the end of the file currently created with new_empty() and not saved yet.
/// you can call it many times. Don't forget to eventually call save()
pub fn write(&mut self, data: &[u8]) -> Result<(), FileError> {
if self.id.is_some() {
return Err(FileError::AlreadySaved);
}
let remainder = self.remainder.len();
let chunk_size = self.meta.chunk_size() as usize;
let mut pos: usize = 0;
let conv_key = self.conv_key.unwrap();
// TODO: provide an option to search in storage for already existing, when doing a resume of previously aborted write
let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
if remainder > 0 {
if data.len() >= chunk_size - remainder {
let mut new_block = Vec::with_capacity(chunk_size);
new_block.append(&mut self.remainder);
pos = chunk_size - remainder;
self.size += chunk_size;
//log_debug!("size += chunk_size {} {}", self.size, chunk_size);
new_block.extend(data[0..pos].iter());
assert_eq!(new_block.len(), chunk_size);
let data_chunk = ChunkContentV0::DataChunk(new_block);
let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
self.blocks.push(Self::make_block(
content_ser,
&conv_key,
vec![],
&mut already_existing,
&self.store,
)?);
} else {
// not enough data to create a new block
self.remainder.extend(data.iter());
return Ok(());
}
} else if data.len() < chunk_size {
self.remainder = Vec::from(data);
return Ok(());
}
for chunk in data[pos..].chunks(chunk_size) {
if chunk.len() == chunk_size {
self.size += chunk_size;
//log_debug!("size += chunk_size {} {}", self.size, chunk_size);
let data_chunk = ChunkContentV0::DataChunk(chunk.to_vec());
let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
self.blocks.push(Self::make_block(
content_ser,
&conv_key,
vec![],
&mut already_existing,
&self.store,
)?);
} else {
self.remainder = Vec::from(chunk);
return Ok(());
}
}
Ok(())
}
pub fn save(&mut self) -> Result<ObjectId, FileError> {
if self.id.is_some() {
return Err(FileError::AlreadySaved);
}
// save the remainder, if any.
if self.remainder.len() > 0 {
self.size += self.remainder.len();
//log_debug!("size += remainder {} {}", self.size, self.remainder.len());
let mut remainder = Vec::with_capacity(self.remainder.len());
remainder.append(&mut self.remainder);
let data_chunk = ChunkContentV0::DataChunk(remainder);
let content_ser = serde_bare::to_vec(&data_chunk).unwrap();
self.blocks.push(Self::make_block(
content_ser,
&self.conv_key.unwrap(),
vec![],
&mut HashMap::new(),
&self.store,
)?);
}
self.meta.set_total_size(self.size as u64);
let mut already_existing: HashMap<BlockKey, BlockId> = HashMap::new();
let (content_block, root_block) = Self::save_(
&mut already_existing,
&self.blocks,
&mut self.meta,
self.conv_key.as_ref().unwrap(),
&self.store,
)?;
self.conv_key.as_mut().unwrap().zeroize();
self.conv_key = None;
self.id = Some(root_block.0);
self.key = Some(root_block.1.clone());
self.content_block = Some(content_block);
self.blocks = vec![];
self.blocks.shrink_to_fit();
Ok(root_block.0)
}
pub fn reference(&self) -> Option<ObjectRef> {
if self.key.is_some() && self.id.is_some() {
Some(ObjectRef::from_id_key(
self.id.unwrap(),
self.key.as_ref().unwrap().clone(),
))
} else {
None
}
}
/// Opens a file for read purpose.
pub fn open(
id: ObjectId,
key: SymKey,
store: Arc<Store>,
) -> Result<RandomAccessFile, FileError> {
// load root block
let root_block = store.get(&id)?;
if root_block.children().len() != 2
|| *root_block.content().commit_header_obj() != CommitHeaderObject::RandomAccess
{
return Err(FileError::BlockDeserializeError);
}
let (root_sub_blocks, _) = root_block.read(&key)?;
// load meta object (first one in root block)
let meta_object = Object::load(
root_sub_blocks[0].0,
Some(root_sub_blocks[0].1.clone()),
&store,
)?;
let meta = match meta_object.content_v0()? {
ObjectContentV0::RandomAccessFileMeta(meta) => meta,
_ => return Err(FileError::InvalidChildren),
};
Ok(RandomAccessFile {
store,
meta,
block_contents: HashMap::new(), // not used in this case
blocks: vec![(id, SymKey::nil()), (root_sub_blocks[0].0, SymKey::nil())], // not used in this case
id: Some(id),
key: Some(key),
content_block: Some(root_sub_blocks[1].clone()),
conv_key: None,
remainder: vec![],
size: 0,
})
}
pub fn blocks(&self) -> impl Iterator<Item = Block> + '_ {
self.blocks
.iter()
.map(|key| self.store.get(&key.0).unwrap())
}
/// Size once encoded, before deduplication. Only available before save()
pub fn size(&self) -> usize {
let mut total = 0;
self.blocks().for_each(|b| total += b.size());
total
}
/// Real size on disk
pub fn dedup_size(&self) -> usize {
let mut total = 0;
self.block_contents
.values()
.for_each(|b| total += self.store.get(b).unwrap().size());
total
}
pub fn depth(&self) -> Result<u8, NgError> {
Ok(self.meta.depth())
// unimplemented!();
// if self.key().is_none() {
// return Err(ObjectParseError::MissingRootKey);
// }
// let parents = vec![(self.id(), self.key().unwrap())];
// Self::collect_leaves(
// &self.blocks,
// &parents,
// self.blocks.len() - 1,
// &mut None,
// &mut None,
// &self.block_contents,
// )
}
}
impl fmt::Display for RandomAccessFile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(
f,
"====== File ID {}",
self.id
.map_or("NOT SAVED".to_string(), |i| format!("{}", i))
)?;
writeln!(
f,
"== Key: {}",
self.key
.as_ref()
.map_or("None".to_string(), |k| format!("{}", k))
)?;
writeln!(f, "== depth: {}", self.meta.depth())?;
writeln!(f, "== arity: {}", self.meta.arity())?;
writeln!(f, "== chunk_size: {}", self.meta.chunk_size())?;
writeln!(f, "== total_size: {}", self.meta.total_size())?;
writeln!(f, "== content_type: {}", self.meta.content_type())?;
writeln!(f, "== metadata len: {}", self.meta.metadata().len())?;
if self.id.is_none() {
writeln!(f, "== blocks to save: {}", self.blocks.len())?;
}
Ok(())
}
}
#[cfg(test)]
mod test {
use time::Instant;
use crate::file::*;
use std::io::BufReader;
use std::io::Read;
/// Checks that a content that does fit in one block, creates an arity of 0
#[test]
pub fn test_depth_0() {
let block_size = store_max_value_size();
//store_valid_value_size(0)
////// 1 MB of data!
let data_size = block_size - BLOCK_EXTRA;
let store = Store::dummy_public_v0();
log_debug!("creating 1MB of data");
let content: Vec<u8> = vec![99; data_size];
log_debug!("creating random access file with that data");
let file: RandomAccessFile = RandomAccessFile::new_from_slice(
&content,
block_size,
"text/plain".to_string(),
vec![],
Arc::clone(&store),
)
.expect("new_from_slice");
log_debug!("{}", file);
let id = file.id.as_ref().unwrap().clone();
let file_size = file.size();
log_debug!("file size to save : {}", file_size);
log_debug!("data size: {}", data_size);
let read_content = file.read(0, data_size).expect("reading all");
assert_eq!(read_content, content);
let read_content2 = file.read(0, data_size + 1);
assert_eq!(read_content2.unwrap().len(), 1048564);
let read_content = file.read(data_size - 9, 9).expect("reading end");
assert_eq!(read_content, vec![99, 99, 99, 99, 99, 99, 99, 99, 99]);
let read_content = file.read(data_size - 9, 10);
assert_eq!(read_content, Ok(vec![99, 99, 99, 99, 99, 99, 99, 99, 99]));
// log_debug!(
// "overhead: {} - {}%",
// file_size - data_size,
// ((file_size - data_size) * 100) as f32 / data_size as f32
// );
// let dedup_size = file.dedup_size();
// log_debug!(
// "dedup compression: {} - {}%",
// data_size - dedup_size,
// ((data_size - dedup_size) * 100) as f32 / data_size as f32
// );
// log_debug!("number of blocks : {}", file.blocks.len());
// assert_eq!(
// file.blocks.len(),
// MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
// );
assert_eq!(file.depth(), Ok(0));
assert_eq!(store.len(), Ok(3));
let file = RandomAccessFile::open(id, file.key.unwrap(), store).expect("re open");
log_debug!("{}", file);
let read_content = file.read(0, data_size).expect("reading all after re open");
assert_eq!(read_content, content);
}
/// Checks that a content that doesn't fit in all the children of first level in tree
#[ignore]
#[test]
pub fn test_depth_1() {
const MAX_ARITY_LEAVES: usize = 15887;
const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
////// 16 GB of data!
let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
let store = Store::dummy_public_v0();
log_debug!("creating 16GB of data");
let content: Vec<u8> = vec![99; data_size];
log_debug!("creating random access file with that data");
let file: RandomAccessFile = RandomAccessFile::new_from_slice(
&content,
store_max_value_size(),
"text/plain".to_string(),
vec![],
Arc::clone(&store),
)
.expect("new_from_slice");
log_debug!("{}", file);
let _id = file.id.as_ref().unwrap().clone();
log_debug!("data size: {}", data_size);
assert_eq!(file.depth(), Ok(1));
assert_eq!(store.len(), Ok(4));
}
/// Checks that a content that doesn't fit in all the children of first level in tree
#[ignore]
#[test]
pub fn test_depth_2() {
const MAX_ARITY_LEAVES: usize = 15887;
const MAX_DATA_PAYLOAD_SIZE: usize = 1048564;
////// 16 GB of data!
let data_size = MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE + 1;
let store = Store::dummy_public_v0();
log_debug!("creating 16GB of data");
let content: Vec<u8> = vec![99; data_size];
log_debug!("creating file with that data");
let file: RandomAccessFile = RandomAccessFile::new_from_slice(
&content,
store_max_value_size(),
"text/plain".to_string(),
vec![],
Arc::clone(&store),
)
.expect("new_from_slice");
log_debug!("{}", file);
let file_size = file.size();
log_debug!("file size: {}", file_size);
log_debug!("data size: {}", data_size);
assert_eq!(file.depth().unwrap(), 2);
assert_eq!(store.len(), Ok(7));
}
/// Checks that a content that doesn't fit in all the children of first level in tree
#[test]
pub fn test_depth_3() {
const MAX_ARITY_LEAVES: usize = 61;
const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
////// 900 MB of data!
let data_size =
MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_ARITY_LEAVES * MAX_DATA_PAYLOAD_SIZE;
let store = Store::dummy_public_v0();
log_debug!("creating 900MB of data");
let content: Vec<u8> = vec![99; data_size];
log_debug!("creating file with that data");
let file: RandomAccessFile = RandomAccessFile::new_from_slice(
&content,
store_valid_value_size(0),
"text/plain".to_string(),
vec![],
Arc::clone(&store),
)
.expect("new_from_slice");
log_debug!("{}", file);
let file_size = file.size();
log_debug!("file size: {}", file_size);
let read_content = file.read(0, data_size).expect("reading all");
assert_eq!(read_content.len(), MAX_DATA_PAYLOAD_SIZE);
let read_content = file.read(9000, 10000).expect("reading 10k");
assert_eq!(read_content, vec![99; 3252]);
// log_debug!("data size: {}", data_size);
// log_debug!(
// "overhead: {} - {}%",
// file_size - data_size,
// ((file_size - data_size) * 100) as f32 / data_size as f32
// );
// let dedup_size = file.dedup_size();
// log_debug!(
// "dedup compression: {} - {}%",
// data_size - dedup_size,
// ((data_size - dedup_size) * 100) as f32 / data_size as f32
// );
// log_debug!("number of blocks : {}", file.blocks.len());
// assert_eq!(
// file.blocks.len(),
// MAX_ARITY_LEAVES * (MAX_ARITY_LEAVES + 1) * MAX_ARITY_LEAVES + MAX_ARITY_LEAVES + 1
// );
assert_eq!(file.depth().unwrap(), 3);
assert_eq!(store.len(), Ok(6));
}
/// Checks that a content that doesn't fit in all the children of first level in tree
#[ignore]
#[test]
pub fn test_depth_4() {
const MAX_ARITY_LEAVES: usize = 61;
const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
////// 52GB of data!
let data_size = MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_DATA_PAYLOAD_SIZE;
let store = Store::dummy_public_v0();
log_debug!("creating 55GB of data");
let content: Vec<u8> = vec![99; data_size];
log_debug!("creating file with that data");
let file: RandomAccessFile = RandomAccessFile::new_from_slice(
&content,
store_valid_value_size(0),
"text/plain".to_string(),
vec![],
Arc::clone(&store),
)
.expect("new_from_slice");
log_debug!("{}", file);
let file_size = file.size();
log_debug!("file size: {}", file_size);
log_debug!("data size: {}", data_size);
assert_eq!(file.depth().unwrap(), 4);
assert_eq!(store.len(), Ok(7));
}
/// Test async write to a file all at once
#[test]
pub fn test_write_all_at_once() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let store = Store::dummy_public_v0();
log_debug!("creating file with the JPG content");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_max_value_size(), //store_valid_value_size(0),//
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
file.write(&img_buffer).expect("write all at once");
// !!! all those tests work only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
assert_eq!(
file.read(0, img_buffer.len()).expect("read before save"),
img_buffer
);
// asking too much, receiving just enough
assert_eq!(
file.read(0, img_buffer.len() + 1)
.expect("read before save"),
img_buffer
);
// // reading too far, well behind the size of the JPG
// assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
file.save().expect("save");
let res = file.read(0, img_buffer.len()).expect("read all");
assert_eq!(res, img_buffer);
// // asking too much, receiving an error, as now we know the total size of file, and we check it
// assert_eq!(
// file.read(0, img_buffer.len() + 1),
// Err(FileError::EndOfFile)
// );
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
}
/// Test async write to a file by increments
#[test]
pub fn test_write_by_increments() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let store = Store::dummy_public_v0();
log_debug!("creating file with the JPG content");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_max_value_size(), //store_valid_value_size(0),//
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
for chunk in img_buffer.chunks(1000) {
file.write(chunk).expect("write a chunk");
}
assert_eq!(
file.read(0, img_buffer.len()).expect("read before save"),
img_buffer
);
// asking too much, receiving just enough
assert_eq!(
file.read(0, img_buffer.len() + 1)
.expect("read before save"),
img_buffer
);
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
// reading one byte after the end of the file size.
assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
file.save().expect("save");
// this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
let res = file.read(0, img_buffer.len()).expect("read all");
assert_eq!(res, img_buffer);
// // asking too much, receiving an error, as now we know the total size of file, and we check it
// assert_eq!(
// file.read(0, img_buffer.len() + 1),
// Err(FileError::EndOfFile)
// );
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
}
/// Test async write to a file by increments small blocks
#[test]
pub fn test_write_by_increments_small_blocks() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let store = Store::dummy_public_v0();
log_debug!("creating file with the JPG content");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_valid_value_size(0),
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
let first_block_content = img_buffer[0..4084].to_vec();
for chunk in img_buffer.chunks(1000) {
file.write(chunk).expect("write a chunk");
}
log_debug!("{}", file);
assert_eq!(
file.read(0, img_buffer.len()).expect("read before save"),
first_block_content
);
// asking too much, receiving just enough
assert_eq!(
file.read(0, img_buffer.len() + 1)
.expect("read before save"),
first_block_content
);
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
file.save().expect("save");
log_debug!("{}", file);
assert_eq!(img_buffer.len(), file.meta.total_size() as usize);
let res = file.read(0, img_buffer.len()).expect("read all");
assert_eq!(res, first_block_content);
// // asking too much, not receiving an error, as we know the total size of file, and return what we can
// assert_eq!(
// file.read(0, img_buffer.len() + 1),
// Err(FileError::EndOfFile)
// );
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
}
/// Test async write to a file all at once
#[test]
pub fn test_write_all_at_once_small_blocks() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let first_block_content = img_buffer[0..4084].to_vec();
let store = Store::dummy_public_v0();
log_debug!("creating file with the JPG content");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_valid_value_size(0),
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
file.write(&img_buffer).expect("write all at once");
assert_eq!(
file.read(0, img_buffer.len()).expect("read before save"),
first_block_content
);
// asking too much, receiving just enough
assert_eq!(
file.read(0, img_buffer.len() + 1)
.expect("read before save"),
first_block_content
);
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read before save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
file.save().expect("save");
let res = file.read(0, img_buffer.len()).expect("read all");
assert_eq!(res, first_block_content);
let res = file.read(10, img_buffer.len() - 10).expect("read all");
assert_eq!(res, first_block_content[10..].to_vec());
// // asking too much, receiving an error, as now we know the total size of file, and we check it
// assert_eq!(
// file.read(0, img_buffer.len() + 1),
// Err(FileError::EndOfFile)
// );
// reading too far, well behind the size of the JPG
assert_eq!(file.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(10000, 1).expect("read after save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file.read(29454, 0), Err(FileError::InvalidArgument));
}
/// Test depth 4 with 52GB of data, but using write in small increments, so the memory burden on the system will be minimal
#[ignore]
#[test]
pub fn test_depth_4_write_small() {
const MAX_ARITY_LEAVES: usize = 61;
const MAX_DATA_PAYLOAD_SIZE: usize = 4084;
////// 52GB of data!
let data_size = MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_ARITY_LEAVES
* MAX_DATA_PAYLOAD_SIZE;
// chunks of 5 MB
let chunk_nbr = data_size / 5000000;
let last_chunk = data_size % 5000000;
let store = Store::dummy_public_v0();
log_debug!("creating empty file");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_valid_value_size(0),
"image/jpeg".to_string(),
vec![],
Arc::clone(&store),
);
log_debug!("{}", file);
let chunk = vec![99; 5000000];
let last_chunk = vec![99; last_chunk];
for _i in 0..chunk_nbr {
file.write(&chunk).expect("write a chunk");
}
file.write(&last_chunk).expect("write last chunk");
log_debug!("{}", file);
file.save().expect("save");
log_debug!("{}", file);
let file_size = file.size();
log_debug!("file size: {}", file_size);
log_debug!("data size: {}", data_size);
assert_eq!(data_size, file.meta.total_size() as usize);
assert_eq!(file.depth().unwrap(), 4);
assert_eq!(store.len(), Ok(7));
}
/// Test open
#[test]
pub fn test_open() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let store = Store::dummy_public_v0();
log_debug!("creating file with the JPG content");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_max_value_size(), //store_valid_value_size(0),//
"image/jpeg".to_string(),
vec![],
Arc::clone(&store),
);
log_debug!("{}", file);
for chunk in img_buffer.chunks(1000) {
file.write(chunk).expect("write a chunk");
}
file.save().expect("save");
let file2 = RandomAccessFile::open(file.id().unwrap(), file.key.unwrap(), store)
.expect("reopen file");
// this works only because store_max_value_size() is bigger than the actual size of the JPEG file. so it fits in one block.
let res = file2.read(0, img_buffer.len()).expect("read all");
log_debug!("{}", file2);
assert_eq!(res, img_buffer);
// // asking too much, receiving an error, as now we know the total size of file, and we check it
// assert_eq!(
// file2.read(0, img_buffer.len() + 1),
// Err(FileError::EndOfFile)
// );
// reading too far, well behind the size of the JPG
assert_eq!(file2.read(100000, 1), Err(FileError::EndOfFile));
assert_eq!(file2.read(10000, 1).expect("read after save"), vec![41]);
// // reading one byte after the end of the file size.
// assert_eq!(file2.read(29454, 1), Err(FileError::EndOfFile));
assert_eq!(file2.read(29454, 0), Err(FileError::InvalidArgument));
}
/// Test read JPEG file small
#[test]
pub fn test_read_small_file() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let len = img_buffer.len();
let content = ObjectContent::new_file_v0_with_content(img_buffer.clone(), "image/jpeg");
let max_object_size = store_max_value_size();
let store = Store::dummy_public_v0();
let mut obj = Object::new(content, None, max_object_size, &store);
log_debug!("{}", obj);
let _ = obj.save_in_test(&store).expect("save");
let file = File::open(obj.id(), obj.key().unwrap(), store).expect("open");
let res = file.read(0, len).expect("read all");
assert_eq!(res, img_buffer);
}
/// Test read JPEG file random access
#[test]
pub fn test_read_random_access_file() {
let f = std::fs::File::open("tests/test.jpg").expect("open of tests/test.jpg");
let mut reader = BufReader::new(f);
let mut img_buffer: Vec<u8> = Vec::new();
reader
.read_to_end(&mut img_buffer)
.expect("read of test.jpg");
let len = img_buffer.len();
let max_object_size = store_max_value_size();
let store = Store::dummy_public_v0();
log_debug!("creating empty file");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
max_object_size,
"image/jpeg".to_string(),
vec![],
Arc::clone(&store),
);
file.write(&img_buffer).expect("write all");
log_debug!("{}", file);
file.save().expect("save");
log_debug!("{}", file);
let file = File::open(
file.id().unwrap(),
file.key().as_ref().unwrap().clone(),
store,
)
.expect("open");
// this only works because we chose a big block size (1MB) so the small JPG file files in one block.
// if not, we would have to call read repeatedly and append the results into a buffer, in order to get the full file
let res = file.read(0, len).expect("read all");
assert_eq!(res, img_buffer);
}
/// Test depth 4, but using write in increments, so the memory burden on the system will be minimal
#[ignore]
#[test]
pub fn test_depth_4_big_write_small() {
let encoding_big_file = Instant::now();
let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
let mut reader = BufReader::new(f);
let store = Store::dummy_public_v0();
log_debug!("creating empty file");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_valid_value_size(0),
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
let mut chunk = [0u8; 1000000];
loop {
let size = reader.read(&mut chunk).expect("read a chunk");
//log_debug!("{}", size);
file.write(&chunk[0..size]).expect("write a chunk");
if size != 1000000 {
break;
}
}
log_debug!("{}", file);
file.save().expect("save");
log_debug!("{}", file);
log_debug!("data size: {}", file.meta.total_size());
//assert_eq!(data_size, file.meta.total_size() as usize);
assert_eq!(file.depth().unwrap(), 4);
log_debug!(
"encoding_big_file took: {} s",
encoding_big_file.elapsed().as_seconds_f32()
);
}
/// Test depth 4 with 2.7GB of data, but using write in increments, so the memory burden on the system will be minimal
#[ignore]
#[test]
pub fn test_depth_4_big_write_big() {
let encoding_big_file = Instant::now();
let f = std::fs::File::open("[enter path of a big file here]").expect("open of a big file");
let mut reader = BufReader::new(f);
let store = Store::dummy_public_v0();
log_debug!("creating empty file");
let mut file: RandomAccessFile = RandomAccessFile::new_empty(
store_max_value_size(),
"image/jpeg".to_string(),
vec![],
store,
);
log_debug!("{}", file);
let mut chunk = [0u8; 2000000];
loop {
let size = reader.read(&mut chunk).expect("read a chunk");
//log_debug!("{}", size);
file.write(&chunk[0..size]).expect("write a chunk");
if size != 2000000 {
break;
}
}
log_debug!("{}", file);
file.save().expect("save");
log_debug!("{}", file);
log_debug!("data size: {}", file.meta.total_size());
//assert_eq!(data_size, file.meta.total_size() as usize);
assert_eq!(file.depth().unwrap(), 1);
log_debug!(
"encoding_big_file took: {} s",
encoding_big_file.elapsed().as_seconds_f32()
);
}
}