|
|
@ -1,6 +1,8 @@ |
|
|
|
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, Quad, QuadRef, TermRef}; |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
|
|
|
|
use crate::model::Quad; |
|
|
|
|
|
|
|
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; |
|
|
|
use crate::storage::backend::{Reader, Transaction}; |
|
|
|
use crate::storage::backend::{Reader, Transaction}; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; |
|
|
|
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; |
|
|
|
use crate::storage::binary_encoder::{ |
|
|
|
use crate::storage::binary_encoder::{ |
|
|
|
decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, |
|
|
|
decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, |
|
|
@ -9,24 +11,30 @@ use crate::storage::binary_encoder::{ |
|
|
|
WRITTEN_TERM_MAX_SIZE, |
|
|
|
WRITTEN_TERM_MAX_SIZE, |
|
|
|
}; |
|
|
|
}; |
|
|
|
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; |
|
|
|
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; |
|
|
|
use crate::storage::numeric_encoder::{ |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
insert_term, Decoder, EncodedQuad, EncodedTerm, StrHash, StrLookup, |
|
|
|
use crate::storage::numeric_encoder::Decoder; |
|
|
|
}; |
|
|
|
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; |
|
|
|
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; |
|
|
|
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::cmp::{max, min}; |
|
|
|
use std::cmp::{max, min}; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::collections::VecDeque; |
|
|
|
use std::collections::VecDeque; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::collections::{HashMap, HashSet}; |
|
|
|
use std::collections::{HashMap, HashSet}; |
|
|
|
use std::error::Error; |
|
|
|
use std::error::Error; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::mem::take; |
|
|
|
use std::mem::take; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::path::{Path, PathBuf}; |
|
|
|
use std::path::{Path, PathBuf}; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::sync::atomic::{AtomicU64, Ordering}; |
|
|
|
use std::sync::atomic::{AtomicU64, Ordering}; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::sync::Arc; |
|
|
|
use std::sync::Arc; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::thread::spawn; |
|
|
|
use std::thread::spawn; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use std::thread::JoinHandle; |
|
|
|
use std::thread::JoinHandle; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
use sysinfo::{System, SystemExt}; |
|
|
|
use sysinfo::{System, SystemExt}; |
|
|
|
|
|
|
|
|
|
|
|
mod backend; |
|
|
|
mod backend; |
|
|
@ -46,15 +54,18 @@ const DSPO_CF: &str = "dspo"; |
|
|
|
const DPOS_CF: &str = "dpos"; |
|
|
|
const DPOS_CF: &str = "dpos"; |
|
|
|
const DOSP_CF: &str = "dosp"; |
|
|
|
const DOSP_CF: &str = "dosp"; |
|
|
|
const GRAPHS_CF: &str = "graphs"; |
|
|
|
const GRAPHS_CF: &str = "graphs"; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
const DEFAULT_CF: &str = "default"; |
|
|
|
const DEFAULT_CF: &str = "default"; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
const DEFAULT_BULK_LOAD_BATCH_SIZE: usize = 1_000_000; |
|
|
|
const DEFAULT_BULK_LOAD_BATCH_SIZE: usize = 1_000_000; |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
const MAX_BULK_LOAD_BATCH_SIZE: usize = 100_000_000; |
|
|
|
const MAX_BULK_LOAD_BATCH_SIZE: usize = 100_000_000; |
|
|
|
|
|
|
|
|
|
|
|
/// Low level storage primitives
|
|
|
|
/// Low level storage primitives
|
|
|
|
#[derive(Clone)] |
|
|
|
#[derive(Clone)] |
|
|
|
pub struct Storage { |
|
|
|
pub struct Storage { |
|
|
|
db: Db, |
|
|
|
db: Db, |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
default_cf: ColumnFamily, |
|
|
|
default_cf: ColumnFamily, |
|
|
|
id2str_cf: ColumnFamily, |
|
|
|
id2str_cf: ColumnFamily, |
|
|
|
spog_cf: ColumnFamily, |
|
|
|
spog_cf: ColumnFamily, |
|
|
@ -74,7 +85,7 @@ impl Storage { |
|
|
|
Self::setup(Db::new(Self::column_families())?) |
|
|
|
Self::setup(Db::new(Self::column_families())?) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn open(path: &Path) -> Result<Self, StorageError> { |
|
|
|
pub fn open(path: &Path) -> Result<Self, StorageError> { |
|
|
|
Self::setup(Db::open(path, Self::column_families())?) |
|
|
|
Self::setup(Db::open(path, Self::column_families())?) |
|
|
|
} |
|
|
|
} |
|
|
@ -150,8 +161,10 @@ impl Storage { |
|
|
|
] |
|
|
|
] |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[allow(clippy::unnecessary_wraps)] |
|
|
|
fn setup(db: Db) -> Result<Self, StorageError> { |
|
|
|
fn setup(db: Db) -> Result<Self, StorageError> { |
|
|
|
let this = Self { |
|
|
|
let this = Self { |
|
|
|
|
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
default_cf: db.column_family(DEFAULT_CF).unwrap(), |
|
|
|
default_cf: db.column_family(DEFAULT_CF).unwrap(), |
|
|
|
id2str_cf: db.column_family(ID2STR_CF).unwrap(), |
|
|
|
id2str_cf: db.column_family(ID2STR_CF).unwrap(), |
|
|
|
spog_cf: db.column_family(SPOG_CF).unwrap(), |
|
|
|
spog_cf: db.column_family(SPOG_CF).unwrap(), |
|
|
@ -166,12 +179,12 @@ impl Storage { |
|
|
|
graphs_cf: db.column_family(GRAPHS_CF).unwrap(), |
|
|
|
graphs_cf: db.column_family(GRAPHS_CF).unwrap(), |
|
|
|
db, |
|
|
|
db, |
|
|
|
}; |
|
|
|
}; |
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
this.migrate()?; |
|
|
|
this.migrate()?; |
|
|
|
Ok(this) |
|
|
|
Ok(this) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
fn migrate(&self) -> Result<(), StorageError> { |
|
|
|
fn migrate(&self) -> Result<(), StorageError> { |
|
|
|
let mut version = self.ensure_version()?; |
|
|
|
let mut version = self.ensure_version()?; |
|
|
|
if version == 0 { |
|
|
|
if version == 0 { |
|
|
@ -211,7 +224,7 @@ impl Storage { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
fn ensure_version(&self) -> Result<u64, StorageError> { |
|
|
|
fn ensure_version(&self) -> Result<u64, StorageError> { |
|
|
|
Ok( |
|
|
|
Ok( |
|
|
|
if let Some(version) = self.db.get(&self.default_cf, b"oxversion")? { |
|
|
|
if let Some(version) = self.db.get(&self.default_cf, b"oxversion")? { |
|
|
@ -225,7 +238,7 @@ impl Storage { |
|
|
|
) |
|
|
|
) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
fn update_version(&self, version: u64) -> Result<(), StorageError> { |
|
|
|
fn update_version(&self, version: u64) -> Result<(), StorageError> { |
|
|
|
self.db |
|
|
|
self.db |
|
|
|
.insert(&self.default_cf, b"oxversion", &version.to_be_bytes())?; |
|
|
|
.insert(&self.default_cf, b"oxversion", &version.to_be_bytes())?; |
|
|
@ -252,7 +265,7 @@ impl Storage { |
|
|
|
}) |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn flush(&self) -> Result<(), StorageError> { |
|
|
|
pub fn flush(&self) -> Result<(), StorageError> { |
|
|
|
self.db.flush(&self.default_cf)?; |
|
|
|
self.db.flush(&self.default_cf)?; |
|
|
|
self.db.flush(&self.gpos_cf)?; |
|
|
|
self.db.flush(&self.gpos_cf)?; |
|
|
@ -267,7 +280,7 @@ impl Storage { |
|
|
|
self.db.flush(&self.id2str_cf) |
|
|
|
self.db.flush(&self.id2str_cf) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn compact(&self) -> Result<(), StorageError> { |
|
|
|
pub fn compact(&self) -> Result<(), StorageError> { |
|
|
|
self.db.compact(&self.default_cf)?; |
|
|
|
self.db.compact(&self.default_cf)?; |
|
|
|
self.db.compact(&self.gpos_cf)?; |
|
|
|
self.db.compact(&self.gpos_cf)?; |
|
|
@ -282,7 +295,7 @@ impl Storage { |
|
|
|
self.db.compact(&self.id2str_cf) |
|
|
|
self.db.compact(&self.id2str_cf) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn backup(&self, target_directory: &Path) -> Result<(), StorageError> { |
|
|
|
pub fn backup(&self, target_directory: &Path) -> Result<(), StorageError> { |
|
|
|
self.db.backup(target_directory) |
|
|
|
self.db.backup(target_directory) |
|
|
|
} |
|
|
|
} |
|
|
@ -607,7 +620,7 @@ impl StorageReader { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { |
|
|
|
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { |
|
|
|
Ok(self |
|
|
|
Ok(self |
|
|
|
.storage |
|
|
|
.storage |
|
|
@ -618,31 +631,31 @@ impl StorageReader { |
|
|
|
.map_err(CorruptionError::new)?) |
|
|
|
.map_err(CorruptionError::new)?) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(target_arch = "wasm32")] |
|
|
|
#[cfg(target_family = "wasm")] |
|
|
|
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { |
|
|
|
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { |
|
|
|
Ok(self |
|
|
|
Ok(self |
|
|
|
.reader |
|
|
|
.reader |
|
|
|
.get(&self.storage.id2str_cf, &key.to_be_bytes())? |
|
|
|
.get(&self.storage.id2str_cf, &key.to_be_bytes())? |
|
|
|
.map(|v| String::from_utf8(v.into())) |
|
|
|
.map(String::from_utf8) |
|
|
|
.transpose() |
|
|
|
.transpose() |
|
|
|
.map_err(CorruptionError::new)?) |
|
|
|
.map_err(CorruptionError::new)?) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { |
|
|
|
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { |
|
|
|
self.storage |
|
|
|
self.storage |
|
|
|
.db |
|
|
|
.db |
|
|
|
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) |
|
|
|
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(target_arch = "wasm32")] |
|
|
|
#[cfg(target_family = "wasm")] |
|
|
|
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { |
|
|
|
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { |
|
|
|
self.reader |
|
|
|
self.reader |
|
|
|
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) |
|
|
|
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/// Validates that all the storage invariants held in the data
|
|
|
|
/// Validates that all the storage invariants held in the data
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub fn validate(&self) -> Result<(), StorageError> { |
|
|
|
pub fn validate(&self) -> Result<(), StorageError> { |
|
|
|
// triples
|
|
|
|
// triples
|
|
|
|
let dspo_size = self.dspo_quads(&[]).count(); |
|
|
|
let dspo_size = self.dspo_quads(&[]).count(); |
|
|
@ -752,6 +765,13 @@ impl StorageReader { |
|
|
|
} |
|
|
|
} |
|
|
|
Ok(()) |
|
|
|
Ok(()) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/// Validates that all the storage invariants held in the data
|
|
|
|
|
|
|
|
#[cfg(target_family = "wasm")] |
|
|
|
|
|
|
|
#[allow(clippy::unused_self, clippy::unnecessary_wraps)] |
|
|
|
|
|
|
|
pub fn validate(&self) -> Result<(), StorageError> { |
|
|
|
|
|
|
|
Ok(()) //TODO
|
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
pub struct ChainedDecodingQuadIterator { |
|
|
|
pub struct ChainedDecodingQuadIterator { |
|
|
@ -975,7 +995,7 @@ impl<'a> StorageWriter<'a> { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { |
|
|
|
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { |
|
|
|
if self |
|
|
|
if self |
|
|
|
.storage |
|
|
|
.storage |
|
|
@ -991,7 +1011,7 @@ impl<'a> StorageWriter<'a> { |
|
|
|
) |
|
|
|
) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(target_arch = "wasm32")] |
|
|
|
#[cfg(target_family = "wasm")] |
|
|
|
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { |
|
|
|
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { |
|
|
|
self.transaction.insert( |
|
|
|
self.transaction.insert( |
|
|
|
&self.storage.id2str_cf, |
|
|
|
&self.storage.id2str_cf, |
|
|
@ -1156,7 +1176,7 @@ impl<'a> StorageWriter<'a> { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
pub struct StorageBulkLoader { |
|
|
|
pub struct StorageBulkLoader { |
|
|
|
storage: Storage, |
|
|
|
storage: Storage, |
|
|
|
hooks: Vec<Box<dyn Fn(u64)>>, |
|
|
|
hooks: Vec<Box<dyn Fn(u64)>>, |
|
|
@ -1164,7 +1184,7 @@ pub struct StorageBulkLoader { |
|
|
|
max_memory_size: Option<usize>, |
|
|
|
max_memory_size: Option<usize>, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
impl StorageBulkLoader { |
|
|
|
impl StorageBulkLoader { |
|
|
|
pub fn new(storage: Storage) -> Self { |
|
|
|
pub fn new(storage: Storage) -> Self { |
|
|
|
Self { |
|
|
|
Self { |
|
|
@ -1290,7 +1310,7 @@ impl StorageBulkLoader { |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
struct FileBulkLoader { |
|
|
|
struct FileBulkLoader { |
|
|
|
storage: Storage, |
|
|
|
storage: Storage, |
|
|
|
id2str: HashMap<StrHash, Box<str>>, |
|
|
|
id2str: HashMap<StrHash, Box<str>>, |
|
|
@ -1299,7 +1319,7 @@ struct FileBulkLoader { |
|
|
|
graphs: HashSet<EncodedTerm>, |
|
|
|
graphs: HashSet<EncodedTerm>, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[cfg(not(target_arch = "wasm32"))] |
|
|
|
#[cfg(not(target_family = "wasm"))] |
|
|
|
impl FileBulkLoader { |
|
|
|
impl FileBulkLoader { |
|
|
|
fn new(storage: Storage) -> Self { |
|
|
|
fn new(storage: Storage) -> Self { |
|
|
|
Self { |
|
|
|
Self { |
|
|
|