Breaking: SledStore: use 3 indexes instead of 6 for the default graph

Factorizes some code
pull/46/head
Tpt 4 years ago
parent 09d0089910
commit 58a3b34d9f
  1. 530
      lib/src/store/binary_encoder.rs
  2. 2
      lib/src/store/mod.rs
  3. 413
      lib/src/store/numeric_encoder.rs
  4. 131
      lib/src/store/rocksdb.rs
  5. 711
      lib/src/store/sled.rs

@ -0,0 +1,530 @@
use crate::error::invalid_data_error;
use crate::model::xsd::*;
use crate::store::numeric_encoder::StrId;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::hash::Hasher;
use std::io;
use std::io::{Cursor, Read};
use std::mem::size_of;
type EncodedTerm = crate::store::numeric_encoder::EncodedTerm<StrHash>;
type EncodedQuad = crate::store::numeric_encoder::EncodedQuad<StrHash>;
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_INLINE_BLANK_NODE_ID: u8 = 2;
const TYPE_NAMED_BLANK_NODE_ID: u8 = 3;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 4;
const TYPE_TYPED_LITERAL_ID: u8 = 5;
const TYPE_STRING_LITERAL: u8 = 6;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8;
const TYPE_FLOAT_LITERAL: u8 = 9;
const TYPE_DOUBLE_LITERAL: u8 = 10;
const TYPE_INTEGER_LITERAL: u8 = 11;
const TYPE_DECIMAL_LITERAL: u8 = 12;
const TYPE_DATE_TIME_LITERAL: u8 = 13;
const TYPE_DATE_LITERAL: u8 = 14;
const TYPE_TIME_LITERAL: u8 = 15;
const TYPE_DURATION_LITERAL: u8 = 16;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18;
pub trait SerializableStrId: StrId {
fn len() -> usize;
fn from_be_bytes(bytes: &[u8]) -> Self;
fn push_be_bytes(&self, buffer: &mut Vec<u8>);
}
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
impl SerializableStrId for StrHash {
fn len() -> usize {
16
}
fn from_be_bytes(bytes: &[u8]) -> Self {
let mut hash = [0; 16];
hash.copy_from_slice(bytes);
Self {
hash: u128::from_be_bytes(hash),
}
}
fn push_be_bytes(&self, buffer: &mut Vec<u8>) {
buffer.extend_from_slice(&self.to_be_bytes())
}
}
#[derive(Clone, Copy)]
pub enum QuadEncoding {
SPOG,
POSG,
OSPG,
GSPO,
GPOS,
GOSP,
DSPO,
DPOS,
DOSP,
}
impl QuadEncoding {
pub fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, io::Error> {
let mut cursor = Cursor::new(&buffer);
match self {
QuadEncoding::SPOG => cursor.read_spog_quad(),
QuadEncoding::POSG => cursor.read_posg_quad(),
QuadEncoding::OSPG => cursor.read_ospg_quad(),
QuadEncoding::GSPO => cursor.read_gspo_quad(),
QuadEncoding::GPOS => cursor.read_gpos_quad(),
QuadEncoding::GOSP => cursor.read_gosp_quad(),
QuadEncoding::DSPO => cursor.read_dspo_quad(),
QuadEncoding::DPOS => cursor.read_dpos_quad(),
QuadEncoding::DOSP => cursor.read_dosp_quad(),
}
}
}
pub trait TermReader {
fn read_term(&mut self) -> Result<EncodedTerm, io::Error>;
fn read_spog_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_posg_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_ospg_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gspo_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let graph_name = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gpos_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let graph_name = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gosp_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let graph_name = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_dspo_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dpos_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dosp_quad(&mut self) -> Result<EncodedQuad, io::Error> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
}
impl<R: Read> TermReader for R {
fn read_term(&mut self) -> Result<EncodedTerm, io::Error> {
let mut type_buffer = [0];
self.read_exact(&mut type_buffer)?;
match type_buffer[0] {
TYPE_DEFAULT_GRAPH_ID => Ok(EncodedTerm::DefaultGraph),
TYPE_NAMED_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode {
iri_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_INLINE_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::InlineBlankNode {
id: u128::from_be_bytes(buffer),
})
}
TYPE_NAMED_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedBlankNode {
id_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_LANG_STRING_LITERAL_ID => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral {
language_id: StrHash::from_be_bytes(language_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_TYPED_LITERAL_ID => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral {
value_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)),
TYPE_BOOLEAN_LITERAL_FALSE => Ok(EncodedTerm::BooleanLiteral(false)),
TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::FloatLiteral(f32::from_be_bytes(buffer)))
}
TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DoubleLiteral(f64::from_be_bytes(buffer)))
}
TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer)))
}
TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer)))
}
TYPE_DATE_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer)))
}
TYPE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer)))
}
TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes(
buffer,
)))
}
TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes(
buffer,
)))
}
TYPE_YEAR_MONTH_DURATION_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::YearMonthDurationLiteral(
YearMonthDuration::from_be_bytes(buffer),
))
}
TYPE_DAY_TIME_DURATION_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DayTimeDurationLiteral(
DayTimeDuration::from_be_bytes(buffer),
))
}
_ => Err(invalid_data_error("the term buffer has an invalid type id")),
}
}
}
pub fn write_spog_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.graph_name);
}
pub fn write_posg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.graph_name);
}
pub fn write_ospg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.graph_name);
}
pub fn write_gspo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
}
pub fn write_gpos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
}
pub fn write_gosp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
}
pub fn write_spo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
}
pub fn write_pos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
}
pub fn write_osp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
}
pub fn encode_term(t: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t);
vec
}
pub fn encode_term_pair(t1: EncodedTerm, t2: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(2 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
vec
}
pub fn encode_term_triple(t1: EncodedTerm, t2: EncodedTerm, t3: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(3 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
vec
}
pub fn encode_term_quad(
t1: EncodedTerm,
t2: EncodedTerm,
t3: EncodedTerm,
t4: EncodedTerm,
) -> Vec<u8> {
let mut vec = Vec::with_capacity(4 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
write_term(&mut vec, t4);
vec
}
pub fn write_term(sink: &mut Vec<u8>, term: EncodedTerm) {
match term {
EncodedTerm::DefaultGraph => sink.push(TYPE_DEFAULT_GRAPH_ID),
EncodedTerm::NamedNode { iri_id } => {
sink.push(TYPE_NAMED_NODE_ID);
iri_id.push_be_bytes(sink)
}
EncodedTerm::InlineBlankNode { id } => {
sink.push(TYPE_INLINE_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes())
}
EncodedTerm::NamedBlankNode { id_id } => {
sink.push(TYPE_NAMED_BLANK_NODE_ID);
id_id.push_be_bytes(sink)
}
EncodedTerm::StringLiteral { value_id } => {
sink.push(TYPE_STRING_LITERAL);
value_id.push_be_bytes(sink)
}
EncodedTerm::LangStringLiteral {
value_id,
language_id,
} => {
sink.push(TYPE_LANG_STRING_LITERAL_ID);
value_id.push_be_bytes(sink);
language_id.push_be_bytes(sink);
}
EncodedTerm::TypedLiteral {
value_id,
datatype_id,
} => {
sink.push(TYPE_TYPED_LITERAL_ID);
value_id.push_be_bytes(sink);
datatype_id.push_be_bytes(sink);
}
EncodedTerm::BooleanLiteral(true) => sink.push(TYPE_BOOLEAN_LITERAL_TRUE),
EncodedTerm::BooleanLiteral(false) => sink.push(TYPE_BOOLEAN_LITERAL_FALSE),
EncodedTerm::FloatLiteral(value) => {
sink.push(TYPE_FLOAT_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DoubleLiteral(value) => {
sink.push(TYPE_DOUBLE_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::IntegerLiteral(value) => {
sink.push(TYPE_INTEGER_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DecimalLiteral(value) => {
sink.push(TYPE_DECIMAL_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DateLiteral(value) => {
sink.push(TYPE_DATE_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::TimeLiteral(value) => {
sink.push(TYPE_TIME_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DateTimeLiteral(value) => {
sink.push(TYPE_DATE_TIME_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DurationLiteral(value) => {
sink.push(TYPE_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::YearMonthDurationLiteral(value) => {
sink.push(TYPE_YEAR_MONTH_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DayTimeDurationLiteral(value) => {
sink.push(TYPE_DAY_TIME_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
}
}

@ -3,6 +3,8 @@
//! They encode a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) //! They encode a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset)
//! and allow querying and updating them using SPARQL. //! and allow querying and updating them using SPARQL.
#[cfg(any(feature = "rocksdb", feature = "sled"))]
mod binary_encoder;
pub mod memory; pub mod memory;
pub(crate) mod numeric_encoder; pub(crate) mod numeric_encoder;
#[cfg(feature = "rocksdb")] #[cfg(feature = "rocksdb")]

@ -4,97 +4,19 @@ use crate::error::invalid_data_error;
use crate::model::xsd::*; use crate::model::xsd::*;
use crate::model::*; use crate::model::*;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use lasso::{Rodeo, Spur};
use rand::random; use rand::random;
use rio_api::model as rio; use rio_api::model as rio;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible; use std::convert::Infallible;
use std::error::Error; use std::error::Error;
use std::fmt::Debug; use std::fmt::Debug;
use std::hash::Hash; use std::hash::Hash;
use std::hash::Hasher; use std::hash::Hasher;
use std::io::Read;
use std::mem::size_of;
use std::{fmt, io, str}; use std::{fmt, io, str};
pub trait StrId: Eq + Debug + Copy + Hash {} pub trait StrId: Eq + Debug + Copy + Hash {}
pub trait SerializableStrId: StrId {
fn len() -> usize;
fn from_be_bytes(bytes: &[u8]) -> Self;
fn push_be_bytes(&self, buffer: &mut Vec<u8>);
}
#[derive(Eq, PartialEq, Debug, Copy, Clone, Hash)]
#[repr(transparent)]
pub struct StrHash {
hash: u128,
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = SipHasher24::new();
hasher.write(value.as_bytes());
Self {
hash: hasher.finish128().into(),
}
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
hash: u128::from_be_bytes(bytes),
}
}
#[inline]
pub fn to_be_bytes(&self) -> [u8; 16] {
self.hash.to_be_bytes()
}
}
impl StrId for StrHash {}
impl SerializableStrId for StrHash {
fn len() -> usize {
16
}
fn from_be_bytes(bytes: &[u8]) -> Self {
let mut hash = [0; 16];
hash.copy_from_slice(bytes);
Self {
hash: u128::from_be_bytes(hash),
}
}
fn push_be_bytes(&self, buffer: &mut Vec<u8>) {
buffer.extend_from_slice(&self.to_be_bytes())
}
}
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_INLINE_BLANK_NODE_ID: u8 = 2;
const TYPE_NAMED_BLANK_NODE_ID: u8 = 3;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 4;
const TYPE_TYPED_LITERAL_ID: u8 = 5;
const TYPE_STRING_LITERAL: u8 = 6;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 7;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 8;
const TYPE_FLOAT_LITERAL: u8 = 9;
const TYPE_DOUBLE_LITERAL: u8 = 10;
const TYPE_INTEGER_LITERAL: u8 = 11;
const TYPE_DECIMAL_LITERAL: u8 = 12;
const TYPE_DATE_TIME_LITERAL: u8 = 13;
const TYPE_DATE_LITERAL: u8 = 14;
const TYPE_TIME_LITERAL: u8 = 15;
const TYPE_DURATION_LITERAL: u8 = 16;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 17;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 18;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum EncodedTerm<I: StrId> { pub enum EncodedTerm<I: StrId> {
DefaultGraph, DefaultGraph,
@ -264,30 +186,6 @@ impl<I: StrId> EncodedTerm<I> {
*self == EncodedTerm::DefaultGraph *self == EncodedTerm::DefaultGraph
} }
fn type_id(&self) -> u8 {
match self {
Self::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID,
Self::NamedNode { .. } => TYPE_NAMED_NODE_ID,
Self::InlineBlankNode { .. } => TYPE_INLINE_BLANK_NODE_ID,
Self::NamedBlankNode { .. } => TYPE_NAMED_BLANK_NODE_ID,
Self::StringLiteral { .. } => TYPE_STRING_LITERAL,
Self::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID,
Self::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID,
Self::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE,
Self::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE,
Self::FloatLiteral(_) => TYPE_FLOAT_LITERAL,
Self::DoubleLiteral(_) => TYPE_DOUBLE_LITERAL,
Self::IntegerLiteral(_) => TYPE_INTEGER_LITERAL,
Self::DecimalLiteral(_) => TYPE_DECIMAL_LITERAL,
Self::DateLiteral(_) => TYPE_DATE_LITERAL,
Self::TimeLiteral(_) => TYPE_TIME_LITERAL,
Self::DateTimeLiteral(_) => TYPE_DATE_TIME_LITERAL,
Self::DurationLiteral(_) => TYPE_DURATION_LITERAL,
Self::YearMonthDurationLiteral(_) => TYPE_YEAR_MONTH_DURATION_LITERAL,
Self::DayTimeDurationLiteral(_) => TYPE_DAY_TIME_DURATION_LITERAL,
}
}
pub fn map_id<J: StrId>(self, mapping: impl Fn(I) -> J) -> EncodedTerm<J> { pub fn map_id<J: StrId>(self, mapping: impl Fn(I) -> J) -> EncodedTerm<J> {
match self { match self {
Self::DefaultGraph { .. } => EncodedTerm::DefaultGraph, Self::DefaultGraph { .. } => EncodedTerm::DefaultGraph,
@ -478,283 +376,6 @@ impl<I: StrId> EncodedQuad<I> {
} }
} }
pub trait TermReader {
fn read_term(&mut self) -> Result<EncodedTerm<StrHash>, io::Error>;
fn read_spog_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_posg_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_ospg_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gspo_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let graph_name = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gpos_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let graph_name = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gosp_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let graph_name = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_dspo_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dpos_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dosp_quad(&mut self) -> Result<EncodedQuad<StrHash>, io::Error> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
}
impl<R: Read> TermReader for R {
fn read_term(&mut self) -> Result<EncodedTerm<StrHash>, io::Error> {
let mut type_buffer = [0];
self.read_exact(&mut type_buffer)?;
match type_buffer[0] {
TYPE_DEFAULT_GRAPH_ID => Ok(EncodedTerm::DefaultGraph),
TYPE_NAMED_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode {
iri_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_INLINE_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::InlineBlankNode {
id: u128::from_be_bytes(buffer),
})
}
TYPE_NAMED_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedBlankNode {
id_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_LANG_STRING_LITERAL_ID => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral {
language_id: StrHash::from_be_bytes(language_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_TYPED_LITERAL_ID => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral {
value_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)),
TYPE_BOOLEAN_LITERAL_FALSE => Ok(EncodedTerm::BooleanLiteral(false)),
TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::FloatLiteral(f32::from_be_bytes(buffer)))
}
TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DoubleLiteral(f64::from_be_bytes(buffer)))
}
TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer)))
}
TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer)))
}
TYPE_DATE_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer)))
}
TYPE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer)))
}
TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes(
buffer,
)))
}
TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes(
buffer,
)))
}
TYPE_YEAR_MONTH_DURATION_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::YearMonthDurationLiteral(
YearMonthDuration::from_be_bytes(buffer),
))
}
TYPE_DAY_TIME_DURATION_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DayTimeDurationLiteral(
DayTimeDuration::from_be_bytes(buffer),
))
}
_ => Err(invalid_data_error("the term buffer has an invalid type id")),
}
}
}
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
pub fn write_term<I: SerializableStrId>(sink: &mut Vec<u8>, term: EncodedTerm<I>) {
sink.push(term.type_id());
match term {
EncodedTerm::DefaultGraph => {}
EncodedTerm::NamedNode { iri_id } => iri_id.push_be_bytes(sink),
EncodedTerm::InlineBlankNode { id } => sink.extend_from_slice(&id.to_be_bytes()),
EncodedTerm::NamedBlankNode { id_id } => id_id.push_be_bytes(sink),
EncodedTerm::StringLiteral { value_id } => value_id.push_be_bytes(sink),
EncodedTerm::LangStringLiteral {
value_id,
language_id,
} => {
value_id.push_be_bytes(sink);
language_id.push_be_bytes(sink);
}
EncodedTerm::TypedLiteral {
value_id,
datatype_id,
} => {
value_id.push_be_bytes(sink);
datatype_id.push_be_bytes(sink);
}
EncodedTerm::BooleanLiteral(_) => {}
EncodedTerm::FloatLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::DoubleLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::IntegerLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::DecimalLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::DateLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::TimeLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::DateTimeLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::DurationLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
EncodedTerm::YearMonthDurationLiteral(value) => {
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DayTimeDurationLiteral(value) => sink.extend_from_slice(&value.to_be_bytes()),
}
}
pub(crate) trait WithStoreError { pub(crate) trait WithStoreError {
//TODO: rename //TODO: rename
type Error: Error + Into<EvaluationError> + 'static; type Error: Error + Into<EvaluationError> + 'static;
@ -776,44 +397,32 @@ pub(crate) trait StrContainer: WithStoreError {
fn insert_str(&mut self, value: &str) -> Result<Self::StrId, Self::Error>; fn insert_str(&mut self, value: &str) -> Result<Self::StrId, Self::Error>;
} }
#[derive(Default)]
pub struct MemoryStrStore { pub struct MemoryStrStore {
id2str: HashMap<StrHash, String>, inner: Rodeo,
} }
impl Default for MemoryStrStore { impl StrId for Spur {}
fn default() -> Self {
Self {
id2str: HashMap::default(),
}
}
}
impl WithStoreError for MemoryStrStore { impl WithStoreError for MemoryStrStore {
type Error = Infallible; type Error = Infallible;
type StrId = StrHash; type StrId = Spur;
} }
impl StrLookup for MemoryStrStore { impl StrLookup for MemoryStrStore {
fn get_str(&self, id: StrHash) -> Result<Option<String>, Infallible> { fn get_str(&self, id: Spur) -> Result<Option<String>, Infallible> {
//TODO: avoid copy by adding a lifetime limit to get_str //TODO: avoid copy by adding a lifetime limit to get_str
Ok(self.id2str.get(&id).cloned()) Ok(self.inner.try_resolve(&id).map(|e| e.to_owned()))
} }
fn get_str_id(&self, value: &str) -> Result<Option<StrHash>, Infallible> { fn get_str_id(&self, value: &str) -> Result<Option<Spur>, Infallible> {
let id = StrHash::new(value); Ok(self.inner.get(value))
Ok(if self.id2str.contains_key(&id) {
Some(id)
} else {
None
})
} }
} }
impl StrContainer for MemoryStrStore { impl StrContainer for MemoryStrStore {
fn insert_str(&mut self, value: &str) -> Result<StrHash, Infallible> { fn insert_str(&mut self, value: &str) -> Result<Spur, Infallible> {
let key = StrHash::new(value); Ok(self.inner.get_or_intern(value))
self.id2str.entry(key).or_insert_with(|| value.to_owned());
Ok(key)
} }
} }

@ -4,9 +4,9 @@ use crate::error::invalid_data_error;
use crate::io::{DatasetFormat, GraphFormat}; use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*; use crate::model::*;
use crate::sparql::{EvaluationError, Query, QueryOptions, QueryResult, SimplePreparedQuery}; use crate::sparql::{EvaluationError, Query, QueryOptions, QueryResult, SimplePreparedQuery};
use crate::store::binary_encoder::*;
use crate::store::numeric_encoder::{ use crate::store::numeric_encoder::{
write_term, Decoder, ReadEncoder, StrContainer, StrHash, StrLookup, TermReader, WithStoreError, Decoder, ReadEncoder, StrContainer, StrLookup, WithStoreError, WriteEncoder,
WriteEncoder, WRITTEN_TERM_MAX_SIZE,
}; };
use crate::store::{ use crate::store::{
dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph, dump_dataset, dump_graph, get_encoded_quad_pattern, load_dataset, load_graph,
@ -16,7 +16,7 @@ use rocksdb::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::TryInto; use std::convert::TryInto;
use std::io; use std::io;
use std::io::{BufRead, Cursor, Write}; use std::io::{BufRead, Write};
use std::iter::{once, Once}; use std::iter::{once, Once};
use std::mem::{take, transmute}; use std::mem::{take, transmute};
use std::path::Path; use std::path::Path;
@ -1045,36 +1045,6 @@ fn get_cf<'a>(db: &'a DB, name: &str) -> &'a ColumnFamily {
.expect("A column family that should exist in RocksDB does not exist") .expect("A column family that should exist in RocksDB does not exist")
} }
fn encode_term(t: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t);
vec
}
fn encode_term_pair(t1: EncodedTerm, t2: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(2 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
vec
}
fn encode_term_triple(t1: EncodedTerm, t2: EncodedTerm, t3: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(3 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
vec
}
fn encode_term_quad(t1: EncodedTerm, t2: EncodedTerm, t3: EncodedTerm, t4: EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(4 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
write_term(&mut vec, t4);
vec
}
struct StaticDBRowIterator { struct StaticDBRowIterator {
iter: DBRawIterator<'static>, iter: DBRawIterator<'static>,
_db: Arc<DB>, // needed to ensure that DB still lives while iter is used _db: Arc<DB>, // needed to ensure that DB still lives while iter is used
@ -1162,96 +1132,6 @@ impl Iterator for DecodingIndexIterator {
} }
} }
fn write_spog_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.graph_name);
}
fn write_posg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.graph_name);
}
fn write_ospg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.graph_name);
}
fn write_gspo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
}
fn write_gpos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
}
fn write_gosp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.graph_name);
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
}
fn write_spo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
write_term(sink, quad.object);
}
fn write_pos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.predicate);
write_term(sink, quad.object);
write_term(sink, quad.subject);
}
fn write_osp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, quad.object);
write_term(sink, quad.subject);
write_term(sink, quad.predicate);
}
#[derive(Clone, Copy)]
enum QuadEncoding {
SPOG,
POSG,
OSPG,
GSPO,
GPOS,
GOSP,
DSPO,
DPOS,
DOSP,
}
impl QuadEncoding {
fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, io::Error> {
let mut cursor = Cursor::new(&buffer);
match self {
QuadEncoding::SPOG => cursor.read_spog_quad(),
QuadEncoding::POSG => cursor.read_posg_quad(),
QuadEncoding::OSPG => cursor.read_ospg_quad(),
QuadEncoding::GSPO => cursor.read_gspo_quad(),
QuadEncoding::GPOS => cursor.read_gpos_quad(),
QuadEncoding::GOSP => cursor.read_gosp_quad(),
QuadEncoding::DSPO => cursor.read_dspo_quad(),
QuadEncoding::DPOS => cursor.read_dpos_quad(),
QuadEncoding::DOSP => cursor.read_dosp_quad(),
}
}
}
fn map_err(e: Error) -> io::Error { fn map_err(e: Error) -> io::Error {
io::Error::new(io::ErrorKind::Other, e) io::Error::new(io::ErrorKind::Other, e)
} }
@ -1330,6 +1210,7 @@ fn store() -> Result<(), io::Error> {
store.insert(t)?; store.insert(t)?;
} }
assert_eq!(store.len(), 4);
assert_eq!( assert_eq!(
store store
.quads_for_pattern(None, None, None, None) .quads_for_pattern(None, None, None, None)
@ -1464,7 +1345,7 @@ fn store() -> Result<(), io::Error> {
Some(GraphNameRef::DefaultGraph) Some(GraphNameRef::DefaultGraph)
) )
.collect::<Result<Vec<_>, _>>()?, .collect::<Result<Vec<_>, _>>()?,
vec![default_quad.clone()] vec![default_quad]
); );
assert_eq!( assert_eq!(
store store
@ -1475,7 +1356,7 @@ fn store() -> Result<(), io::Error> {
Some(main_g.as_ref()) Some(main_g.as_ref())
) )
.collect::<Result<Vec<_>, _>>()?, .collect::<Result<Vec<_>, _>>()?,
vec![named_quad.clone()] vec![named_quad]
); );
} }

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save