Simplifies term encoding code

pull/171/head
Tpt 3 years ago
parent f9d9530a1b
commit 8606877e33
  1. 70
      lib/src/sparql/dataset.rs
  2. 19
      lib/src/storage/binary_encoder.rs
  3. 52
      lib/src/storage/mod.rs
  4. 246
      lib/src/storage/numeric_encoder.rs

@ -1,10 +1,13 @@
use crate::model::TermRef; use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup}; use crate::storage::numeric_encoder::{
insert_term_values, EncodedQuad, EncodedTerm, StrHash, StrLookup,
};
use crate::storage::Storage; use crate::storage::Storage;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible;
use std::iter::empty; use std::iter::empty;
pub(crate) struct DatasetView { pub(crate) struct DatasetView {
@ -139,69 +142,14 @@ impl DatasetView {
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm { pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into(); let term = term.into();
let encoded = term.into(); let encoded = term.into();
self.insert_term_values(term, &encoded); insert_term_values::<Infallible, _>(term, &encoded, |key, value| {
self.insert_str(key, value);
Ok(())
})
.unwrap(); // Can not fail
encoded encoded
} }
fn insert_term_values(&self, term: TermRef<'_>, encoded: &EncodedTerm) {
match (term, encoded) {
(TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => {
self.insert_str(iri_id, node.as_str());
}
(TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => {
self.insert_str(id_id, node.as_str());
}
(TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::SmallBigLangStringLiteral { language_id, .. },
) => {
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(
TermRef::Literal(literal),
EncodedTerm::BigSmallLangStringLiteral { value_id, .. },
) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
},
) => {
self.insert_str(value_id, literal.value());
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => {
self.insert_str(datatype_id, literal.datatype().as_str());
}
(
TermRef::Literal(literal),
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
},
) => {
self.insert_str(value_id, literal.value());
self.insert_str(datatype_id, literal.datatype().as_str());
}
(TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => {
self.insert_term_values(triple.subject.as_ref().into(), &encoded.subject);
self.insert_term_values(triple.predicate.as_ref().into(), &encoded.predicate);
self.insert_term_values(triple.object.as_ref(), &encoded.object);
}
_ => (),
}
}
pub fn insert_str(&self, key: &StrHash, value: &str) { pub fn insert_str(&self, key: &StrHash, value: &str) {
if matches!(self.storage.contains_str(key), Ok(true)) { if matches!(self.storage.contains_str(key), Ok(true)) {
return; return;

@ -643,7 +643,6 @@ mod tests {
use super::*; use super::*;
use crate::storage::numeric_encoder::*; use crate::storage::numeric_encoder::*;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::convert::Infallible; use std::convert::Infallible;
@ -664,15 +663,15 @@ mod tests {
} }
} }
impl StrContainer for MemoryStrStore { impl TermEncoder for MemoryStrStore {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, Infallible> { type Error = Infallible;
match self.id2str.borrow_mut().entry(*key) {
Entry::Occupied(_) => Ok(false), fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Infallible> {
Entry::Vacant(entry) => { self.id2str
entry.insert(value.to_owned()); .borrow_mut()
Ok(true) .entry(*key)
} .or_insert_with(|| value.to_owned());
} Ok(())
} }
} }

@ -19,9 +19,7 @@ use crate::storage::binary_encoder::{
LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE, LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE,
}; };
use crate::storage::io::StoreOrParseError; use crate::storage::io::StoreOrParseError;
use crate::storage::numeric_encoder::{ use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup, TermEncoder};
EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup, WriteEncoder,
};
mod binary_encoder; mod binary_encoder;
pub(crate) mod io; pub(crate) mod io;
@ -671,10 +669,6 @@ impl Storage {
pub fn contains_str(&self, key: &StrHash) -> std::io::Result<bool> { pub fn contains_str(&self, key: &StrHash) -> std::io::Result<bool> {
Ok(self.id2str.contains_key(key.to_be_bytes())?) Ok(self.id2str.contains_key(key.to_be_bytes())?)
} }
pub fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<bool> {
Ok(self.id2str.insert(key.to_be_bytes(), value)?.is_none())
}
} }
pub struct ChainedDecodingQuadIterator { pub struct ChainedDecodingQuadIterator {
@ -872,7 +866,7 @@ impl<'a> StorageTransaction<'a> {
&self, &self,
graph_name: NamedOrBlankNodeRef<'_>, graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, UnabortableTransactionError> { ) -> Result<bool, UnabortableTransactionError> {
let graph_name = self.encode_named_or_blank_node(graph_name)?; let graph_name = self.encode_term(graph_name)?;
Ok(self.graphs.insert(encode_term(&graph_name), &[])?.is_none()) Ok(self.graphs.insert(encode_term(&graph_name), &[])?.is_none())
} }
@ -887,14 +881,6 @@ impl<'a> StorageTransaction<'a> {
pub fn contains_str(&self, key: &StrHash) -> Result<bool, UnabortableTransactionError> { pub fn contains_str(&self, key: &StrHash) -> Result<bool, UnabortableTransactionError> {
Ok(self.id2str.get(key.to_be_bytes())?.is_some()) Ok(self.id2str.get(key.to_be_bytes())?.is_some())
} }
pub fn insert_str(
&self,
key: &StrHash,
value: &str,
) -> Result<bool, UnabortableTransactionError> {
Ok(self.id2str.insert(&key.to_be_bytes(), value)?.is_none())
}
} }
/// Error returned by a Sled transaction /// Error returned by a Sled transaction
@ -1061,12 +1047,6 @@ impl StrLookup for Storage {
} }
} }
impl StrContainer for Storage {
fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<bool> {
self.insert_str(key, value)
}
}
impl<'a> StrLookup for StorageTransaction<'a> { impl<'a> StrLookup for StorageTransaction<'a> {
type Error = UnabortableTransactionError; type Error = UnabortableTransactionError;
@ -1079,34 +1059,46 @@ impl<'a> StrLookup for StorageTransaction<'a> {
} }
} }
impl<'a> StrContainer for StorageTransaction<'a> { impl TermEncoder for Storage {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, UnabortableTransactionError> { type Error = std::io::Error;
self.insert_str(key, value)
fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<()> {
self.id2str.insert(key.to_be_bytes(), value)?;
Ok(())
}
}
impl<'a> TermEncoder for StorageTransaction<'a> {
type Error = UnabortableTransactionError;
fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), UnabortableTransactionError> {
self.id2str.insert(&key.to_be_bytes(), value)?;
Ok(())
} }
} }
pub(crate) trait StorageLike: StrLookup + StrContainer { pub(crate) trait StorageLike: StrLookup {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>; fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>;
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>; fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>;
} }
impl StorageLike for Storage { impl StorageLike for Storage {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> { fn insert(&self, quad: QuadRef<'_>) -> std::io::Result<bool> {
self.insert(quad) self.insert(quad)
} }
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> { fn remove(&self, quad: QuadRef<'_>) -> std::io::Result<bool> {
self.remove(quad) self.remove(quad)
} }
} }
impl<'a> StorageLike for StorageTransaction<'a> { impl<'a> StorageLike for StorageTransaction<'a> {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> { fn insert(&self, quad: QuadRef<'_>) -> Result<bool, UnabortableTransactionError> {
self.insert(quad) self.insert(quad)
} }
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> { fn remove(&self, quad: QuadRef<'_>) -> Result<bool, UnabortableTransactionError> {
self.remove(quad) self.remove(quad)
} }
} }

@ -657,7 +657,7 @@ impl From<QuadRef<'_>> for EncodedQuad {
} }
} }
pub(crate) trait StrLookup { pub trait StrLookup {
type Error: Error + Into<EvaluationError> + 'static; type Error: Error + Into<EvaluationError> + 'static;
fn get_str(&self, key: &StrHash) -> Result<Option<String>, Self::Error>; fn get_str(&self, key: &StrHash) -> Result<Option<String>, Self::Error>;
@ -665,200 +665,92 @@ pub(crate) trait StrLookup {
fn contains_str(&self, key: &StrHash) -> Result<bool, Self::Error>; fn contains_str(&self, key: &StrHash) -> Result<bool, Self::Error>;
} }
pub(crate) trait StrContainer: StrLookup { pub(super) trait TermEncoder {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, Self::Error>; type Error;
}
/// Encodes a term and insert strings if needed fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Self::Error>;
pub(crate) trait WriteEncoder: StrContainer {
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result<EncodedTerm, Self::Error> {
Ok(EncodedTerm::NamedNode {
iri_id: self.encode_str(named_node.as_str())?,
})
}
fn encode_blank_node(&self, blank_node: BlankNodeRef<'_>) -> Result<EncodedTerm, Self::Error> { fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> Result<EncodedTerm, Self::Error> {
Ok(if let Some(id) = blank_node.id() { let term = term.into();
EncodedTerm::NumericalBlankNode { id } let encoded = term.into();
} else { insert_term_values(term, &encoded, |key, value| self.insert_str(key, value))?;
let id = blank_node.as_str(); Ok(encoded)
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: self.encode_str(id)?,
}
}
})
} }
fn encode_literal(&self, literal: LiteralRef<'_>) -> Result<EncodedTerm, Self::Error> { fn encode_quad(&self, quad: QuadRef<'_>) -> Result<EncodedQuad, Self::Error> {
Ok(if literal.is_plain() { Ok(EncodedQuad {
if let Some(language) = literal.language() { subject: self.encode_term(quad.subject)?,
if let Ok(value) = SmallString::try_from(literal.value()) { predicate: self.encode_term(quad.predicate)?,
if let Ok(language) = SmallString::try_from(language) { object: self.encode_term(quad.object)?,
EncodedTerm::SmallSmallLangStringLiteral { value, language } graph_name: match quad.graph_name {
} else { GraphNameRef::NamedNode(graph_name) => self.encode_term(graph_name)?,
EncodedTerm::SmallBigLangStringLiteral { GraphNameRef::BlankNode(graph_name) => self.encode_term(graph_name)?,
value, GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
language_id: self.encode_str(language)?, },
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: self.encode_str(literal.value())?,
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: self.encode_str(literal.value())?,
language_id: self.encode_str(language)?,
}
}
} else if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(literal.value())?,
}
}
} else {
match match literal.datatype().as_str() {
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#string" => {
Some(if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(literal.value())?,
}
}) })
} }
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
parse_integer_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
parse_date_time_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gYearMonth" => {
parse_g_year_month_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gMonthDay" => {
parse_g_month_day_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
parse_day_time_duration_str(literal.value())
}
_ => None,
} {
Some(v) => v,
None => {
if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: self.encode_str(literal.datatype().as_str())?,
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: self.encode_str(literal.value())?,
datatype_id: self.encode_str(literal.datatype().as_str())?,
}
}
}
}
})
} }
fn encode_named_or_blank_node( pub fn insert_term_values<E, F: Fn(&StrHash, &str) -> Result<(), E> + Copy>(
&self, term: TermRef<'_>,
term: NamedOrBlankNodeRef<'_>, encoded: &EncodedTerm,
) -> Result<EncodedTerm, Self::Error> { insert_str: F,
match term { ) -> Result<(), E> {
NamedOrBlankNodeRef::NamedNode(named_node) => self.encode_named_node(named_node), match (term, encoded) {
NamedOrBlankNodeRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), (TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => {
insert_str(iri_id, node.as_str())?;
} }
(TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => {
insert_str(id_id, node.as_str())?;
} }
(TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => {
fn encode_subject(&self, term: SubjectRef<'_>) -> Result<EncodedTerm, Self::Error> { insert_str(value_id, literal.value())?;
match term {
SubjectRef::NamedNode(named_node) => self.encode_named_node(named_node),
SubjectRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
SubjectRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new(
self.encode_triple(triple.as_ref())?,
))),
} }
(TermRef::Literal(literal), EncodedTerm::SmallBigLangStringLiteral { language_id, .. }) => {
if let Some(language) = literal.language() {
insert_str(language_id, language)?;
} }
fn encode_term(&self, term: TermRef<'_>) -> Result<EncodedTerm, Self::Error> {
match term {
TermRef::NamedNode(named_node) => self.encode_named_node(named_node),
TermRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
TermRef::Literal(literal) => self.encode_literal(literal),
TermRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new(
self.encode_triple(triple.as_ref())?,
))),
} }
(TermRef::Literal(literal), EncodedTerm::BigSmallLangStringLiteral { value_id, .. }) => {
insert_str(value_id, literal.value())?;
} }
(
fn encode_graph_name(&self, name: GraphNameRef<'_>) -> Result<EncodedTerm, Self::Error> { TermRef::Literal(literal),
match name { EncodedTerm::BigBigLangStringLiteral {
GraphNameRef::NamedNode(named_node) => self.encode_named_node(named_node), value_id,
GraphNameRef::BlankNode(blank_node) => self.encode_blank_node(blank_node), language_id,
GraphNameRef::DefaultGraph => Ok(EncodedTerm::DefaultGraph), },
) => {
insert_str(value_id, literal.value())?;
if let Some(language) = literal.language() {
insert_str(language_id, language)?
} }
} }
(TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => {
fn encode_triple(&self, quad: TripleRef<'_>) -> Result<EncodedTriple, Self::Error> { insert_str(datatype_id, literal.datatype().as_str())?;
Ok(EncodedTriple {
subject: self.encode_subject(quad.subject)?,
predicate: self.encode_named_node(quad.predicate)?,
object: self.encode_term(quad.object)?,
})
} }
(
fn encode_quad(&self, quad: QuadRef<'_>) -> Result<EncodedQuad, Self::Error> { TermRef::Literal(literal),
Ok(EncodedQuad { EncodedTerm::BigTypedLiteral {
subject: self.encode_subject(quad.subject)?, value_id,
predicate: self.encode_named_node(quad.predicate)?, datatype_id,
object: self.encode_term(quad.object)?, },
graph_name: self.encode_graph_name(quad.graph_name)?, ) => {
}) insert_str(value_id, literal.value())?;
insert_str(datatype_id, literal.datatype().as_str())?;
} }
(TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => {
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error>; insert_term_values(triple.subject.as_ref().into(), &encoded.subject, insert_str)?;
insert_term_values(
triple.predicate.as_ref().into(),
&encoded.predicate,
insert_str,
)?;
insert_term_values(triple.object.as_ref(), &encoded.object, insert_str)?;
} }
_ => (),
impl<S: StrContainer> WriteEncoder for S {
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error> {
let key = StrHash::new(value);
self.insert_str(&key, value)?;
Ok(key)
} }
Ok(())
} }
pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> { pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> {
@ -932,7 +824,7 @@ pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok() value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
} }
pub(crate) trait Decoder: StrLookup { pub trait Decoder: StrLookup {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, DecoderError<Self::Error>>; fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, DecoderError<Self::Error>>;
fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, DecoderError<Self::Error>> { fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, DecoderError<Self::Error>> {
@ -1112,7 +1004,7 @@ fn get_required_str<L: StrLookup>(
} }
#[derive(Debug)] #[derive(Debug)]
pub(crate) enum DecoderError<E> { pub enum DecoderError<E> {
Store(E), Store(E),
Decoder { msg: String }, Decoder { msg: String },
} }

Loading…
Cancel
Save