Simplifies term encoding code

pull/171/head
Tpt 3 years ago
parent f9d9530a1b
commit 8606877e33
  1. 70
      lib/src/sparql/dataset.rs
  2. 19
      lib/src/storage/binary_encoder.rs
  3. 52
      lib/src/storage/mod.rs
  4. 258
      lib/src/storage/numeric_encoder.rs

@ -1,10 +1,13 @@
use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::numeric_encoder::{
insert_term_values, EncodedQuad, EncodedTerm, StrHash, StrLookup,
};
use crate::storage::Storage;
use std::cell::RefCell;
use std::collections::HashMap;
use std::convert::Infallible;
use std::iter::empty;
pub(crate) struct DatasetView {
@ -139,69 +142,14 @@ impl DatasetView {
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into();
let encoded = term.into();
self.insert_term_values(term, &encoded);
insert_term_values::<Infallible, _>(term, &encoded, |key, value| {
self.insert_str(key, value);
Ok(())
})
.unwrap(); // Can not fail
encoded
}
fn insert_term_values(&self, term: TermRef<'_>, encoded: &EncodedTerm) {
match (term, encoded) {
(TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => {
self.insert_str(iri_id, node.as_str());
}
(TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => {
self.insert_str(id_id, node.as_str());
}
(TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::SmallBigLangStringLiteral { language_id, .. },
) => {
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(
TermRef::Literal(literal),
EncodedTerm::BigSmallLangStringLiteral { value_id, .. },
) => {
self.insert_str(value_id, literal.value());
}
(
TermRef::Literal(literal),
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
},
) => {
self.insert_str(value_id, literal.value());
if let Some(language) = literal.language() {
self.insert_str(language_id, language)
}
}
(TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => {
self.insert_str(datatype_id, literal.datatype().as_str());
}
(
TermRef::Literal(literal),
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
},
) => {
self.insert_str(value_id, literal.value());
self.insert_str(datatype_id, literal.datatype().as_str());
}
(TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => {
self.insert_term_values(triple.subject.as_ref().into(), &encoded.subject);
self.insert_term_values(triple.predicate.as_ref().into(), &encoded.predicate);
self.insert_term_values(triple.object.as_ref(), &encoded.object);
}
_ => (),
}
}
pub fn insert_str(&self, key: &StrHash, value: &str) {
if matches!(self.storage.contains_str(key), Ok(true)) {
return;

@ -643,7 +643,6 @@ mod tests {
use super::*;
use crate::storage::numeric_encoder::*;
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::convert::Infallible;
@ -664,15 +663,15 @@ mod tests {
}
}
impl StrContainer for MemoryStrStore {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, Infallible> {
match self.id2str.borrow_mut().entry(*key) {
Entry::Occupied(_) => Ok(false),
Entry::Vacant(entry) => {
entry.insert(value.to_owned());
Ok(true)
}
}
impl TermEncoder for MemoryStrStore {
type Error = Infallible;
fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Infallible> {
self.id2str
.borrow_mut()
.entry(*key)
.or_insert_with(|| value.to_owned());
Ok(())
}
}

@ -19,9 +19,7 @@ use crate::storage::binary_encoder::{
LATEST_STORAGE_VERSION, WRITTEN_TERM_MAX_SIZE,
};
use crate::storage::io::StoreOrParseError;
use crate::storage::numeric_encoder::{
EncodedQuad, EncodedTerm, StrContainer, StrHash, StrLookup, WriteEncoder,
};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, StrHash, StrLookup, TermEncoder};
mod binary_encoder;
pub(crate) mod io;
@ -671,10 +669,6 @@ impl Storage {
pub fn contains_str(&self, key: &StrHash) -> std::io::Result<bool> {
Ok(self.id2str.contains_key(key.to_be_bytes())?)
}
pub fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<bool> {
Ok(self.id2str.insert(key.to_be_bytes(), value)?.is_none())
}
}
pub struct ChainedDecodingQuadIterator {
@ -872,7 +866,7 @@ impl<'a> StorageTransaction<'a> {
&self,
graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, UnabortableTransactionError> {
let graph_name = self.encode_named_or_blank_node(graph_name)?;
let graph_name = self.encode_term(graph_name)?;
Ok(self.graphs.insert(encode_term(&graph_name), &[])?.is_none())
}
@ -887,14 +881,6 @@ impl<'a> StorageTransaction<'a> {
pub fn contains_str(&self, key: &StrHash) -> Result<bool, UnabortableTransactionError> {
Ok(self.id2str.get(key.to_be_bytes())?.is_some())
}
pub fn insert_str(
&self,
key: &StrHash,
value: &str,
) -> Result<bool, UnabortableTransactionError> {
Ok(self.id2str.insert(&key.to_be_bytes(), value)?.is_none())
}
}
/// Error returned by a Sled transaction
@ -1061,12 +1047,6 @@ impl StrLookup for Storage {
}
}
impl StrContainer for Storage {
fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<bool> {
self.insert_str(key, value)
}
}
impl<'a> StrLookup for StorageTransaction<'a> {
type Error = UnabortableTransactionError;
@ -1079,34 +1059,46 @@ impl<'a> StrLookup for StorageTransaction<'a> {
}
}
impl<'a> StrContainer for StorageTransaction<'a> {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, UnabortableTransactionError> {
self.insert_str(key, value)
impl TermEncoder for Storage {
type Error = std::io::Error;
fn insert_str(&self, key: &StrHash, value: &str) -> std::io::Result<()> {
self.id2str.insert(key.to_be_bytes(), value)?;
Ok(())
}
}
impl<'a> TermEncoder for StorageTransaction<'a> {
type Error = UnabortableTransactionError;
fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), UnabortableTransactionError> {
self.id2str.insert(&key.to_be_bytes(), value)?;
Ok(())
}
}
pub(crate) trait StorageLike: StrLookup + StrContainer {
pub(crate) trait StorageLike: StrLookup {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>;
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error>;
}
impl StorageLike for Storage {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> {
fn insert(&self, quad: QuadRef<'_>) -> std::io::Result<bool> {
self.insert(quad)
}
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> {
fn remove(&self, quad: QuadRef<'_>) -> std::io::Result<bool> {
self.remove(quad)
}
}
impl<'a> StorageLike for StorageTransaction<'a> {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> {
fn insert(&self, quad: QuadRef<'_>) -> Result<bool, UnabortableTransactionError> {
self.insert(quad)
}
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, Self::Error> {
fn remove(&self, quad: QuadRef<'_>) -> Result<bool, UnabortableTransactionError> {
self.remove(quad)
}
}

@ -657,7 +657,7 @@ impl From<QuadRef<'_>> for EncodedQuad {
}
}
pub(crate) trait StrLookup {
pub trait StrLookup {
type Error: Error + Into<EvaluationError> + 'static;
fn get_str(&self, key: &StrHash) -> Result<Option<String>, Self::Error>;
@ -665,200 +665,92 @@ pub(crate) trait StrLookup {
fn contains_str(&self, key: &StrHash) -> Result<bool, Self::Error>;
}
pub(crate) trait StrContainer: StrLookup {
fn insert_str(&self, key: &StrHash, value: &str) -> Result<bool, Self::Error>;
}
pub(super) trait TermEncoder {
type Error;
/// Encodes a term and insert strings if needed
pub(crate) trait WriteEncoder: StrContainer {
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> Result<EncodedTerm, Self::Error> {
Ok(EncodedTerm::NamedNode {
iri_id: self.encode_str(named_node.as_str())?,
})
fn insert_str(&self, key: &StrHash, value: &str) -> Result<(), Self::Error>;
fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> Result<EncodedTerm, Self::Error> {
let term = term.into();
let encoded = term.into();
insert_term_values(term, &encoded, |key, value| self.insert_str(key, value))?;
Ok(encoded)
}
fn encode_blank_node(&self, blank_node: BlankNodeRef<'_>) -> Result<EncodedTerm, Self::Error> {
Ok(if let Some(id) = blank_node.id() {
EncodedTerm::NumericalBlankNode { id }
} else {
let id = blank_node.as_str();
if let Ok(id) = id.try_into() {
EncodedTerm::SmallBlankNode(id)
} else {
EncodedTerm::BigBlankNode {
id_id: self.encode_str(id)?,
}
}
fn encode_quad(&self, quad: QuadRef<'_>) -> Result<EncodedQuad, Self::Error> {
Ok(EncodedQuad {
subject: self.encode_term(quad.subject)?,
predicate: self.encode_term(quad.predicate)?,
object: self.encode_term(quad.object)?,
graph_name: match quad.graph_name {
GraphNameRef::NamedNode(graph_name) => self.encode_term(graph_name)?,
GraphNameRef::BlankNode(graph_name) => self.encode_term(graph_name)?,
GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
},
})
}
}
fn encode_literal(&self, literal: LiteralRef<'_>) -> Result<EncodedTerm, Self::Error> {
Ok(if literal.is_plain() {
pub fn insert_term_values<E, F: Fn(&StrHash, &str) -> Result<(), E> + Copy>(
term: TermRef<'_>,
encoded: &EncodedTerm,
insert_str: F,
) -> Result<(), E> {
match (term, encoded) {
(TermRef::NamedNode(node), EncodedTerm::NamedNode { iri_id }) => {
insert_str(iri_id, node.as_str())?;
}
(TermRef::BlankNode(node), EncodedTerm::BigBlankNode { id_id }) => {
insert_str(id_id, node.as_str())?;
}
(TermRef::Literal(literal), EncodedTerm::BigStringLiteral { value_id }) => {
insert_str(value_id, literal.value())?;
}
(TermRef::Literal(literal), EncodedTerm::SmallBigLangStringLiteral { language_id, .. }) => {
if let Some(language) = literal.language() {
if let Ok(value) = SmallString::try_from(literal.value()) {
if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else {
EncodedTerm::SmallBigLangStringLiteral {
value,
language_id: self.encode_str(language)?,
}
}
} else if let Ok(language) = SmallString::try_from(language) {
EncodedTerm::BigSmallLangStringLiteral {
value_id: self.encode_str(literal.value())?,
language,
}
} else {
EncodedTerm::BigBigLangStringLiteral {
value_id: self.encode_str(literal.value())?,
language_id: self.encode_str(language)?,
}
}
} else if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(literal.value())?,
}
insert_str(language_id, language)?;
}
} else {
match match literal.datatype().as_str() {
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#string" => {
Some(if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallStringLiteral(value)
} else {
EncodedTerm::BigStringLiteral {
value_id: self.encode_str(literal.value())?,
}
})
}
"http://www.w3.org/2001/XMLSchema#float" => parse_float_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#double" => parse_double_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
parse_integer_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
parse_date_time_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#time" => parse_time_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#date" => parse_date_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gYearMonth" => {
parse_g_year_month_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gMonthDay" => {
parse_g_month_day_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(literal.value()),
"http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
parse_year_month_duration_str(literal.value())
}
"http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
parse_day_time_duration_str(literal.value())
}
_ => None,
} {
Some(v) => v,
None => {
if let Ok(value) = SmallString::try_from(literal.value()) {
EncodedTerm::SmallTypedLiteral {
value,
datatype_id: self.encode_str(literal.datatype().as_str())?,
}
} else {
EncodedTerm::BigTypedLiteral {
value_id: self.encode_str(literal.value())?,
datatype_id: self.encode_str(literal.datatype().as_str())?,
}
}
}
}
(TermRef::Literal(literal), EncodedTerm::BigSmallLangStringLiteral { value_id, .. }) => {
insert_str(value_id, literal.value())?;
}
(
TermRef::Literal(literal),
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
},
) => {
insert_str(value_id, literal.value())?;
if let Some(language) = literal.language() {
insert_str(language_id, language)?
}
})
}
fn encode_named_or_blank_node(
&self,
term: NamedOrBlankNodeRef<'_>,
) -> Result<EncodedTerm, Self::Error> {
match term {
NamedOrBlankNodeRef::NamedNode(named_node) => self.encode_named_node(named_node),
NamedOrBlankNodeRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
}
}
fn encode_subject(&self, term: SubjectRef<'_>) -> Result<EncodedTerm, Self::Error> {
match term {
SubjectRef::NamedNode(named_node) => self.encode_named_node(named_node),
SubjectRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
SubjectRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new(
self.encode_triple(triple.as_ref())?,
))),
(TermRef::Literal(literal), EncodedTerm::SmallTypedLiteral { datatype_id, .. }) => {
insert_str(datatype_id, literal.datatype().as_str())?;
}
}
fn encode_term(&self, term: TermRef<'_>) -> Result<EncodedTerm, Self::Error> {
match term {
TermRef::NamedNode(named_node) => self.encode_named_node(named_node),
TermRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
TermRef::Literal(literal) => self.encode_literal(literal),
TermRef::Triple(triple) => Ok(EncodedTerm::Triple(Rc::new(
self.encode_triple(triple.as_ref())?,
))),
(
TermRef::Literal(literal),
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
},
) => {
insert_str(value_id, literal.value())?;
insert_str(datatype_id, literal.datatype().as_str())?;
}
}
fn encode_graph_name(&self, name: GraphNameRef<'_>) -> Result<EncodedTerm, Self::Error> {
match name {
GraphNameRef::NamedNode(named_node) => self.encode_named_node(named_node),
GraphNameRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
GraphNameRef::DefaultGraph => Ok(EncodedTerm::DefaultGraph),
(TermRef::Triple(triple), EncodedTerm::Triple(encoded)) => {
insert_term_values(triple.subject.as_ref().into(), &encoded.subject, insert_str)?;
insert_term_values(
triple.predicate.as_ref().into(),
&encoded.predicate,
insert_str,
)?;
insert_term_values(triple.object.as_ref(), &encoded.object, insert_str)?;
}
_ => (),
}
fn encode_triple(&self, quad: TripleRef<'_>) -> Result<EncodedTriple, Self::Error> {
Ok(EncodedTriple {
subject: self.encode_subject(quad.subject)?,
predicate: self.encode_named_node(quad.predicate)?,
object: self.encode_term(quad.object)?,
})
}
fn encode_quad(&self, quad: QuadRef<'_>) -> Result<EncodedQuad, Self::Error> {
Ok(EncodedQuad {
subject: self.encode_subject(quad.subject)?,
predicate: self.encode_named_node(quad.predicate)?,
object: self.encode_term(quad.object)?,
graph_name: self.encode_graph_name(quad.graph_name)?,
})
}
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error>;
}
impl<S: StrContainer> WriteEncoder for S {
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error> {
let key = StrHash::new(value);
self.insert_str(&key, value)?;
Ok(key)
}
Ok(())
}
pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> {
@ -932,7 +824,7 @@ pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
}
pub(crate) trait Decoder: StrLookup {
pub trait Decoder: StrLookup {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, DecoderError<Self::Error>>;
fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, DecoderError<Self::Error>> {
@ -1112,7 +1004,7 @@ fn get_required_str<L: StrLookup>(
}
#[derive(Debug)]
pub(crate) enum DecoderError<E> {
pub enum DecoderError<E> {
Store(E),
Decoder { msg: String },
}

Loading…
Cancel
Save