Replaces Decoder trait by a TermDecoder struct

encoder
Tpt 3 years ago
parent e75de43042
commit 722836c1ec
  1. 8
      lib/src/sparql/dataset.rs
  2. 23
      lib/src/sparql/eval.rs
  3. 10
      lib/src/sparql/update.rs
  4. 3
      lib/src/storage/binary_encoder.rs
  5. 10
      lib/src/storage/mod.rs
  6. 180
      lib/src/storage/numeric_encoder.rs
  7. 14
      lib/src/store.rs

@ -1,7 +1,9 @@
use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::numeric_encoder::{
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder,
};
use crate::storage::{StorageError, StorageReader};
use std::cell::RefCell;
use std::collections::hash_map::Entry;
@ -159,6 +161,10 @@ impl DatasetView {
encoded
}
pub fn term_decoder(&self) -> TermDecoder<Self> {
TermDecoder::new(self)
}
pub fn insert_str(&self, key: &StrHash, value: &str) {
if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) {
if !matches!(self.reader.contains_str(key), Ok(true)) {

@ -659,7 +659,9 @@ impl SimpleEvaluator {
let service_name = get_pattern_value(service_name, from)
.ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?;
if let QueryResults::Solutions(iter) = self.service_handler.handle(
self.dataset.decode_named_node(&service_name)?,
self.dataset
.term_decoder()
.decode_named_node(&service_name)?,
Query {
inner: spargebra::Query::Select {
dataset: None,
@ -2029,7 +2031,7 @@ impl SimpleEvaluator {
Rc::new(move |tuple| {
let args = args
.iter()
.map(|f| dataset.decode_term(&f(tuple)?).ok())
.map(|f| dataset.term_decoder().decode_term(&f(tuple)?).ok())
.collect::<Option<Vec<_>>>()?;
Some(dataset.encode_term(&function(&args)?))
})
@ -2307,9 +2309,10 @@ fn decode_bindings(
variables,
Box::new(iter.map(move |values| {
let mut result = vec![None; tuple_size];
let decoder = dataset.term_decoder();
for (i, value) in values?.iter().enumerate() {
if let Some(term) = value {
result[i] = Some(dataset.decode_term(&term)?)
result[i] = Some(decoder.decode_term(&term)?)
}
}
Ok(result)
@ -2569,9 +2572,10 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT
b => {
if let Some(ord) = partial_cmp_literals(dataset, a, b) {
ord
} else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) =
(dataset.decode_term(a), dataset.decode_term(b))
{
} else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = (
dataset.term_decoder().decode_term(a),
dataset.term_decoder().decode_term(b),
) {
(a.value(), a.datatype(), a.language()).cmp(&(
b.value(),
b.datatype(),
@ -3323,7 +3327,7 @@ impl Iterator for ConstructIterator {
get_triple_template_value(&template.object, &tuple, &mut self.bnodes),
) {
self.buffered_results.push(decode_triple(
&*self.eval.dataset,
&self.eval.dataset.term_decoder(),
&subject,
&predicate,
&object,
@ -3373,8 +3377,8 @@ fn new_bnode() -> EncodedTerm {
EncodedTerm::NumericalBlankNode { id: random() }
}
fn decode_triple<D: Decoder>(
decoder: &D,
fn decode_triple<S: StrLookup>(
decoder: &TermDecoder<S>,
subject: &EncodedTerm,
predicate: &EncodedTerm,
object: &EncodedTerm,
@ -3402,6 +3406,7 @@ impl Iterator for DescribeIterator {
Ok(quad) => self
.eval
.dataset
.term_decoder()
.decode_quad(&quad)
.map(|q| q.into())
.map_err(|e| e.into()),

@ -8,7 +8,7 @@ use crate::sparql::http::Client;
use crate::sparql::plan::EncodedTuple;
use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::numeric_encoder::EncodedTerm;
use crate::storage::StorageWriter;
use oxiri::Iri;
use spargebra::algebra::{GraphPattern, GraphTarget};
@ -374,7 +374,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
.map(|t| t.into())
}
TermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node))
.map(|node| dataset.term_decoder().decode_term(&node))
.transpose()?,
})
}
@ -388,7 +388,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
Ok(match term {
NamedNodePattern::NamedNode(term) => Some(term.clone()),
NamedNodePattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_named_node(&node))
.map(|node| dataset.term_decoder().decode_named_node(&node))
.transpose()?,
})
}
@ -407,7 +407,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
Ok(if node == EncodedTerm::DefaultGraph {
OxGraphName::DefaultGraph
} else {
dataset.decode_named_node(&node)?.into()
dataset.term_decoder().decode_named_node(&node)?.into()
})
})
.transpose(),
@ -507,7 +507,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
.map(|t| t.into())
}
GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node))
.map(|node| dataset.term_decoder().decode_term(&node))
.transpose()?,
})
}

@ -742,11 +742,12 @@ mod tests {
)
.into(),
];
let decoder = TermDecoder::new(&store);
for term in terms {
let encoded = term.as_ref().into();
store.insert_term(term.as_ref(), &encoded);
assert_eq!(encoded, term.as_ref().into());
assert_eq!(term, store.decode_term(&encoded).unwrap());
assert_eq!(term, decoder.decode_term(&encoded).unwrap());
let mut buffer = Vec::new();
write_term(&mut buffer, &encoded);

@ -10,7 +10,7 @@ use crate::storage::binary_encoder::{
};
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError};
use crate::storage::numeric_encoder::{
insert_term, Decoder, EncodedQuad, EncodedTerm, StrHash, StrLookup,
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder,
};
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
use std::cmp::{max, min};
@ -294,6 +294,10 @@ pub struct StorageReader {
}
impl StorageReader {
pub fn term_decoder(&self) -> TermDecoder<Self> {
TermDecoder::new(self)
}
pub fn len(&self) -> Result<usize, StorageError> {
Ok(self.reader.len(&self.storage.gspo_cf)? + self.reader.len(&self.storage.dspo_cf)?)
}
@ -654,7 +658,7 @@ impl StorageReader {
}
for spo in self.dspo_quads(&[]) {
let spo = spo?;
self.decode_quad(&spo)?; // We ensure that the quad is readable
self.term_decoder().decode_quad(&spo)?; // We ensure that the quad is readable
if !self.storage.db.contains_key(
&self.storage.dpos_cf,
&encode_term_triple(&spo.predicate, &spo.object, &spo.subject),
@ -684,7 +688,7 @@ impl StorageReader {
}
for gspo in self.gspo_quads(&[]) {
let gspo = gspo?;
self.decode_quad(&gspo)?; // We ensure that the quad is readable
self.term_decoder().decode_quad(&gspo)?; // We ensure that the quad is readable
if !self.storage.db.contains_key(
&self.storage.gpos_cf,
&encode_term_quad(

@ -819,10 +819,95 @@ pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
}
pub trait Decoder: StrLookup {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError>;
pub struct TermDecoder<'a, S: StrLookup> {
lookup: &'a S,
}
impl<'a, S: StrLookup> TermDecoder<'a, S> {
pub fn new(lookup: &'a S) -> Self {
Self { lookup }
}
pub fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
match encoded {
EncodedTerm::DefaultGraph => {
Err(CorruptionError::msg("The default graph tag is not a valid term").into())
}
EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self.lookup, iri_id)?).into())
}
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self.lookup, id_id)?).into())
}
EncodedTerm::SmallStringLiteral(value) => {
Ok(Literal::new_simple_literal(*value).into())
}
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self.lookup, value_id)?).into())
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
*value,
get_required_str(self.lookup, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self.lookup, value_id)?,
*language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self.lookup, value_id)?,
get_required_str(self.lookup, language_id)?,
)
.into()),
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
*value,
NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => Ok(Literal::new_typed_literal(
get_required_str(self.lookup, value_id)?,
NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?),
)
.into()),
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
}
}
fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> {
pub fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> {
match self.decode_term(encoded)? {
Term::NamedNode(named_node) => Ok(named_node.into()),
Term::BlankNode(blank_node) => Ok(blank_node.into()),
@ -834,7 +919,7 @@ pub trait Decoder: StrLookup {
}
}
fn decode_named_or_blank_node(
pub fn decode_named_or_blank_node(
&self,
encoded: &EncodedTerm,
) -> Result<NamedOrBlankNode, StorageError> {
@ -852,7 +937,7 @@ pub trait Decoder: StrLookup {
}
}
fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> {
pub fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> {
match self.decode_term(encoded)? {
Term::NamedNode(named_node) => Ok(named_node),
Term::BlankNode(_) => Err(CorruptionError::msg(
@ -868,7 +953,7 @@ pub trait Decoder: StrLookup {
}
}
fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> {
pub fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> {
Ok(Triple::new(
self.decode_subject(&encoded.subject)?,
self.decode_named_node(&encoded.predicate)?,
@ -876,7 +961,7 @@ pub trait Decoder: StrLookup {
))
}
fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> {
pub fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> {
Ok(Quad::new(
self.decode_subject(&encoded.subject)?,
self.decode_named_node(&encoded.predicate)?,
@ -903,87 +988,6 @@ pub trait Decoder: StrLookup {
}
}
impl<S: StrLookup> Decoder for S {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
match encoded {
EncodedTerm::DefaultGraph => {
Err(CorruptionError::msg("The default graph tag is not a valid term").into())
}
EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
}
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
}
EncodedTerm::SmallStringLiteral(value) => {
Ok(Literal::new_simple_literal(*value).into())
}
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
*value,
get_required_str(self, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
*language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
get_required_str(self, language_id)?,
)
.into()),
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
*value,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => Ok(Literal::new_typed_literal(
get_required_str(self, value_id)?,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into()),
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
}
}
}
fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> {
Ok(lookup.get_str(id)?.ok_or_else(|| {
CorruptionError::new(format!(

@ -32,7 +32,7 @@ use crate::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update,
UpdateOptions,
};
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm};
#[cfg(not(target_arch = "wasm32"))]
use crate::storage::StorageBulkLoader;
use crate::storage::{
@ -1238,7 +1238,7 @@ impl Iterator for QuadIter {
fn next(&mut self) -> Option<Result<Quad, StorageError>> {
Some(match self.iter.next()? {
Ok(quad) => self.reader.decode_quad(&quad),
Ok(quad) => self.reader.term_decoder().decode_quad(&quad),
Err(error) => Err(error),
})
}
@ -1254,11 +1254,11 @@ impl Iterator for GraphNameIter {
type Item = Result<NamedOrBlankNode, StorageError>;
fn next(&mut self) -> Option<Result<NamedOrBlankNode, StorageError>> {
Some(
self.iter
.next()?
.and_then(|graph_name| self.reader.decode_named_or_blank_node(&graph_name)),
)
Some(self.iter.next()?.and_then(|graph_name| {
self.reader
.term_decoder()
.decode_named_or_blank_node(&graph_name)
}))
}
fn size_hint(&self) -> (usize, Option<usize>) {

Loading…
Cancel
Save