Compare commits

...

2 Commits

Author SHA1 Message Date
Tpt f7cc600054 Introduces a TermEncoder struct 3 years ago
Tpt 722836c1ec Replaces Decoder trait by a TermDecoder struct 3 years ago
  1. 34
      lib/oxrdf/src/triple.rs
  2. 29
      lib/src/sparql/dataset.rs
  3. 23
      lib/src/sparql/eval.rs
  4. 19
      lib/src/sparql/update.rs
  5. 8
      lib/src/storage/binary_encoder.rs
  6. 36
      lib/src/storage/mod.rs
  7. 391
      lib/src/storage/numeric_encoder.rs
  8. 52
      lib/src/store.rs

@ -921,6 +921,23 @@ impl From<BlankNodeRef<'_>> for GraphName {
} }
} }
impl From<NamedOrBlankNode> for GraphName {
#[inline]
fn from(node: NamedOrBlankNode) -> Self {
match node {
NamedOrBlankNode::NamedNode(node) => node.into(),
NamedOrBlankNode::BlankNode(node) => node.into(),
}
}
}
impl From<NamedOrBlankNodeRef<'_>> for GraphName {
#[inline]
fn from(node: NamedOrBlankNodeRef<'_>) -> Self {
node.into_owned().into()
}
}
/// A possible borrowed graph name. /// A possible borrowed graph name.
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node), and the [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph).
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
@ -995,6 +1012,23 @@ impl<'a> From<&'a BlankNode> for GraphNameRef<'a> {
} }
} }
impl<'a> From<NamedOrBlankNodeRef<'a>> for GraphNameRef<'a> {
#[inline]
fn from(node: NamedOrBlankNodeRef<'a>) -> Self {
match node {
NamedOrBlankNodeRef::NamedNode(node) => node.into(),
NamedOrBlankNodeRef::BlankNode(node) => node.into(),
}
}
}
impl<'a> From<&'a NamedOrBlankNode> for GraphNameRef<'a> {
#[inline]
fn from(node: &'a NamedOrBlankNode) -> Self {
node.as_ref().into()
}
}
impl<'a> From<&'a GraphName> for GraphNameRef<'a> { impl<'a> From<&'a GraphName> for GraphNameRef<'a> {
#[inline] #[inline]
fn from(node: &'a GraphName) -> Self { fn from(node: &'a GraphName) -> Self {

@ -1,7 +1,9 @@
use crate::model::TermRef; use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError; use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; use crate::storage::numeric_encoder::{
insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder,
};
use crate::storage::{StorageError, StorageReader}; use crate::storage::{StorageError, StorageReader};
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
@ -16,13 +18,20 @@ pub struct DatasetView {
impl DatasetView { impl DatasetView {
pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self { pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self {
let encoder = reader.term_encoder();
let dataset = EncodedDatasetSpec { let dataset = EncodedDatasetSpec {
default: dataset default: dataset.default_graph_graphs().map(|graphs| {
.default_graph_graphs() graphs
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()), .iter()
named: dataset .map(|g| encoder.encode_graph_name(g))
.available_named_graphs() .collect::<Vec<_>>()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()), }),
named: dataset.available_named_graphs().map(|graphs| {
graphs
.iter()
.map(|g| encoder.encode_term(g))
.collect::<Vec<_>>()
}),
}; };
Self { Self {
reader, reader,
@ -150,7 +159,7 @@ impl DatasetView {
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm { pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into(); let term = term.into();
let encoded = term.into(); let encoded = self.reader.term_encoder().encode_term(term);
insert_term(term, &encoded, &mut |key, value| { insert_term(term, &encoded, &mut |key, value| {
self.insert_str(key, value); self.insert_str(key, value);
Ok(()) Ok(())
@ -159,6 +168,10 @@ impl DatasetView {
encoded encoded
} }
pub fn term_decoder(&self) -> TermDecoder<Self> {
TermDecoder::new(self)
}
pub fn insert_str(&self, key: &StrHash, value: &str) { pub fn insert_str(&self, key: &StrHash, value: &str) {
if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) { if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) {
if !matches!(self.reader.contains_str(key), Ok(true)) { if !matches!(self.reader.contains_str(key), Ok(true)) {

@ -659,7 +659,9 @@ impl SimpleEvaluator {
let service_name = get_pattern_value(service_name, from) let service_name = get_pattern_value(service_name, from)
.ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?; .ok_or_else(|| EvaluationError::msg("The SERVICE name is not bound"))?;
if let QueryResults::Solutions(iter) = self.service_handler.handle( if let QueryResults::Solutions(iter) = self.service_handler.handle(
self.dataset.decode_named_node(&service_name)?, self.dataset
.term_decoder()
.decode_named_node(&service_name)?,
Query { Query {
inner: spargebra::Query::Select { inner: spargebra::Query::Select {
dataset: None, dataset: None,
@ -2029,7 +2031,7 @@ impl SimpleEvaluator {
Rc::new(move |tuple| { Rc::new(move |tuple| {
let args = args let args = args
.iter() .iter()
.map(|f| dataset.decode_term(&f(tuple)?).ok()) .map(|f| dataset.term_decoder().decode_term(&f(tuple)?).ok())
.collect::<Option<Vec<_>>>()?; .collect::<Option<Vec<_>>>()?;
Some(dataset.encode_term(&function(&args)?)) Some(dataset.encode_term(&function(&args)?))
}) })
@ -2307,9 +2309,10 @@ fn decode_bindings(
variables, variables,
Box::new(iter.map(move |values| { Box::new(iter.map(move |values| {
let mut result = vec![None; tuple_size]; let mut result = vec![None; tuple_size];
let decoder = dataset.term_decoder();
for (i, value) in values?.iter().enumerate() { for (i, value) in values?.iter().enumerate() {
if let Some(term) = value { if let Some(term) = value {
result[i] = Some(dataset.decode_term(&term)?) result[i] = Some(decoder.decode_term(&term)?)
} }
} }
Ok(result) Ok(result)
@ -2569,9 +2572,10 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT
b => { b => {
if let Some(ord) = partial_cmp_literals(dataset, a, b) { if let Some(ord) = partial_cmp_literals(dataset, a, b) {
ord ord
} else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = } else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = (
(dataset.decode_term(a), dataset.decode_term(b)) dataset.term_decoder().decode_term(a),
{ dataset.term_decoder().decode_term(b),
) {
(a.value(), a.datatype(), a.language()).cmp(&( (a.value(), a.datatype(), a.language()).cmp(&(
b.value(), b.value(),
b.datatype(), b.datatype(),
@ -3323,7 +3327,7 @@ impl Iterator for ConstructIterator {
get_triple_template_value(&template.object, &tuple, &mut self.bnodes), get_triple_template_value(&template.object, &tuple, &mut self.bnodes),
) { ) {
self.buffered_results.push(decode_triple( self.buffered_results.push(decode_triple(
&*self.eval.dataset, &self.eval.dataset.term_decoder(),
&subject, &subject,
&predicate, &predicate,
&object, &object,
@ -3373,8 +3377,8 @@ fn new_bnode() -> EncodedTerm {
EncodedTerm::NumericalBlankNode { id: random() } EncodedTerm::NumericalBlankNode { id: random() }
} }
fn decode_triple<D: Decoder>( fn decode_triple<S: StrLookup>(
decoder: &D, decoder: &TermDecoder<S>,
subject: &EncodedTerm, subject: &EncodedTerm,
predicate: &EncodedTerm, predicate: &EncodedTerm,
object: &EncodedTerm, object: &EncodedTerm,
@ -3402,6 +3406,7 @@ impl Iterator for DescribeIterator {
Ok(quad) => self Ok(quad) => self
.eval .eval
.dataset .dataset
.term_decoder()
.decode_quad(&quad) .decode_quad(&quad)
.map(|q| q.into()) .map(|q| q.into())
.map_err(|e| e.into()), .map_err(|e| e.into()),

@ -8,7 +8,7 @@ use crate::sparql::http::Client;
use crate::sparql::plan::EncodedTuple; use crate::sparql::plan::EncodedTuple;
use crate::sparql::plan_builder::PlanBuilder; use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, Update, UpdateOptions}; use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; use crate::storage::numeric_encoder::EncodedTerm;
use crate::storage::StorageWriter; use crate::storage::StorageWriter;
use oxiri::Iri; use oxiri::Iri;
use spargebra::algebra::{GraphPattern, GraphTarget}; use spargebra::algebra::{GraphPattern, GraphTarget};
@ -191,11 +191,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
fn eval_clear(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> { fn eval_clear(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> {
match graph { match graph {
GraphTarget::NamedNode(graph_name) => { GraphTarget::NamedNode(graph_name) => {
if self if self.transaction.reader().contains_named_graph(
&self
.transaction .transaction
.reader() .term_encoder()
.contains_named_graph(&graph_name.as_ref().into())? .encode_graph_name(graph_name),
{ )? {
Ok(self.transaction.clear_graph(graph_name.into())?) Ok(self.transaction.clear_graph(graph_name.into())?)
} else if silent { } else if silent {
Ok(()) Ok(())
@ -374,7 +375,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
.map(|t| t.into()) .map(|t| t.into())
} }
TermPattern::Variable(v) => Self::lookup_variable(v, variables, values) TermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node)) .map(|node| dataset.term_decoder().decode_term(&node))
.transpose()?, .transpose()?,
}) })
} }
@ -388,7 +389,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
Ok(match term { Ok(match term {
NamedNodePattern::NamedNode(term) => Some(term.clone()), NamedNodePattern::NamedNode(term) => Some(term.clone()),
NamedNodePattern::Variable(v) => Self::lookup_variable(v, variables, values) NamedNodePattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_named_node(&node)) .map(|node| dataset.term_decoder().decode_named_node(&node))
.transpose()?, .transpose()?,
}) })
} }
@ -407,7 +408,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
Ok(if node == EncodedTerm::DefaultGraph { Ok(if node == EncodedTerm::DefaultGraph {
OxGraphName::DefaultGraph OxGraphName::DefaultGraph
} else { } else {
dataset.decode_named_node(&node)?.into() dataset.term_decoder().decode_named_node(&node)?.into()
}) })
}) })
.transpose(), .transpose(),
@ -507,7 +508,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
.map(|t| t.into()) .map(|t| t.into())
} }
GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values) GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node)) .map(|node| dataset.term_decoder().decode_term(&node))
.transpose()?, .transpose()?,
}) })
} }

@ -742,11 +742,13 @@ mod tests {
) )
.into(), .into(),
]; ];
let decoder = TermDecoder::new(&store);
for term in terms { for term in terms {
let encoded = term.as_ref().into(); let encoder = TermEncoder::new();
let encoded = encoder.encode_term(&term);
assert_eq!(encoded, encoder.encode_term(&term));
store.insert_term(term.as_ref(), &encoded); store.insert_term(term.as_ref(), &encoded);
assert_eq!(encoded, term.as_ref().into()); assert_eq!(term, decoder.decode_term(&encoded).unwrap());
assert_eq!(term, store.decode_term(&encoded).unwrap());
let mut buffer = Vec::new(); let mut buffer = Vec::new();
write_term(&mut buffer, &encoded); write_term(&mut buffer, &encoded);

@ -10,7 +10,7 @@ use crate::storage::binary_encoder::{
}; };
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError};
use crate::storage::numeric_encoder::{ use crate::storage::numeric_encoder::{
insert_term, Decoder, EncodedQuad, EncodedTerm, StrHash, StrLookup, insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, TermDecoder, TermEncoder,
}; };
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
use std::cmp::{max, min}; use std::cmp::{max, min};
@ -294,6 +294,15 @@ pub struct StorageReader {
} }
impl StorageReader { impl StorageReader {
#[allow(clippy::unused_self)]
pub fn term_encoder(&self) -> TermEncoder {
TermEncoder::new()
}
pub fn term_decoder(&self) -> TermDecoder<Self> {
TermDecoder::new(self)
}
pub fn len(&self) -> Result<usize, StorageError> { pub fn len(&self) -> Result<usize, StorageError> {
Ok(self.reader.len(&self.storage.gspo_cf)? + self.reader.len(&self.storage.dspo_cf)?) Ok(self.reader.len(&self.storage.gspo_cf)? + self.reader.len(&self.storage.dspo_cf)?)
} }
@ -654,7 +663,7 @@ impl StorageReader {
} }
for spo in self.dspo_quads(&[]) { for spo in self.dspo_quads(&[]) {
let spo = spo?; let spo = spo?;
self.decode_quad(&spo)?; // We ensure that the quad is readable self.term_decoder().decode_quad(&spo)?; // We ensure that the quad is readable
if !self.storage.db.contains_key( if !self.storage.db.contains_key(
&self.storage.dpos_cf, &self.storage.dpos_cf,
&encode_term_triple(&spo.predicate, &spo.object, &spo.subject), &encode_term_triple(&spo.predicate, &spo.object, &spo.subject),
@ -684,7 +693,7 @@ impl StorageReader {
} }
for gspo in self.gspo_quads(&[]) { for gspo in self.gspo_quads(&[]) {
let gspo = gspo?; let gspo = gspo?;
self.decode_quad(&gspo)?; // We ensure that the quad is readable self.term_decoder().decode_quad(&gspo)?; // We ensure that the quad is readable
if !self.storage.db.contains_key( if !self.storage.db.contains_key(
&self.storage.gpos_cf, &self.storage.gpos_cf,
&encode_term_quad( &encode_term_quad(
@ -848,8 +857,13 @@ impl<'a> StorageWriter<'a> {
} }
} }
#[allow(clippy::unused_self)]
pub fn term_encoder(&self) -> TermEncoder {
TermEncoder::new()
}
pub fn insert(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> { pub fn insert(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> {
let encoded = quad.into(); let encoded = self.term_encoder().encode_quad(quad);
self.buffer.clear(); self.buffer.clear();
let result = if quad.graph_name.is_default_graph() { let result = if quad.graph_name.is_default_graph() {
write_spo_quad(&mut self.buffer, &encoded); write_spo_quad(&mut self.buffer, &encoded);
@ -937,7 +951,7 @@ impl<'a> StorageWriter<'a> {
&mut self, &mut self,
graph_name: NamedOrBlankNodeRef<'_>, graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
let encoded_graph_name = graph_name.into(); let encoded_graph_name = self.term_encoder().encode_graph_name(graph_name);
self.buffer.clear(); self.buffer.clear();
write_term(&mut self.buffer, &encoded_graph_name); write_term(&mut self.buffer, &encoded_graph_name);
@ -1001,7 +1015,7 @@ impl<'a> StorageWriter<'a> {
} }
pub fn remove(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> { pub fn remove(&mut self, quad: QuadRef<'_>) -> Result<bool, StorageError> {
self.remove_encoded(&quad.into()) self.remove_encoded(&self.term_encoder().encode_quad(quad))
} }
fn remove_encoded(&mut self, quad: &EncodedQuad) -> Result<bool, StorageError> { fn remove_encoded(&mut self, quad: &EncodedQuad) -> Result<bool, StorageError> {
@ -1077,14 +1091,15 @@ impl<'a> StorageWriter<'a> {
self.remove_encoded(&quad?)?; self.remove_encoded(&quad?)?;
} }
} else { } else {
let graph_name = self.term_encoder().encode_graph_name(graph_name);
self.buffer.clear(); self.buffer.clear();
write_term(&mut self.buffer, &graph_name.into()); write_term(&mut self.buffer, &graph_name);
if self if self
.transaction .transaction
.contains_key_for_update(&self.storage.graphs_cf, &self.buffer)? .contains_key_for_update(&self.storage.graphs_cf, &self.buffer)?
{ {
// The condition is useful to lock the graph itself and ensure no quad is inserted at the same time // The condition is useful to lock the graph itself and ensure no quad is inserted at the same time
for quad in self.reader().quads_for_graph(&graph_name.into()) { for quad in self.reader().quads_for_graph(&graph_name) {
self.remove_encoded(&quad?)?; self.remove_encoded(&quad?)?;
} }
} }
@ -1110,7 +1125,7 @@ impl<'a> StorageWriter<'a> {
&mut self, &mut self,
graph_name: NamedOrBlankNodeRef<'_>, graph_name: NamedOrBlankNodeRef<'_>,
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
self.remove_encoded_named_graph(&graph_name.into()) self.remove_encoded_named_graph(&self.term_encoder().encode_graph_name(graph_name))
} }
fn remove_encoded_named_graph( fn remove_encoded_named_graph(
@ -1323,8 +1338,9 @@ impl FileBulkLoader {
} }
fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> { fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> {
let encoder = TermEncoder::new();
for quad in quads { for quad in quads {
let encoded = EncodedQuad::from(quad.as_ref()); let encoded = encoder.encode_quad(&quad);
if quad.graph_name.is_default_graph() { if quad.graph_name.is_default_graph() {
if self.triples.insert(encoded.clone()) { if self.triples.insert(encoded.clone()) {
self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?; self.insert_term(quad.subject.as_ref().into(), &encoded.subject)?;

@ -429,33 +429,88 @@ impl From<EncodedTriple> for EncodedTerm {
} }
} }
impl From<NamedNodeRef<'_>> for EncodedTerm { #[derive(Eq, PartialEq, Debug, Clone, Hash)]
fn from(named_node: NamedNodeRef<'_>) -> Self { pub struct EncodedTriple {
Self::NamedNode { pub subject: EncodedTerm,
iri_id: StrHash::new(named_node.as_str()), pub predicate: EncodedTerm,
pub object: EncodedTerm,
}
impl EncodedTriple {
pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
pub graph_name: EncodedTerm,
}
impl EncodedQuad {
pub fn new(
subject: EncodedTerm,
predicate: EncodedTerm,
object: EncodedTerm,
graph_name: EncodedTerm,
) -> Self {
Self {
subject,
predicate,
object,
graph_name,
} }
} }
} }
impl From<BlankNodeRef<'_>> for EncodedTerm { pub struct TermEncoder {}
fn from(blank_node: BlankNodeRef<'_>) -> Self {
impl TermEncoder {
pub fn new() -> Self {
Self {}
}
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
match term.into() {
TermRef::NamedNode(named_node) => self.encode_named_node(named_node),
TermRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
TermRef::Literal(literal) => self.encode_literal(literal),
TermRef::Triple(triple) => self.encode_triple(triple.as_ref()).into(),
}
}
#[allow(clippy::unused_self)]
fn encode_named_node(&self, named_node: NamedNodeRef<'_>) -> EncodedTerm {
EncodedTerm::NamedNode {
iri_id: StrHash::new(named_node.as_str()),
}
}
#[allow(clippy::unused_self)]
fn encode_blank_node(&self, blank_node: BlankNodeRef<'_>) -> EncodedTerm {
if let Some(id) = blank_node.unique_id() { if let Some(id) = blank_node.unique_id() {
Self::NumericalBlankNode { id } EncodedTerm::NumericalBlankNode { id }
} else { } else {
let id = blank_node.as_str(); let id = blank_node.as_str();
if let Ok(id) = id.try_into() { if let Ok(id) = id.try_into() {
Self::SmallBlankNode(id) EncodedTerm::SmallBlankNode(id)
} else { } else {
Self::BigBlankNode { EncodedTerm::BigBlankNode {
id_id: StrHash::new(id), id_id: StrHash::new(id),
} }
} }
} }
} }
}
impl From<LiteralRef<'_>> for EncodedTerm { #[allow(clippy::unused_self)]
fn from(literal: LiteralRef<'_>) -> Self { fn encode_literal(&self, literal: LiteralRef<'_>) -> EncodedTerm {
let value = literal.value(); let value = literal.value();
let datatype = literal.datatype().as_str(); let datatype = literal.datatype().as_str();
let native_encoding = match datatype { let native_encoding = match datatype {
@ -463,20 +518,20 @@ impl From<LiteralRef<'_>> for EncodedTerm {
literal.language().map(|language| { literal.language().map(|language| {
if let Ok(value) = SmallString::try_from(value) { if let Ok(value) = SmallString::try_from(value) {
if let Ok(language) = SmallString::try_from(language) { if let Ok(language) = SmallString::try_from(language) {
Self::SmallSmallLangStringLiteral { value, language } EncodedTerm::SmallSmallLangStringLiteral { value, language }
} else { } else {
Self::SmallBigLangStringLiteral { EncodedTerm::SmallBigLangStringLiteral {
value, value,
language_id: StrHash::new(language), language_id: StrHash::new(language),
} }
} }
} else if let Ok(language) = SmallString::try_from(language) { } else if let Ok(language) = SmallString::try_from(language) {
Self::BigSmallLangStringLiteral { EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value), value_id: StrHash::new(value),
language, language,
} }
} else { } else {
Self::BigBigLangStringLiteral { EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value), value_id: StrHash::new(value),
language_id: StrHash::new(language), language_id: StrHash::new(language),
} }
@ -487,9 +542,9 @@ impl From<LiteralRef<'_>> for EncodedTerm {
"http://www.w3.org/2001/XMLSchema#string" => { "http://www.w3.org/2001/XMLSchema#string" => {
let value = value; let value = value;
Some(if let Ok(value) = SmallString::try_from(value) { Some(if let Ok(value) = SmallString::try_from(value) {
Self::SmallStringLiteral(value) EncodedTerm::SmallStringLiteral(value)
} else { } else {
Self::BigStringLiteral { EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value), value_id: StrHash::new(value),
} }
}) })
@ -532,12 +587,12 @@ impl From<LiteralRef<'_>> for EncodedTerm {
Some(term) => term, Some(term) => term,
None => { None => {
if let Ok(value) = SmallString::try_from(value) { if let Ok(value) = SmallString::try_from(value) {
Self::SmallTypedLiteral { EncodedTerm::SmallTypedLiteral {
value, value,
datatype_id: StrHash::new(datatype), datatype_id: StrHash::new(datatype),
} }
} else { } else {
Self::BigTypedLiteral { EncodedTerm::BigTypedLiteral {
value_id: StrHash::new(value), value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype), datatype_id: StrHash::new(datatype),
} }
@ -545,112 +600,30 @@ impl From<LiteralRef<'_>> for EncodedTerm {
} }
} }
} }
}
impl From<NamedOrBlankNodeRef<'_>> for EncodedTerm { pub fn encode_graph_name<'a>(&self, name: impl Into<GraphNameRef<'a>>) -> EncodedTerm {
fn from(term: NamedOrBlankNodeRef<'_>) -> Self { match name.into() {
match term { GraphNameRef::NamedNode(named_node) => self.encode_named_node(named_node),
NamedOrBlankNodeRef::NamedNode(named_node) => named_node.into(), GraphNameRef::BlankNode(blank_node) => self.encode_blank_node(blank_node),
NamedOrBlankNodeRef::BlankNode(blank_node) => blank_node.into(), GraphNameRef::DefaultGraph => EncodedTerm::DefaultGraph,
} }
} }
}
impl From<SubjectRef<'_>> for EncodedTerm { fn encode_triple(&self, triple: TripleRef<'_>) -> EncodedTriple {
fn from(term: SubjectRef<'_>) -> Self { EncodedTriple {
match term { subject: self.encode_term(triple.subject),
SubjectRef::NamedNode(named_node) => named_node.into(), predicate: self.encode_term(triple.predicate),
SubjectRef::BlankNode(blank_node) => blank_node.into(), object: self.encode_term(triple.object),
SubjectRef::Triple(triple) => triple.as_ref().into(),
} }
} }
}
impl From<TermRef<'_>> for EncodedTerm { pub fn encode_quad<'a>(&self, quad: impl Into<QuadRef<'a>>) -> EncodedQuad {
fn from(term: TermRef<'_>) -> Self { let quad = quad.into();
match term { EncodedQuad {
TermRef::NamedNode(named_node) => named_node.into(), subject: self.encode_term(quad.subject),
TermRef::BlankNode(blank_node) => blank_node.into(), predicate: self.encode_term(quad.predicate),
TermRef::Literal(literal) => literal.into(), object: self.encode_term(quad.object),
TermRef::Triple(triple) => triple.as_ref().into(), graph_name: self.encode_graph_name(quad.graph_name),
}
}
}
impl From<GraphNameRef<'_>> for EncodedTerm {
fn from(name: GraphNameRef<'_>) -> Self {
match name {
GraphNameRef::NamedNode(named_node) => named_node.into(),
GraphNameRef::BlankNode(blank_node) => blank_node.into(),
GraphNameRef::DefaultGraph => Self::DefaultGraph,
}
}
}
impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self {
Self::Triple(Rc::new(triple.into()))
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTriple {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
}
impl EncodedTriple {
pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
}
impl From<TripleRef<'_>> for EncodedTriple {
fn from(triple: TripleRef<'_>) -> Self {
Self {
subject: triple.subject.into(),
predicate: triple.predicate.into(),
object: triple.object.into(),
}
}
}
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
pub graph_name: EncodedTerm,
}
impl EncodedQuad {
pub fn new(
subject: EncodedTerm,
predicate: EncodedTerm,
object: EncodedTerm,
graph_name: EncodedTerm,
) -> Self {
Self {
subject,
predicate,
object,
graph_name,
}
}
}
impl From<QuadRef<'_>> for EncodedQuad {
fn from(quad: QuadRef<'_>) -> Self {
Self {
subject: quad.subject.into(),
predicate: quad.predicate.into(),
object: quad.object.into(),
graph_name: quad.graph_name.into(),
} }
} }
} }
@ -819,10 +792,95 @@ pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok() value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
} }
pub trait Decoder: StrLookup { pub struct TermDecoder<'a, S: StrLookup> {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError>; lookup: &'a S,
}
impl<'a, S: StrLookup> TermDecoder<'a, S> {
pub fn new(lookup: &'a S) -> Self {
Self { lookup }
}
fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> { pub fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
match encoded {
EncodedTerm::DefaultGraph => {
Err(CorruptionError::msg("The default graph tag is not a valid term").into())
}
EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self.lookup, iri_id)?).into())
}
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self.lookup, id_id)?).into())
}
EncodedTerm::SmallStringLiteral(value) => {
Ok(Literal::new_simple_literal(*value).into())
}
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self.lookup, value_id)?).into())
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
*value,
get_required_str(self.lookup, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self.lookup, value_id)?,
*language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self.lookup, value_id)?,
get_required_str(self.lookup, language_id)?,
)
.into()),
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
*value,
NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => Ok(Literal::new_typed_literal(
get_required_str(self.lookup, value_id)?,
NamedNode::new_unchecked(get_required_str(self.lookup, datatype_id)?),
)
.into()),
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
}
}
pub fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> {
match self.decode_term(encoded)? { match self.decode_term(encoded)? {
Term::NamedNode(named_node) => Ok(named_node.into()), Term::NamedNode(named_node) => Ok(named_node.into()),
Term::BlankNode(blank_node) => Ok(blank_node.into()), Term::BlankNode(blank_node) => Ok(blank_node.into()),
@ -834,7 +892,7 @@ pub trait Decoder: StrLookup {
} }
} }
fn decode_named_or_blank_node( pub fn decode_named_or_blank_node(
&self, &self,
encoded: &EncodedTerm, encoded: &EncodedTerm,
) -> Result<NamedOrBlankNode, StorageError> { ) -> Result<NamedOrBlankNode, StorageError> {
@ -852,7 +910,7 @@ pub trait Decoder: StrLookup {
} }
} }
fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> { pub fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> {
match self.decode_term(encoded)? { match self.decode_term(encoded)? {
Term::NamedNode(named_node) => Ok(named_node), Term::NamedNode(named_node) => Ok(named_node),
Term::BlankNode(_) => Err(CorruptionError::msg( Term::BlankNode(_) => Err(CorruptionError::msg(
@ -868,7 +926,7 @@ pub trait Decoder: StrLookup {
} }
} }
fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> { pub fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> {
Ok(Triple::new( Ok(Triple::new(
self.decode_subject(&encoded.subject)?, self.decode_subject(&encoded.subject)?,
self.decode_named_node(&encoded.predicate)?, self.decode_named_node(&encoded.predicate)?,
@ -876,7 +934,7 @@ pub trait Decoder: StrLookup {
)) ))
} }
fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> { pub fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> {
Ok(Quad::new( Ok(Quad::new(
self.decode_subject(&encoded.subject)?, self.decode_subject(&encoded.subject)?,
self.decode_named_node(&encoded.predicate)?, self.decode_named_node(&encoded.predicate)?,
@ -903,87 +961,6 @@ pub trait Decoder: StrLookup {
} }
} }
impl<S: StrLookup> Decoder for S {
fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
match encoded {
EncodedTerm::DefaultGraph => {
Err(CorruptionError::msg("The default graph tag is not a valid term").into())
}
EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
}
EncodedTerm::NumericalBlankNode { id } => Ok(BlankNode::new_from_unique_id(*id).into()),
EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
EncodedTerm::BigBlankNode { id_id } => {
Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
}
EncodedTerm::SmallStringLiteral(value) => {
Ok(Literal::new_simple_literal(*value).into())
}
EncodedTerm::BigStringLiteral { value_id } => {
Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
Ok(Literal::new_language_tagged_literal_unchecked(
*value,
get_required_str(self, language_id)?,
)
.into())
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
*language,
)
.into())
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
get_required_str(self, language_id)?,
)
.into()),
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
Ok(Literal::new_typed_literal(
*value,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into())
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => Ok(Literal::new_typed_literal(
get_required_str(self, value_id)?,
NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
)
.into()),
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
}
}
}
fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> { fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> {
Ok(lookup.get_str(id)?.ok_or_else(|| { Ok(lookup.get_str(id)?.ok_or_else(|| {
CorruptionError::new(format!( CorruptionError::new(format!(

@ -32,7 +32,6 @@ use crate::sparql::{
evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update, evaluate_query, evaluate_update, EvaluationError, Query, QueryOptions, QueryResults, Update,
UpdateOptions, UpdateOptions,
}; };
use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm};
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
use crate::storage::StorageBulkLoader; use crate::storage::StorageBulkLoader;
use crate::storage::{ use crate::storage::{
@ -184,12 +183,13 @@ impl Store {
graph_name: Option<GraphNameRef<'_>>, graph_name: Option<GraphNameRef<'_>>,
) -> QuadIter { ) -> QuadIter {
let reader = self.storage.snapshot(); let reader = self.storage.snapshot();
let encoder = reader.term_encoder();
QuadIter { QuadIter {
iter: reader.quads_for_pattern( iter: reader.quads_for_pattern(
subject.map(EncodedTerm::from).as_ref(), subject.map(|t| encoder.encode_term(t)).as_ref(),
predicate.map(EncodedTerm::from).as_ref(), predicate.map(|t| encoder.encode_term(t)).as_ref(),
object.map(EncodedTerm::from).as_ref(), object.map(|t| encoder.encode_term(t)).as_ref(),
graph_name.map(EncodedTerm::from).as_ref(), graph_name.map(|t| encoder.encode_graph_name(t)).as_ref(),
), ),
reader, reader,
} }
@ -236,8 +236,9 @@ impl Store {
/// # Result::<_, Box<dyn std::error::Error>>::Ok(()) /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, StorageError> { pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> Result<bool, StorageError> {
let quad = EncodedQuad::from(quad.into()); let reader = self.storage.snapshot();
self.storage.snapshot().contains(&quad) let quad = reader.term_encoder().encode_quad(quad);
reader.contains(&quad)
} }
/// Returns the number of quads in the store. /// Returns the number of quads in the store.
@ -611,8 +612,9 @@ impl Store {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'a>>, graph_name: impl Into<NamedOrBlankNodeRef<'a>>,
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
let graph_name = EncodedTerm::from(graph_name.into()); let reader = self.storage.snapshot();
self.storage.snapshot().contains_named_graph(&graph_name) let graph_name = reader.term_encoder().encode_graph_name(graph_name.into());
reader.contains_named_graph(&graph_name)
} }
/// Inserts a graph into this store. /// Inserts a graph into this store.
@ -899,12 +901,13 @@ impl<'a> Transaction<'a> {
graph_name: Option<GraphNameRef<'_>>, graph_name: Option<GraphNameRef<'_>>,
) -> QuadIter { ) -> QuadIter {
let reader = self.writer.reader(); let reader = self.writer.reader();
let encoder = reader.term_encoder();
QuadIter { QuadIter {
iter: reader.quads_for_pattern( iter: reader.quads_for_pattern(
subject.map(EncodedTerm::from).as_ref(), subject.map(|t| encoder.encode_term(t)).as_ref(),
predicate.map(EncodedTerm::from).as_ref(), predicate.map(|t| encoder.encode_term(t)).as_ref(),
object.map(EncodedTerm::from).as_ref(), object.map(|t| encoder.encode_term(t)).as_ref(),
graph_name.map(EncodedTerm::from).as_ref(), graph_name.map(|t| encoder.encode_graph_name(t)).as_ref(),
), ),
reader, reader,
} }
@ -917,8 +920,9 @@ impl<'a> Transaction<'a> {
/// Checks if this store contains a given quad. /// Checks if this store contains a given quad.
pub fn contains<'b>(&self, quad: impl Into<QuadRef<'b>>) -> Result<bool, StorageError> { pub fn contains<'b>(&self, quad: impl Into<QuadRef<'b>>) -> Result<bool, StorageError> {
let quad = EncodedQuad::from(quad.into()); let reader = self.writer.reader();
self.writer.reader().contains(&quad) let quad = reader.term_encoder().encode_quad(quad);
reader.contains(&quad)
} }
/// Returns the number of quads in the store. /// Returns the number of quads in the store.
@ -1125,9 +1129,9 @@ impl<'a> Transaction<'a> {
&self, &self,
graph_name: impl Into<NamedOrBlankNodeRef<'b>>, graph_name: impl Into<NamedOrBlankNodeRef<'b>>,
) -> Result<bool, StorageError> { ) -> Result<bool, StorageError> {
self.writer let reader = self.writer.reader();
.reader() let graph_name = reader.term_encoder().encode_graph_name(graph_name.into());
.contains_named_graph(&EncodedTerm::from(graph_name.into())) reader.contains_named_graph(&graph_name)
} }
/// Inserts a graph into this store. /// Inserts a graph into this store.
@ -1238,7 +1242,7 @@ impl Iterator for QuadIter {
fn next(&mut self) -> Option<Result<Quad, StorageError>> { fn next(&mut self) -> Option<Result<Quad, StorageError>> {
Some(match self.iter.next()? { Some(match self.iter.next()? {
Ok(quad) => self.reader.decode_quad(&quad), Ok(quad) => self.reader.term_decoder().decode_quad(&quad),
Err(error) => Err(error), Err(error) => Err(error),
}) })
} }
@ -1254,11 +1258,11 @@ impl Iterator for GraphNameIter {
type Item = Result<NamedOrBlankNode, StorageError>; type Item = Result<NamedOrBlankNode, StorageError>;
fn next(&mut self) -> Option<Result<NamedOrBlankNode, StorageError>> { fn next(&mut self) -> Option<Result<NamedOrBlankNode, StorageError>> {
Some( Some(self.iter.next()?.and_then(|graph_name| {
self.iter self.reader
.next()? .term_decoder()
.and_then(|graph_name| self.reader.decode_named_or_blank_node(&graph_name)), .decode_named_or_blank_node(&graph_name)
) }))
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {

Loading…
Cancel
Save