Drops rio specific numerical encoder

pull/171/head
Tpt 3 years ago
parent f15430804a
commit 98f5f35dbb
  1. 90
      lib/src/storage/io.rs
  2. 103
      lib/src/storage/numeric_encoder.rs

@ -2,10 +2,11 @@
use crate::error::invalid_input_error;
use crate::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerializer};
use crate::model::{GraphNameRef, Quad, Triple};
use crate::model::{BlankNode, GraphNameRef, LiteralRef, NamedNodeRef, Quad, QuadRef, Triple};
use crate::storage::numeric_encoder::WriteEncoder;
use crate::storage::StorageLike;
use oxiri::Iri;
use rio_api::model as rio;
use rio_api::parser::{QuadsParser, TriplesParser};
use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleError, TurtleParser};
use rio_xml::{RdfXmlError, RdfXmlParser};
@ -47,18 +48,51 @@ where
StoreOrParseError<S::Error>: From<P::Error>,
{
let mut bnode_map = HashMap::default();
let to_graph_name = storage
.encode_graph_name(to_graph_name)
.map_err(StoreOrParseError::Store)?;
parser.parse_all(&mut move |t| {
let quad = storage
.encode_rio_triple_in_graph(t, to_graph_name.clone(), &mut bnode_map)
.encode_quad(quad_from_rio_triple(&t, to_graph_name, &mut bnode_map))
.map_err(StoreOrParseError::Store)?;
storage.insert(&quad).map_err(StoreOrParseError::Store)?;
Ok(())
})
}
fn quad_from_rio_triple<'a>(
triple: &rio::Triple<'a>,
graph_name: GraphNameRef<'a>,
bnode_map: &'a mut HashMap<String, BlankNode>,
) -> QuadRef<'a> {
// we insert the blank nodes
if let rio::NamedOrBlankNode::BlankNode(node) = triple.subject {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let rio::Term::BlankNode(node) = triple.object {
bnode_map.entry(node.id.to_owned()).or_default();
}
QuadRef {
subject: match triple.subject {
rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(),
},
predicate: NamedNodeRef::new_unchecked(triple.predicate.iri),
object: match triple.object {
rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(),
rio::Term::Literal(literal) => match literal {
rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri))
}
}
.into(),
},
graph_name,
}
}
pub fn dump_graph(
triples: impl Iterator<Item = io::Result<Triple>>,
writer: impl Write,
@ -98,13 +132,57 @@ where
let mut bnode_map = HashMap::default();
parser.parse_all(&mut move |q| {
let quad = store
.encode_rio_quad(q, &mut bnode_map)
.encode_quad(quad_from_rio(&q, &mut bnode_map))
.map_err(StoreOrParseError::Store)?;
store.insert(&quad).map_err(StoreOrParseError::Store)?;
Ok(())
})
}
fn quad_from_rio<'a>(
quad: &rio::Quad<'a>,
bnode_map: &'a mut HashMap<String, BlankNode>,
) -> QuadRef<'a> {
// we insert the blank nodes
if let rio::NamedOrBlankNode::BlankNode(node) = quad.subject {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let rio::Term::BlankNode(node) = quad.object {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let Some(rio::NamedOrBlankNode::BlankNode(node)) = quad.graph_name {
bnode_map.entry(node.id.to_owned()).or_default();
}
QuadRef {
subject: match quad.subject {
rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(),
},
predicate: NamedNodeRef::new_unchecked(quad.predicate.iri),
object: match quad.object {
rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(),
rio::Term::Literal(literal) => match literal {
rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri))
}
}
.into(),
},
graph_name: match quad.graph_name {
Some(rio::NamedOrBlankNode::NamedNode(node)) => {
NamedNodeRef::new_unchecked(node.iri).into()
}
Some(rio::NamedOrBlankNode::BlankNode(node)) => bnode_map[node.id].as_ref().into(),
None => GraphNameRef::DefaultGraph,
},
}
}
pub fn dump_dataset(
quads: impl Iterator<Item = io::Result<Quad>>,
writer: impl Write,

@ -5,10 +5,7 @@ use crate::model::xsd::*;
use crate::model::*;
use crate::sparql::EvaluationError;
use crate::storage::small_string::SmallString;
use rand::random;
use rio_api::model as rio;
use siphasher::sip128::{Hasher128, SipHasher24};
use std::collections::HashMap;
use std::convert::{TryFrom, TryInto};
use std::error::Error;
use std::fmt::Debug;
@ -900,106 +897,6 @@ pub(crate) trait WriteEncoder: StrContainer {
})
}
fn encode_triple_in_graph(
&self,
triple: TripleRef<'_>,
graph_name: EncodedTerm,
) -> Result<EncodedQuad, Self::Error> {
Ok(EncodedQuad {
subject: self.encode_subject(triple.subject)?,
predicate: self.encode_named_node(triple.predicate)?,
object: self.encode_term(triple.object)?,
graph_name,
})
}
fn encode_rio_named_node(
&self,
named_node: rio::NamedNode<'_>,
) -> Result<EncodedTerm, Self::Error> {
self.encode_named_node(NamedNodeRef::new_unchecked(named_node.iri))
}
fn encode_rio_blank_node(
&self,
blank_node: rio::BlankNode<'_>,
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm, Self::Error> {
Ok(if let Some(id) = bnodes_map.get(blank_node.id) {
EncodedTerm::NumericalBlankNode { id: *id }
} else {
let id = random::<u128>();
bnodes_map.insert(blank_node.id.to_owned(), id);
EncodedTerm::NumericalBlankNode { id }
})
}
fn encode_rio_literal(&self, literal: rio::Literal<'_>) -> Result<EncodedTerm, Self::Error> {
self.encode_literal(match literal {
rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri))
}
})
}
fn encode_rio_subject(
&self,
term: rio::NamedOrBlankNode<'_>,
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm, Self::Error> {
match term {
rio::NamedOrBlankNode::NamedNode(named_node) => self.encode_rio_named_node(named_node),
rio::NamedOrBlankNode::BlankNode(blank_node) => {
self.encode_rio_blank_node(blank_node, bnodes_map)
}
}
}
fn encode_rio_term(
&self,
term: rio::Term<'_>,
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm, Self::Error> {
match term {
rio::Term::NamedNode(named_node) => self.encode_rio_named_node(named_node),
rio::Term::BlankNode(blank_node) => self.encode_rio_blank_node(blank_node, bnodes_map),
rio::Term::Literal(literal) => self.encode_rio_literal(literal),
}
}
fn encode_rio_quad(
&self,
quad: rio::Quad<'_>,
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedQuad, Self::Error> {
Ok(EncodedQuad {
subject: self.encode_rio_subject(quad.subject, bnodes_map)?,
predicate: self.encode_rio_named_node(quad.predicate)?,
object: self.encode_rio_term(quad.object, bnodes_map)?,
graph_name: match quad.graph_name {
Some(graph_name) => self.encode_rio_subject(graph_name, bnodes_map)?,
None => EncodedTerm::DefaultGraph,
},
})
}
fn encode_rio_triple_in_graph(
&self,
triple: rio::Triple<'_>,
graph_name: EncodedTerm,
bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedQuad, Self::Error> {
Ok(EncodedQuad {
subject: self.encode_rio_subject(triple.subject, bnodes_map)?,
predicate: self.encode_rio_named_node(triple.predicate)?,
object: self.encode_rio_term(triple.object, bnodes_map)?,
graph_name,
})
}
fn encode_str(&self, value: &str) -> Result<StrHash, Self::Error>;
}

Loading…
Cancel
Save