Makes store file load use the public API parser

Small slow done and simplifies code
pull/171/head
Tpt 3 years ago
parent 8606877e33
commit 704f60e094
  1. 192
      lib/src/storage/io.rs

@ -1,94 +1,40 @@
//! Utilities for I/O from the store //! Utilities for I/O from the store
use crate::error::invalid_input_error; use crate::error::invalid_input_error;
use crate::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerializer}; use crate::io::{
use crate::model::{BlankNode, GraphNameRef, LiteralRef, NamedNodeRef, Quad, QuadRef, Triple}; DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer,
};
use crate::model::{GraphNameRef, Quad, Triple};
use crate::storage::StorageLike; use crate::storage::StorageLike;
use oxiri::Iri;
use rio_api::model as rio;
use rio_api::parser::{QuadsParser, TriplesParser};
use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleError, TurtleParser};
use rio_xml::{RdfXmlError, RdfXmlParser};
use std::collections::HashMap;
use std::io; use std::io;
use std::io::{BufRead, Write}; use std::io::{BufRead, Write};
pub(crate) fn load_graph<S: StorageLike>( pub(crate) fn load_graph<S: StorageLike>(
storage: &S, store: &S,
reader: impl BufRead, reader: impl BufRead,
format: GraphFormat, format: GraphFormat,
to_graph_name: GraphNameRef<'_>, to_graph_name: GraphNameRef<'_>,
base_iri: Option<&str>, base_iri: Option<&str>,
) -> Result<(), StoreOrParseError<S::Error>> { ) -> Result<(), StoreOrParseError<S::Error>> {
let base_iri = if let Some(base_iri) = base_iri { let mut parser = GraphParser::from_format(format);
Some(Iri::parse(base_iri.into()).map_err(invalid_input_error)?) if let Some(base_iri) = base_iri {
} else { parser = parser
None .with_base_iri(base_iri)
}; .map_err(|e| StoreOrParseError::Parse(invalid_input_error(e)))?;
match format {
GraphFormat::NTriples => {
load_from_triple_parser(storage, NTriplesParser::new(reader), to_graph_name)
}
GraphFormat::Turtle => {
load_from_triple_parser(storage, TurtleParser::new(reader, base_iri), to_graph_name)
}
GraphFormat::RdfXml => {
load_from_triple_parser(storage, RdfXmlParser::new(reader, base_iri), to_graph_name)
}
} }
} for t in parser
.read_triples(reader)
fn load_from_triple_parser<S: StorageLike, P: TriplesParser>( .map_err(StoreOrParseError::Parse)?
storage: &S, {
mut parser: P, store
to_graph_name: GraphNameRef<'_>, .insert(
) -> Result<(), StoreOrParseError<S::Error>> t.map_err(StoreOrParseError::Parse)?
where .as_ref()
StoreOrParseError<S::Error>: From<P::Error>, .in_graph(to_graph_name),
{ )
let mut bnode_map = HashMap::default();
parser.parse_all(&mut move |t| {
storage
.insert(quad_from_rio_triple(&t, to_graph_name, &mut bnode_map))
.map_err(StoreOrParseError::Store)?; .map_err(StoreOrParseError::Store)?;
Ok(())
})
}
fn quad_from_rio_triple<'a>(
triple: &rio::Triple<'a>,
graph_name: GraphNameRef<'a>,
bnode_map: &'a mut HashMap<String, BlankNode>,
) -> QuadRef<'a> {
// we insert the blank nodes
if let rio::NamedOrBlankNode::BlankNode(node) = triple.subject {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let rio::Term::BlankNode(node) = triple.object {
bnode_map.entry(node.id.to_owned()).or_default();
}
QuadRef {
subject: match triple.subject {
rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(),
},
predicate: NamedNodeRef::new_unchecked(triple.predicate.iri),
object: match triple.object {
rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(),
rio::Term::Literal(literal) => match literal {
rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri))
}
}
.into(),
},
graph_name,
} }
Ok(())
} }
pub fn dump_graph( pub fn dump_graph(
@ -109,75 +55,21 @@ pub(crate) fn load_dataset<S: StorageLike>(
format: DatasetFormat, format: DatasetFormat,
base_iri: Option<&str>, base_iri: Option<&str>,
) -> Result<(), StoreOrParseError<S::Error>> { ) -> Result<(), StoreOrParseError<S::Error>> {
let base_iri = if let Some(base_iri) = base_iri { let mut parser = DatasetParser::from_format(format);
Some(Iri::parse(base_iri.into()).map_err(invalid_input_error)?) if let Some(base_iri) = base_iri {
} else { parser = parser
None .with_base_iri(base_iri)
}; .map_err(|e| StoreOrParseError::Parse(invalid_input_error(e)))?;
match format {
DatasetFormat::NQuads => load_from_quad_parser(store, NQuadsParser::new(reader)),
DatasetFormat::TriG => load_from_quad_parser(store, TriGParser::new(reader, base_iri)),
} }
} for t in parser
.read_quads(reader)
fn load_from_quad_parser<S: StorageLike, P: QuadsParser>( .map_err(StoreOrParseError::Parse)?
store: &S, {
mut parser: P,
) -> Result<(), StoreOrParseError<S::Error>>
where
StoreOrParseError<S::Error>: From<P::Error>,
{
let mut bnode_map = HashMap::default();
parser.parse_all(&mut move |q| {
store store
.insert(quad_from_rio(&q, &mut bnode_map)) .insert(t.map_err(StoreOrParseError::Parse)?.as_ref())
.map_err(StoreOrParseError::Store)?; .map_err(StoreOrParseError::Store)?;
Ok(())
})
}
fn quad_from_rio<'a>(
quad: &rio::Quad<'a>,
bnode_map: &'a mut HashMap<String, BlankNode>,
) -> QuadRef<'a> {
// we insert the blank nodes
if let rio::NamedOrBlankNode::BlankNode(node) = quad.subject {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let rio::Term::BlankNode(node) = quad.object {
bnode_map.entry(node.id.to_owned()).or_default();
}
if let Some(rio::NamedOrBlankNode::BlankNode(node)) = quad.graph_name {
bnode_map.entry(node.id.to_owned()).or_default();
}
QuadRef {
subject: match quad.subject {
rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(),
},
predicate: NamedNodeRef::new_unchecked(quad.predicate.iri),
object: match quad.object {
rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(),
rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(),
rio::Term::Literal(literal) => match literal {
rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
LiteralRef::new_language_tagged_literal_unchecked(value, language)
}
rio::Literal::Typed { value, datatype } => {
LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri))
}
}
.into(),
},
graph_name: match quad.graph_name {
Some(rio::NamedOrBlankNode::NamedNode(node)) => {
NamedNodeRef::new_unchecked(node.iri).into()
}
Some(rio::NamedOrBlankNode::BlankNode(node)) => bnode_map[node.id].as_ref().into(),
None => GraphNameRef::DefaultGraph,
},
} }
Ok(())
} }
pub fn dump_dataset( pub fn dump_dataset(
@ -192,29 +84,11 @@ pub fn dump_dataset(
writer.finish() writer.finish()
} }
pub(crate) enum StoreOrParseError<S> { pub enum StoreOrParseError<S> {
Store(S), Store(S),
Parse(io::Error), Parse(io::Error),
} }
impl<S> From<TurtleError> for StoreOrParseError<S> {
fn from(error: TurtleError) -> Self {
Self::Parse(error.into())
}
}
impl<S> From<RdfXmlError> for StoreOrParseError<S> {
fn from(error: RdfXmlError) -> Self {
Self::Parse(error.into())
}
}
impl<S> From<io::Error> for StoreOrParseError<S> {
fn from(error: io::Error) -> Self {
Self::Parse(error)
}
}
impl From<StoreOrParseError<io::Error>> for io::Error { impl From<StoreOrParseError<io::Error>> for io::Error {
fn from(error: StoreOrParseError<io::Error>) -> Self { fn from(error: StoreOrParseError<io::Error>) -> Self {
match error { match error {

Loading…
Cancel
Save