From 704f60e0945be68ada51d0d7613294ce04d7c711 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 12 Jun 2021 08:59:41 +0200 Subject: [PATCH] Makes store file load use the public API parser Small slow done and simplifies code --- lib/src/storage/io.rs | 192 ++++++++---------------------------------- 1 file changed, 33 insertions(+), 159 deletions(-) diff --git a/lib/src/storage/io.rs b/lib/src/storage/io.rs index 29457509..c7eaa8d4 100644 --- a/lib/src/storage/io.rs +++ b/lib/src/storage/io.rs @@ -1,94 +1,40 @@ //! Utilities for I/O from the store use crate::error::invalid_input_error; -use crate::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerializer}; -use crate::model::{BlankNode, GraphNameRef, LiteralRef, NamedNodeRef, Quad, QuadRef, Triple}; +use crate::io::{ + DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, +}; +use crate::model::{GraphNameRef, Quad, Triple}; use crate::storage::StorageLike; -use oxiri::Iri; -use rio_api::model as rio; -use rio_api::parser::{QuadsParser, TriplesParser}; -use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleError, TurtleParser}; -use rio_xml::{RdfXmlError, RdfXmlParser}; -use std::collections::HashMap; use std::io; use std::io::{BufRead, Write}; pub(crate) fn load_graph( - storage: &S, + store: &S, reader: impl BufRead, format: GraphFormat, to_graph_name: GraphNameRef<'_>, base_iri: Option<&str>, ) -> Result<(), StoreOrParseError> { - let base_iri = if let Some(base_iri) = base_iri { - Some(Iri::parse(base_iri.into()).map_err(invalid_input_error)?) - } else { - None - }; - match format { - GraphFormat::NTriples => { - load_from_triple_parser(storage, NTriplesParser::new(reader), to_graph_name) - } - GraphFormat::Turtle => { - load_from_triple_parser(storage, TurtleParser::new(reader, base_iri), to_graph_name) - } - GraphFormat::RdfXml => { - load_from_triple_parser(storage, RdfXmlParser::new(reader, base_iri), to_graph_name) - } + let mut parser = GraphParser::from_format(format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| StoreOrParseError::Parse(invalid_input_error(e)))?; } -} - -fn load_from_triple_parser( - storage: &S, - mut parser: P, - to_graph_name: GraphNameRef<'_>, -) -> Result<(), StoreOrParseError> -where - StoreOrParseError: From, -{ - let mut bnode_map = HashMap::default(); - parser.parse_all(&mut move |t| { - storage - .insert(quad_from_rio_triple(&t, to_graph_name, &mut bnode_map)) + for t in parser + .read_triples(reader) + .map_err(StoreOrParseError::Parse)? + { + store + .insert( + t.map_err(StoreOrParseError::Parse)? + .as_ref() + .in_graph(to_graph_name), + ) .map_err(StoreOrParseError::Store)?; - Ok(()) - }) -} - -fn quad_from_rio_triple<'a>( - triple: &rio::Triple<'a>, - graph_name: GraphNameRef<'a>, - bnode_map: &'a mut HashMap, -) -> QuadRef<'a> { - // we insert the blank nodes - if let rio::NamedOrBlankNode::BlankNode(node) = triple.subject { - bnode_map.entry(node.id.to_owned()).or_default(); - } - if let rio::Term::BlankNode(node) = triple.object { - bnode_map.entry(node.id.to_owned()).or_default(); - } - QuadRef { - subject: match triple.subject { - rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), - rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(), - }, - predicate: NamedNodeRef::new_unchecked(triple.predicate.iri), - object: match triple.object { - rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), - rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(), - rio::Term::Literal(literal) => match literal { - rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value), - rio::Literal::LanguageTaggedString { value, language } => { - LiteralRef::new_language_tagged_literal_unchecked(value, language) - } - rio::Literal::Typed { value, datatype } => { - LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri)) - } - } - .into(), - }, - graph_name, } + Ok(()) } pub fn dump_graph( @@ -109,75 +55,21 @@ pub(crate) fn load_dataset( format: DatasetFormat, base_iri: Option<&str>, ) -> Result<(), StoreOrParseError> { - let base_iri = if let Some(base_iri) = base_iri { - Some(Iri::parse(base_iri.into()).map_err(invalid_input_error)?) - } else { - None - }; - match format { - DatasetFormat::NQuads => load_from_quad_parser(store, NQuadsParser::new(reader)), - DatasetFormat::TriG => load_from_quad_parser(store, TriGParser::new(reader, base_iri)), + let mut parser = DatasetParser::from_format(format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| StoreOrParseError::Parse(invalid_input_error(e)))?; } -} - -fn load_from_quad_parser( - store: &S, - mut parser: P, -) -> Result<(), StoreOrParseError> -where - StoreOrParseError: From, -{ - let mut bnode_map = HashMap::default(); - parser.parse_all(&mut move |q| { + for t in parser + .read_quads(reader) + .map_err(StoreOrParseError::Parse)? + { store - .insert(quad_from_rio(&q, &mut bnode_map)) + .insert(t.map_err(StoreOrParseError::Parse)?.as_ref()) .map_err(StoreOrParseError::Store)?; - Ok(()) - }) -} - -fn quad_from_rio<'a>( - quad: &rio::Quad<'a>, - bnode_map: &'a mut HashMap, -) -> QuadRef<'a> { - // we insert the blank nodes - if let rio::NamedOrBlankNode::BlankNode(node) = quad.subject { - bnode_map.entry(node.id.to_owned()).or_default(); - } - if let rio::Term::BlankNode(node) = quad.object { - bnode_map.entry(node.id.to_owned()).or_default(); - } - if let Some(rio::NamedOrBlankNode::BlankNode(node)) = quad.graph_name { - bnode_map.entry(node.id.to_owned()).or_default(); - } - QuadRef { - subject: match quad.subject { - rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), - rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(), - }, - predicate: NamedNodeRef::new_unchecked(quad.predicate.iri), - object: match quad.object { - rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), - rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(), - rio::Term::Literal(literal) => match literal { - rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value), - rio::Literal::LanguageTaggedString { value, language } => { - LiteralRef::new_language_tagged_literal_unchecked(value, language) - } - rio::Literal::Typed { value, datatype } => { - LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri)) - } - } - .into(), - }, - graph_name: match quad.graph_name { - Some(rio::NamedOrBlankNode::NamedNode(node)) => { - NamedNodeRef::new_unchecked(node.iri).into() - } - Some(rio::NamedOrBlankNode::BlankNode(node)) => bnode_map[node.id].as_ref().into(), - None => GraphNameRef::DefaultGraph, - }, } + Ok(()) } pub fn dump_dataset( @@ -192,29 +84,11 @@ pub fn dump_dataset( writer.finish() } -pub(crate) enum StoreOrParseError { +pub enum StoreOrParseError { Store(S), Parse(io::Error), } -impl From for StoreOrParseError { - fn from(error: TurtleError) -> Self { - Self::Parse(error.into()) - } -} - -impl From for StoreOrParseError { - fn from(error: RdfXmlError) -> Self { - Self::Parse(error.into()) - } -} - -impl From for StoreOrParseError { - fn from(error: io::Error) -> Self { - Self::Parse(error) - } -} - impl From> for io::Error { fn from(error: StoreOrParseError) -> Self { match error {