From 98f5f35dbb53fa5089c0cf78749271593dfa220b Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 5 Jun 2021 12:51:13 +0200 Subject: [PATCH] Drops rio specific numerical encoder --- lib/src/storage/io.rs | 90 +++++++++++++++++++++++-- lib/src/storage/numeric_encoder.rs | 103 ----------------------------- 2 files changed, 84 insertions(+), 109 deletions(-) diff --git a/lib/src/storage/io.rs b/lib/src/storage/io.rs index 8e331945..9b8cacc1 100644 --- a/lib/src/storage/io.rs +++ b/lib/src/storage/io.rs @@ -2,10 +2,11 @@ use crate::error::invalid_input_error; use crate::io::{DatasetFormat, DatasetSerializer, GraphFormat, GraphSerializer}; -use crate::model::{GraphNameRef, Quad, Triple}; +use crate::model::{BlankNode, GraphNameRef, LiteralRef, NamedNodeRef, Quad, QuadRef, Triple}; use crate::storage::numeric_encoder::WriteEncoder; use crate::storage::StorageLike; use oxiri::Iri; +use rio_api::model as rio; use rio_api::parser::{QuadsParser, TriplesParser}; use rio_turtle::{NQuadsParser, NTriplesParser, TriGParser, TurtleError, TurtleParser}; use rio_xml::{RdfXmlError, RdfXmlParser}; @@ -47,18 +48,51 @@ where StoreOrParseError: From, { let mut bnode_map = HashMap::default(); - let to_graph_name = storage - .encode_graph_name(to_graph_name) - .map_err(StoreOrParseError::Store)?; parser.parse_all(&mut move |t| { let quad = storage - .encode_rio_triple_in_graph(t, to_graph_name.clone(), &mut bnode_map) + .encode_quad(quad_from_rio_triple(&t, to_graph_name, &mut bnode_map)) .map_err(StoreOrParseError::Store)?; storage.insert(&quad).map_err(StoreOrParseError::Store)?; Ok(()) }) } +fn quad_from_rio_triple<'a>( + triple: &rio::Triple<'a>, + graph_name: GraphNameRef<'a>, + bnode_map: &'a mut HashMap, +) -> QuadRef<'a> { + // we insert the blank nodes + if let rio::NamedOrBlankNode::BlankNode(node) = triple.subject { + bnode_map.entry(node.id.to_owned()).or_default(); + } + if let rio::Term::BlankNode(node) = triple.object { + bnode_map.entry(node.id.to_owned()).or_default(); + } + QuadRef { + subject: match triple.subject { + rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), + rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(), + }, + predicate: NamedNodeRef::new_unchecked(triple.predicate.iri), + object: match triple.object { + rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), + rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(), + rio::Term::Literal(literal) => match literal { + rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value), + rio::Literal::LanguageTaggedString { value, language } => { + LiteralRef::new_language_tagged_literal_unchecked(value, language) + } + rio::Literal::Typed { value, datatype } => { + LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri)) + } + } + .into(), + }, + graph_name, + } +} + pub fn dump_graph( triples: impl Iterator>, writer: impl Write, @@ -98,13 +132,57 @@ where let mut bnode_map = HashMap::default(); parser.parse_all(&mut move |q| { let quad = store - .encode_rio_quad(q, &mut bnode_map) + .encode_quad(quad_from_rio(&q, &mut bnode_map)) .map_err(StoreOrParseError::Store)?; store.insert(&quad).map_err(StoreOrParseError::Store)?; Ok(()) }) } +fn quad_from_rio<'a>( + quad: &rio::Quad<'a>, + bnode_map: &'a mut HashMap, +) -> QuadRef<'a> { + // we insert the blank nodes + if let rio::NamedOrBlankNode::BlankNode(node) = quad.subject { + bnode_map.entry(node.id.to_owned()).or_default(); + } + if let rio::Term::BlankNode(node) = quad.object { + bnode_map.entry(node.id.to_owned()).or_default(); + } + if let Some(rio::NamedOrBlankNode::BlankNode(node)) = quad.graph_name { + bnode_map.entry(node.id.to_owned()).or_default(); + } + QuadRef { + subject: match quad.subject { + rio::NamedOrBlankNode::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), + rio::NamedOrBlankNode::BlankNode(node) => bnode_map[node.id].as_ref().into(), + }, + predicate: NamedNodeRef::new_unchecked(quad.predicate.iri), + object: match quad.object { + rio::Term::NamedNode(node) => NamedNodeRef::new_unchecked(node.iri).into(), + rio::Term::BlankNode(node) => bnode_map[node.id].as_ref().into(), + rio::Term::Literal(literal) => match literal { + rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value), + rio::Literal::LanguageTaggedString { value, language } => { + LiteralRef::new_language_tagged_literal_unchecked(value, language) + } + rio::Literal::Typed { value, datatype } => { + LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri)) + } + } + .into(), + }, + graph_name: match quad.graph_name { + Some(rio::NamedOrBlankNode::NamedNode(node)) => { + NamedNodeRef::new_unchecked(node.iri).into() + } + Some(rio::NamedOrBlankNode::BlankNode(node)) => bnode_map[node.id].as_ref().into(), + None => GraphNameRef::DefaultGraph, + }, + } +} + pub fn dump_dataset( quads: impl Iterator>, writer: impl Write, diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index 36424c52..df3013ef 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -5,10 +5,7 @@ use crate::model::xsd::*; use crate::model::*; use crate::sparql::EvaluationError; use crate::storage::small_string::SmallString; -use rand::random; -use rio_api::model as rio; use siphasher::sip128::{Hasher128, SipHasher24}; -use std::collections::HashMap; use std::convert::{TryFrom, TryInto}; use std::error::Error; use std::fmt::Debug; @@ -900,106 +897,6 @@ pub(crate) trait WriteEncoder: StrContainer { }) } - fn encode_triple_in_graph( - &self, - triple: TripleRef<'_>, - graph_name: EncodedTerm, - ) -> Result { - Ok(EncodedQuad { - subject: self.encode_subject(triple.subject)?, - predicate: self.encode_named_node(triple.predicate)?, - object: self.encode_term(triple.object)?, - graph_name, - }) - } - - fn encode_rio_named_node( - &self, - named_node: rio::NamedNode<'_>, - ) -> Result { - self.encode_named_node(NamedNodeRef::new_unchecked(named_node.iri)) - } - - fn encode_rio_blank_node( - &self, - blank_node: rio::BlankNode<'_>, - bnodes_map: &mut HashMap, - ) -> Result { - Ok(if let Some(id) = bnodes_map.get(blank_node.id) { - EncodedTerm::NumericalBlankNode { id: *id } - } else { - let id = random::(); - bnodes_map.insert(blank_node.id.to_owned(), id); - EncodedTerm::NumericalBlankNode { id } - }) - } - fn encode_rio_literal(&self, literal: rio::Literal<'_>) -> Result { - self.encode_literal(match literal { - rio::Literal::Simple { value } => LiteralRef::new_simple_literal(value), - rio::Literal::LanguageTaggedString { value, language } => { - LiteralRef::new_language_tagged_literal_unchecked(value, language) - } - rio::Literal::Typed { value, datatype } => { - LiteralRef::new_typed_literal(value, NamedNodeRef::new_unchecked(datatype.iri)) - } - }) - } - - fn encode_rio_subject( - &self, - term: rio::NamedOrBlankNode<'_>, - bnodes_map: &mut HashMap, - ) -> Result { - match term { - rio::NamedOrBlankNode::NamedNode(named_node) => self.encode_rio_named_node(named_node), - rio::NamedOrBlankNode::BlankNode(blank_node) => { - self.encode_rio_blank_node(blank_node, bnodes_map) - } - } - } - - fn encode_rio_term( - &self, - term: rio::Term<'_>, - bnodes_map: &mut HashMap, - ) -> Result { - match term { - rio::Term::NamedNode(named_node) => self.encode_rio_named_node(named_node), - rio::Term::BlankNode(blank_node) => self.encode_rio_blank_node(blank_node, bnodes_map), - rio::Term::Literal(literal) => self.encode_rio_literal(literal), - } - } - - fn encode_rio_quad( - &self, - quad: rio::Quad<'_>, - bnodes_map: &mut HashMap, - ) -> Result { - Ok(EncodedQuad { - subject: self.encode_rio_subject(quad.subject, bnodes_map)?, - predicate: self.encode_rio_named_node(quad.predicate)?, - object: self.encode_rio_term(quad.object, bnodes_map)?, - graph_name: match quad.graph_name { - Some(graph_name) => self.encode_rio_subject(graph_name, bnodes_map)?, - None => EncodedTerm::DefaultGraph, - }, - }) - } - - fn encode_rio_triple_in_graph( - &self, - triple: rio::Triple<'_>, - graph_name: EncodedTerm, - bnodes_map: &mut HashMap, - ) -> Result { - Ok(EncodedQuad { - subject: self.encode_rio_subject(triple.subject, bnodes_map)?, - predicate: self.encode_rio_named_node(triple.predicate)?, - object: self.encode_rio_term(triple.object, bnodes_map)?, - graph_name, - }) - } - fn encode_str(&self, value: &str) -> Result; }