From 6a2c59ab9fb5214427cfe07f24d392d454879389 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 26 May 2018 21:29:55 +0200 Subject: [PATCH] Uses uuid for encoding blank node ids --- Cargo.toml | 1 + src/lib.rs | 1 + src/model/data.rs | 69 +++++-------------- src/rio/ntriples/mod.rs | 9 +-- src/rio/ntriples/ntriples_grammar.rustpeg | 5 +- src/rio/turtle/mod.rs | 80 ++++++++++++----------- src/rio/turtle/turtle_grammar.rustpeg | 4 +- 7 files changed, 72 insertions(+), 97 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index e8a20744..4d91f657 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ travis-ci = { repository = "Tpt/rudf" } [dependencies] lazy_static = "1.0" url = "1.7" +uuid = { version = "0.6", features = ["v4"] } [build-dependencies] peg = "0.5" diff --git a/src/lib.rs b/src/lib.rs index 398e2f29..1777b8b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,7 @@ #[macro_use] extern crate lazy_static; extern crate url; +extern crate uuid; pub mod model; pub mod rio; diff --git a/src/model/data.rs b/src/model/data.rs index a08c0876..f8774a4e 100644 --- a/src/model/data.rs +++ b/src/model/data.rs @@ -4,9 +4,10 @@ use std::fmt; use std::option::Option; use std::str::FromStr; use std::sync::Arc; -use std::sync::Mutex; use url::ParseError; use url::Url; +use uuid::Uuid; +use std::ops::Deref; /// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] @@ -31,6 +32,14 @@ impl NamedNode { } } +impl Deref for NamedNode { + type Target = Url; + + fn deref(&self) -> &Url { + &self.iri + } +} + impl fmt::Display for NamedNode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "<{}>", self.iri) @@ -48,56 +57,29 @@ impl FromStr for NamedNode { /// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct BlankNode { - id: String, + id: Uuid, } -impl BlankNode { - /// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id - pub fn new(id: impl Into) -> Self { - Self { id: id.into() } - } +impl Deref for BlankNode { + type Target = Uuid; - pub fn value(&self) -> &str { + fn deref(&self) -> &Uuid { &self.id } } impl fmt::Display for BlankNode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "_:{}", self.value()) - } -} - -/// An utility structure to generate bank node ids in a thread safe way -#[derive(Debug)] -struct U64IDProvider { - counter: Mutex, -} - -impl U64IDProvider { - pub fn next(&self) -> u64 { - let mut id = self.counter.lock().unwrap(); - *id += 1; - *id + write!(f, "_:{}", self.id) } } -impl Default for U64IDProvider { - fn default() -> Self { - U64IDProvider { - counter: Mutex::new(0), - } - } -} - -lazy_static! { - static ref U64_ID_PROVIDER: U64IDProvider = U64IDProvider::default(); -} - impl Default for BlankNode { /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id fn default() -> Self { - BlankNode::new(U64_ID_PROVIDER.next().to_string()) + BlankNode { + id: Uuid::new_v4() + } } } @@ -228,13 +210,6 @@ pub enum NamedOrBlankNode { } impl NamedOrBlankNode { - pub fn value(&self) -> &str { - match self { - NamedOrBlankNode::NamedNode(node) => node.value(), - NamedOrBlankNode::BlankNode(node) => node.value(), - } - } - pub fn is_named_node(&self) -> bool { match self { NamedOrBlankNode::NamedNode(_) => true, @@ -281,14 +256,6 @@ pub enum Term { } impl Term { - pub fn value(&self) -> &str { - match self { - Term::NamedNode(node) => node.value(), - Term::BlankNode(node) => node.value(), - Term::Literal(literal) => literal.value(), - } - } - pub fn is_named_node(&self) -> bool { match self { Term::NamedNode(_) => true, diff --git a/src/rio/ntriples/mod.rs b/src/rio/ntriples/mod.rs index 74979a1a..bdc8e26e 100644 --- a/src/rio/ntriples/mod.rs +++ b/src/rio/ntriples/mod.rs @@ -9,13 +9,14 @@ use rio::*; use std::io::BufRead; use std::io::BufReader; use std::io::Read; +use std::collections::BTreeMap; pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator> { //TODO: use read_lines to avoid allocations - BufReader::new(source) - .lines() - .flat_map(move |line| match line { - Ok(line) => match grammar::triple(line.as_str()) { + let lines = BufReader::new(source).lines(); + let mut bnodes_map: BTreeMap = BTreeMap::default(); + lines.flat_map(move |line| match line { + Ok(line) => match grammar::triple(line.as_str(), &mut bnodes_map) { Ok(triple) => Some(Ok(triple?)), Err(error) => Some(Err(RioError::new(error))), }, diff --git a/src/rio/ntriples/ntriples_grammar.rustpeg b/src/rio/ntriples/ntriples_grammar.rustpeg index 52cbf05b..05c6596a 100644 --- a/src/rio/ntriples/ntriples_grammar.rustpeg +++ b/src/rio/ntriples/ntriples_grammar.rustpeg @@ -4,6 +4,9 @@ use std::iter::FromIterator; use std::char; use std::str::FromStr; use model::data::*; +use std::collections::BTreeMap; + +#![arguments(bnodes_map: &mut BTreeMap)] //[2] #[pub] @@ -60,7 +63,7 @@ STRING_LITERAL_QUOTE_simple_char -> char = c: $([^\u{0022}\u{005c}\u{000a}\u{000 //[141s] BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { - BlankNode::new(b) + bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } //[10] diff --git a/src/rio/turtle/mod.rs b/src/rio/turtle/mod.rs index 4e1809ba..45e23ca4 100644 --- a/src/rio/turtle/mod.rs +++ b/src/rio/turtle/mod.rs @@ -2,47 +2,51 @@ mod grammar { include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs")); -} -use model::data::*; -use rio::*; -use std::collections::HashMap; -use std::io::BufReader; -use std::io::Read; -use url::ParseOptions; -use url::Url; - -//TODO: make private -pub struct ParserState { - pub base_uri: Option, - pub namespaces: HashMap, - pub cur_subject: Vec, - pub cur_predicate: Vec, -} + use model::data::*; + use rio::*; + use std::collections::HashMap; + use std::io::BufReader; + use std::io::Read; + use url::ParseOptions; + use url::Url; + use std::collections::BTreeMap; -impl ParserState { - fn url_parser<'a>(&'a self) -> ParseOptions<'a> { - Url::options().base_url(self.base_uri.as_ref()) + pub struct ParserState { + base_uri: Option, + namespaces: HashMap, + cur_subject: Vec, + cur_predicate: Vec, + bnodes_map: BTreeMap } -} -pub fn read_turtle<'a, R: Read + 'a>( - source: R, - base_uri: impl Into>, -) -> RioResult> { - let mut state = ParserState { - base_uri: base_uri.into(), - namespaces: HashMap::default(), - cur_subject: Vec::default(), - cur_predicate: Vec::default(), - }; - let mut triple_buffer = Vec::default(); - - let mut string_buffer = String::default(); - BufReader::new(source).read_to_string(&mut string_buffer)?; - - match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer) { - Ok(_) => Ok(triple_buffer.into_iter()), - Err(error) => Err(RioError::new(error)), + impl ParserState { + fn url_parser<'a>(&'a self) -> ParseOptions<'a> { + Url::options().base_url(self.base_uri.as_ref()) + } + } + + pub fn read_turtle<'a, R: Read + 'a>( + source: R, + base_uri: impl Into>, + ) -> RioResult> { + let mut state = ParserState { + base_uri: base_uri.into(), + namespaces: HashMap::default(), + cur_subject: Vec::default(), + cur_predicate: Vec::default(), + bnodes_map: BTreeMap::default() + }; + let mut triple_buffer = Vec::default(); + + let mut string_buffer = String::default(); + BufReader::new(source).read_to_string(&mut string_buffer)?; + + match turtleDoc(&string_buffer, &mut state, &mut triple_buffer) { + Ok(_) => Ok(triple_buffer.into_iter()), + Err(error) => Err(RioError::new(error)), + } } } + +pub use self::grammar::read_turtle; \ No newline at end of file diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index 966f8c0a..b4ffabb4 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -1,11 +1,9 @@ //See https://www.w3.org/TR/turtle/#sec-grammar use std::char; -use model::data::*; use model::vocab::rdf; use model::vocab::xsd; use std::iter; -use rio::turtle::ParserState; #![arguments(state: &mut ParserState, buffer: &mut Vec)] @@ -165,7 +163,7 @@ PrefixedName -> String = PNAME_LN / //[137s] BlankNode -> BlankNode = - b:BLANK_NODE_LABEL { BlankNode::new(b) } / + b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } / ANON { BlankNode::default() } //[18]