From 5c90fab6cc83f597be2f5d9c1e443beb632b03a9 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 25 May 2018 22:04:15 +0200 Subject: [PATCH] Simplifies data objects factory --- src/model/data.rs | 140 +++++++++------------- src/rio/ntriples/mod.rs | 10 +- src/rio/ntriples/ntriples_grammar.rustpeg | 8 +- src/rio/turtle/mod.rs | 5 +- src/rio/turtle/turtle_grammar.rustpeg | 22 ++-- tests/rdf_test_cases.rs | 68 ++++------- 6 files changed, 101 insertions(+), 152 deletions(-) diff --git a/src/model/data.rs b/src/model/data.rs index af2ee237..90073dda 100644 --- a/src/model/data.rs +++ b/src/model/data.rs @@ -68,6 +68,39 @@ impl fmt::Display for BlankNode { } } +/// An utility structure to generate bank node ids in a thread safe way +#[derive(Debug)] +struct U64IDProvider { + counter: Mutex, +} + +impl U64IDProvider { + pub fn next(&self) -> u64 { + let mut id = self.counter.lock().unwrap(); + *id += 1; + *id + } +} + +impl Default for U64IDProvider { + fn default() -> Self { + U64IDProvider { + counter: Mutex::new(0), + } + } +} + +lazy_static! { + static ref U64_ID_PROVIDER: U64IDProvider = U64IDProvider::default(); +} + +impl Default for BlankNode { + /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id + fn default() -> Self { + BlankNode::new(U64_ID_PROVIDER.next().to_string()) + } +} + /// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub enum Literal { @@ -86,6 +119,30 @@ lazy_static! { } impl Literal { + /// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) + pub fn new_simple_literal(value: impl Into) -> Self { + Literal::SimpleLiteral(value.into()) + } + + /// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri) + pub fn new_typed_literal(value: impl Into, datatype: impl Into) -> Self { + Literal::TypedLiteral { + value: value.into(), + datatype: datatype.into(), + } + } + + /// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + pub fn new_language_tagged_literal( + value: impl Into, + language: impl Into, + ) -> Self { + Literal::LanguageTaggedString { + value: value.into(), + language: language.into(), + } + } + /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) pub fn value(&self) -> &str { match self { @@ -453,86 +510,3 @@ impl QuadLike for Quad { return self.graph_name; } } - -/// An utility structure to generate bank node ids in a thread safe way -#[derive(Debug, Clone)] -struct U64IDProvider { - counter: Arc>, -} - -impl U64IDProvider { - pub fn next(&self) -> u64 { - let mut id = self.counter.lock().unwrap(); - *id += 1; - *id - } -} - -impl Default for U64IDProvider { - fn default() -> Self { - U64IDProvider { - counter: Arc::new(Mutex::new(0)), - } - } -} - -/// A structure creating RDF elements -#[derive(Debug, Clone)] -pub struct DataFactory { - blank_node_id_provider: U64IDProvider, -} - -impl Default for DataFactory { - fn default() -> Self { - DataFactory { - blank_node_id_provider: U64IDProvider::default(), - } - } -} - -impl DataFactory { - /// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) - pub fn named_node(&self, iri: impl Into) -> NamedNode { - NamedNode::new(iri) - } - - /// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id - pub fn blank_node(&self, id: impl Into) -> BlankNode { - BlankNode::new(id) - } - - /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id - pub fn new_blank_node(&self) -> BlankNode { - BlankNode::new(self.blank_node_id_provider.next().to_string()) - } - - /// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) - pub fn simple_literal(&self, value: impl Into) -> Literal { - Literal::SimpleLiteral(value.into()) - } - - /// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri) - pub fn typed_literal( - &self, - value: impl Into, - datatype: impl Into, - ) -> Literal { - //TODO: find the best representation - Literal::TypedLiteral { - value: value.into(), - datatype: datatype.into(), - } - } - - /// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) - pub fn language_tagged_literal( - &self, - value: impl Into, - language: impl Into, - ) -> Literal { - Literal::LanguageTaggedString { - value: value.into(), - language: language.into(), - } - } -} diff --git a/src/rio/ntriples/mod.rs b/src/rio/ntriples/mod.rs index d1c2a6a6..3dd36233 100644 --- a/src/rio/ntriples/mod.rs +++ b/src/rio/ntriples/mod.rs @@ -10,16 +10,12 @@ use std::io::BufRead; use std::io::BufReader; use std::io::Read; -pub fn read_ntriples<'a, R: Read + 'a>( - source: R, - data_factory: &'a DataFactory, -) -> impl Iterator> { - let factory = data_factory.clone(); //TODO: try to avoid clone here - //TODO: use read_lines to avoid allocations +pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator> { + //TODO: use read_lines to avoid allocations BufReader::new(source) .lines() .flat_map(move |line| match line { - Ok(line) => match grammar::triple(line.as_str(), &factory) { + Ok(line) => match grammar::triple(line.as_str()) { Ok(triple) => Some(Ok(triple?)), Err(error) => Some(Err(RioError::new(error))), }, diff --git a/src/rio/ntriples/ntriples_grammar.rustpeg b/src/rio/ntriples/ntriples_grammar.rustpeg index 931ea9f0..52cbf05b 100644 --- a/src/rio/ntriples/ntriples_grammar.rustpeg +++ b/src/rio/ntriples/ntriples_grammar.rustpeg @@ -5,8 +5,6 @@ use std::char; use std::str::FromStr; use model::data::*; -#![arguments(data_factory: &DataFactory)] - //[2] #[pub] triple -> Option = @@ -31,8 +29,8 @@ object -> Term = //[6] literal -> Literal = - v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { data_factory.typed_literal(v, t) } / - v: STRING_LITERAL_QUOTE _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / + v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / + v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v: STRING_LITERAL_QUOTE { v.into() } @@ -62,7 +60,7 @@ STRING_LITERAL_QUOTE_simple_char -> char = c: $([^\u{0022}\u{005c}\u{000a}\u{000 //[141s] BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { - data_factory.blank_node(b) + BlankNode::new(b) } //[10] diff --git a/src/rio/turtle/mod.rs b/src/rio/turtle/mod.rs index 912c33ee..64dfbff1 100644 --- a/src/rio/turtle/mod.rs +++ b/src/rio/turtle/mod.rs @@ -28,10 +28,8 @@ impl ParserState { pub fn read_turtle<'a, R: Read + 'a>( source: R, - data_factory: &'a DataFactory, base_uri: impl Into>, ) -> RioResult> { - let factory = data_factory.clone(); //TODO: try to avoid clone here let mut state = ParserState { base_uri: base_uri.into(), namespaces: HashMap::default(), @@ -41,8 +39,7 @@ pub fn read_turtle<'a, R: Read + 'a>( let mut string_buffer = String::default(); let mut triple_buffer = Vec::default(); match BufReader::new(source).read_to_string(&mut string_buffer) { - Ok(_) => match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer, &factory) - { + Ok(_) => match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer) { Ok(_) => Ok(triple_buffer.into_iter()), Err(error) => Err(RioError::new(error)), }, diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index 915b31f0..3964cfb3 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -8,7 +8,7 @@ use model::vocab::xsd; use std::iter; use rio::turtle::ParserState; -#![arguments(state: &mut ParserState, buffer: &mut Vec, data_factory: &DataFactory)] +#![arguments(state: &mut ParserState, buffer: &mut Vec)] //[1] #[pub] @@ -112,14 +112,14 @@ blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predica state.cur_subject.pop().ok_or("No subject found in the stack") } blankNodePropertyList_open -> () = "[" { - state.cur_subject.push(data_factory.new_blank_node().into()) + state.cur_subject.push(BlankNode::default().into()) } //[15] collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone()); for obj in o.into_iter().rev() { - let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node()); + let new_blank_node = NamedOrBlankNode::from(BlankNode::default()); buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj)); buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); current_list_node = new_blank_node; @@ -130,14 +130,14 @@ collection_value -> Term = o:object_value _ { o } //[16] NumericLiteral -> Literal = - d:$(DOUBLE) { data_factory.typed_literal(d, xsd::DOUBLE.clone()) } / - d:$(DECIMAL) { data_factory.typed_literal(d, xsd::DECIMAL.clone()) } / - i:$(INTEGER) { data_factory.typed_literal(i, xsd::INTEGER.clone()) } + d:$(DOUBLE) { Literal::new_typed_literal(d, xsd::DOUBLE.clone()) } / + d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / + i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } //[128s] RDFLiteral -> Literal = - v:String _ "^^" _ t:iri { data_factory.typed_literal(v, t) } / - v:String _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / + v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / + v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / v:String { v.into() } //[133s] @@ -151,7 +151,7 @@ String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE //[135s] iri -> NamedNode = i:(IRIREF / PrefixedName) {? match state.url_parser().parse(&i) { - Ok(url) => Ok(data_factory.named_node(url)), + Ok(url) => Ok(NamedNode::new(url)), Err(error) => Err("IRI parsing failed") } } @@ -162,8 +162,8 @@ PrefixedName -> String = PNAME_LN / //[137s] BlankNode -> BlankNode = - b:BLANK_NODE_LABEL { data_factory.blank_node(b) } / - ANON { data_factory.new_blank_node() } + b:BLANK_NODE_LABEL { BlankNode::new(b) } / + ANON { BlankNode::default() } //[18] IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" { diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs index 44434d5b..311bd950 100644 --- a/tests/rdf_test_cases.rs +++ b/tests/rdf_test_cases.rs @@ -1,29 +1,27 @@ -#[macro_use] -extern crate lazy_static; extern crate reqwest; extern crate rudf; extern crate url; use reqwest::Client; use rudf::model::data::*; +use rudf::model::vocab::rdf; use rudf::rio::RioError; use rudf::rio::RioResult; use rudf::rio::ntriples::read_ntriples; use rudf::rio::turtle::read_turtle; use std::collections::HashSet; use std::iter::FromIterator; +use std::str::FromStr; use url::Url; struct RDFClient { client: Client, - data_factory: DataFactory, } impl Default for RDFClient { fn default() -> Self { Self { client: Client::new(), - data_factory: DataFactory::default(), } } } @@ -31,18 +29,14 @@ impl Default for RDFClient { impl RDFClient { fn load_turtle(&self, uri: Url) -> RioResult> { match self.client.get(uri.clone()).send() { - Ok(response) => Ok(HashSet::from_iter(read_turtle( - response, - &self.data_factory, - Some(uri), - )?)), + Ok(response) => Ok(HashSet::from_iter(read_turtle(response, Some(uri))?)), Err(error) => Err(RioError::new(error)), } } fn load_ntriples(&self, uri: Url) -> RioResult> { match self.client.get(uri).send() { - Ok(response) => read_ntriples(response, &self.data_factory).collect(), + Ok(response) => read_ntriples(response).collect(), Err(error) => Err(RioError::new(error)), } } @@ -89,27 +83,21 @@ fn subject_for_predicate_object<'a>( #[test] fn turtle_w3c_testsuite() { let client = RDFClient::default(); - let data_factory = &client.data_factory; let manifest = client .load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap()) .unwrap(); - let rdf_type = data_factory - .named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()); - let mf_action = data_factory.named_node( - Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action").unwrap(), + let mf_action = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action", + ).unwrap(); + let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap(); + let rdft_test_turtle_positive_syntax = Term::from( + NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(), + ); + let rdft_test_turtle_negative_syntax = Term::from( + NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(), ); - let rdfs_comment = data_factory - .named_node(Url::parse("http://www.w3.org/2000/01/rdf-schema#comment").unwrap()); - let rdft_test_turtle_positive_syntax = - Term::from(data_factory.named_node( - Url::parse("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(), - )); - let rdft_test_turtle_negative_syntax = - Term::from(data_factory.named_node( - Url::parse("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(), - )); - subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_positive_syntax) + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax) .for_each(|test| { let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); if let Some(Term::NamedNode(file)) = @@ -124,7 +112,7 @@ fn turtle_w3c_testsuite() { } } }); - subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_negative_syntax) + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_syntax) .for_each(|test| { let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); if let Some(Term::NamedNode(file)) = @@ -143,25 +131,21 @@ fn turtle_w3c_testsuite() { #[test] fn ntriples_w3c_testsuite() { let client = RDFClient::default(); - let data_factory = &client.data_factory; let manifest = client .load_turtle(Url::parse("https://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap()) .unwrap(); - let rdf_type = data_factory - .named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()); - let mf_action = data_factory.named_node( - Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action").unwrap(), + let mf_action = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action", + ).unwrap(); + let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap(); + let rdft_test_ntriples_positive_syntax = Term::from( + NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax").unwrap(), + ); + let rdft_test_ntriples_negative_syntax = Term::from( + NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(), ); - let rdfs_comment = data_factory - .named_node(Url::parse("http://www.w3.org/2000/01/rdf-schema#comment").unwrap()); - let rdft_test_turtle_positive_syntax = Term::from(data_factory.named_node( - Url::parse("http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax").unwrap(), - )); - let rdft_test_turtle_negative_syntax = Term::from(data_factory.named_node( - Url::parse("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(), - )); - subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_positive_syntax) + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_positive_syntax) .for_each(|test| { let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); if let Some(Term::NamedNode(file)) = @@ -176,7 +160,7 @@ fn ntriples_w3c_testsuite() { } } }); - subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_negative_syntax) + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_negative_syntax) .for_each(|test| { let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); if let Some(Term::NamedNode(file)) =