diff --git a/Cargo.toml b/Cargo.toml index 0be8054e..e8a20744 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,3 +20,6 @@ url = "1.7" [build-dependencies] peg = "0.5" + +[dev-dependencies] +reqwest = "0.8" \ No newline at end of file diff --git a/src/model/data.rs b/src/model/data.rs index bbd4cc21..b9dbc750 100644 --- a/src/model/data.rs +++ b/src/model/data.rs @@ -372,7 +372,9 @@ impl Default for DataFactory { impl DataFactory { /// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) pub fn named_node(&self, iri: impl Into) -> NamedNode { - NamedNode { iri: Arc::new(iri.into()) } + NamedNode { + iri: Arc::new(iri.into()), + } } /// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id diff --git a/src/rio/turtle/mod.rs b/src/rio/turtle/mod.rs index b82d1ec1..912c33ee 100644 --- a/src/rio/turtle/mod.rs +++ b/src/rio/turtle/mod.rs @@ -9,8 +9,8 @@ use rio::*; use std::collections::HashMap; use std::io::BufReader; use std::io::Read; -use url::Url; use url::ParseOptions; +use url::Url; //TODO: make private pub struct ParserState { @@ -29,7 +29,7 @@ impl ParserState { pub fn read_turtle<'a, R: Read + 'a>( source: R, data_factory: &'a DataFactory, - base_uri: impl Into> + base_uri: impl Into>, ) -> RioResult> { let factory = data_factory.clone(); //TODO: try to avoid clone here let mut state = ParserState { diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index ad435ece..3273e3ac 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -182,7 +182,7 @@ PNAME_LN -> String = ns:$(PNAME_NS) local:$(PN_LOCAL) {? } //[141s] -BLANK_NODE_LABEL -> &'input str = "_:" b:$((PN_CHARS_U / [0-9]) ((PN_CHARS / ".")* PN_CHARS)?) { +BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { //TODO unescape b } @@ -199,7 +199,7 @@ INTEGER -> () = [+-]? [0-9]+ DECIMAL -> () = [+-]? [0-9]* "." [0-9]+ //[21] -DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* EXPONENT / "." [0-9]+ EXPONENT / [0-9]+ EXPONENT) +DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT //[154s] EXPONENT -> () = [eE] [+-]? [0-9]+ @@ -208,25 +208,35 @@ EXPONENT -> () = [eE] [+-]? [0-9]+ STRING_LITERAL_QUOTE -> String = "\"" l:((STRING_LITERAL_QUOTE_simple_char / ECHAR / UCHAR)*) "\"" { l.into_iter().collect() } -STRING_LITERAL_QUOTE_simple_char -> char = c:$([^\u{0022}\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } +STRING_LITERAL_QUOTE_simple_char -> char = c:$([^"\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } //[23] STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:((STRING_LITERAL_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'" { l.into_iter().collect() } -STRING_LITERAL_SINGLE_QUOTE_simple_char -> char = c:$([^\u{0027}\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } +STRING_LITERAL_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } //[24] -STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" ("'" / "''")? l:((STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char / ECHAR / UCHAR)*) "'''" { +STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" { l.into_iter().collect() } -STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^\u{0027}\u{005c}]) { c.chars().next().unwrap() } +STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> String = a:$(("''" / "'")?) b:(STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char / ECHAR / UCHAR) { + let mut s = a.to_string(); + s.push(b); + s +} +STRING_LITERAL_LONG_SINGLE_QUOTE_simple_char -> char = c:$([^'\u{005c}]) { c.chars().next().unwrap() } -//[25] -STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" ("\"" / "\"\"")? l:((STRING_LITERAL_LONG_QUOTE_simple_char / ECHAR / UCHAR)*) "\"\"\"" { -l.into_iter().collect() +//[25]abc""def''ghi" +STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" { + l.into_iter().collect() +} +STRING_LITERAL_LONG_QUOTE_inner -> String = a:$(("\"\"" / "\"")?) b:(STRING_LITERAL_LONG_QUOTE_simple_char / ECHAR / UCHAR) { + let mut s = a.to_string(); + s.push(b); + s } -STRING_LITERAL_LONG_QUOTE_simple_char -> char = c:$([^\u{0022}\u{005c}]) { c.chars().next().unwrap() } +STRING_LITERAL_LONG_QUOTE_simple_char -> char = c:$([^"\u{005c}]) { c.chars().next().unwrap() } //[26] UCHAR -> char = "\\u" h:$(HEX HEX HEX HEX) { @@ -266,10 +276,10 @@ PN_CHARS_U -> () = "_" / PN_CHARS_BASE PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U //[167s] -PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("." PN_CHARS+)* +PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* //[168s] -PN_LOCAL -> () = (":" / [0-9] / PN_CHARS_U / PLX) (":" / PN_CHARS / PLX)* ("." (":" / PN_CHARS / PLX)+)* +PN_LOCAL -> () = (":" / [0-9] / PN_CHARS_U / PLX) (":" / PN_CHARS / PLX)* ("."+ (":" / PN_CHARS / PLX)+)* //[169s] PLX -> String = diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs new file mode 100644 index 00000000..e6417169 --- /dev/null +++ b/tests/rdf_test_cases.rs @@ -0,0 +1,133 @@ +#[macro_use] +extern crate lazy_static; +extern crate reqwest; +extern crate rudf; +extern crate url; + +use reqwest::Client; +use rudf::model::data::*; +use rudf::rio::RioError; +use rudf::rio::RioResult; +use rudf::rio::turtle::read_turtle; +use std::collections::HashSet; +use std::iter::FromIterator; +use url::Url; + +struct RDFClient { + client: Client, + data_factory: DataFactory, +} + +impl Default for RDFClient { + fn default() -> Self { + Self { + client: Client::new(), + data_factory: DataFactory::default(), + } + } +} + +impl RDFClient { + fn load_turtle(&self, uri: Url) -> RioResult> { + match self.client.get(uri.clone()).send() { + Ok(response) => Ok(HashSet::from_iter(read_turtle( + response, + &self.data_factory, + Some(uri), + )?)), + Err(error) => Err(RioError::new(error)), + } + } +} + +fn objects_for_subject_predicate<'a>( + graph: &'a HashSet, + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, +) -> impl Iterator { + graph + .iter() + .filter(move |t| t.subject() == subject && t.predicate() == predicate) + .map(|t| t.object()) +} + +fn object_for_subject_predicate<'a>( + graph: &'a HashSet, + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, +) -> Option<&'a Term> { + objects_for_subject_predicate(graph, subject, predicate).nth(0) +} + +fn subjects_for_predicate_object<'a>( + graph: &'a HashSet, + predicate: &'a NamedNode, + object: &'a Term, +) -> impl Iterator { + graph + .iter() + .filter(move |t| t.predicate() == predicate && t.object() == object) + .map(|t| t.subject()) +} + +fn subject_for_predicate_object<'a>( + graph: &'a HashSet, + predicate: &'a NamedNode, + object: &'a Term, +) -> Option<&'a NamedOrBlankNode> { + subjects_for_predicate_object(graph, predicate, object).nth(0) +} + +#[test] +fn turtle_w3c_testsuite() { + let client = RDFClient::default(); + let data_factory = &client.data_factory; + let manifest = client + .load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap()) + .unwrap(); + let rdf_type = data_factory + .named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()); + let mf_action = data_factory.named_node( + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action").unwrap(), + ); + let rdfs_comment = data_factory + .named_node(Url::parse("http://www.w3.org/2000/01/rdf-schema#comment").unwrap()); + let rdft_test_turtle_positive_syntax = + Term::from(data_factory.named_node( + Url::parse("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(), + )); + let rdft_test_turtle_negative_syntax = + Term::from(data_factory.named_node( + Url::parse("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(), + )); + + subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_positive_syntax) + .for_each(|test| { + let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + if let Some(Term::NamedNode(file)) = + object_for_subject_predicate(&manifest, test, &mf_action) + { + if let Err(error) = client.load_turtle(file.url().clone()) { + assert!( + false, + "Failure on positive syntax file {} about {} with error: {}", + file, comment, error + ) + } + } + }); + subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_negative_syntax) + .for_each(|test| { + let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + if let Some(Term::NamedNode(file)) = + object_for_subject_predicate(&manifest, test, &mf_action) + { + assert!( + client.load_turtle(file.url().clone()).is_err(), + "Failure on negative syntax test file {} about {}", + file, + comment + ); + } + }); +}