From f90cfbc39a42db7b5076a0f247668cf808336f3c Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 20 May 2018 19:34:41 +0200 Subject: [PATCH] Adds basic evaluation tests for Turtle --- src/model/data.rs | 14 +- src/rio/turtle/turtle_grammar.rustpeg | 9 +- tests/rdf_test_cases.rs | 258 +++++++++++++++++++++++++- 3 files changed, 267 insertions(+), 14 deletions(-) diff --git a/src/model/data.rs b/src/model/data.rs index 90073dda..a08c0876 100644 --- a/src/model/data.rs +++ b/src/model/data.rs @@ -9,7 +9,7 @@ use url::ParseError; use url::Url; /// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct NamedNode { iri: Arc, } @@ -46,7 +46,7 @@ impl FromStr for NamedNode { } /// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct BlankNode { id: String, } @@ -102,7 +102,7 @@ impl Default for BlankNode { } /// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum Literal { SimpleLiteral(String), LanguageTaggedString { value: String, language: String }, @@ -221,7 +221,7 @@ impl FromStr for Literal { } /// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum NamedOrBlankNode { NamedNode(NamedNode), BlankNode(BlankNode), @@ -273,7 +273,7 @@ impl From for NamedOrBlankNode { /// A RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) /// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub enum Term { NamedNode(NamedNode), BlankNode(BlankNode), @@ -373,7 +373,7 @@ pub trait TripleLike { } /// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct Triple { subject: NamedOrBlankNode, predicate: NamedNode, @@ -437,7 +437,7 @@ pub trait QuadLike: TripleLike { } /// A [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct Quad { subject: NamedOrBlankNode, predicate: NamedNode, diff --git a/src/rio/turtle/turtle_grammar.rustpeg b/src/rio/turtle/turtle_grammar.rustpeg index e8454dff..966f8c0a 100644 --- a/src/rio/turtle/turtle_grammar.rustpeg +++ b/src/rio/turtle/turtle_grammar.rustpeg @@ -52,7 +52,9 @@ sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { } //[6] -triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? +triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? { + state.cur_subject.pop(); +} subject_push -> () = s:subject { state.cur_subject.push(s) } @@ -61,7 +63,9 @@ triples_blankNodePropertyList_push -> () = s: blankNodePropertyList { } //[7] -predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)* +predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)* { + state.cur_predicate.pop(); +} predicate_push -> () = v:verb { state.cur_predicate.push(v) } @@ -182,7 +186,6 @@ PNAME_LN -> String = ns:$(PNAME_NS) local:PN_LOCAL {? //[141s] BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { - //TODO unescape b } diff --git a/tests/rdf_test_cases.rs b/tests/rdf_test_cases.rs index 311bd950..4e7a8336 100644 --- a/tests/rdf_test_cases.rs +++ b/tests/rdf_test_cases.rs @@ -9,7 +9,13 @@ use rudf::rio::RioError; use rudf::rio::RioResult; use rudf::rio::ntriples::read_ntriples; use rudf::rio::turtle::read_turtle; +use std::collections::BTreeSet; +use std::collections::HashMap; use std::collections::HashSet; +use std::collections::hash_map::DefaultHasher; +use std::fmt; +use std::hash::Hash; +use std::hash::Hasher; use std::iter::FromIterator; use std::str::FromStr; use url::Url; @@ -42,6 +48,42 @@ impl RDFClient { } } +#[derive(Eq, PartialEq, Clone)] +struct Graph(HashSet); + +impl fmt::Display for Graph { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + for triple in &self.0 { + write!(fmt, "{}\n", triple)?; + } + Ok(()) + } +} + +#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] +struct SubjectPredicate<'a> { + subject: &'a NamedOrBlankNode, + predicate: &'a NamedNode, +} + +impl<'a> SubjectPredicate<'a> { + fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self { + Self { subject, predicate } + } +} + +#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)] +struct PredicateObject<'a> { + predicate: &'a NamedNode, + object: &'a Term, +} + +impl<'a> PredicateObject<'a> { + fn new(predicate: &'a NamedNode, object: &'a Term) -> Self { + Self { predicate, object } + } +} + fn objects_for_subject_predicate<'a>( graph: &'a HashSet, subject: &'a NamedOrBlankNode, @@ -80,15 +122,136 @@ fn subject_for_predicate_object<'a>( subjects_for_predicate_object(graph, predicate, object).nth(0) } +fn subject_predicates_for_object<'a>( + graph: &'a HashSet, + object: &'a Term, +) -> impl Iterator> { + graph + .iter() + .filter(move |t| t.object() == object) + .map(|t| SubjectPredicate::new(t.subject(), t.predicate())) +} + +fn predicate_objects_for_subject<'a>( + graph: &'a HashSet, + subject: &'a NamedOrBlankNode, +) -> impl Iterator> { + graph + .iter() + .filter(move |t| t.subject() == subject) + .map(|t| PredicateObject::new(t.predicate(), t.object())) +} + +fn hash_blank_nodes<'a>( + bnodes: HashSet<&'a BlankNode>, + graph: &'a HashSet, +) -> HashMap> { + let mut bnodes_by_hash: HashMap> = HashMap::default(); + + // NB: we need to sort the triples to have the same hash + for bnode in bnodes.into_iter() { + let mut hasher = DefaultHasher::new(); + + { + let subject = NamedOrBlankNode::from(bnode.clone()); + let mut po_set: BTreeSet = BTreeSet::default(); + for po in predicate_objects_for_subject(&graph, &subject) { + if !po.object.is_blank_node() { + po_set.insert(po); + } + } + for po in po_set { + po.hash(&mut hasher); + } + } + + { + let object = Term::from(bnode.clone()); + let mut sp_set: BTreeSet = BTreeSet::default(); + for sp in subject_predicates_for_object(&graph, &object) { + if !sp.subject.is_blank_node() { + sp_set.insert(sp); + } + } + for sp in sp_set { + sp.hash(&mut hasher); + } + } + + bnodes_by_hash + .entry(hasher.finish()) + .or_insert_with(Vec::default) + .push(bnode); + } + + bnodes_by_hash +} + +//TODO: use a better datastructure +fn is_isomorphic(a: &HashSet, b: &HashSet) -> bool { + if a.len() != b.len() { + return false; + } + + let mut a_bnodes: HashSet<&BlankNode> = HashSet::default(); + let mut b_bnodes: HashSet<&BlankNode> = HashSet::default(); + + for t in a { + if let NamedOrBlankNode::BlankNode(subject) = t.subject() { + a_bnodes.insert(subject); + if let Term::BlankNode(object) = t.object() { + a_bnodes.insert(object); + } + } else if let Term::BlankNode(object) = t.object() { + a_bnodes.insert(object); + } else if !b.contains(t) { + return false; + } + } + for t in b { + if let NamedOrBlankNode::BlankNode(subject) = t.subject() { + b_bnodes.insert(subject); + if let Term::BlankNode(object) = t.object() { + b_bnodes.insert(object); + } + } else if let Term::BlankNode(object) = t.object() { + b_bnodes.insert(object); + } else if !a.contains(t) { + return false; + } + } + + let a_bnodes_by_hash = hash_blank_nodes(a_bnodes, &a); + let b_bnodes_by_hash = hash_blank_nodes(b_bnodes, &b); + + if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() { + return false; + } + + for hash in a_bnodes_by_hash.keys() { + if a_bnodes_by_hash.get(hash).map(|l| l.len()) + != b_bnodes_by_hash.get(hash).map(|l| l.len()) + { + return false; + } + } + + //TODO: proper isomorphism building + + true +} + #[test] fn turtle_w3c_testsuite() { + let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap(); let client = RDFClient::default(); - let manifest = client - .load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap()) - .unwrap(); + let manifest = client.load_turtle(manifest_url.clone()).unwrap(); let mf_action = NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action", ).unwrap(); + let mf_result = NamedNode::from_str( + "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result", + ).unwrap(); let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap(); let rdft_test_turtle_positive_syntax = Term::from( NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(), @@ -96,6 +259,35 @@ fn turtle_w3c_testsuite() { let rdft_test_turtle_negative_syntax = Term::from( NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(), ); + let rdft_test_turtle_eval = + Term::from(NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleEval").unwrap()); + let rdft_test_turtle_negative_eval = Term::from( + NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeEval").unwrap(), + ); + //TODO: make blacklist pass + let test_blacklist: Vec = vec![ + //UTF-8 broken surrogates in BNode ids + NamedNode::new( + manifest_url + .join("#prefix_with_PN_CHARS_BASE_character_boundaries") + .unwrap(), + ).into(), + NamedNode::new( + manifest_url + .join("#labeled_blank_node_with_PN_CHARS_BASE_character_boundaries") + .unwrap(), + ).into(), + NamedNode::new( + manifest_url + .join("#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries") + .unwrap(), + ).into(), + NamedNode::new( + manifest_url + .join("#localName_with_nfc_PN_CHARS_BASE_character_boundaries") + .unwrap(), + ).into(), + ]; subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax) .for_each(|test| { @@ -126,13 +318,71 @@ fn turtle_w3c_testsuite() { ); } }); + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_eval).for_each(|test| { + if test_blacklist.contains(test) { + return; + } + let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + if let Some(Term::NamedNode(input)) = + object_for_subject_predicate(&manifest, test, &mf_action) + { + if let Some(Term::NamedNode(result)) = + object_for_subject_predicate(&manifest, test, &mf_result) + { + match client.load_turtle(input.url().clone()) { + Ok(action_graph) => match client.load_turtle(result.url().clone()) { + Ok(result_graph) => assert!( + is_isomorphic(&action_graph, &result_graph), + "Failure on positive evaluation test file {} against {} about {}. Expected file:\n{}\nParsed file:\n{}\n", + input, + result, + comment, + Graph(action_graph), + Graph(result_graph) + ), + Err(error) => assert!( + false, + "Failure to parse the Turtle result file {} about {} with error: {}", + result, comment, error + ) + }, + Err(error) => assert!( + false, + "Failure to parse the Turtle input file {} about {} with error: {}", + input, comment, error + ) + } + } + } + }); + subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_eval).for_each( + |test| { + let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); + if let Some(Term::NamedNode(file)) = + object_for_subject_predicate(&manifest, test, &mf_action) + { + if let Some(Term::NamedNode(result)) = + object_for_subject_predicate(&manifest, test, &mf_result) + { + let action_graph = client.load_turtle(file.url().clone()); + let result_graph = client.load_turtle(result.url().clone()); + assert!( + !is_isomorphic(&action_graph.unwrap(), &result_graph.unwrap()), + "Failure on positive evaluation test file {} about {}", + file, + comment + ); + } + } + }, + ); } #[test] fn ntriples_w3c_testsuite() { let client = RDFClient::default(); let manifest = client - .load_turtle(Url::parse("https://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap()) + .load_turtle(Url::parse("http://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap()) .unwrap(); let mf_action = NamedNode::from_str( "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action",