Adds basic evaluation tests for Turtle

pull/10/head
Tpt 6 years ago
parent 196864d250
commit f90cfbc39a
  1. 14
      src/model/data.rs
  2. 9
      src/rio/turtle/turtle_grammar.rustpeg
  3. 258
      tests/rdf_test_cases.rs

@ -9,7 +9,7 @@ use url::ParseError;
use url::Url;
/// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct NamedNode {
iri: Arc<Url>,
}
@ -46,7 +46,7 @@ impl FromStr for NamedNode {
}
/// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct BlankNode {
id: String,
}
@ -102,7 +102,7 @@ impl Default for BlankNode {
}
/// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum Literal {
SimpleLiteral(String),
LanguageTaggedString { value: String, language: String },
@ -221,7 +221,7 @@ impl FromStr for Literal {
}
/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum NamedOrBlankNode {
NamedNode(NamedNode),
BlankNode(BlankNode),
@ -273,7 +273,7 @@ impl From<BlankNode> for NamedOrBlankNode {
/// A RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term)
/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal).
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum Term {
NamedNode(NamedNode),
BlankNode(BlankNode),
@ -373,7 +373,7 @@ pub trait TripleLike {
}
/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Triple {
subject: NamedOrBlankNode,
predicate: NamedNode,
@ -437,7 +437,7 @@ pub trait QuadLike: TripleLike {
}
/// A [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Quad {
subject: NamedOrBlankNode,
predicate: NamedNode,

@ -52,7 +52,9 @@ sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF {
}
//[6]
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList?
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? {
state.cur_subject.pop();
}
subject_push -> () = s:subject {
state.cur_subject.push(s)
}
@ -61,7 +63,9 @@ triples_blankNodePropertyList_push -> () = s: blankNodePropertyList {
}
//[7]
predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)*
predicateObjectList -> () = predicate_push _ objectList _ (";" _ (predicate_push _ objectList _)?)* {
state.cur_predicate.pop();
}
predicate_push -> () = v:verb {
state.cur_predicate.push(v)
}
@ -182,7 +186,6 @@ PNAME_LN -> String = ns:$(PNAME_NS) local:PN_LOCAL {?
//[141s]
BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
//TODO unescape
b
}

@ -9,7 +9,13 @@ use rudf::rio::RioError;
use rudf::rio::RioResult;
use rudf::rio::ntriples::read_ntriples;
use rudf::rio::turtle::read_turtle;
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::collections::HashSet;
use std::collections::hash_map::DefaultHasher;
use std::fmt;
use std::hash::Hash;
use std::hash::Hasher;
use std::iter::FromIterator;
use std::str::FromStr;
use url::Url;
@ -42,6 +48,42 @@ impl RDFClient {
}
}
#[derive(Eq, PartialEq, Clone)]
struct Graph(HashSet<Triple>);
impl fmt::Display for Graph {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for triple in &self.0 {
write!(fmt, "{}\n", triple)?;
}
Ok(())
}
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct SubjectPredicate<'a> {
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
}
impl<'a> SubjectPredicate<'a> {
fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self {
Self { subject, predicate }
}
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct PredicateObject<'a> {
predicate: &'a NamedNode,
object: &'a Term,
}
impl<'a> PredicateObject<'a> {
fn new(predicate: &'a NamedNode, object: &'a Term) -> Self {
Self { predicate, object }
}
}
fn objects_for_subject_predicate<'a>(
graph: &'a HashSet<Triple>,
subject: &'a NamedOrBlankNode,
@ -80,15 +122,136 @@ fn subject_for_predicate_object<'a>(
subjects_for_predicate_object(graph, predicate, object).nth(0)
}
fn subject_predicates_for_object<'a>(
graph: &'a HashSet<Triple>,
object: &'a Term,
) -> impl Iterator<Item = SubjectPredicate<'a>> {
graph
.iter()
.filter(move |t| t.object() == object)
.map(|t| SubjectPredicate::new(t.subject(), t.predicate()))
}
fn predicate_objects_for_subject<'a>(
graph: &'a HashSet<Triple>,
subject: &'a NamedOrBlankNode,
) -> impl Iterator<Item = PredicateObject<'a>> {
graph
.iter()
.filter(move |t| t.subject() == subject)
.map(|t| PredicateObject::new(t.predicate(), t.object()))
}
fn hash_blank_nodes<'a>(
bnodes: HashSet<&'a BlankNode>,
graph: &'a HashSet<Triple>,
) -> HashMap<u64, Vec<&'a BlankNode>> {
let mut bnodes_by_hash: HashMap<u64, Vec<&BlankNode>> = HashMap::default();
// NB: we need to sort the triples to have the same hash
for bnode in bnodes.into_iter() {
let mut hasher = DefaultHasher::new();
{
let subject = NamedOrBlankNode::from(bnode.clone());
let mut po_set: BTreeSet<PredicateObject> = BTreeSet::default();
for po in predicate_objects_for_subject(&graph, &subject) {
if !po.object.is_blank_node() {
po_set.insert(po);
}
}
for po in po_set {
po.hash(&mut hasher);
}
}
{
let object = Term::from(bnode.clone());
let mut sp_set: BTreeSet<SubjectPredicate> = BTreeSet::default();
for sp in subject_predicates_for_object(&graph, &object) {
if !sp.subject.is_blank_node() {
sp_set.insert(sp);
}
}
for sp in sp_set {
sp.hash(&mut hasher);
}
}
bnodes_by_hash
.entry(hasher.finish())
.or_insert_with(Vec::default)
.push(bnode);
}
bnodes_by_hash
}
//TODO: use a better datastructure
fn is_isomorphic(a: &HashSet<Triple>, b: &HashSet<Triple>) -> bool {
if a.len() != b.len() {
return false;
}
let mut a_bnodes: HashSet<&BlankNode> = HashSet::default();
let mut b_bnodes: HashSet<&BlankNode> = HashSet::default();
for t in a {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
a_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
a_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
a_bnodes.insert(object);
} else if !b.contains(t) {
return false;
}
}
for t in b {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
b_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
b_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
b_bnodes.insert(object);
} else if !a.contains(t) {
return false;
}
}
let a_bnodes_by_hash = hash_blank_nodes(a_bnodes, &a);
let b_bnodes_by_hash = hash_blank_nodes(b_bnodes, &b);
if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() {
return false;
}
for hash in a_bnodes_by_hash.keys() {
if a_bnodes_by_hash.get(hash).map(|l| l.len())
!= b_bnodes_by_hash.get(hash).map(|l| l.len())
{
return false;
}
}
//TODO: proper isomorphism building
true
}
#[test]
fn turtle_w3c_testsuite() {
let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap();
let client = RDFClient::default();
let manifest = client
.load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap())
.unwrap();
let manifest = client.load_turtle(manifest_url.clone()).unwrap();
let mf_action = NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action",
).unwrap();
let mf_result = NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#result",
).unwrap();
let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap();
let rdft_test_turtle_positive_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(),
@ -96,6 +259,35 @@ fn turtle_w3c_testsuite() {
let rdft_test_turtle_negative_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(),
);
let rdft_test_turtle_eval =
Term::from(NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleEval").unwrap());
let rdft_test_turtle_negative_eval = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeEval").unwrap(),
);
//TODO: make blacklist pass
let test_blacklist: Vec<NamedOrBlankNode> = vec![
//UTF-8 broken surrogates in BNode ids
NamedNode::new(
manifest_url
.join("#prefix_with_PN_CHARS_BASE_character_boundaries")
.unwrap(),
).into(),
NamedNode::new(
manifest_url
.join("#labeled_blank_node_with_PN_CHARS_BASE_character_boundaries")
.unwrap(),
).into(),
NamedNode::new(
manifest_url
.join("#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries")
.unwrap(),
).into(),
NamedNode::new(
manifest_url
.join("#localName_with_nfc_PN_CHARS_BASE_character_boundaries")
.unwrap(),
).into(),
];
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax)
.for_each(|test| {
@ -126,13 +318,71 @@ fn turtle_w3c_testsuite() {
);
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_eval).for_each(|test| {
if test_blacklist.contains(test) {
return;
}
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(input)) =
object_for_subject_predicate(&manifest, test, &mf_action)
{
if let Some(Term::NamedNode(result)) =
object_for_subject_predicate(&manifest, test, &mf_result)
{
match client.load_turtle(input.url().clone()) {
Ok(action_graph) => match client.load_turtle(result.url().clone()) {
Ok(result_graph) => assert!(
is_isomorphic(&action_graph, &result_graph),
"Failure on positive evaluation test file {} against {} about {}. Expected file:\n{}\nParsed file:\n{}\n",
input,
result,
comment,
Graph(action_graph),
Graph(result_graph)
),
Err(error) => assert!(
false,
"Failure to parse the Turtle result file {} about {} with error: {}",
result, comment, error
)
},
Err(error) => assert!(
false,
"Failure to parse the Turtle input file {} about {} with error: {}",
input, comment, error
)
}
}
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_eval).for_each(
|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
{
if let Some(Term::NamedNode(result)) =
object_for_subject_predicate(&manifest, test, &mf_result)
{
let action_graph = client.load_turtle(file.url().clone());
let result_graph = client.load_turtle(result.url().clone());
assert!(
!is_isomorphic(&action_graph.unwrap(), &result_graph.unwrap()),
"Failure on positive evaluation test file {} about {}",
file,
comment
);
}
}
},
);
}
#[test]
fn ntriples_w3c_testsuite() {
let client = RDFClient::default();
let manifest = client
.load_turtle(Url::parse("https://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap())
.load_turtle(Url::parse("http://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap())
.unwrap();
let mf_action = NamedNode::from_str(
"http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action",

Loading…
Cancel
Save