Creates a simplistic implementation of a MemoryStore

Moves the graph isomorphism implementation out of rdf_test_cases
pull/10/head
Tpt 7 years ago
parent f90cfbc39a
commit 723bb22b18
  1. 1
      src/lib.rs
  2. 154
      src/store/isomorphism.rs
  3. 148
      src/store/memory.rs
  4. 2
      src/store/mod.rs
  5. 280
      tests/rdf_test_cases.rs

@ -4,3 +4,4 @@ extern crate url;
pub mod model;
pub mod rio;
pub mod store;

@ -0,0 +1,154 @@
use model::data::*;
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::collections::HashSet;
use std::collections::hash_map::DefaultHasher;
use std::hash::Hash;
use std::hash::Hasher;
use store::memory::MemoryGraph;
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct SubjectPredicate<'a> {
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
}
impl<'a> SubjectPredicate<'a> {
fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self {
Self { subject, predicate }
}
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct PredicateObject<'a> {
predicate: &'a NamedNode,
object: &'a Term,
}
impl<'a> PredicateObject<'a> {
fn new(predicate: &'a NamedNode, object: &'a Term) -> Self {
Self { predicate, object }
}
}
fn subject_predicates_for_object<'a>(
graph: &'a MemoryGraph,
object: &'a Term,
) -> impl Iterator<Item = SubjectPredicate<'a>> {
graph
.triples_for_object(object)
.map(|t| SubjectPredicate::new(t.subject(), t.predicate()))
}
fn predicate_objects_for_subject<'a>(
graph: &'a MemoryGraph,
subject: &'a NamedOrBlankNode,
) -> impl Iterator<Item = PredicateObject<'a>> {
graph
.triples_for_subject(subject)
.map(|t| PredicateObject::new(t.predicate(), t.object()))
}
fn hash_blank_nodes<'a>(
bnodes: HashSet<&'a BlankNode>,
graph: &'a MemoryGraph,
) -> HashMap<u64, Vec<&'a BlankNode>> {
let mut bnodes_by_hash: HashMap<u64, Vec<&BlankNode>> = HashMap::default();
// NB: we need to sort the triples to have the same hash
for bnode in bnodes.into_iter() {
let mut hasher = DefaultHasher::new();
{
let subject = NamedOrBlankNode::from(bnode.clone());
let mut po_set: BTreeSet<PredicateObject> = BTreeSet::default();
for po in predicate_objects_for_subject(&graph, &subject) {
if !po.object.is_blank_node() {
po_set.insert(po);
}
}
for po in po_set {
po.hash(&mut hasher);
}
}
{
let object = Term::from(bnode.clone());
let mut sp_set: BTreeSet<SubjectPredicate> = BTreeSet::default();
for sp in subject_predicates_for_object(&graph, &object) {
if !sp.subject.is_blank_node() {
sp_set.insert(sp);
}
}
for sp in sp_set {
sp.hash(&mut hasher);
}
}
bnodes_by_hash
.entry(hasher.finish())
.or_insert_with(Vec::default)
.push(bnode);
}
bnodes_by_hash
}
pub trait GraphIsomorphism {
/// Checks if two graphs are [isomorphic](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)
fn is_isomorphic(&self, other: &Self) -> bool;
}
impl GraphIsomorphism for MemoryGraph {
//TODO: proper isomorphism building
fn is_isomorphic(&self, other: &Self) -> bool {
if self.len() != other.len() {
return false;
}
let mut self_bnodes: HashSet<&BlankNode> = HashSet::default();
let mut other_bnodes: HashSet<&BlankNode> = HashSet::default();
for t in self {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
self_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
self_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
self_bnodes.insert(object);
} else if !other.contains(t) {
return false;
}
}
for t in other {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
other_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
other_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
other_bnodes.insert(object);
} else if !self.contains(t) {
return false;
}
}
let self_bnodes_by_hash = hash_blank_nodes(self_bnodes, &self);
let other_bnodes_by_hash = hash_blank_nodes(other_bnodes, &other);
if self_bnodes_by_hash.len() != other_bnodes_by_hash.len() {
return false;
}
for hash in self_bnodes_by_hash.keys() {
if self_bnodes_by_hash.get(hash).map(|l| l.len())
!= other_bnodes_by_hash.get(hash).map(|l| l.len())
{
return false;
}
}
true
}
}

@ -0,0 +1,148 @@
use model::data::*;
use std::collections::HashSet;
use std::fmt;
use std::iter::FromIterator;
#[derive(Debug, Clone, Default)]
pub struct MemoryGraph {
triples: HashSet<Triple>,
}
impl MemoryGraph {
pub fn iter(&self) -> impl Iterator<Item = &Triple> {
self.triples.iter()
}
pub fn triples_for_subject<'a>(
&'a self,
subject: &'a NamedOrBlankNode,
) -> impl Iterator<Item = &'a Triple> {
self.iter().filter(move |t| t.subject() == subject)
}
pub fn triples_for_predicate<'a>(
&'a self,
predicate: &'a NamedNode,
) -> impl Iterator<Item = &'a Triple> {
self.iter().filter(move |t| t.predicate() == predicate)
}
pub fn triples_for_object<'a>(&'a self, object: &'a Term) -> impl Iterator<Item = &'a Triple> {
self.iter().filter(move |t| t.object() == object)
}
pub fn triples_for_subject_predicate<'a>(
&'a self,
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
) -> impl Iterator<Item = &'a Triple> {
self.iter()
.filter(move |t| t.subject() == subject && t.predicate() == predicate)
}
pub fn objects_for_subject_predicate<'a>(
&'a self,
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
) -> impl Iterator<Item = &'a Term> {
self.triples_for_subject_predicate(subject, predicate)
.map(|t| t.object())
}
pub fn object_for_subject_predicate<'a>(
&'a self,
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
) -> Option<&'a Term> {
self.objects_for_subject_predicate(subject, predicate)
.nth(0)
}
pub fn triples_for_predicate_object<'a>(
&'a self,
predicate: &'a NamedNode,
object: &'a Term,
) -> impl Iterator<Item = &'a Triple> {
self.iter()
.filter(move |t| t.predicate() == predicate && t.object() == object)
}
pub fn subjects_for_predicate_object<'a>(
&'a self,
predicate: &'a NamedNode,
object: &'a Term,
) -> impl Iterator<Item = &'a NamedOrBlankNode> {
self.triples_for_predicate_object(predicate, object)
.map(|t| t.subject())
}
pub fn subject_for_predicate_object<'a>(
&'a self,
predicate: &'a NamedNode,
object: &'a Term,
) -> Option<&'a NamedOrBlankNode> {
self.subjects_for_predicate_object(predicate, object).nth(0)
}
pub fn len(&self) -> usize {
self.triples.len()
}
pub fn is_empty(&self) -> bool {
self.triples.is_empty()
}
pub fn contains(&self, value: &Triple) -> bool {
self.triples.contains(value)
}
pub fn insert(&mut self, value: Triple) -> bool {
self.triples.insert(value)
}
}
impl fmt::Display for MemoryGraph {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for triple in &self.triples {
write!(fmt, "{}\n", triple)?;
}
Ok(())
}
}
impl IntoIterator for MemoryGraph {
type Item = Triple;
type IntoIter = <HashSet<Triple> as IntoIterator>::IntoIter;
fn into_iter(self) -> <Self as IntoIterator>::IntoIter {
self.triples.into_iter()
}
}
impl<'a> IntoIterator for &'a MemoryGraph {
type Item = &'a Triple;
type IntoIter = <&'a HashSet<Triple> as IntoIterator>::IntoIter;
fn into_iter(self) -> <Self as IntoIterator>::IntoIter {
self.triples.iter()
}
}
impl FromIterator<Triple> for MemoryGraph {
fn from_iter<I: IntoIterator<Item = Triple>>(iter: I) -> Self {
let triples = HashSet::from_iter(iter);
Self { triples }
}
}
impl Extend<Triple> for MemoryGraph {
fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) {
self.triples.extend(iter)
}
}
impl<'a> Extend<&'a Triple> for MemoryGraph {
fn extend<I: IntoIterator<Item = &'a Triple>>(&mut self, iter: I) {
self.triples.extend(iter.into_iter().cloned())
}
}

@ -0,0 +1,2 @@
pub mod isomorphism;
pub mod memory;

@ -9,13 +9,8 @@ use rudf::rio::RioError;
use rudf::rio::RioResult;
use rudf::rio::ntriples::read_ntriples;
use rudf::rio::turtle::read_turtle;
use std::collections::BTreeSet;
use std::collections::HashMap;
use std::collections::HashSet;
use std::collections::hash_map::DefaultHasher;
use std::fmt;
use std::hash::Hash;
use std::hash::Hasher;
use rudf::store::isomorphism::GraphIsomorphism;
use rudf::store::memory::MemoryGraph;
use std::iter::FromIterator;
use std::str::FromStr;
use url::Url;
@ -33,14 +28,14 @@ impl Default for RDFClient {
}
impl RDFClient {
fn load_turtle(&self, uri: Url) -> RioResult<HashSet<Triple>> {
fn load_turtle(&self, uri: Url) -> RioResult<MemoryGraph> {
match self.client.get(uri.clone()).send() {
Ok(response) => Ok(HashSet::from_iter(read_turtle(response, Some(uri))?)),
Ok(response) => Ok(MemoryGraph::from_iter(read_turtle(response, Some(uri))?)),
Err(error) => Err(RioError::new(error)),
}
}
fn load_ntriples(&self, uri: Url) -> RioResult<HashSet<Triple>> {
fn load_ntriples(&self, uri: Url) -> RioResult<MemoryGraph> {
match self.client.get(uri).send() {
Ok(response) => read_ntriples(response).collect(),
Err(error) => Err(RioError::new(error)),
@ -48,199 +43,6 @@ impl RDFClient {
}
}
#[derive(Eq, PartialEq, Clone)]
struct Graph(HashSet<Triple>);
impl fmt::Display for Graph {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
for triple in &self.0 {
write!(fmt, "{}\n", triple)?;
}
Ok(())
}
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct SubjectPredicate<'a> {
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
}
impl<'a> SubjectPredicate<'a> {
fn new(subject: &'a NamedOrBlankNode, predicate: &'a NamedNode) -> Self {
Self { subject, predicate }
}
}
#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
struct PredicateObject<'a> {
predicate: &'a NamedNode,
object: &'a Term,
}
impl<'a> PredicateObject<'a> {
fn new(predicate: &'a NamedNode, object: &'a Term) -> Self {
Self { predicate, object }
}
}
fn objects_for_subject_predicate<'a>(
graph: &'a HashSet<Triple>,
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
) -> impl Iterator<Item = &'a Term> {
graph
.iter()
.filter(move |t| t.subject() == subject && t.predicate() == predicate)
.map(|t| t.object())
}
fn object_for_subject_predicate<'a>(
graph: &'a HashSet<Triple>,
subject: &'a NamedOrBlankNode,
predicate: &'a NamedNode,
) -> Option<&'a Term> {
objects_for_subject_predicate(graph, subject, predicate).nth(0)
}
fn subjects_for_predicate_object<'a>(
graph: &'a HashSet<Triple>,
predicate: &'a NamedNode,
object: &'a Term,
) -> impl Iterator<Item = &'a NamedOrBlankNode> {
graph
.iter()
.filter(move |t| t.predicate() == predicate && t.object() == object)
.map(|t| t.subject())
}
fn subject_for_predicate_object<'a>(
graph: &'a HashSet<Triple>,
predicate: &'a NamedNode,
object: &'a Term,
) -> Option<&'a NamedOrBlankNode> {
subjects_for_predicate_object(graph, predicate, object).nth(0)
}
fn subject_predicates_for_object<'a>(
graph: &'a HashSet<Triple>,
object: &'a Term,
) -> impl Iterator<Item = SubjectPredicate<'a>> {
graph
.iter()
.filter(move |t| t.object() == object)
.map(|t| SubjectPredicate::new(t.subject(), t.predicate()))
}
fn predicate_objects_for_subject<'a>(
graph: &'a HashSet<Triple>,
subject: &'a NamedOrBlankNode,
) -> impl Iterator<Item = PredicateObject<'a>> {
graph
.iter()
.filter(move |t| t.subject() == subject)
.map(|t| PredicateObject::new(t.predicate(), t.object()))
}
fn hash_blank_nodes<'a>(
bnodes: HashSet<&'a BlankNode>,
graph: &'a HashSet<Triple>,
) -> HashMap<u64, Vec<&'a BlankNode>> {
let mut bnodes_by_hash: HashMap<u64, Vec<&BlankNode>> = HashMap::default();
// NB: we need to sort the triples to have the same hash
for bnode in bnodes.into_iter() {
let mut hasher = DefaultHasher::new();
{
let subject = NamedOrBlankNode::from(bnode.clone());
let mut po_set: BTreeSet<PredicateObject> = BTreeSet::default();
for po in predicate_objects_for_subject(&graph, &subject) {
if !po.object.is_blank_node() {
po_set.insert(po);
}
}
for po in po_set {
po.hash(&mut hasher);
}
}
{
let object = Term::from(bnode.clone());
let mut sp_set: BTreeSet<SubjectPredicate> = BTreeSet::default();
for sp in subject_predicates_for_object(&graph, &object) {
if !sp.subject.is_blank_node() {
sp_set.insert(sp);
}
}
for sp in sp_set {
sp.hash(&mut hasher);
}
}
bnodes_by_hash
.entry(hasher.finish())
.or_insert_with(Vec::default)
.push(bnode);
}
bnodes_by_hash
}
//TODO: use a better datastructure
fn is_isomorphic(a: &HashSet<Triple>, b: &HashSet<Triple>) -> bool {
if a.len() != b.len() {
return false;
}
let mut a_bnodes: HashSet<&BlankNode> = HashSet::default();
let mut b_bnodes: HashSet<&BlankNode> = HashSet::default();
for t in a {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
a_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
a_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
a_bnodes.insert(object);
} else if !b.contains(t) {
return false;
}
}
for t in b {
if let NamedOrBlankNode::BlankNode(subject) = t.subject() {
b_bnodes.insert(subject);
if let Term::BlankNode(object) = t.object() {
b_bnodes.insert(object);
}
} else if let Term::BlankNode(object) = t.object() {
b_bnodes.insert(object);
} else if !a.contains(t) {
return false;
}
}
let a_bnodes_by_hash = hash_blank_nodes(a_bnodes, &a);
let b_bnodes_by_hash = hash_blank_nodes(b_bnodes, &b);
if a_bnodes_by_hash.len() != b_bnodes_by_hash.len() {
return false;
}
for hash in a_bnodes_by_hash.keys() {
if a_bnodes_by_hash.get(hash).map(|l| l.len())
!= b_bnodes_by_hash.get(hash).map(|l| l.len())
{
return false;
}
}
//TODO: proper isomorphism building
true
}
#[test]
fn turtle_w3c_testsuite() {
let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap();
@ -289,11 +91,14 @@ fn turtle_w3c_testsuite() {
).into(),
];
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax)
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_positive_syntax)
.for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
if let Err(error) = client.load_turtle(file.url().clone()) {
assert!(
@ -304,11 +109,14 @@ fn turtle_w3c_testsuite() {
}
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_syntax)
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_negative_syntax)
.for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
assert!(
client.load_turtle(file.url().clone()).is_err(),
@ -318,27 +126,31 @@ fn turtle_w3c_testsuite() {
);
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_eval).for_each(|test| {
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_eval)
.for_each(|test| {
if test_blacklist.contains(test) {
return;
}
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(input)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
if let Some(Term::NamedNode(result)) =
object_for_subject_predicate(&manifest, test, &mf_result)
manifest.object_for_subject_predicate(test, &mf_result)
{
match client.load_turtle(input.url().clone()) {
Ok(action_graph) => match client.load_turtle(result.url().clone()) {
Ok(result_graph) => assert!(
is_isomorphic(&action_graph, &result_graph),
action_graph.is_isomorphic(&result_graph),
"Failure on positive evaluation test file {} against {} about {}. Expected file:\n{}\nParsed file:\n{}\n",
input,
result,
comment,
Graph(action_graph),
Graph(result_graph)
action_graph,
result_graph
),
Err(error) => assert!(
false,
@ -355,27 +167,29 @@ fn turtle_w3c_testsuite() {
}
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_eval).for_each(
|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_turtle_negative_eval)
.for_each(|test| {
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
if let Some(Term::NamedNode(result)) =
object_for_subject_predicate(&manifest, test, &mf_result)
manifest.object_for_subject_predicate(test, &mf_result)
{
let action_graph = client.load_turtle(file.url().clone());
let result_graph = client.load_turtle(result.url().clone());
assert!(
!is_isomorphic(&action_graph.unwrap(), &result_graph.unwrap()),
!action_graph.unwrap().is_isomorphic(&result_graph.unwrap()),
"Failure on positive evaluation test file {} about {}",
file,
comment
);
}
}
},
);
});
}
#[test]
@ -395,11 +209,14 @@ fn ntriples_w3c_testsuite() {
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(),
);
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_positive_syntax)
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_ntriples_positive_syntax)
.for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
if let Err(error) = client.load_ntriples(file.url().clone()) {
assert!(
@ -410,11 +227,14 @@ fn ntriples_w3c_testsuite() {
}
}
});
subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_negative_syntax)
manifest
.subjects_for_predicate_object(&rdf::TYPE, &rdft_test_ntriples_negative_syntax)
.for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
let comment = manifest
.object_for_subject_predicate(test, &rdfs_comment)
.unwrap();
if let Some(Term::NamedNode(file)) =
object_for_subject_predicate(&manifest, test, &mf_action)
manifest.object_for_subject_predicate(test, &mf_action)
{
assert!(
client.load_ntriples(file.url().clone()).is_err(),

Loading…
Cancel
Save