Simplifies data objects factory

pull/10/head
Tpt 7 years ago
parent c99c571995
commit 5c90fab6cc
  1. 140
      src/model/data.rs
  2. 10
      src/rio/ntriples/mod.rs
  3. 8
      src/rio/ntriples/ntriples_grammar.rustpeg
  4. 5
      src/rio/turtle/mod.rs
  5. 22
      src/rio/turtle/turtle_grammar.rustpeg
  6. 68
      tests/rdf_test_cases.rs

@ -68,6 +68,39 @@ impl fmt::Display for BlankNode {
} }
} }
/// An utility structure to generate bank node ids in a thread safe way
#[derive(Debug)]
struct U64IDProvider {
counter: Mutex<u64>,
}
impl U64IDProvider {
pub fn next(&self) -> u64 {
let mut id = self.counter.lock().unwrap();
*id += 1;
*id
}
}
impl Default for U64IDProvider {
fn default() -> Self {
U64IDProvider {
counter: Mutex::new(0),
}
}
}
lazy_static! {
static ref U64_ID_PROVIDER: U64IDProvider = U64IDProvider::default();
}
impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id
fn default() -> Self {
BlankNode::new(U64_ID_PROVIDER.next().to_string())
}
}
/// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) /// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal)
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum Literal { pub enum Literal {
@ -86,6 +119,30 @@ lazy_static! {
} }
impl Literal { impl Literal {
/// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal)
pub fn new_simple_literal(value: impl Into<String>) -> Self {
Literal::SimpleLiteral(value.into())
}
/// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri)
pub fn new_typed_literal(value: impl Into<String>, datatype: impl Into<NamedNode>) -> Self {
Literal::TypedLiteral {
value: value.into(),
datatype: datatype.into(),
}
}
/// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
pub fn new_language_tagged_literal(
value: impl Into<String>,
language: impl Into<String>,
) -> Self {
Literal::LanguageTaggedString {
value: value.into(),
language: language.into(),
}
}
/// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form)
pub fn value(&self) -> &str { pub fn value(&self) -> &str {
match self { match self {
@ -453,86 +510,3 @@ impl QuadLike for Quad {
return self.graph_name; return self.graph_name;
} }
} }
/// An utility structure to generate bank node ids in a thread safe way
#[derive(Debug, Clone)]
struct U64IDProvider {
counter: Arc<Mutex<u64>>,
}
impl U64IDProvider {
pub fn next(&self) -> u64 {
let mut id = self.counter.lock().unwrap();
*id += 1;
*id
}
}
impl Default for U64IDProvider {
fn default() -> Self {
U64IDProvider {
counter: Arc::new(Mutex::new(0)),
}
}
}
/// A structure creating RDF elements
#[derive(Debug, Clone)]
pub struct DataFactory {
blank_node_id_provider: U64IDProvider,
}
impl Default for DataFactory {
fn default() -> Self {
DataFactory {
blank_node_id_provider: U64IDProvider::default(),
}
}
}
impl DataFactory {
/// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
pub fn named_node(&self, iri: impl Into<Url>) -> NamedNode {
NamedNode::new(iri)
}
/// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id
pub fn blank_node(&self, id: impl Into<String>) -> BlankNode {
BlankNode::new(id)
}
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id
pub fn new_blank_node(&self) -> BlankNode {
BlankNode::new(self.blank_node_id_provider.next().to_string())
}
/// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal)
pub fn simple_literal(&self, value: impl Into<String>) -> Literal {
Literal::SimpleLiteral(value.into())
}
/// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri)
pub fn typed_literal(
&self,
value: impl Into<String>,
datatype: impl Into<NamedNode>,
) -> Literal {
//TODO: find the best representation
Literal::TypedLiteral {
value: value.into(),
datatype: datatype.into(),
}
}
/// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
pub fn language_tagged_literal(
&self,
value: impl Into<String>,
language: impl Into<String>,
) -> Literal {
Literal::LanguageTaggedString {
value: value.into(),
language: language.into(),
}
}
}

@ -10,16 +10,12 @@ use std::io::BufRead;
use std::io::BufReader; use std::io::BufReader;
use std::io::Read; use std::io::Read;
pub fn read_ntriples<'a, R: Read + 'a>( pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator<Item = RioResult<Triple>> {
source: R, //TODO: use read_lines to avoid allocations
data_factory: &'a DataFactory,
) -> impl Iterator<Item = RioResult<Triple>> {
let factory = data_factory.clone(); //TODO: try to avoid clone here
//TODO: use read_lines to avoid allocations
BufReader::new(source) BufReader::new(source)
.lines() .lines()
.flat_map(move |line| match line { .flat_map(move |line| match line {
Ok(line) => match grammar::triple(line.as_str(), &factory) { Ok(line) => match grammar::triple(line.as_str()) {
Ok(triple) => Some(Ok(triple?)), Ok(triple) => Some(Ok(triple?)),
Err(error) => Some(Err(RioError::new(error))), Err(error) => Some(Err(RioError::new(error))),
}, },

@ -5,8 +5,6 @@ use std::char;
use std::str::FromStr; use std::str::FromStr;
use model::data::*; use model::data::*;
#![arguments(data_factory: &DataFactory)]
//[2] //[2]
#[pub] #[pub]
triple -> Option<Triple> = triple -> Option<Triple> =
@ -31,8 +29,8 @@ object -> Term =
//[6] //[6]
literal -> Literal = literal -> Literal =
v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { data_factory.typed_literal(v, t) } / v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } /
v: STRING_LITERAL_QUOTE _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v: STRING_LITERAL_QUOTE { v.into() } v: STRING_LITERAL_QUOTE { v.into() }
@ -62,7 +60,7 @@ STRING_LITERAL_QUOTE_simple_char -> char = c: $([^\u{0022}\u{005c}\u{000a}\u{000
//[141s] //[141s]
BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
data_factory.blank_node(b) BlankNode::new(b)
} }
//[10] //[10]

@ -28,10 +28,8 @@ impl ParserState {
pub fn read_turtle<'a, R: Read + 'a>( pub fn read_turtle<'a, R: Read + 'a>(
source: R, source: R,
data_factory: &'a DataFactory,
base_uri: impl Into<Option<Url>>, base_uri: impl Into<Option<Url>>,
) -> RioResult<impl Iterator<Item = Triple>> { ) -> RioResult<impl Iterator<Item = Triple>> {
let factory = data_factory.clone(); //TODO: try to avoid clone here
let mut state = ParserState { let mut state = ParserState {
base_uri: base_uri.into(), base_uri: base_uri.into(),
namespaces: HashMap::default(), namespaces: HashMap::default(),
@ -41,8 +39,7 @@ pub fn read_turtle<'a, R: Read + 'a>(
let mut string_buffer = String::default(); let mut string_buffer = String::default();
let mut triple_buffer = Vec::default(); let mut triple_buffer = Vec::default();
match BufReader::new(source).read_to_string(&mut string_buffer) { match BufReader::new(source).read_to_string(&mut string_buffer) {
Ok(_) => match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer, &factory) Ok(_) => match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer) {
{
Ok(_) => Ok(triple_buffer.into_iter()), Ok(_) => Ok(triple_buffer.into_iter()),
Err(error) => Err(RioError::new(error)), Err(error) => Err(RioError::new(error)),
}, },

@ -8,7 +8,7 @@ use model::vocab::xsd;
use std::iter; use std::iter;
use rio::turtle::ParserState; use rio::turtle::ParserState;
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)] #![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>)]
//[1] //[1]
#[pub] #[pub]
@ -112,14 +112,14 @@ blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predica
state.cur_subject.pop().ok_or("No subject found in the stack") state.cur_subject.pop().ok_or("No subject found in the stack")
} }
blankNodePropertyList_open -> () = "[" { blankNodePropertyList_open -> () = "[" {
state.cur_subject.push(data_factory.new_blank_node().into()) state.cur_subject.push(BlankNode::default().into())
} }
//[15] //[15]
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' {
let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone()); let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone());
for obj in o.into_iter().rev() { for obj in o.into_iter().rev() {
let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node()); let new_blank_node = NamedOrBlankNode::from(BlankNode::default());
buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj)); buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj));
buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node));
current_list_node = new_blank_node; current_list_node = new_blank_node;
@ -130,14 +130,14 @@ collection_value -> Term = o:object_value _ { o }
//[16] //[16]
NumericLiteral -> Literal = NumericLiteral -> Literal =
d:$(DOUBLE) { data_factory.typed_literal(d, xsd::DOUBLE.clone()) } / d:$(DOUBLE) { Literal::new_typed_literal(d, xsd::DOUBLE.clone()) } /
d:$(DECIMAL) { data_factory.typed_literal(d, xsd::DECIMAL.clone()) } / d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } /
i:$(INTEGER) { data_factory.typed_literal(i, xsd::INTEGER.clone()) } i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) }
//[128s] //[128s]
RDFLiteral -> Literal = RDFLiteral -> Literal =
v:String _ "^^" _ t:iri { data_factory.typed_literal(v, t) } / v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } /
v:String _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v:String { v.into() } v:String { v.into() }
//[133s] //[133s]
@ -151,7 +151,7 @@ String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE
//[135s] //[135s]
iri -> NamedNode = i:(IRIREF / PrefixedName) {? iri -> NamedNode = i:(IRIREF / PrefixedName) {?
match state.url_parser().parse(&i) { match state.url_parser().parse(&i) {
Ok(url) => Ok(data_factory.named_node(url)), Ok(url) => Ok(NamedNode::new(url)),
Err(error) => Err("IRI parsing failed") Err(error) => Err("IRI parsing failed")
} }
} }
@ -162,8 +162,8 @@ PrefixedName -> String = PNAME_LN /
//[137s] //[137s]
BlankNode -> BlankNode = BlankNode -> BlankNode =
b:BLANK_NODE_LABEL { data_factory.blank_node(b) } / b:BLANK_NODE_LABEL { BlankNode::new(b) } /
ANON { data_factory.new_blank_node() } ANON { BlankNode::default() }
//[18] //[18]
IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" { IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" {

@ -1,29 +1,27 @@
#[macro_use]
extern crate lazy_static;
extern crate reqwest; extern crate reqwest;
extern crate rudf; extern crate rudf;
extern crate url; extern crate url;
use reqwest::Client; use reqwest::Client;
use rudf::model::data::*; use rudf::model::data::*;
use rudf::model::vocab::rdf;
use rudf::rio::RioError; use rudf::rio::RioError;
use rudf::rio::RioResult; use rudf::rio::RioResult;
use rudf::rio::ntriples::read_ntriples; use rudf::rio::ntriples::read_ntriples;
use rudf::rio::turtle::read_turtle; use rudf::rio::turtle::read_turtle;
use std::collections::HashSet; use std::collections::HashSet;
use std::iter::FromIterator; use std::iter::FromIterator;
use std::str::FromStr;
use url::Url; use url::Url;
struct RDFClient { struct RDFClient {
client: Client, client: Client,
data_factory: DataFactory,
} }
impl Default for RDFClient { impl Default for RDFClient {
fn default() -> Self { fn default() -> Self {
Self { Self {
client: Client::new(), client: Client::new(),
data_factory: DataFactory::default(),
} }
} }
} }
@ -31,18 +29,14 @@ impl Default for RDFClient {
impl RDFClient { impl RDFClient {
fn load_turtle(&self, uri: Url) -> RioResult<HashSet<Triple>> { fn load_turtle(&self, uri: Url) -> RioResult<HashSet<Triple>> {
match self.client.get(uri.clone()).send() { match self.client.get(uri.clone()).send() {
Ok(response) => Ok(HashSet::from_iter(read_turtle( Ok(response) => Ok(HashSet::from_iter(read_turtle(response, Some(uri))?)),
response,
&self.data_factory,
Some(uri),
)?)),
Err(error) => Err(RioError::new(error)), Err(error) => Err(RioError::new(error)),
} }
} }
fn load_ntriples(&self, uri: Url) -> RioResult<HashSet<Triple>> { fn load_ntriples(&self, uri: Url) -> RioResult<HashSet<Triple>> {
match self.client.get(uri).send() { match self.client.get(uri).send() {
Ok(response) => read_ntriples(response, &self.data_factory).collect(), Ok(response) => read_ntriples(response).collect(),
Err(error) => Err(RioError::new(error)), Err(error) => Err(RioError::new(error)),
} }
} }
@ -89,27 +83,21 @@ fn subject_for_predicate_object<'a>(
#[test] #[test]
fn turtle_w3c_testsuite() { fn turtle_w3c_testsuite() {
let client = RDFClient::default(); let client = RDFClient::default();
let data_factory = &client.data_factory;
let manifest = client let manifest = client
.load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap()) .load_turtle(Url::parse("https://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap())
.unwrap(); .unwrap();
let rdf_type = data_factory let mf_action = NamedNode::from_str(
.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()); "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action",
let mf_action = data_factory.named_node( ).unwrap();
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action").unwrap(), let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap();
let rdft_test_turtle_positive_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(),
);
let rdft_test_turtle_negative_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(),
); );
let rdfs_comment = data_factory
.named_node(Url::parse("http://www.w3.org/2000/01/rdf-schema#comment").unwrap());
let rdft_test_turtle_positive_syntax =
Term::from(data_factory.named_node(
Url::parse("http://www.w3.org/ns/rdftest#TestTurtlePositiveSyntax").unwrap(),
));
let rdft_test_turtle_negative_syntax =
Term::from(data_factory.named_node(
Url::parse("http://www.w3.org/ns/rdftest#TestTurtleNegativeSyntax").unwrap(),
));
subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_positive_syntax) subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_positive_syntax)
.for_each(|test| { .for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(file)) = if let Some(Term::NamedNode(file)) =
@ -124,7 +112,7 @@ fn turtle_w3c_testsuite() {
} }
} }
}); });
subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_negative_syntax) subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_turtle_negative_syntax)
.for_each(|test| { .for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(file)) = if let Some(Term::NamedNode(file)) =
@ -143,25 +131,21 @@ fn turtle_w3c_testsuite() {
#[test] #[test]
fn ntriples_w3c_testsuite() { fn ntriples_w3c_testsuite() {
let client = RDFClient::default(); let client = RDFClient::default();
let data_factory = &client.data_factory;
let manifest = client let manifest = client
.load_turtle(Url::parse("https://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap()) .load_turtle(Url::parse("https://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap())
.unwrap(); .unwrap();
let rdf_type = data_factory let mf_action = NamedNode::from_str(
.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()); "http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action",
let mf_action = data_factory.named_node( ).unwrap();
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/test-manifest#action").unwrap(), let rdfs_comment = NamedNode::from_str("http://www.w3.org/2000/01/rdf-schema#comment").unwrap();
let rdft_test_ntriples_positive_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax").unwrap(),
);
let rdft_test_ntriples_negative_syntax = Term::from(
NamedNode::from_str("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(),
); );
let rdfs_comment = data_factory
.named_node(Url::parse("http://www.w3.org/2000/01/rdf-schema#comment").unwrap());
let rdft_test_turtle_positive_syntax = Term::from(data_factory.named_node(
Url::parse("http://www.w3.org/ns/rdftest#TestNTriplesPositiveSyntax").unwrap(),
));
let rdft_test_turtle_negative_syntax = Term::from(data_factory.named_node(
Url::parse("http://www.w3.org/ns/rdftest#TestNTriplesNegativeSyntax").unwrap(),
));
subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_positive_syntax) subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_positive_syntax)
.for_each(|test| { .for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(file)) = if let Some(Term::NamedNode(file)) =
@ -176,7 +160,7 @@ fn ntriples_w3c_testsuite() {
} }
} }
}); });
subjects_for_predicate_object(&manifest, &rdf_type, &rdft_test_turtle_negative_syntax) subjects_for_predicate_object(&manifest, &rdf::TYPE, &rdft_test_ntriples_negative_syntax)
.for_each(|test| { .for_each(|test| {
let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap(); let comment = object_for_subject_predicate(&manifest, test, &rdfs_comment).unwrap();
if let Some(Term::NamedNode(file)) = if let Some(Term::NamedNode(file)) =

Loading…
Cancel
Save