Uses Rio N-Triples and Turtle parsers

pull/10/head
Tpt 6 years ago
parent 10ce4aaeca
commit 74dadf5f21
  1. 2
      lib/Cargo.toml
  2. 2
      lib/build.rs
  3. 1
      lib/src/rio/mod.rs
  4. 15
      lib/src/rio/ntriples.rs
  5. 75
      lib/src/rio/ntriples/mod.rs
  6. 92
      lib/src/rio/ntriples/ntriples_grammar.rustpeg
  7. 64
      lib/src/rio/rio.rs
  8. 22
      lib/src/rio/turtle.rs
  9. 101
      lib/src/rio/turtle/mod.rs
  10. 272
      lib/src/rio/turtle/turtle_grammar.rustpeg
  11. 58
      lib/tests/rdf_test_cases.rs
  12. 2
      lib/tests/sparql_test_cases.rs
  13. 3
      server/src/main.rs

@ -29,6 +29,8 @@ rust_decimal = "1"
chrono = "0.4" chrono = "0.4"
failure = "0.1" failure = "0.1"
regex = "1" regex = "1"
rio_api = "0.1"
rio_turtle = "0.1"
[build-dependencies] [build-dependencies]
peg = "0.5" peg = "0.5"

@ -1,7 +1,5 @@
use peg; use peg;
fn main() { fn main() {
peg::cargo_build("src/rio/ntriples/ntriples_grammar.rustpeg");
peg::cargo_build("src/rio/turtle/turtle_grammar.rustpeg");
peg::cargo_build("src/sparql/sparql_grammar.rustpeg"); peg::cargo_build("src/sparql/sparql_grammar.rustpeg");
} }

@ -1,6 +1,7 @@
//! Implementations of serializers and deserializers for usual RDF syntaxes //! Implementations of serializers and deserializers for usual RDF syntaxes
pub mod ntriples; pub mod ntriples;
mod rio;
pub mod turtle; pub mod turtle;
pub(crate) mod utils; pub(crate) mod utils;
pub mod xml; pub mod xml;

@ -0,0 +1,15 @@
//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax
use crate::model::Triple;
use crate::rio::rio::convert_triple;
use crate::Result;
use rio_api::parser::TripleParser;
use rio_turtle::NTriplesParser;
use std::collections::BTreeMap;
use std::io::BufRead;
/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s
pub fn read_ntriples<R: BufRead>(reader: R) -> Result<impl Iterator<Item = Result<Triple>>> {
let mut bnode_map = BTreeMap::default();
Ok(NTriplesParser::new(reader)?.into_iter(move |t| convert_triple(t, &mut bnode_map)))
}

@ -1,75 +0,0 @@
//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax
mod grammar {
#![allow(
clippy::suspicious_else_formatting,
clippy::len_zero,
clippy::single_match,
clippy::unit_arg,
clippy::naive_bytecount
)]
use crate::rio::utils::unescape_characters;
use crate::utils::StaticSliceMap;
use lazy_static::lazy_static;
use std::borrow::Cow;
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\'];
lazy_static! {
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'],
&[
'\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}',
'\u{005C}'
]
);
}
pub fn unescape_echars(input: &str) -> Cow<'_, str> {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
}
include!(concat!(env!("OUT_DIR"), "/ntriples_grammar.rs"));
}
use crate::model::*;
use crate::Result;
use std::collections::BTreeMap;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
struct NTriplesIterator<R: Read> {
buffer: String,
reader: BufReader<R>,
bnodes_map: BTreeMap<String, BlankNode>,
}
impl<R: Read> Iterator for NTriplesIterator<R> {
type Item = Result<Triple>;
fn next(&mut self) -> Option<Result<Triple>> {
if let Err(error) = self.reader.read_line(&mut self.buffer) {
return Some(Err(error.into()));
}
if self.buffer.is_empty() {
return None; //End of file
}
let result = grammar::triple(&self.buffer, &mut self.bnodes_map);
self.buffer.clear();
match result {
Ok(Some(triple)) => Some(Ok(triple)),
Ok(None) => self.next(),
Err(error) => Some(Err(error.into())),
}
}
}
/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `Read` and returns an iterator of the read `Triple`s
pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator<Item = Result<Triple>> {
NTriplesIterator {
buffer: String::default(),
reader: BufReader::new(source),
bnodes_map: BTreeMap::default(),
}
}

@ -1,92 +0,0 @@
//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar
use std::char;
use std::str::FromStr;
use crate::model::*;
use std::collections::BTreeMap;
use crate::rio::utils::unescape_unicode_codepoints;
#![arguments(bnodes_map: &mut BTreeMap<String, BlankNode>)]
//[2]
pub triple -> Option<Triple> =
_ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } /
_ comment? EOL? { None }
//[3]
subject -> NamedOrBlankNode =
i: IRIREF { i.into() } /
b: BLANK_NODE_LABEL { b.into() }
//[4]
predicate -> NamedNode = i:IRIREF {
i
}
//[5]
object -> Term =
i: IRIREF { i.into() } /
b: BLANK_NODE_LABEL { b.into() } /
l: literal { l.into() }
//[6]
literal -> Literal =
v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } /
v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) }
//[144s]
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {?
match LanguageTag::parse(&l) {
Ok(langtag) => Ok(langtag),
Err(error) => Err("language tag parsing failed")
}
}
//[7]
EOL = [\r\n]+
//[8]
IRIREF -> NamedNode = "<" _ i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) _ ">" {?
let s = unescape_unicode_codepoints(i);
match NamedNode::from_str(&s) {
Ok(named_node) => Ok(named_node),
Err(error) => Err("IRI parsing failed")
}
}
_IRIREF_simple_char -> char = c:$() { c.chars().next().unwrap() }
//[9]
STRING_LITERAL_QUOTE -> String = "\"" l:$(([^\u{0022}\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
//[141s]
BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone()
}
//[10]
UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX
//[153s]
ECHAR -> () = '\\' [tbnrf"'\\]
//[157s]
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}]
//[158s]
PN_CHARS_U -> () = '_' / ':' / PN_CHARS_BASE
//[160s]
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
//[162s]
HEX -> () = ([0-9A-Fa-f])
//space
_ = #quiet<[ \t]*>
//comment
comment = #quiet<"#" [^\r\n]*>

@ -0,0 +1,64 @@
//! Wrapper for RIO parsers
use crate::model::*;
use crate::Result;
use rio_api::model as rio;
use std::collections::BTreeMap;
use std::str::FromStr;
pub fn convert_triple(
value: rio::Triple,
bnodes_map: &mut BTreeMap<String, BlankNode>,
) -> Result<Triple> {
let t = Triple::new(
convert_named_or_blank_node(value.subject, bnodes_map)?,
convert_named_node(value.predicate)?,
convert_term(value.object, bnodes_map)?,
);
// println!("{}", t);
Ok(t)
}
fn convert_term(value: rio::Term, bnodes_map: &mut BTreeMap<String, BlankNode>) -> Result<Term> {
Ok(match value {
rio::Term::NamedNode(v) => convert_named_node(v)?.into(),
rio::Term::BlankNode(v) => convert_blank_node(v, bnodes_map).into(),
rio::Term::Literal(v) => convert_literal(v)?.into(),
})
}
fn convert_named_or_blank_node(
value: rio::NamedOrBlankNode,
bnodes_map: &mut BTreeMap<String, BlankNode>,
) -> Result<NamedOrBlankNode> {
Ok(match value {
rio::NamedOrBlankNode::NamedNode(v) => convert_named_node(v)?.into(),
rio::NamedOrBlankNode::BlankNode(v) => convert_blank_node(v, bnodes_map).into(),
})
}
fn convert_named_node(value: rio::NamedNode) -> Result<NamedNode> {
NamedNode::from_str(value.iri)
}
fn convert_blank_node(
value: rio::BlankNode,
bnodes_map: &mut BTreeMap<String, BlankNode>,
) -> BlankNode {
bnodes_map
.entry(value.id.to_string())
.or_insert_with(BlankNode::default)
.clone()
}
fn convert_literal(value: rio::Literal) -> Result<Literal> {
Ok(match value {
rio::Literal::Simple { value } => Literal::new_simple_literal(value),
rio::Literal::LanguageTaggedString { value, language } => {
Literal::new_language_tagged_literal(value, LanguageTag::parse(language)?)
}
rio::Literal::Typed { value, datatype } => {
Literal::new_typed_literal(value, convert_named_node(datatype)?)
}
})
}

@ -0,0 +1,22 @@
//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax
use crate::model::Triple;
use crate::rio::rio::convert_triple;
use crate::Result;
use rio_api::parser::TripleParser;
use rio_turtle::TurtleParser;
use std::collections::BTreeMap;
use std::io::BufRead;
use url::Url;
/// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s
pub fn read_turtle<R: BufRead>(
reader: R,
base_url: Option<Url>,
) -> Result<impl Iterator<Item = Result<Triple>>> {
let mut bnode_map = BTreeMap::default();
Ok(
TurtleParser::new(reader, base_url.as_ref().map_or("", |url| url.as_str()))?
.into_iter(move |t| convert_triple(t, &mut bnode_map)),
)
}

@ -1,101 +0,0 @@
//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax
mod grammar {
#![allow(
clippy::suspicious_else_formatting,
clippy::len_zero,
clippy::single_match,
clippy::unit_arg,
clippy::naive_bytecount
)]
use crate::model::*;
use crate::rio::utils::unescape_characters;
use crate::utils::StaticSliceMap;
use lazy_static::lazy_static;
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::Read;
use url::ParseOptions;
use url::Url;
include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs"));
pub struct ParserState {
base_uri: Option<Url>,
namespaces: HashMap<String, Url>,
cur_subject: Vec<NamedOrBlankNode>,
cur_predicate: Vec<NamedNode>,
bnodes_map: BTreeMap<String, BlankNode>,
}
impl ParserState {
fn url_parser(&self) -> ParseOptions<'_> {
Url::options().base_url(self.base_uri.as_ref())
}
}
/// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `Read` and returns an iterator on the read `Triple`s
///
/// Warning: this implementation has not been optimized yet and stores all the found triples in memory.
/// This implementation also requires that blank node ids are valid UTF-8
pub fn read_turtle<'a, R: Read + 'a>(
source: R,
base_uri: impl Into<Option<Url>>,
) -> super::super::super::Result<impl Iterator<Item = Triple>> {
let mut state = ParserState {
base_uri: base_uri.into(),
namespaces: HashMap::default(),
cur_subject: Vec::default(),
cur_predicate: Vec::default(),
bnodes_map: BTreeMap::default(),
};
let mut triple_buffer = Vec::default();
let mut string_buffer = String::default();
BufReader::new(source).read_to_string(&mut string_buffer)?;
turtleDoc(&string_buffer, &mut state, &mut triple_buffer)?;
Ok(triple_buffer.into_iter())
}
const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\'];
lazy_static! {
static ref UNESCAPE_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'],
&[
'\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}',
'\u{005C}'
]
);
}
fn unescape_echars(input: &str) -> Cow<'_, str> {
unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT)
}
const UNESCAPE_PN_CHARACTERS: [u8; 20] = [
b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=',
b'/', b'?', b'#', b'@', b'%',
];
lazy_static! {
static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap<char, char> = StaticSliceMap::new(
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
],
&[
'_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/',
'?', '#', '@', '%'
]
);
}
pub fn unescape_pn_local(input: &str) -> Cow<'_, str> {
unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT)
}
}
pub use self::grammar::read_turtle;

@ -1,272 +0,0 @@
//See https://www.w3.org/TR/turtle/#sec-grammar
use std::char;
use crate::model::vocab::rdf;
use crate::model::vocab::xsd;
use std::str::FromStr;
use crate::rio::utils::unescape_unicode_codepoints;
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>)]
//[1]
pub turtleDoc -> () = _ (statement _)*
//[2]
statement -> () = directive / triples "."
//[3]
directive -> () = prefixID / base / sparqlPrefix / sparqlBase
//[4]
prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." {
state.namespaces.insert(ns.into(), i);
}
//[5]
base -> () = "@base" _ url:IRIREF _ "." {
state.base_uri = Some(url);
}
//[5s]
sparqlBase -> () = "BASE"i _ url:IRIREF {
state.base_uri = Some(url);
}
//[6s]
sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF {
state.namespaces.insert(ns.into(), i);
}
//[6]
triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? {
state.cur_subject.pop();
}
subject_push -> () = s:subject {
state.cur_subject.push(s)
}
triples_blankNodePropertyList_push -> () = s: blankNodePropertyList {
state.cur_subject.push(s)
}
//[7]
predicateObjectList -> () = predicateObject (";" _ predicateObject?)*
predicateObject -> () = predicate_push _ objectList _ {
state.cur_predicate.pop();
}
predicate_push -> () = v:verb {
state.cur_predicate.push(v)
}
//[8]
objectList -> () = object _ ("," _ object _)*
//[9]
verb -> NamedNode = predicate /
"a" { rdf::TYPE.clone() }
// [10]
subject -> NamedOrBlankNode =
i:iri { i.into() } /
b:BlankNode { b.into() } /
c:collection { c }
//[11]
predicate -> NamedNode = iri
// [12]
object -> () = o:object_value {?
match state.cur_subject.last() {
Some(s) => match state.cur_predicate.last() {
Some(p) => {
buffer.push(Triple::new(s.clone(), p.clone(), o));
Ok(())
}
None => Err("Predicate not found")
},
None => Err("Subject not found")
}
}
object_value -> Term =
i:iri { i.into() } /
b:BlankNode { b.into() } /
c:collection { c.into() } /
b:blankNodePropertyList { b.into() } /
l:literal { l.into() }
//[13]
literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral
//[14]
blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {?
state.cur_subject.pop().ok_or("No subject found in the stack")
}
blankNodePropertyList_open -> () = "[" {
state.cur_subject.push(BlankNode::default().into())
}
//[15]
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' {
let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone());
for obj in o.into_iter().rev() {
let new_blank_node = NamedOrBlankNode::from(BlankNode::default());
buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj));
buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node));
current_list_node = new_blank_node;
}
current_list_node
}
collection_value -> Term = o:object_value _ { o }
//[16]
NumericLiteral -> Literal =
d:$(DOUBLE) {? match f64::from_str(d) {
Ok(value) => Ok(value.into()),
Err(_) => Err("Invalid xsd:double")
} } /
d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } /
i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) }
//[128s]
RDFLiteral -> Literal =
v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } /
v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } /
v:String { Literal::new_simple_literal(v) }
//[133s]
BooleanLiteral -> Literal =
"true" { true.into() } /
"false" { false.into() }
//[17]
String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE
//[135s]
iri -> NamedNode = i:(IRIREF / PrefixedName) {
i.into()
}
//[136s]
PrefixedName -> Url = PNAME_LN /
ns:PNAME_NS {? state.namespaces.get(ns).cloned().ok_or("Prefix not found") }
//[137s]
BlankNode -> BlankNode =
b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } /
ANON { BlankNode::default() }
//[18]
IRIREF -> Url = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) ">" {?
match state.url_parser().parse(&unescape_unicode_codepoints(i)) {
Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed")
}
}
//[139s]
PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") {
ns
}
//[140s]
PNAME_LN -> Url = ns:$(PNAME_NS) local:$(PN_LOCAL) {?
match state.namespaces.get(ns) {
Some(ns) => match Url::parse(&(ns.to_string() + &unescape_pn_local(local))) {
Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed")
},
None => Err("Prefix not found")
}
}
//[141s]
BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
b
}
//[144s]
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {?
match LanguageTag::parse(&l) {
Ok(langtag) => Ok(langtag),
Err(error) => Err("language tag parsing failed")
}
}
//[19]
INTEGER -> () = [+-]? [0-9]+
//[20]
DECIMAL -> () = [+-]? [0-9]* "." [0-9]+
//[21]
DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT
//[154s]
EXPONENT -> () = [eE] [+-]? [0-9]+
//[22]
STRING_LITERAL_QUOTE -> String = "\"" l: $(([^"\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
//[23]
STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:$(([^'\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "'" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
//[24]
STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:$(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> () = ("''" / "'")? ([^'\u{005c}] / ECHAR / UCHAR)
//[25]
STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:$(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" {
unescape_unicode_codepoints(&unescape_echars(l)).into_owned()
}
STRING_LITERAL_LONG_QUOTE_inner -> () = ("\"\"" / "\"")? ([^"\u{005c}] / ECHAR / UCHAR)
//[26]
UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX
//[159s]
ECHAR -> () = "\\" [tbnrf"'\\]
//[161s]
WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]>
//[162s]
ANON -> () = "[" WS* "]"
//[163s]
PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}]
//[164s]
PN_CHARS_U -> () = "_" / PN_CHARS_BASE
//[166s]
PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U
//[167s]
PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)*
//[168s]
PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)?
//[169s]
PLX -> () = PERCENT / PN_LOCAL_ESC
//[170s]
PERCENT -> () = "%" HEX HEX
//[171s]
HEX -> () = ([0-9A-Fa-f])
//[172s]
PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%]
//space
_ = #quiet<([ \t\n\r] / comment)*>
//comment
comment = #quiet<"#" [^\r\n]*>

@ -17,42 +17,9 @@ use url::Url;
#[test] #[test]
fn turtle_w3c_testsuite() { fn turtle_w3c_testsuite() {
let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap(); let manifest_url = Url::parse("http://w3c.github.io/rdf-tests/turtle/manifest.ttl").unwrap();
//TODO: make blacklist pass
let test_blacklist = vec![
//UTF-8 broken surrogates in BNode ids
NamedNode::new(
manifest_url
.join("#prefix_with_PN_CHARS_BASE_character_boundaries")
.unwrap(),
),
NamedNode::new(
manifest_url
.join("#labeled_blank_node_with_PN_CHARS_BASE_character_boundaries")
.unwrap(),
),
NamedNode::new(
manifest_url
.join("#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries")
.unwrap(),
),
NamedNode::new(
manifest_url
.join("#localName_with_nfc_PN_CHARS_BASE_character_boundaries")
.unwrap(),
),
NamedNode::new(manifest_url.join("#IRI-resolution-01").unwrap()),
NamedNode::new(manifest_url.join("#IRI-resolution-02").unwrap()),
NamedNode::new(manifest_url.join("#IRI-resolution-07").unwrap()),
NamedNode::new(manifest_url.join("#turtle-subm-01").unwrap()),
NamedNode::new(manifest_url.join("#turtle-subm-27").unwrap()),
];
for test_result in TestManifest::new(manifest_url) { for test_result in TestManifest::new(manifest_url) {
let test = test_result.unwrap(); let test = test_result.unwrap();
if test_blacklist.contains(&test.id) {
continue;
}
if test.kind == "TestTurtlePositiveSyntax" { if test.kind == "TestTurtlePositiveSyntax" {
if let Err(error) = load_turtle(test.action.clone()) { if let Err(error) = load_turtle(test.action.clone()) {
assert!(false, "Failure on {} with error: {}", test, error) assert!(false, "Failure on {} with error: {}", test, error)
@ -107,7 +74,7 @@ fn turtle_w3c_testsuite() {
#[test] #[test]
fn ntriples_w3c_testsuite() { fn ntriples_w3c_testsuite() {
let manifest_url = Url::parse("http://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap(); let manifest_url = Url::parse("http://w3c.github.io/rdf-tests/ntriples/manifest.ttl").unwrap();
for test_result in TestManifest::new(manifest_url) { for test_result in TestManifest::new(manifest_url) {
let test = test_result.unwrap(); let test = test_result.unwrap();
@ -116,11 +83,9 @@ fn ntriples_w3c_testsuite() {
assert!(false, "Failure on {} with error: {}", test, error) assert!(false, "Failure on {} with error: {}", test, error)
} }
} else if test.kind == "TestNTriplesNegativeSyntax" { } else if test.kind == "TestNTriplesNegativeSyntax" {
assert!( if let Ok(graph) = load_ntriples(test.action.clone()) {
load_ntriples(test.action.clone()).is_err(), assert!(false, "Failure on {}, found:\n{}", test, graph);
"Failure on {}", }
test
);
} else { } else {
assert!(false, "Not supported test: {}", test); assert!(false, "Not supported test: {}", test);
} }
@ -177,11 +142,11 @@ fn rdf_xml_w3c_testsuite() -> Result<()> {
} }
fn load_turtle(url: Url) -> Result<MemoryGraph> { fn load_turtle(url: Url) -> Result<MemoryGraph> {
Ok(read_turtle(read_file(&url)?, Some(url))?.collect()) read_turtle(read_file(&url)?, Some(url))?.collect()
} }
fn load_ntriples(url: Url) -> Result<MemoryGraph> { fn load_ntriples(url: Url) -> Result<MemoryGraph> {
read_ntriples(read_file(&url)?).collect() read_ntriples(read_file(&url)?)?.collect()
} }
fn load_rdf_xml(url: Url) -> Result<MemoryGraph> { fn load_rdf_xml(url: Url) -> Result<MemoryGraph> {
@ -190,13 +155,8 @@ fn load_rdf_xml(url: Url) -> Result<MemoryGraph> {
fn to_relative_path(url: &Url) -> Result<String> { fn to_relative_path(url: &Url) -> Result<String> {
let url = url.as_str(); let url = url.as_str();
if url.starts_with("http://www.w3.org/2013/N-TriplesTests") { if url.starts_with("http://w3c.github.io/rdf-tests/") {
Ok(url.replace( Ok(url.replace("http://w3c.github.io/", ""))
"http://www.w3.org/2013/N-TriplesTests",
"rdf-tests/ntriples/",
))
} else if url.starts_with("http://www.w3.org/2013/TurtleTests/") {
Ok(url.replace("http://www.w3.org/2013/TurtleTests/", "rdf-tests/turtle/"))
} else if url.starts_with("http://www.w3.org/2013/RDFXMLTests/") { } else if url.starts_with("http://www.w3.org/2013/RDFXMLTests/") {
Ok(url.replace("http://www.w3.org/2013/RDFXMLTests/", "rdf-tests/rdf-xml/")) Ok(url.replace("http://www.w3.org/2013/RDFXMLTests/", "rdf-tests/rdf-xml/"))
} else { } else {

@ -241,7 +241,7 @@ fn sparql_w3c_query_evaluation_testsuite() {
fn load_graph(url: Url) -> Result<MemoryGraph> { fn load_graph(url: Url) -> Result<MemoryGraph> {
if url.as_str().ends_with(".ttl") { if url.as_str().ends_with(".ttl") {
Ok(read_turtle(read_file(&url)?, Some(url))?.collect()) read_turtle(read_file(&url)?, Some(url))?.collect()
} else if url.as_str().ends_with(".rdf") { } else if url.as_str().ends_with(".rdf") {
read_rdf_xml(read_file(&url)?, Some(url)).collect() read_rdf_xml(read_file(&url)?, Some(url)).collect()
} else { } else {

@ -38,6 +38,7 @@ use rudf::store::MemoryGraph;
use rudf::store::RocksDbDataset; use rudf::store::RocksDbDataset;
use serde_derive::Deserialize; use serde_derive::Deserialize;
use std::fs::File; use std::fs::File;
use std::io::BufReader;
use std::panic::RefUnwindSafe; use std::panic::RefUnwindSafe;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
@ -99,7 +100,7 @@ fn main_with_dataset<D: SparqlDataset + Send + Sync + RefUnwindSafe + 'static>(
if let Some(nt_file) = matches.value_of("ntriples") { if let Some(nt_file) = matches.value_of("ntriples") {
println!("Loading NTriples file {}", nt_file); println!("Loading NTriples file {}", nt_file);
let default_graph = dataset.default_graph(); let default_graph = dataset.default_graph();
for quad in read_ntriples(File::open(nt_file)?) { for quad in read_ntriples(BufReader::new(File::open(nt_file)?))? {
default_graph.insert(&quad?)? default_graph.insert(&quad?)?
} }
} }

Loading…
Cancel
Save