From 74dadf5f2176458ede820cee0e173bbc00dd5165 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 28 Jul 2019 23:10:52 +0200 Subject: [PATCH] Uses Rio N-Triples and Turtle parsers --- lib/Cargo.toml | 2 + lib/build.rs | 2 - lib/src/rio/mod.rs | 1 + lib/src/rio/ntriples.rs | 15 + lib/src/rio/ntriples/mod.rs | 75 ----- lib/src/rio/ntriples/ntriples_grammar.rustpeg | 92 ------ lib/src/rio/rio.rs | 64 +++++ lib/src/rio/turtle.rs | 22 ++ lib/src/rio/turtle/mod.rs | 101 ------- lib/src/rio/turtle/turtle_grammar.rustpeg | 272 ------------------ lib/tests/rdf_test_cases.rs | 58 +--- lib/tests/sparql_test_cases.rs | 2 +- server/src/main.rs | 3 +- 13 files changed, 116 insertions(+), 593 deletions(-) create mode 100644 lib/src/rio/ntriples.rs delete mode 100644 lib/src/rio/ntriples/mod.rs delete mode 100644 lib/src/rio/ntriples/ntriples_grammar.rustpeg create mode 100644 lib/src/rio/rio.rs create mode 100644 lib/src/rio/turtle.rs delete mode 100644 lib/src/rio/turtle/mod.rs delete mode 100644 lib/src/rio/turtle/turtle_grammar.rustpeg diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 485e9ff1..71c1c5f7 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -29,6 +29,8 @@ rust_decimal = "1" chrono = "0.4" failure = "0.1" regex = "1" +rio_api = "0.1" +rio_turtle = "0.1" [build-dependencies] peg = "0.5" diff --git a/lib/build.rs b/lib/build.rs index 060ca5db..dd649463 100644 --- a/lib/build.rs +++ b/lib/build.rs @@ -1,7 +1,5 @@ use peg; fn main() { - peg::cargo_build("src/rio/ntriples/ntriples_grammar.rustpeg"); - peg::cargo_build("src/rio/turtle/turtle_grammar.rustpeg"); peg::cargo_build("src/sparql/sparql_grammar.rustpeg"); } diff --git a/lib/src/rio/mod.rs b/lib/src/rio/mod.rs index f06a626e..192af82e 100644 --- a/lib/src/rio/mod.rs +++ b/lib/src/rio/mod.rs @@ -1,6 +1,7 @@ //! Implementations of serializers and deserializers for usual RDF syntaxes pub mod ntriples; +mod rio; pub mod turtle; pub(crate) mod utils; pub mod xml; diff --git a/lib/src/rio/ntriples.rs b/lib/src/rio/ntriples.rs new file mode 100644 index 00000000..7c8ece60 --- /dev/null +++ b/lib/src/rio/ntriples.rs @@ -0,0 +1,15 @@ +//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax + +use crate::model::Triple; +use crate::rio::rio::convert_triple; +use crate::Result; +use rio_api::parser::TripleParser; +use rio_turtle::NTriplesParser; +use std::collections::BTreeMap; +use std::io::BufRead; + +/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s +pub fn read_ntriples(reader: R) -> Result>> { + let mut bnode_map = BTreeMap::default(); + Ok(NTriplesParser::new(reader)?.into_iter(move |t| convert_triple(t, &mut bnode_map))) +} diff --git a/lib/src/rio/ntriples/mod.rs b/lib/src/rio/ntriples/mod.rs deleted file mode 100644 index 1f3ac5ac..00000000 --- a/lib/src/rio/ntriples/mod.rs +++ /dev/null @@ -1,75 +0,0 @@ -//! Implementation of [N-Triples](https://www.w3.org/TR/n-triples/) RDF syntax - -mod grammar { - #![allow( - clippy::suspicious_else_formatting, - clippy::len_zero, - clippy::single_match, - clippy::unit_arg, - clippy::naive_bytecount - )] - - use crate::rio::utils::unescape_characters; - use crate::utils::StaticSliceMap; - use lazy_static::lazy_static; - use std::borrow::Cow; - - const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; - lazy_static! { - static ref UNESCAPE_REPLACEMENT: StaticSliceMap = StaticSliceMap::new( - &['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'], - &[ - '\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}', - '\u{005C}' - ] - ); - } - - pub fn unescape_echars(input: &str) -> Cow<'_, str> { - unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) - } - - include!(concat!(env!("OUT_DIR"), "/ntriples_grammar.rs")); -} - -use crate::model::*; -use crate::Result; -use std::collections::BTreeMap; -use std::io::BufRead; -use std::io::BufReader; -use std::io::Read; - -struct NTriplesIterator { - buffer: String, - reader: BufReader, - bnodes_map: BTreeMap, -} - -impl Iterator for NTriplesIterator { - type Item = Result; - - fn next(&mut self) -> Option> { - if let Err(error) = self.reader.read_line(&mut self.buffer) { - return Some(Err(error.into())); - } - if self.buffer.is_empty() { - return None; //End of file - } - let result = grammar::triple(&self.buffer, &mut self.bnodes_map); - self.buffer.clear(); - match result { - Ok(Some(triple)) => Some(Ok(triple)), - Ok(None) => self.next(), - Err(error) => Some(Err(error.into())), - } - } -} - -/// Reads a [N-Triples](https://www.w3.org/TR/n-triples/) file from a Rust `Read` and returns an iterator of the read `Triple`s -pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator> { - NTriplesIterator { - buffer: String::default(), - reader: BufReader::new(source), - bnodes_map: BTreeMap::default(), - } -} diff --git a/lib/src/rio/ntriples/ntriples_grammar.rustpeg b/lib/src/rio/ntriples/ntriples_grammar.rustpeg deleted file mode 100644 index c1486fb5..00000000 --- a/lib/src/rio/ntriples/ntriples_grammar.rustpeg +++ /dev/null @@ -1,92 +0,0 @@ -//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar - -use std::char; -use std::str::FromStr; -use crate::model::*; -use std::collections::BTreeMap; -use crate::rio::utils::unescape_unicode_codepoints; - -#![arguments(bnodes_map: &mut BTreeMap)] - -//[2] -pub triple -> Option = - _ s:subject _ p:predicate _ o:object _ "." _ comment? EOL? { Some(Triple::new(s, p, o)) } / - _ comment? EOL? { None } - -//[3] -subject -> NamedOrBlankNode = - i: IRIREF { i.into() } / - b: BLANK_NODE_LABEL { b.into() } - -//[4] -predicate -> NamedNode = i:IRIREF { - i -} - -//[5] -object -> Term = - i: IRIREF { i.into() } / - b: BLANK_NODE_LABEL { b.into() } / - l: literal { l.into() } - -//[6] -literal -> Literal = - v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { Literal::new_typed_literal(v, t) } / - v: STRING_LITERAL_QUOTE _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / - v: STRING_LITERAL_QUOTE { Literal::new_simple_literal(v) } - - -//[144s] -LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {? - match LanguageTag::parse(&l) { - Ok(langtag) => Ok(langtag), - Err(error) => Err("language tag parsing failed") - } -} - -//[7] -EOL = [\r\n]+ - -//[8] -IRIREF -> NamedNode = "<" _ i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) _ ">" {? - let s = unescape_unicode_codepoints(i); - match NamedNode::from_str(&s) { - Ok(named_node) => Ok(named_node), - Err(error) => Err("IRI parsing failed") - } -} -_IRIREF_simple_char -> char = c:$() { c.chars().next().unwrap() } - -//[9] -STRING_LITERAL_QUOTE -> String = "\"" l:$(([^\u{0022}\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" { - unescape_unicode_codepoints(&unescape_echars(l)).into_owned() -} - -//[141s] -BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { - bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() -} - -//[10] -UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX - -//[153s] -ECHAR -> () = '\\' [tbnrf"'\\] - -//[157s] -PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] - -//[158s] -PN_CHARS_U -> () = '_' / ':' / PN_CHARS_BASE - -//[160s] -PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U - -//[162s] -HEX -> () = ([0-9A-Fa-f]) - - -//space -_ = #quiet<[ \t]*> -//comment -comment = #quiet<"#" [^\r\n]*> diff --git a/lib/src/rio/rio.rs b/lib/src/rio/rio.rs new file mode 100644 index 00000000..15bbbb79 --- /dev/null +++ b/lib/src/rio/rio.rs @@ -0,0 +1,64 @@ +//! Wrapper for RIO parsers + +use crate::model::*; +use crate::Result; +use rio_api::model as rio; +use std::collections::BTreeMap; +use std::str::FromStr; + +pub fn convert_triple( + value: rio::Triple, + bnodes_map: &mut BTreeMap, +) -> Result { + let t = Triple::new( + convert_named_or_blank_node(value.subject, bnodes_map)?, + convert_named_node(value.predicate)?, + convert_term(value.object, bnodes_map)?, + ); + // println!("{}", t); + Ok(t) +} + +fn convert_term(value: rio::Term, bnodes_map: &mut BTreeMap) -> Result { + Ok(match value { + rio::Term::NamedNode(v) => convert_named_node(v)?.into(), + rio::Term::BlankNode(v) => convert_blank_node(v, bnodes_map).into(), + rio::Term::Literal(v) => convert_literal(v)?.into(), + }) +} + +fn convert_named_or_blank_node( + value: rio::NamedOrBlankNode, + bnodes_map: &mut BTreeMap, +) -> Result { + Ok(match value { + rio::NamedOrBlankNode::NamedNode(v) => convert_named_node(v)?.into(), + rio::NamedOrBlankNode::BlankNode(v) => convert_blank_node(v, bnodes_map).into(), + }) +} + +fn convert_named_node(value: rio::NamedNode) -> Result { + NamedNode::from_str(value.iri) +} + +fn convert_blank_node( + value: rio::BlankNode, + bnodes_map: &mut BTreeMap, +) -> BlankNode { + bnodes_map + .entry(value.id.to_string()) + .or_insert_with(BlankNode::default) + .clone() +} + +fn convert_literal(value: rio::Literal) -> Result { + Ok(match value { + rio::Literal::Simple { value } => Literal::new_simple_literal(value), + rio::Literal::LanguageTaggedString { value, language } => { + Literal::new_language_tagged_literal(value, LanguageTag::parse(language)?) + } + rio::Literal::Typed { value, datatype } => { + Literal::new_typed_literal(value, convert_named_node(datatype)?) + } + }) +} diff --git a/lib/src/rio/turtle.rs b/lib/src/rio/turtle.rs new file mode 100644 index 00000000..aecec384 --- /dev/null +++ b/lib/src/rio/turtle.rs @@ -0,0 +1,22 @@ +//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax + +use crate::model::Triple; +use crate::rio::rio::convert_triple; +use crate::Result; +use rio_api::parser::TripleParser; +use rio_turtle::TurtleParser; +use std::collections::BTreeMap; +use std::io::BufRead; +use url::Url; + +/// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `BufRead` and returns an iterator of the read `Triple`s +pub fn read_turtle( + reader: R, + base_url: Option, +) -> Result>> { + let mut bnode_map = BTreeMap::default(); + Ok( + TurtleParser::new(reader, base_url.as_ref().map_or("", |url| url.as_str()))? + .into_iter(move |t| convert_triple(t, &mut bnode_map)), + ) +} diff --git a/lib/src/rio/turtle/mod.rs b/lib/src/rio/turtle/mod.rs deleted file mode 100644 index 81d0469d..00000000 --- a/lib/src/rio/turtle/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -//! Implementation of [Turtle](https://www.w3.org/TR/turtle/) RDF syntax - -mod grammar { - #![allow( - clippy::suspicious_else_formatting, - clippy::len_zero, - clippy::single_match, - clippy::unit_arg, - clippy::naive_bytecount - )] - - use crate::model::*; - use crate::rio::utils::unescape_characters; - use crate::utils::StaticSliceMap; - use lazy_static::lazy_static; - use std::borrow::Cow; - use std::collections::BTreeMap; - use std::collections::HashMap; - use std::io::BufReader; - use std::io::Read; - use url::ParseOptions; - use url::Url; - - include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs")); - - pub struct ParserState { - base_uri: Option, - namespaces: HashMap, - cur_subject: Vec, - cur_predicate: Vec, - bnodes_map: BTreeMap, - } - - impl ParserState { - fn url_parser(&self) -> ParseOptions<'_> { - Url::options().base_url(self.base_uri.as_ref()) - } - } - - /// Reads a [Turtle](https://www.w3.org/TR/turtle/) file from a Rust `Read` and returns an iterator on the read `Triple`s - /// - /// Warning: this implementation has not been optimized yet and stores all the found triples in memory. - /// This implementation also requires that blank node ids are valid UTF-8 - pub fn read_turtle<'a, R: Read + 'a>( - source: R, - base_uri: impl Into>, - ) -> super::super::super::Result> { - let mut state = ParserState { - base_uri: base_uri.into(), - namespaces: HashMap::default(), - cur_subject: Vec::default(), - cur_predicate: Vec::default(), - bnodes_map: BTreeMap::default(), - }; - let mut triple_buffer = Vec::default(); - - let mut string_buffer = String::default(); - BufReader::new(source).read_to_string(&mut string_buffer)?; - - turtleDoc(&string_buffer, &mut state, &mut triple_buffer)?; - Ok(triple_buffer.into_iter()) - } - - const UNESCAPE_CHARACTERS: [u8; 8] = [b't', b'b', b'n', b'r', b'f', b'"', b'\'', b'\\']; - lazy_static! { - static ref UNESCAPE_REPLACEMENT: StaticSliceMap = StaticSliceMap::new( - &['t', 'b', 'n', 'r', 'f', '"', '\'', '\\'], - &[ - '\u{0009}', '\u{0008}', '\u{000A}', '\u{000D}', '\u{000C}', '\u{0022}', '\u{0027}', - '\u{005C}' - ] - ); - } - - fn unescape_echars(input: &str) -> Cow<'_, str> { - unescape_characters(input, &UNESCAPE_CHARACTERS, &UNESCAPE_REPLACEMENT) - } - - const UNESCAPE_PN_CHARACTERS: [u8; 20] = [ - b'_', b'~', b'.', b'-', b'!', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b';', b'=', - b'/', b'?', b'#', b'@', b'%', - ]; - lazy_static! { - static ref UNESCAPE_PN_REPLACEMENT: StaticSliceMap = StaticSliceMap::new( - &[ - '_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/', - '?', '#', '@', '%' - ], - &[ - '_', '~', '.', '-', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', '/', - '?', '#', '@', '%' - ] - ); - } - - pub fn unescape_pn_local(input: &str) -> Cow<'_, str> { - unescape_characters(input, &UNESCAPE_PN_CHARACTERS, &UNESCAPE_PN_REPLACEMENT) - } -} - -pub use self::grammar::read_turtle; diff --git a/lib/src/rio/turtle/turtle_grammar.rustpeg b/lib/src/rio/turtle/turtle_grammar.rustpeg deleted file mode 100644 index 9b4dfe90..00000000 --- a/lib/src/rio/turtle/turtle_grammar.rustpeg +++ /dev/null @@ -1,272 +0,0 @@ -//See https://www.w3.org/TR/turtle/#sec-grammar - -use std::char; -use crate::model::vocab::rdf; -use crate::model::vocab::xsd; -use std::str::FromStr; -use crate::rio::utils::unescape_unicode_codepoints; - -#![arguments(state: &mut ParserState, buffer: &mut Vec)] - -//[1] -pub turtleDoc -> () = _ (statement _)* - -//[2] -statement -> () = directive / triples "." - -//[3] -directive -> () = prefixID / base / sparqlPrefix / sparqlBase - -//[4] -prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." { - state.namespaces.insert(ns.into(), i); -} - -//[5] -base -> () = "@base" _ url:IRIREF _ "." { - state.base_uri = Some(url); -} - -//[5s] -sparqlBase -> () = "BASE"i _ url:IRIREF { - state.base_uri = Some(url); -} - -//[6s] -sparqlPrefix -> () = "PREFIX"i _ ns:PNAME_NS _ i:IRIREF { - state.namespaces.insert(ns.into(), i); -} - -//[6] -triples -> () = subject_push _ predicateObjectList / triples_blankNodePropertyList_push _ predicateObjectList? { - state.cur_subject.pop(); -} -subject_push -> () = s:subject { - state.cur_subject.push(s) -} -triples_blankNodePropertyList_push -> () = s: blankNodePropertyList { - state.cur_subject.push(s) -} - -//[7] -predicateObjectList -> () = predicateObject (";" _ predicateObject?)* -predicateObject -> () = predicate_push _ objectList _ { - state.cur_predicate.pop(); -} -predicate_push -> () = v:verb { - state.cur_predicate.push(v) -} - -//[8] -objectList -> () = object _ ("," _ object _)* - -//[9] -verb -> NamedNode = predicate / - "a" { rdf::TYPE.clone() } - -// [10] -subject -> NamedOrBlankNode = - i:iri { i.into() } / - b:BlankNode { b.into() } / - c:collection { c } - -//[11] -predicate -> NamedNode = iri - -// [12] -object -> () = o:object_value {? - match state.cur_subject.last() { - Some(s) => match state.cur_predicate.last() { - Some(p) => { - buffer.push(Triple::new(s.clone(), p.clone(), o)); - Ok(()) - } - None => Err("Predicate not found") - }, - None => Err("Subject not found") - } -} - -object_value -> Term = - i:iri { i.into() } / - b:BlankNode { b.into() } / - c:collection { c.into() } / - b:blankNodePropertyList { b.into() } / - l:literal { l.into() } - -//[13] -literal -> Literal = RDFLiteral / NumericLiteral / BooleanLiteral - -//[14] -blankNodePropertyList -> NamedOrBlankNode = blankNodePropertyList_open _ predicateObjectList _ "]" {? - state.cur_subject.pop().ok_or("No subject found in the stack") -} -blankNodePropertyList_open -> () = "[" { - state.cur_subject.push(BlankNode::default().into()) -} - -//[15] -collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { - let mut current_list_node = NamedOrBlankNode::from(rdf::NIL.clone()); - for obj in o.into_iter().rev() { - let new_blank_node = NamedOrBlankNode::from(BlankNode::default()); - buffer.push(Triple::new(new_blank_node.clone(), rdf::FIRST.clone(), obj)); - buffer.push(Triple::new(new_blank_node.clone(), rdf::REST.clone(), current_list_node)); - current_list_node = new_blank_node; - } - current_list_node -} -collection_value -> Term = o:object_value _ { o } - -//[16] -NumericLiteral -> Literal = - d:$(DOUBLE) {? match f64::from_str(d) { - Ok(value) => Ok(value.into()), - Err(_) => Err("Invalid xsd:double") - } } / - d:$(DECIMAL) { Literal::new_typed_literal(d, xsd::DECIMAL.clone()) } / - i:$(INTEGER) { Literal::new_typed_literal(i, xsd::INTEGER.clone()) } - -//[128s] -RDFLiteral -> Literal = - v:String _ "^^" _ t:iri { Literal::new_typed_literal(v, t) } / - v:String _ l:LANGTAG { Literal::new_language_tagged_literal(v, l) } / - v:String { Literal::new_simple_literal(v) } - -//[133s] -BooleanLiteral -> Literal = - "true" { true.into() } / - "false" { false.into() } - -//[17] -String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE - -//[135s] -iri -> NamedNode = i:(IRIREF / PrefixedName) { - i.into() -} - -//[136s] -PrefixedName -> Url = PNAME_LN / - ns:PNAME_NS {? state.namespaces.get(ns).cloned().ok_or("Prefix not found") } - -//[137s] -BlankNode -> BlankNode = - b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } / - ANON { BlankNode::default() } - -//[18] -IRIREF -> Url = "<" i:$(([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}] / UCHAR)*) ">" {? - match state.url_parser().parse(&unescape_unicode_codepoints(i)) { - Ok(url) => Ok(url), - Err(error) => Err("IRI parsing failed") - } -} - -//[139s] -PNAME_NS -> &'input str = ns:$(PN_PREFIX? ":") { - ns -} - -//[140s] -PNAME_LN -> Url = ns:$(PNAME_NS) local:$(PN_LOCAL) {? - match state.namespaces.get(ns) { - Some(ns) => match Url::parse(&(ns.to_string() + &unescape_pn_local(local))) { - Ok(url) => Ok(url), - Err(error) => Err("IRI parsing failed") - }, - None => Err("Prefix not found") - } -} - -//[141s] -BLANK_NODE_LABEL -> &'input str = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) { - b -} - -//[144s] -LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {? - match LanguageTag::parse(&l) { - Ok(langtag) => Ok(langtag), - Err(error) => Err("language tag parsing failed") - } -} - -//[19] -INTEGER -> () = [+-]? [0-9]+ - -//[20] -DECIMAL -> () = [+-]? [0-9]* "." [0-9]+ - -//[21] -DOUBLE -> () = [+-]? ([0-9]+ "." [0-9]* / "."? [0-9]+) EXPONENT - -//[154s] -EXPONENT -> () = [eE] [+-]? [0-9]+ - -//[22] -STRING_LITERAL_QUOTE -> String = "\"" l: $(([^"\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "\"" { - unescape_unicode_codepoints(&unescape_echars(l)).into_owned() -} - -//[23] -STRING_LITERAL_SINGLE_QUOTE -> String = "'" l:$(([^'\u{005c}\u{000a}\u{000d}] / ECHAR / UCHAR)*) "'" { - unescape_unicode_codepoints(&unescape_echars(l)).into_owned() -} - -//[24] -STRING_LITERAL_LONG_SINGLE_QUOTE -> String = "'''" l:$(STRING_LITERAL_LONG_SINGLE_QUOTE_inner*) "'''" { - unescape_unicode_codepoints(&unescape_echars(l)).into_owned() -} -STRING_LITERAL_LONG_SINGLE_QUOTE_inner -> () = ("''" / "'")? ([^'\u{005c}] / ECHAR / UCHAR) - -//[25] -STRING_LITERAL_LONG_QUOTE -> String = "\"\"\"" l:$(STRING_LITERAL_LONG_QUOTE_inner*) "\"\"\"" { - unescape_unicode_codepoints(&unescape_echars(l)).into_owned() -} -STRING_LITERAL_LONG_QUOTE_inner -> () = ("\"\"" / "\"")? ([^"\u{005c}] / ECHAR / UCHAR) - -//[26] -UCHAR -> () = "\\u" HEX HEX HEX HEX / "\\U" HEX HEX HEX HEX HEX HEX HEX HEX - -//[159s] -ECHAR -> () = "\\" [tbnrf"'\\] - -//[161s] -WS -> () = #quiet<[\u{20}\u{9}\u{D}\u{A}]> - -//[162s] -ANON -> () = "[" WS* "]" - -//[163s] -PN_CHARS_BASE -> () = [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}] - -//[164s] -PN_CHARS_U -> () = "_" / PN_CHARS_BASE - -//[166s] -PN_CHARS -> () = [\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}] / PN_CHARS_U - -//[167s] -PN_PREFIX -> () = PN_CHARS_BASE PN_CHARS* ("."+ PN_CHARS+)* - -//[168s] -PN_LOCAL -> () = (PN_CHARS_U / ':' / [0-9] / PLX) (PN_CHARS / ':' / PLX)* ('.'+ (PN_CHARS / ':' / PLX)+)? - -//[169s] -PLX -> () = PERCENT / PN_LOCAL_ESC - -//[170s] -PERCENT -> () = "%" HEX HEX - -//[171s] -HEX -> () = ([0-9A-Fa-f]) - -//[172s] -PN_LOCAL_ESC -> () = "\\" [_~\.\-!$&'()*+,;=/?#@%] - - -//space -_ = #quiet<([ \t\n\r] / comment)*> -//comment -comment = #quiet<"#" [^\r\n]*> diff --git a/lib/tests/rdf_test_cases.rs b/lib/tests/rdf_test_cases.rs index 35e04e8c..efa0b0bf 100644 --- a/lib/tests/rdf_test_cases.rs +++ b/lib/tests/rdf_test_cases.rs @@ -17,42 +17,9 @@ use url::Url; #[test] fn turtle_w3c_testsuite() { - let manifest_url = Url::parse("http://www.w3.org/2013/TurtleTests/manifest.ttl").unwrap(); - //TODO: make blacklist pass - let test_blacklist = vec![ - //UTF-8 broken surrogates in BNode ids - NamedNode::new( - manifest_url - .join("#prefix_with_PN_CHARS_BASE_character_boundaries") - .unwrap(), - ), - NamedNode::new( - manifest_url - .join("#labeled_blank_node_with_PN_CHARS_BASE_character_boundaries") - .unwrap(), - ), - NamedNode::new( - manifest_url - .join("#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries") - .unwrap(), - ), - NamedNode::new( - manifest_url - .join("#localName_with_nfc_PN_CHARS_BASE_character_boundaries") - .unwrap(), - ), - NamedNode::new(manifest_url.join("#IRI-resolution-01").unwrap()), - NamedNode::new(manifest_url.join("#IRI-resolution-02").unwrap()), - NamedNode::new(manifest_url.join("#IRI-resolution-07").unwrap()), - NamedNode::new(manifest_url.join("#turtle-subm-01").unwrap()), - NamedNode::new(manifest_url.join("#turtle-subm-27").unwrap()), - ]; - + let manifest_url = Url::parse("http://w3c.github.io/rdf-tests/turtle/manifest.ttl").unwrap(); for test_result in TestManifest::new(manifest_url) { let test = test_result.unwrap(); - if test_blacklist.contains(&test.id) { - continue; - } if test.kind == "TestTurtlePositiveSyntax" { if let Err(error) = load_turtle(test.action.clone()) { assert!(false, "Failure on {} with error: {}", test, error) @@ -107,7 +74,7 @@ fn turtle_w3c_testsuite() { #[test] fn ntriples_w3c_testsuite() { - let manifest_url = Url::parse("http://www.w3.org/2013/N-TriplesTests/manifest.ttl").unwrap(); + let manifest_url = Url::parse("http://w3c.github.io/rdf-tests/ntriples/manifest.ttl").unwrap(); for test_result in TestManifest::new(manifest_url) { let test = test_result.unwrap(); @@ -116,11 +83,9 @@ fn ntriples_w3c_testsuite() { assert!(false, "Failure on {} with error: {}", test, error) } } else if test.kind == "TestNTriplesNegativeSyntax" { - assert!( - load_ntriples(test.action.clone()).is_err(), - "Failure on {}", - test - ); + if let Ok(graph) = load_ntriples(test.action.clone()) { + assert!(false, "Failure on {}, found:\n{}", test, graph); + } } else { assert!(false, "Not supported test: {}", test); } @@ -177,11 +142,11 @@ fn rdf_xml_w3c_testsuite() -> Result<()> { } fn load_turtle(url: Url) -> Result { - Ok(read_turtle(read_file(&url)?, Some(url))?.collect()) + read_turtle(read_file(&url)?, Some(url))?.collect() } fn load_ntriples(url: Url) -> Result { - read_ntriples(read_file(&url)?).collect() + read_ntriples(read_file(&url)?)?.collect() } fn load_rdf_xml(url: Url) -> Result { @@ -190,13 +155,8 @@ fn load_rdf_xml(url: Url) -> Result { fn to_relative_path(url: &Url) -> Result { let url = url.as_str(); - if url.starts_with("http://www.w3.org/2013/N-TriplesTests") { - Ok(url.replace( - "http://www.w3.org/2013/N-TriplesTests", - "rdf-tests/ntriples/", - )) - } else if url.starts_with("http://www.w3.org/2013/TurtleTests/") { - Ok(url.replace("http://www.w3.org/2013/TurtleTests/", "rdf-tests/turtle/")) + if url.starts_with("http://w3c.github.io/rdf-tests/") { + Ok(url.replace("http://w3c.github.io/", "")) } else if url.starts_with("http://www.w3.org/2013/RDFXMLTests/") { Ok(url.replace("http://www.w3.org/2013/RDFXMLTests/", "rdf-tests/rdf-xml/")) } else { diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 64125be1..60227e13 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -241,7 +241,7 @@ fn sparql_w3c_query_evaluation_testsuite() { fn load_graph(url: Url) -> Result { if url.as_str().ends_with(".ttl") { - Ok(read_turtle(read_file(&url)?, Some(url))?.collect()) + read_turtle(read_file(&url)?, Some(url))?.collect() } else if url.as_str().ends_with(".rdf") { read_rdf_xml(read_file(&url)?, Some(url)).collect() } else { diff --git a/server/src/main.rs b/server/src/main.rs index 97348077..a3d6cf75 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -38,6 +38,7 @@ use rudf::store::MemoryGraph; use rudf::store::RocksDbDataset; use serde_derive::Deserialize; use std::fs::File; +use std::io::BufReader; use std::panic::RefUnwindSafe; use std::str::FromStr; use std::sync::Arc; @@ -99,7 +100,7 @@ fn main_with_dataset( if let Some(nt_file) = matches.value_of("ntriples") { println!("Loading NTriples file {}", nt_file); let default_graph = dataset.default_graph(); - for quad in read_ntriples(File::open(nt_file)?) { + for quad in read_ntriples(BufReader::new(File::open(nt_file)?))? { default_graph.insert(&quad?)? } }