Adds proper relative IRI resolution

pull/10/head
Tpt 6 years ago
parent 94ed8c3873
commit 6906bcd351
  1. 3
      Cargo.toml
  2. 1
      src/lib.rs
  3. 17
      src/model/data.rs
  4. 10
      src/rio/ntriples/ntriples_grammar.rustpeg
  5. 13
      src/rio/turtle/mod.rs
  6. 51
      src/rio/turtle/turtle_grammar.rustpeg

@ -15,7 +15,8 @@ build = "build.rs"
travis-ci = { repository = "Tpt/rudf" } travis-ci = { repository = "Tpt/rudf" }
[dependencies] [dependencies]
lazy_static = "^1.0" lazy_static = "1.0"
url = "1.7"
[build-dependencies] [build-dependencies]
peg = "0.5" peg = "0.5"

@ -1,5 +1,6 @@
#[macro_use] #[macro_use]
extern crate lazy_static; extern crate lazy_static;
extern crate url;
pub mod model; pub mod model;
pub mod rio; pub mod rio;

@ -4,22 +4,27 @@ use std::fmt;
use std::option::Option; use std::option::Option;
use std::sync::Arc; use std::sync::Arc;
use std::sync::Mutex; use std::sync::Mutex;
use url::Url;
/// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) /// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct NamedNode { pub struct NamedNode {
iri: String, iri: Arc<Url>,
} }
impl NamedNode { impl NamedNode {
pub fn value(&self) -> &str { pub fn value(&self) -> &str {
self.iri.as_str()
}
pub fn url(&self) -> &Url {
&self.iri &self.iri
} }
} }
impl fmt::Display for NamedNode { impl fmt::Display for NamedNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<{}>", self.value()) write!(f, "<{}>", self.iri)
} }
} }
@ -51,10 +56,10 @@ pub enum Literal {
lazy_static! { lazy_static! {
static ref XSD_STRING: NamedNode = NamedNode { static ref XSD_STRING: NamedNode = NamedNode {
iri: "http://www.w3.org/2001/XMLSchema#string".to_owned() iri: Arc::new(Url::parse("http://www.w3.org/2001/XMLSchema#string").unwrap())
}; };
static ref RDF_LANG_STRING: NamedNode = NamedNode { static ref RDF_LANG_STRING: NamedNode = NamedNode {
iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString".to_owned() iri: Arc::new(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString").unwrap())
}; };
} }
@ -366,8 +371,8 @@ impl Default for DataFactory {
impl DataFactory { impl DataFactory {
/// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) /// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
pub fn named_node(&self, iri: impl Into<String>) -> NamedNode { pub fn named_node(&self, iri: impl Into<Url>) -> NamedNode {
NamedNode { iri: iri.into() } NamedNode { iri: Arc::new(iri.into()) }
} }
/// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id /// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id

@ -1,6 +1,8 @@
//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar //See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar
use std::iter::FromIterator;
use std::char; use std::char;
use url::Url;
use model::data::*; use model::data::*;
#![arguments(data_factory: &DataFactory)] #![arguments(data_factory: &DataFactory)]
@ -43,8 +45,12 @@ LANGTAG -> &'input str = "@" l: $([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
EOL = [\r\n]+ EOL = [\r\n]+
//[8] //[8]
IRIREF -> String = "<" _ i: ((_IRIREF_simple_char / UCHAR)*) _ ">" { IRIREF -> Url = "<" _ i: ((_IRIREF_simple_char / UCHAR)*) _ ">" {?
i.into_iter().collect() let s = String::from_iter(i.into_iter());
match Url::parse(&s) {
Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed")
}
} }
_IRIREF_simple_char -> char = c: $([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() } _IRIREF_simple_char -> char = c: $([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() }

@ -9,22 +9,31 @@ use rio::*;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io::BufReader;
use std::io::Read; use std::io::Read;
use url::Url;
use url::ParseOptions;
//TODO: make private //TODO: make private
pub struct ParserState { pub struct ParserState {
pub base_uri: String, pub base_uri: Option<Url>,
pub namespaces: HashMap<String, String>, pub namespaces: HashMap<String, String>,
pub cur_subject: Vec<NamedOrBlankNode>, pub cur_subject: Vec<NamedOrBlankNode>,
pub cur_predicate: Vec<NamedNode>, pub cur_predicate: Vec<NamedNode>,
} }
impl ParserState {
fn url_parser<'a>(&'a self) -> ParseOptions<'a> {
Url::options().base_url(self.base_uri.as_ref())
}
}
pub fn read_turtle<'a, R: Read + 'a>( pub fn read_turtle<'a, R: Read + 'a>(
source: R, source: R,
data_factory: &'a DataFactory, data_factory: &'a DataFactory,
base_uri: impl Into<Option<Url>>
) -> RioResult<impl Iterator<Item = Triple>> { ) -> RioResult<impl Iterator<Item = Triple>> {
let factory = data_factory.clone(); //TODO: try to avoid clone here let factory = data_factory.clone(); //TODO: try to avoid clone here
let mut state = ParserState { let mut state = ParserState {
base_uri: String::default(), base_uri: base_uri.into(),
namespaces: HashMap::default(), namespaces: HashMap::default(),
cur_subject: Vec::default(), cur_subject: Vec::default(),
cur_predicate: Vec::default(), cur_predicate: Vec::default(),

@ -1,8 +1,9 @@
//See https://www.w3.org/TR/turtle/#sec-grammar //See https://www.w3.org/TR/turtle/#sec-grammar
use std::char; use std::char;
use std::iter; use url::Url;
use model::data::*; use model::data::*;
use std::iter;
use rio::turtle::ParserState; use rio::turtle::ParserState;
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)] #![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)]
@ -23,13 +24,25 @@ prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." {
} }
//[5] //[5]
base -> () = "@base" _ i:IRIREF _ "." { base -> () = "@base" _ i:IRIREF _ "." {?
state.base_uri = i.into(); match Url::parse(&i) {
Ok(url) => {
state.base_uri = Some(url);
Ok(())
},
Err(error) => Err("IRI parsing failed")
}
} }
//[5s] //[5s]
sparqlBase -> () = "BASE"i _ i:IRIREF { sparqlBase -> () = "BASE"i _ i:IRIREF {?
state.base_uri = i.into(); match Url::parse(&i) {
Ok(url) => {
state.base_uri = Some(url);
Ok(())
},
Err(error) => Err("IRI parsing failed")
}
} }
//[6s] //[6s]
@ -57,7 +70,7 @@ objectList -> () = object _ ("," _ object _)*
//[9] //[9]
verb -> NamedNode = predicate / verb -> NamedNode = predicate /
"a" { data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") } "a" { data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()) }
// [10] // [10]
subject -> NamedOrBlankNode = subject -> NamedOrBlankNode =
@ -102,11 +115,13 @@ blankNodePropertyList_open -> () = "[" {
//[15] //[15]
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' { collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' {
let mut current_list_node = NamedOrBlankNode::from(data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")); let first = data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#first").unwrap());
let rest = data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest").unwrap());
let mut current_list_node = NamedOrBlankNode::from(data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil").unwrap()));
for obj in o.into_iter().rev() { for obj in o.into_iter().rev() {
let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node()); let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node());
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), obj)); buffer.push(data_factory.triple(new_blank_node.clone(), first.clone(), obj));
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), current_list_node)); buffer.push(data_factory.triple(new_blank_node.clone(), rest.clone(), current_list_node));
current_list_node = new_blank_node; current_list_node = new_blank_node;
} }
current_list_node current_list_node
@ -115,9 +130,9 @@ collection_value -> Term = o:object_value _ { o }
//[16] //[16]
NumericLiteral -> Literal = NumericLiteral -> Literal =
d:$(DOUBLE) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#double")) } / d:$(DOUBLE) { data_factory.typed_literal(d, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#double").unwrap())) } /
d:$(DECIMAL) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#decimal")) } / d:$(DECIMAL) { data_factory.typed_literal(d, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#decimal").unwrap())) } /
i:$(INTEGER) { data_factory.typed_literal(i, data_factory.named_node("http://www.w3.org/2001/XMLSchema#integer")) } i:$(INTEGER) { data_factory.typed_literal(i, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#integer").unwrap())) }
//[128s] //[128s]
RDFLiteral -> Literal = RDFLiteral -> Literal =
@ -127,15 +142,18 @@ RDFLiteral -> Literal =
//[133s] //[133s]
BooleanLiteral -> Literal = BooleanLiteral -> Literal =
"true" { data_factory.typed_literal("true", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) } / "true" { data_factory.typed_literal("true", data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#boolean").unwrap())) } /
"false" { data_factory.typed_literal("false", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) } "false" { data_factory.typed_literal("false", data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#boolean").unwrap())) }
//[17] //[17]
String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE
//[135s] //[135s]
iri -> NamedNode = i:(IRIREF / PrefixedName) { iri -> NamedNode = i:(IRIREF / PrefixedName) {?
data_factory.named_node(i) match state.url_parser().parse(&i) {
Ok(url) => Ok(data_factory.named_node(url)),
Err(error) => Err("IRI parsing failed")
}
} }
//[136s] //[136s]
@ -149,7 +167,6 @@ BlankNode -> BlankNode =
//[18] //[18]
IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" { IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" {
//TODO: relative URIs resolution
i.into_iter().collect() i.into_iter().collect()
} }
_IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() } _IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() }

Loading…
Cancel
Save