Adds proper relative IRI resolution

pull/10/head
Tpt 6 years ago
parent 94ed8c3873
commit 6906bcd351
  1. 3
      Cargo.toml
  2. 1
      src/lib.rs
  3. 17
      src/model/data.rs
  4. 10
      src/rio/ntriples/ntriples_grammar.rustpeg
  5. 13
      src/rio/turtle/mod.rs
  6. 51
      src/rio/turtle/turtle_grammar.rustpeg

@ -15,7 +15,8 @@ build = "build.rs"
travis-ci = { repository = "Tpt/rudf" }
[dependencies]
lazy_static = "^1.0"
lazy_static = "1.0"
url = "1.7"
[build-dependencies]
peg = "0.5"

@ -1,5 +1,6 @@
#[macro_use]
extern crate lazy_static;
extern crate url;
pub mod model;
pub mod rio;

@ -4,22 +4,27 @@ use std::fmt;
use std::option::Option;
use std::sync::Arc;
use std::sync::Mutex;
use url::Url;
/// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct NamedNode {
iri: String,
iri: Arc<Url>,
}
impl NamedNode {
pub fn value(&self) -> &str {
self.iri.as_str()
}
pub fn url(&self) -> &Url {
&self.iri
}
}
impl fmt::Display for NamedNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<{}>", self.value())
write!(f, "<{}>", self.iri)
}
}
@ -51,10 +56,10 @@ pub enum Literal {
lazy_static! {
static ref XSD_STRING: NamedNode = NamedNode {
iri: "http://www.w3.org/2001/XMLSchema#string".to_owned()
iri: Arc::new(Url::parse("http://www.w3.org/2001/XMLSchema#string").unwrap())
};
static ref RDF_LANG_STRING: NamedNode = NamedNode {
iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString".to_owned()
iri: Arc::new(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString").unwrap())
};
}
@ -366,8 +371,8 @@ impl Default for DataFactory {
impl DataFactory {
/// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
pub fn named_node(&self, iri: impl Into<String>) -> NamedNode {
NamedNode { iri: iri.into() }
pub fn named_node(&self, iri: impl Into<Url>) -> NamedNode {
NamedNode { iri: Arc::new(iri.into()) }
}
/// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id

@ -1,6 +1,8 @@
//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar
use std::iter::FromIterator;
use std::char;
use url::Url;
use model::data::*;
#![arguments(data_factory: &DataFactory)]
@ -43,8 +45,12 @@ LANGTAG -> &'input str = "@" l: $([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
EOL = [\r\n]+
//[8]
IRIREF -> String = "<" _ i: ((_IRIREF_simple_char / UCHAR)*) _ ">" {
i.into_iter().collect()
IRIREF -> Url = "<" _ i: ((_IRIREF_simple_char / UCHAR)*) _ ">" {?
let s = String::from_iter(i.into_iter());
match Url::parse(&s) {
Ok(url) => Ok(url),
Err(error) => Err("IRI parsing failed")
}
}
_IRIREF_simple_char -> char = c: $([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() }

@ -9,22 +9,31 @@ use rio::*;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::Read;
use url::Url;
use url::ParseOptions;
//TODO: make private
pub struct ParserState {
pub base_uri: String,
pub base_uri: Option<Url>,
pub namespaces: HashMap<String, String>,
pub cur_subject: Vec<NamedOrBlankNode>,
pub cur_predicate: Vec<NamedNode>,
}
impl ParserState {
fn url_parser<'a>(&'a self) -> ParseOptions<'a> {
Url::options().base_url(self.base_uri.as_ref())
}
}
pub fn read_turtle<'a, R: Read + 'a>(
source: R,
data_factory: &'a DataFactory,
base_uri: impl Into<Option<Url>>
) -> RioResult<impl Iterator<Item = Triple>> {
let factory = data_factory.clone(); //TODO: try to avoid clone here
let mut state = ParserState {
base_uri: String::default(),
base_uri: base_uri.into(),
namespaces: HashMap::default(),
cur_subject: Vec::default(),
cur_predicate: Vec::default(),

@ -1,8 +1,9 @@
//See https://www.w3.org/TR/turtle/#sec-grammar
use std::char;
use std::iter;
use url::Url;
use model::data::*;
use std::iter;
use rio::turtle::ParserState;
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>, data_factory: &DataFactory)]
@ -23,13 +24,25 @@ prefixID -> () = "@prefix" _ ns:PNAME_NS _ i:IRIREF _ "." {
}
//[5]
base -> () = "@base" _ i:IRIREF _ "." {
state.base_uri = i.into();
base -> () = "@base" _ i:IRIREF _ "." {?
match Url::parse(&i) {
Ok(url) => {
state.base_uri = Some(url);
Ok(())
},
Err(error) => Err("IRI parsing failed")
}
}
//[5s]
sparqlBase -> () = "BASE"i _ i:IRIREF {
state.base_uri = i.into();
sparqlBase -> () = "BASE"i _ i:IRIREF {?
match Url::parse(&i) {
Ok(url) => {
state.base_uri = Some(url);
Ok(())
},
Err(error) => Err("IRI parsing failed")
}
}
//[6s]
@ -57,7 +70,7 @@ objectList -> () = object _ ("," _ object _)*
//[9]
verb -> NamedNode = predicate /
"a" { data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#type") }
"a" { data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").unwrap()) }
// [10]
subject -> NamedOrBlankNode =
@ -102,11 +115,13 @@ blankNodePropertyList_open -> () = "[" {
//[15]
collection -> NamedOrBlankNode = '(' _ o:(collection_value*) ')' {
let mut current_list_node = NamedOrBlankNode::from(data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"));
let first = data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#first").unwrap());
let rest = data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest").unwrap());
let mut current_list_node = NamedOrBlankNode::from(data_factory.named_node(Url::parse("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil").unwrap()));
for obj in o.into_iter().rev() {
let new_blank_node = NamedOrBlankNode::from(data_factory.new_blank_node());
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#first"), obj));
buffer.push(data_factory.triple(new_blank_node.clone(), data_factory.named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"), current_list_node));
buffer.push(data_factory.triple(new_blank_node.clone(), first.clone(), obj));
buffer.push(data_factory.triple(new_blank_node.clone(), rest.clone(), current_list_node));
current_list_node = new_blank_node;
}
current_list_node
@ -115,9 +130,9 @@ collection_value -> Term = o:object_value _ { o }
//[16]
NumericLiteral -> Literal =
d:$(DOUBLE) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#double")) } /
d:$(DECIMAL) { data_factory.typed_literal(d, data_factory.named_node("http://www.w3.org/2001/XMLSchema#decimal")) } /
i:$(INTEGER) { data_factory.typed_literal(i, data_factory.named_node("http://www.w3.org/2001/XMLSchema#integer")) }
d:$(DOUBLE) { data_factory.typed_literal(d, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#double").unwrap())) } /
d:$(DECIMAL) { data_factory.typed_literal(d, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#decimal").unwrap())) } /
i:$(INTEGER) { data_factory.typed_literal(i, data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#integer").unwrap())) }
//[128s]
RDFLiteral -> Literal =
@ -127,15 +142,18 @@ RDFLiteral -> Literal =
//[133s]
BooleanLiteral -> Literal =
"true" { data_factory.typed_literal("true", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) } /
"false" { data_factory.typed_literal("false", data_factory.named_node("http://www.w3.org/2001/XMLSchema#boolean")) }
"true" { data_factory.typed_literal("true", data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#boolean").unwrap())) } /
"false" { data_factory.typed_literal("false", data_factory.named_node(Url::parse("http://www.w3.org/2001/XMLSchema#boolean").unwrap())) }
//[17]
String -> String = STRING_LITERAL_LONG_SINGLE_QUOTE / STRING_LITERAL_LONG_QUOTE / STRING_LITERAL_QUOTE / STRING_LITERAL_SINGLE_QUOTE
//[135s]
iri -> NamedNode = i:(IRIREF / PrefixedName) {
data_factory.named_node(i)
iri -> NamedNode = i:(IRIREF / PrefixedName) {?
match state.url_parser().parse(&i) {
Ok(url) => Ok(data_factory.named_node(url)),
Err(error) => Err("IRI parsing failed")
}
}
//[136s]
@ -149,7 +167,6 @@ BlankNode -> BlankNode =
//[18]
IRIREF -> String = "<" i:((_IRIREF_simple_char / UCHAR)*) ">" {
//TODO: relative URIs resolution
i.into_iter().collect()
}
_IRIREF_simple_char -> char = c:$([^\u{00}-\u{20}<>"{}|^\u{60}\u{5c}]) { c.chars().next().unwrap() }

Loading…
Cancel
Save