Uses uuid for encoding blank node ids

pull/10/head
Tpt 6 years ago
parent 723bb22b18
commit 6a2c59ab9f
  1. 1
      Cargo.toml
  2. 1
      src/lib.rs
  3. 69
      src/model/data.rs
  4. 9
      src/rio/ntriples/mod.rs
  5. 5
      src/rio/ntriples/ntriples_grammar.rustpeg
  6. 80
      src/rio/turtle/mod.rs
  7. 4
      src/rio/turtle/turtle_grammar.rustpeg

@ -17,6 +17,7 @@ travis-ci = { repository = "Tpt/rudf" }
[dependencies]
lazy_static = "1.0"
url = "1.7"
uuid = { version = "0.6", features = ["v4"] }
[build-dependencies]
peg = "0.5"

@ -1,6 +1,7 @@
#[macro_use]
extern crate lazy_static;
extern crate url;
extern crate uuid;
pub mod model;
pub mod rio;

@ -4,9 +4,10 @@ use std::fmt;
use std::option::Option;
use std::str::FromStr;
use std::sync::Arc;
use std::sync::Mutex;
use url::ParseError;
use url::Url;
use uuid::Uuid;
use std::ops::Deref;
/// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri)
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -31,6 +32,14 @@ impl NamedNode {
}
}
impl Deref for NamedNode {
type Target = Url;
fn deref(&self) -> &Url {
&self.iri
}
}
impl fmt::Display for NamedNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "<{}>", self.iri)
@ -48,56 +57,29 @@ impl FromStr for NamedNode {
/// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node)
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct BlankNode {
id: String,
id: Uuid,
}
impl BlankNode {
/// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id
pub fn new(id: impl Into<String>) -> Self {
Self { id: id.into() }
}
impl Deref for BlankNode {
type Target = Uuid;
pub fn value(&self) -> &str {
fn deref(&self) -> &Uuid {
&self.id
}
}
impl fmt::Display for BlankNode {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "_:{}", self.value())
}
}
/// An utility structure to generate bank node ids in a thread safe way
#[derive(Debug)]
struct U64IDProvider {
counter: Mutex<u64>,
}
impl U64IDProvider {
pub fn next(&self) -> u64 {
let mut id = self.counter.lock().unwrap();
*id += 1;
*id
write!(f, "_:{}", self.id)
}
}
impl Default for U64IDProvider {
fn default() -> Self {
U64IDProvider {
counter: Mutex::new(0),
}
}
}
lazy_static! {
static ref U64_ID_PROVIDER: U64IDProvider = U64IDProvider::default();
}
impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id
fn default() -> Self {
BlankNode::new(U64_ID_PROVIDER.next().to_string())
BlankNode {
id: Uuid::new_v4()
}
}
}
@ -228,13 +210,6 @@ pub enum NamedOrBlankNode {
}
impl NamedOrBlankNode {
pub fn value(&self) -> &str {
match self {
NamedOrBlankNode::NamedNode(node) => node.value(),
NamedOrBlankNode::BlankNode(node) => node.value(),
}
}
pub fn is_named_node(&self) -> bool {
match self {
NamedOrBlankNode::NamedNode(_) => true,
@ -281,14 +256,6 @@ pub enum Term {
}
impl Term {
pub fn value(&self) -> &str {
match self {
Term::NamedNode(node) => node.value(),
Term::BlankNode(node) => node.value(),
Term::Literal(literal) => literal.value(),
}
}
pub fn is_named_node(&self) -> bool {
match self {
Term::NamedNode(_) => true,

@ -9,13 +9,14 @@ use rio::*;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Read;
use std::collections::BTreeMap;
pub fn read_ntriples<'a, R: Read + 'a>(source: R) -> impl Iterator<Item = RioResult<Triple>> {
//TODO: use read_lines to avoid allocations
BufReader::new(source)
.lines()
.flat_map(move |line| match line {
Ok(line) => match grammar::triple(line.as_str()) {
let lines = BufReader::new(source).lines();
let mut bnodes_map: BTreeMap<String, BlankNode> = BTreeMap::default();
lines.flat_map(move |line| match line {
Ok(line) => match grammar::triple(line.as_str(), &mut bnodes_map) {
Ok(triple) => Some(Ok(triple?)),
Err(error) => Some(Err(RioError::new(error))),
},

@ -4,6 +4,9 @@ use std::iter::FromIterator;
use std::char;
use std::str::FromStr;
use model::data::*;
use std::collections::BTreeMap;
#![arguments(bnodes_map: &mut BTreeMap<String, BlankNode>)]
//[2]
#[pub]
@ -60,7 +63,7 @@ STRING_LITERAL_QUOTE_simple_char -> char = c: $([^\u{0022}\u{005c}\u{000a}\u{000
//[141s]
BLANK_NODE_LABEL -> BlankNode = "_:" b:$(([0-9] / PN_CHARS_U) PN_CHARS* ("."+ PN_CHARS+)*) {
BlankNode::new(b)
bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone()
}
//[10]

@ -2,47 +2,51 @@
mod grammar {
include!(concat!(env!("OUT_DIR"), "/turtle_grammar.rs"));
}
use model::data::*;
use rio::*;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::Read;
use url::ParseOptions;
use url::Url;
//TODO: make private
pub struct ParserState {
pub base_uri: Option<Url>,
pub namespaces: HashMap<String, String>,
pub cur_subject: Vec<NamedOrBlankNode>,
pub cur_predicate: Vec<NamedNode>,
}
use model::data::*;
use rio::*;
use std::collections::HashMap;
use std::io::BufReader;
use std::io::Read;
use url::ParseOptions;
use url::Url;
use std::collections::BTreeMap;
impl ParserState {
fn url_parser<'a>(&'a self) -> ParseOptions<'a> {
Url::options().base_url(self.base_uri.as_ref())
pub struct ParserState {
base_uri: Option<Url>,
namespaces: HashMap<String, String>,
cur_subject: Vec<NamedOrBlankNode>,
cur_predicate: Vec<NamedNode>,
bnodes_map: BTreeMap<String, BlankNode>
}
}
pub fn read_turtle<'a, R: Read + 'a>(
source: R,
base_uri: impl Into<Option<Url>>,
) -> RioResult<impl Iterator<Item = Triple>> {
let mut state = ParserState {
base_uri: base_uri.into(),
namespaces: HashMap::default(),
cur_subject: Vec::default(),
cur_predicate: Vec::default(),
};
let mut triple_buffer = Vec::default();
let mut string_buffer = String::default();
BufReader::new(source).read_to_string(&mut string_buffer)?;
match grammar::turtleDoc(&string_buffer, &mut state, &mut triple_buffer) {
Ok(_) => Ok(triple_buffer.into_iter()),
Err(error) => Err(RioError::new(error)),
impl ParserState {
fn url_parser<'a>(&'a self) -> ParseOptions<'a> {
Url::options().base_url(self.base_uri.as_ref())
}
}
pub fn read_turtle<'a, R: Read + 'a>(
source: R,
base_uri: impl Into<Option<Url>>,
) -> RioResult<impl Iterator<Item = Triple>> {
let mut state = ParserState {
base_uri: base_uri.into(),
namespaces: HashMap::default(),
cur_subject: Vec::default(),
cur_predicate: Vec::default(),
bnodes_map: BTreeMap::default()
};
let mut triple_buffer = Vec::default();
let mut string_buffer = String::default();
BufReader::new(source).read_to_string(&mut string_buffer)?;
match turtleDoc(&string_buffer, &mut state, &mut triple_buffer) {
Ok(_) => Ok(triple_buffer.into_iter()),
Err(error) => Err(RioError::new(error)),
}
}
}
pub use self::grammar::read_turtle;

@ -1,11 +1,9 @@
//See https://www.w3.org/TR/turtle/#sec-grammar
use std::char;
use model::data::*;
use model::vocab::rdf;
use model::vocab::xsd;
use std::iter;
use rio::turtle::ParserState;
#![arguments(state: &mut ParserState, buffer: &mut Vec<Triple>)]
@ -165,7 +163,7 @@ PrefixedName -> String = PNAME_LN /
//[137s]
BlankNode -> BlankNode =
b:BLANK_NODE_LABEL { BlankNode::new(b) } /
b:BLANK_NODE_LABEL { state.bnodes_map.entry(b.to_string()).or_insert_with(BlankNode::default).clone() } /
ANON { BlankNode::default() }
//[18]

Loading…
Cancel
Save