From 3ea6cbfe62f70f671d01a316b1a3fc1de7705424 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 16 May 2018 17:41:02 +0200 Subject: [PATCH] Initial commit --- .gitignore | 5 + Cargo.toml | 18 ++ LICENSE-APACHE | 201 ++++++++++++++ LICENSE-MIT | 25 ++ README.md | 22 ++ build.rs | 5 + src/lib.rs | 5 + src/model/data.rs | 444 +++++++++++++++++++++++++++++++ src/model/mod.rs | 1 + src/rio/mod.rs | 38 +++ src/rio/ntriples/grammar.rustpeg | 102 +++++++ src/rio/ntriples/mod.rs | 30 +++ 12 files changed, 896 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE-APACHE create mode 100644 LICENSE-MIT create mode 100644 README.md create mode 100644 build.rs create mode 100644 src/lib.rs create mode 100644 src/model/data.rs create mode 100644 src/model/mod.rs create mode 100644 src/rio/mod.rs create mode 100644 src/rio/ntriples/grammar.rustpeg create mode 100644 src/rio/ntriples/mod.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..90f36cf7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +target/ +**/*.rs.bk +Cargo.lock +.idea +*.iml \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..715d49ae --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "rudf" +version = "0.0.1" +authors = ["Tpt "] +license = "MIT/Apache-2.0" +readme = "../README.md" +keywords = ["RDF"] +repository = "https://github.com/Tpt/rudf" +description = """ +An RDF library in Rust +""" +build = "build.rs" + +[dependencies] +lazy_static = "^1.0" + +[build-dependencies] +peg = "0.5" diff --git a/LICENSE-APACHE b/LICENSE-APACHE new file mode 100644 index 00000000..16fe87b0 --- /dev/null +++ b/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/LICENSE-MIT b/LICENSE-MIT new file mode 100644 index 00000000..2a2ac9cc --- /dev/null +++ b/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2018 RUDF developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 00000000..5dc8a3bc --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# Rudf + +This library is a work in progress of a [RDF](https://www.w3.org/RDF/) stack implementation in [Rust](https://www.rust-lang.org). + +Its goal is to provide a compliant, safe and fast implementation of W3C specifications in Rust. + + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Futures by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..b198e889 --- /dev/null +++ b/build.rs @@ -0,0 +1,5 @@ +extern crate peg; + +fn main() { + peg::cargo_build("src/rio/ntriples/grammar.rustpeg"); +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 00000000..6ffc142c --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +#[macro_use] +extern crate lazy_static; + +pub mod model; +pub mod rio; diff --git a/src/model/data.rs b/src/model/data.rs new file mode 100644 index 00000000..971a39c0 --- /dev/null +++ b/src/model/data.rs @@ -0,0 +1,444 @@ +///! Implements data structures for https://www.w3.org/TR/rdf11-concepts/ +///! Inspired by [RDFjs](http://rdf.js.org/) + +use std::fmt; +use std::option::Option; +use std::sync::Arc; +use std::sync::Mutex; + + +/// A RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct NamedNode { + iri: String, +} + +impl NamedNode { + pub fn value(&self) -> &str { + &self.iri + } +} + +impl fmt::Display for NamedNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "<{}>", self.value()) + } +} + +/// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct BlankNode { + id: String, +} + +impl BlankNode { + pub fn value(&self) -> &str { + &self.id + } +} + +impl fmt::Display for BlankNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "_:{}", self.value()) + } +} + +/// A RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Literal { + SimpleLiteral(String), + LanguageTaggedString { value: String, language: String }, + TypedLiteral { value: String, datatype: NamedNode }, +} + +lazy_static! { + static ref XSD_STRING: NamedNode = NamedNode { + iri: "http://www.w3.org/2001/XMLSchema#string".to_owned() + }; + static ref RDF_LANG_STRING: NamedNode = NamedNode { + iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString".to_owned() + }; +} + +impl Literal { + /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form) + pub fn value(&self) -> &str { + match self { + Literal::SimpleLiteral(value) => value, + Literal::LanguageTaggedString { value, .. } => value, + Literal::TypedLiteral { value, .. } => value, + } + } + + /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + pub fn language(&self) -> Option<&str> { + match self { + Literal::LanguageTaggedString { language, .. } => Some(language), + _ => None, + } + } + + /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri) + /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always http://www.w3.org/1999/02/22-rdf-syntax-ns#langString + pub fn datatype(&self) -> &NamedNode { + match self { + Literal::SimpleLiteral(_) => &XSD_STRING, + Literal::LanguageTaggedString { .. } => &RDF_LANG_STRING, + Literal::TypedLiteral { datatype, .. } => datatype, + } + } + + pub fn is_plain(&self) -> bool { + match self { + Literal::SimpleLiteral(_) => true, + Literal::LanguageTaggedString { .. } => true, + _ => false, + } + } +} + +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_plain() { + self.language() + .map(|lang| write!(f, "\"{}\"@{}", self.value(), lang)) + .unwrap_or_else(|| write!(f, "\"{}\"", self.value())) + } else { + write!(f, "\"{}\"^^{}", self.value(), self.datatype()) + } + } +} + +/// The union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) and [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum NamedOrBlankNode { + NamedNode(NamedNode), + BlankNode(BlankNode), +} + +impl NamedOrBlankNode { + pub fn value(&self) -> &str { + match self { + NamedOrBlankNode::NamedNode(node) => node.value(), + NamedOrBlankNode::BlankNode(node) => node.value(), + } + } +} + +impl fmt::Display for NamedOrBlankNode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + NamedOrBlankNode::NamedNode(node) => node.fmt(f), + NamedOrBlankNode::BlankNode(node) => node.fmt(f), + } + } +} + +impl From for NamedOrBlankNode { + fn from(node: NamedNode) -> Self { + NamedOrBlankNode::NamedNode(node) + } +} + +impl From for NamedOrBlankNode { + fn from(node: BlankNode) -> Self { + NamedOrBlankNode::BlankNode(node) + } +} + +/// A RDF [term](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-term) +/// It is the union of [IRIs](https://www.w3.org/TR/rdf11-concepts/#dfn-iri), [blank nodes](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) and [literals](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub enum Term { + NamedNode(NamedNode), + BlankNode(BlankNode), + Literal(Literal), +} + +impl Term { + pub fn value(&self) -> &str { + match self { + Term::NamedNode(node) => node.value(), + Term::BlankNode(node) => node.value(), + Term::Literal(literal) => literal.value(), + } + } +} + +impl fmt::Display for Term { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Term::NamedNode(node) => node.fmt(f), + Term::BlankNode(node) => node.fmt(f), + Term::Literal(literal) => literal.fmt(f), + } + } +} + +impl From for Term { + fn from(node: NamedNode) -> Self { + Term::NamedNode(node) + } +} + +impl From for Term { + fn from(node: BlankNode) -> Self { + Term::BlankNode(node) + } +} + +impl From for Term { + fn from(literal: Literal) -> Self { + Term::Literal(literal) + } +} + +impl From for Term { + fn from(resource: NamedOrBlankNode) -> Self { + match resource { + NamedOrBlankNode::NamedNode(node) => Term::NamedNode(node), + NamedOrBlankNode::BlankNode(node) => Term::BlankNode(node), + } + } +} + +/// The interface of containers that looks like [RDF triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) +pub trait TripleLike { + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple + fn subject(&self) -> &NamedOrBlankNode; + + /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple + fn subject_owned(self) -> NamedOrBlankNode; + + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple + fn predicate(&self) -> &NamedNode; + /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple + + fn predicate_owned(self) -> NamedNode; + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple + fn object(&self) -> &Term; + + /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple + fn object_owned(self) -> Term; +} + +/// A [RDF triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Triple { + subject: NamedOrBlankNode, + predicate: NamedNode, + object: Term, +} + +impl fmt::Display for Triple { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {} {} .", self.subject, self.predicate, self.object) + } +} + +impl TripleLike for Triple { + fn subject(&self) -> &NamedOrBlankNode { + return &self.subject; + } + + fn subject_owned(self) -> NamedOrBlankNode { + return self.subject; + } + + fn predicate(&self) -> &NamedNode { + return &self.predicate; + } + + fn predicate_owned(self) -> NamedNode { + return self.predicate; + } + + fn object(&self) -> &Term { + return &self.object; + } + + fn object_owned(self) -> Term { + return self.object; + } +} + +/// The interface of [triples](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) that are in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) +pub trait QuadLike: TripleLike { + /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is or None if it is in the [default graph](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph) + fn graph_name(&self) -> &Option; + + /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is or None if it is in the [default graph](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph) + fn graph_name_owned(self) -> Option; +} + +/// A [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct Quad { + subject: NamedOrBlankNode, + predicate: NamedNode, + object: Term, + graph_name: Option, +} + +impl fmt::Display for Quad { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.graph_name { + Some(ref graph_name) => write!( + f, + "{} {} {} {} .", + self.subject, self.predicate, self.object, graph_name + ), + None => write!(f, "{} {} {} .", self.subject, self.predicate, self.object), + } + } +} + +impl TripleLike for Quad { + fn subject(&self) -> &NamedOrBlankNode { + return &self.subject; + } + + fn subject_owned(self) -> NamedOrBlankNode { + return self.subject; + } + + fn predicate(&self) -> &NamedNode { + return &self.predicate; + } + + fn predicate_owned(self) -> NamedNode { + return self.predicate; + } + + fn object(&self) -> &Term { + return &self.object; + } + + fn object_owned(self) -> Term { + return self.object; + } +} + +impl QuadLike for Quad { + fn graph_name(&self) -> &Option { + return &self.graph_name; + } + + fn graph_name_owned(self) -> Option { + return self.graph_name; + } +} + +/// An utility structure to generate bank node ids in a thread safe way +#[derive(Debug, Clone)] +struct U64IDProvider { + counter: Arc>, +} + +impl U64IDProvider { + pub fn next(&self) -> u64 { + let mut id = self.counter.lock().unwrap(); + *id += 1; + *id + } +} + +impl Default for U64IDProvider { + fn default() -> Self { + U64IDProvider { + counter: Arc::new(Mutex::new(0)), + } + } +} + +/// A structure creating RDF elements +#[derive(Debug, Clone)] +pub struct DataFactory { + blank_node_id_provider: U64IDProvider, +} + +impl Default for DataFactory { + fn default() -> Self { + DataFactory { + blank_node_id_provider: U64IDProvider::default(), + } + } +} + +impl DataFactory { + /// Builds a RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri) + pub fn named_node(&self, iri: impl Into) -> NamedNode { + NamedNode { iri: iri.into() } + } + + /// Builds a RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a known id + pub fn blank_node(&self, id: impl Into) -> BlankNode { + BlankNode { id: id.into() } + } + + /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id + pub fn new_blank_node(&self) -> BlankNode { + self.blank_node(self.blank_node_id_provider.next().to_string()) + } + + /// Builds a RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) + pub fn simple_literal(&self, value: impl Into) -> Literal { + Literal::SimpleLiteral(value.into()) + } + + /// Builds a RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri) + pub fn typed_literal( + &self, + value: impl Into, + datatype: impl Into, + ) -> Literal { + //TODO: find the best representation + Literal::TypedLiteral { + value: value.into(), + datatype: datatype.into(), + } + } + + /// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + pub fn language_tagged_literal( + &self, + value: impl Into, + language: impl Into, + ) -> Literal { + Literal::LanguageTaggedString { + value: value.into(), + language: language.into(), + } + } + + /// Builds a RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) + pub fn triple( + &self, + subject: impl Into, + predicate: impl Into, + object: impl Into, + ) -> Triple { + Triple { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + } + } + + /// Builds a RDF [triple](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple) in a [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) + pub fn quad( + &self, + subject: impl Into, + predicate: impl Into, + object: impl Into, + graph_name: impl Into>, + ) -> Quad { + Quad { + subject: subject.into(), + predicate: predicate.into(), + object: object.into(), + graph_name: graph_name.into(), + } + } +} diff --git a/src/model/mod.rs b/src/model/mod.rs new file mode 100644 index 00000000..7a345e4c --- /dev/null +++ b/src/model/mod.rs @@ -0,0 +1 @@ +pub mod data; diff --git a/src/rio/mod.rs b/src/rio/mod.rs new file mode 100644 index 00000000..5bbe8544 --- /dev/null +++ b/src/rio/mod.rs @@ -0,0 +1,38 @@ +use std::error::Error; +use std::fmt; + +pub mod ntriples; + +pub type RioResult = Result; + +#[derive(Debug)] +pub struct RioError { + error: Box, +} + +impl RioError { + pub fn new(error: E) -> RioError + where + E: Into>, + { + RioError { + error: error.into(), + } + } +} + +impl fmt::Display for RioError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + self.error.fmt(f) + } +} + +impl Error for RioError { + fn description(&self) -> &str { + self.error.description() + } + + fn cause(&self) -> Option<&Error> { + Some(&*self.error) + } +} diff --git a/src/rio/ntriples/grammar.rustpeg b/src/rio/ntriples/grammar.rustpeg new file mode 100644 index 00000000..3ba8c951 --- /dev/null +++ b/src/rio/ntriples/grammar.rustpeg @@ -0,0 +1,102 @@ +//See https://www.w3.org/TR/2014/REC-n-triples-20140225/#n-triples-grammar + +use std::char; +use model::data::*; + +#![arguments(data_factory: &DataFactory)] + +//[2] +#[pub] +triple -> Option = + _ s:subject _ p:predicate _ o:object _ "." _ comment? { Some(data_factory.triple(s, p, o)) } / + _ comment? { None } +//[3] +subject -> NamedOrBlankNode = + i: IRIREF { data_factory.named_node(i).into() } / + b: BLANK_NODE_LABEL { data_factory.blank_node(b).into() } +//[4] +predicate -> NamedNode = i:IRIREF { + data_factory.named_node(i) +} +//[5] +object -> Term = + i: IRIREF { data_factory.named_node(i).into() } / + b: BLANK_NODE_LABEL { data_factory.blank_node(b).into() } / + l: literal { l.into() } +//[6] +literal -> Literal = + v: STRING_LITERAL_QUOTE _ "^^" _ t:IRIREF { data_factory.typed_literal(v, data_factory.named_node(t)) } / + v: STRING_LITERAL_QUOTE _ l:LANGTAG { data_factory.language_tagged_literal(v, l) } / + v: STRING_LITERAL_QUOTE { data_factory.simple_literal(v) } + +//[144s] +LANGTAG -> String = "@" l: $([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) { + l.into() +} +//[7] +EOL = [\r\n]+ +//[8] +IRIREF -> String = "<" _ i: $(([^<>{}] / UCHAR)*) _ ">" { + i.into() +} +//[9] +STRING_LITERAL_QUOTE -> String = "\"" l: ((NOT_BAD_LITERAL_VALUE / ECHAR / UCHAR)*) "\"" { + l.into_iter().collect() +} +NOT_BAD_LITERAL_VALUE -> char = c: $([^\u{0022}\u{005c}\u{000a}\u{000d}]) { c.chars().next().unwrap() } +//[141s] +BLANK_NODE_LABEL -> String = "_:" b: $((PN_CHARS_U / [0-9]) ((PN_CHARS / ".")* PN_CHARS)?) { + b.into() +} +//[10] +UCHAR -> char = "\\u" h: $(HEX HEX HEX HEX) { + u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() +} / "\\U" h: $(HEX HEX HEX HEX HEX HEX HEX HEX) { + u32::from_str_radix(h, 16).ok().and_then(char::from_u32).unwrap() +} +//[153s] +ECHAR -> char = '\\' c: $([tbnrf"'\\]) { + match c { + "t" => '\u{0009}', + "b" => '\u{0008}', + "n" => '\u{000A}', + "r" => '\u{000D}', + "f" => '\u{000C}', + "\"" => '\u{0022}', + "'" => '\u{0027}', + "\\" => '\u{005C}', + _ => panic!("unexpected escaped char") // not possible + } +} +//[157s] +PN_CHARS_BASE -> char = c: $([A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}]) { c.chars().next().unwrap() } +//[158s] +PN_CHARS_U -> char = PN_CHARS_BASE / '_' { '_' } / ':' { ':' } +//[160s] +PN_CHARS -> char = PN_CHARS_U / c: $([\-0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]) { c.chars().next().unwrap() } +//[162s] +HEX -> char = c: $([0-9A-Fa-f]) { c.chars().next().unwrap() } + +//space +_ = #quiet<[ \t]*> +//comment +comment = #quiet<"#" [^\r\n]*> + + +/*grammar; + +pub NTripleLine: Option = { + Comment? => None, + Comment? => Some(t) +}; +pub NQuadLine: Option = { + Comment? => None, + Comment? => Some(t) +}; + +NTriple: Triple = "." => data_factory.triple(s, p, o); +NQuad: Quad = { + "." => data_factory.quad(s, p, o, Some(g)), + "." => data_factory.quad(s, p, o, None) +}; +*/ \ No newline at end of file diff --git a/src/rio/ntriples/mod.rs b/src/rio/ntriples/mod.rs new file mode 100644 index 00000000..c6a1629c --- /dev/null +++ b/src/rio/ntriples/mod.rs @@ -0,0 +1,30 @@ +///Implements https://www.w3.org/TR/n-triples/ + +mod grammar { + include!(concat!(env!("OUT_DIR"), "/grammar.rs")); +} + +use model::data::*; +use rio::*; +use std::io::BufRead; +use std::io::BufReader; +use std::io::Read; +use std::sync::Arc; + +pub fn read_ntriples<'a, R: Read + 'a>( + source: R, + data_factory: &'a DataFactory, +) -> impl Iterator> { + let factory = data_factory.clone(); //TODO: try to avoid clone here + let mut input = String::new(); + //TODO: use read_lines to avoid allocations + BufReader::new(source) + .lines() + .flat_map(move |line| match line { + Ok(line) => match grammar::triple(line.as_str(), &factory) { + Ok(triple) => Some(Ok(triple?)), + Err(error) => Some(Err(RioError::new(error))), + }, + Err(error) => Some(Err(RioError::new(error))), + }) +}