diff --git a/Cargo.lock b/Cargo.lock index cf7e678..2973f0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2911,21 +2911,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a" -[[package]] -name = "librocksdb-sys" -version = "0.11.0+8.3.2" -source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8" -dependencies = [ - "bindgen", - "bzip2-sys", - "cc", - "glob", - "libc", - "libz-sys", - "openssl", - "pkg-config", -] - [[package]] name = "libz-sys" version = "1.1.12" @@ -3376,7 +3361,7 @@ dependencies = [ [[package]] name = "ng-oxigraph" -version = "0.4.0-alpha.8-ng" +version = "0.4.0-alpha.9-ng" dependencies = [ "codspeed-criterion-compat", "digest 0.10.7", @@ -3386,20 +3371,18 @@ dependencies = [ "json-event-parser", "libc", "md-5", + "memchr", + "ng-rocksdb", "oxilangtag", "oxiri", - "oxrdf", - "oxrdfio", - "oxsdatatypes", + "peg", + "quick-xml 0.31.0", "rand 0.8.5", "regex", - "rocksdb", + "serde", "sha1", "sha2 0.10.7", "siphasher 0.3.10", - "sparesults", - "spargebra", - "sparopt", "thiserror", "zstd", ] @@ -3437,6 +3420,21 @@ dependencies = [ "zeroize", ] +[[package]] +name = "ng-rocksdb" +version = "0.21.0" +source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#95ec9536b1a4088cfa75aae2851df468e64aa451" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "openssl", + "pkg-config", +] + [[package]] name = "ng-sdk-js" version = "0.1.0" @@ -3468,7 +3466,7 @@ name = "ng-storage-rocksdb" version = "0.1.0" dependencies = [ "ng-repo", - "rocksdb", + "ng-rocksdb", "serde_bare", ] @@ -3896,51 +3894,15 @@ dependencies = [ "thiserror", ] -[[package]] -name = "oxrdfio" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxrdf", - "oxrdfxml", - "oxttl", - "thiserror", -] - -[[package]] -name = "oxrdfxml" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxilangtag", - "oxiri", - "oxrdf", - "quick-xml 0.31.0", - "thiserror", -] - [[package]] name = "oxsdatatypes" version = "0.2.0-alpha.1" source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" dependencies = [ - "js-sys", "serde", "thiserror", ] -[[package]] -name = "oxttl" -version = "0.1.0-alpha.5" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "memchr", - "oxilangtag", - "oxiri", - "oxrdf", - "thiserror", -] - [[package]] name = "packed_simd_2" version = "0.3.8" @@ -4679,15 +4641,6 @@ dependencies = [ "winreg 0.10.1", ] -[[package]] -name = "rocksdb" -version = "0.21.0" -source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#13b3c2022202abff8cfe921ee926d6ca567e66e8" -dependencies = [ - "libc", - "librocksdb-sys", -] - [[package]] name = "rust-embed" version = "6.7.0" @@ -5239,41 +5192,6 @@ dependencies = [ "system-deps", ] -[[package]] -name = "sparesults" -version = "0.2.0-alpha.4" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "json-event-parser", - "memchr", - "oxrdf", - "quick-xml 0.31.0", - "thiserror", -] - -[[package]] -name = "spargebra" -version = "0.3.0-alpha.4" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxilangtag", - "oxiri", - "oxrdf", - "peg", - "rand 0.8.5", - "thiserror", -] - -[[package]] -name = "sparopt" -version = "0.1.0-alpha.5-dev" -source = "git+https://git.nextgraph.org/NextGraph/oxigraph.git?branch=main#c7f873f904617c201e359196717eb2133d91cef5" -dependencies = [ - "oxrdf", - "rand 0.8.5", - "spargebra", -] - [[package]] name = "spin" version = "0.9.8" diff --git a/ng-oxigraph/Cargo.toml b/ng-oxigraph/Cargo.toml index 1158b43..96cce65 100644 --- a/ng-oxigraph/Cargo.toml +++ b/ng-oxigraph/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ng-oxigraph" -version = "0.4.0-alpha.8-ng" +version = "0.4.0-alpha.9-ng" authors = ["Tpt ", "Niko PLP "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -16,8 +16,15 @@ edition = "2021" rust-version = "1.70" [features] -js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] - +default = ["rdf-star","sep-0002","sep-0006", "oxsdatatypes"] +js = ["getrandom/js", "js-sys"] +rdf-star = [] +custom-now = [] +xml = [] +ttl = [] +sep-0002 = [] +sep-0006 = [] +oxsdatatypes = [] [dependencies] digest = "0.10" @@ -26,22 +33,20 @@ json-event-parser = "0.2.0-alpha.2" md-5 = "0.10" oxilangtag = "0.1" oxiri = "0.2.3" -oxrdf = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "oxsdatatypes"] } -oxrdfio = { version = "0.1.0-alpha.5", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] } -oxsdatatypes = { version = "0.2.0-alpha.1", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main" } rand = "0.8" regex = "1.7" +serde = { version = "1.0.142", features = ["derive"] } sha1 = "0.10" sha2 = "0.10" siphasher = ">=0.3, <2.0" -sparesults = { version = "0.2.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star"] } -spargebra = { version = "0.3.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] } -sparopt = { version = "0.1.0-alpha.4", git = "https://git.nextgraph.org/NextGraph/oxigraph.git", branch="main", features = ["rdf-star", "sep-0002", "sep-0006"] } thiserror = "1.0.50" +quick-xml = ">=0.29, <0.32" +memchr = "2.5" +peg = "0.8" [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2" -rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } +ng-rocksdb = { version = "0.21.0", git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] getrandom = "0.2.8" diff --git a/ng-oxigraph/src/lib.rs b/ng-oxigraph/src/lib.rs index b36c4d6..df9b74b 100644 --- a/ng-oxigraph/src/lib.rs +++ b/ng-oxigraph/src/lib.rs @@ -5,8 +5,20 @@ #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] -pub mod io; -pub mod model; -pub mod sparql; -mod storage; -pub mod store; +pub mod oxigraph; + +pub mod oxrdf; + +pub mod oxrdfio; + +pub mod oxsdatatypes; + +pub mod oxttl; + +pub mod oxrdfxml; + +pub mod sparesults; + +pub mod spargebra; + +pub mod sparopt; diff --git a/ng-oxigraph/src/io/format.rs b/ng-oxigraph/src/oxigraph/io/format.rs similarity index 99% rename from ng-oxigraph/src/io/format.rs rename to ng-oxigraph/src/oxigraph/io/format.rs index 08b61d8..8268247 100644 --- a/ng-oxigraph/src/io/format.rs +++ b/ng-oxigraph/src/oxigraph/io/format.rs @@ -1,6 +1,6 @@ #![allow(deprecated)] -use oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; +use crate::oxrdfio::{RdfFormat, RdfParser, RdfSerializer}; /// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats. /// diff --git a/ng-oxigraph/src/io/mod.rs b/ng-oxigraph/src/oxigraph/io/mod.rs similarity index 98% rename from ng-oxigraph/src/io/mod.rs rename to ng-oxigraph/src/oxigraph/io/mod.rs index 1b15bc8..6c76116 100644 --- a/ng-oxigraph/src/io/mod.rs +++ b/ng-oxigraph/src/oxigraph/io/mod.rs @@ -36,4 +36,4 @@ pub use self::format::{DatasetFormat, GraphFormat}; pub use self::read::{DatasetParser, GraphParser}; #[allow(deprecated)] pub use self::write::{DatasetSerializer, GraphSerializer}; -pub use oxrdfio::*; +pub use crate::oxrdfio::*; diff --git a/ng-oxigraph/src/io/read.rs b/ng-oxigraph/src/oxigraph/io/read.rs similarity index 97% rename from ng-oxigraph/src/io/read.rs rename to ng-oxigraph/src/oxigraph/io/read.rs index 6d01f6f..ca2c62f 100644 --- a/ng-oxigraph/src/io/read.rs +++ b/ng-oxigraph/src/oxigraph/io/read.rs @@ -2,9 +2,9 @@ //! Utilities to read RDF graphs and datasets. -use crate::io::{DatasetFormat, GraphFormat}; -use crate::model::*; -use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; +use crate::oxigraph::io::{DatasetFormat, GraphFormat}; +use crate::oxigraph::model::*; +use crate::oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser}; use std::io::Read; /// Parsers for RDF graph serialization formats. diff --git a/ng-oxigraph/src/io/write.rs b/ng-oxigraph/src/oxigraph/io/write.rs similarity index 97% rename from ng-oxigraph/src/io/write.rs rename to ng-oxigraph/src/oxigraph/io/write.rs index 7f27cd9..e487720 100644 --- a/ng-oxigraph/src/io/write.rs +++ b/ng-oxigraph/src/oxigraph/io/write.rs @@ -2,9 +2,9 @@ //! Utilities to write RDF graphs and datasets. -use crate::io::{DatasetFormat, GraphFormat}; -use crate::model::*; -use oxrdfio::{RdfSerializer, ToWriteQuadWriter}; +use crate::oxigraph::io::{DatasetFormat, GraphFormat}; +use crate::oxigraph::model::*; +use crate::oxrdfio::{RdfSerializer, ToWriteQuadWriter}; use std::io::{self, Write}; /// A serializer for RDF graph serialization formats. diff --git a/ng-oxigraph/src/oxigraph/mod.rs b/ng-oxigraph/src/oxigraph/mod.rs new file mode 100644 index 0000000..57a6bd0 --- /dev/null +++ b/ng-oxigraph/src/oxigraph/mod.rs @@ -0,0 +1,5 @@ +pub mod io; +pub mod model; +pub mod sparql; +mod storage; +pub mod store; diff --git a/ng-oxigraph/src/model.rs b/ng-oxigraph/src/oxigraph/model.rs similarity index 88% rename from ng-oxigraph/src/model.rs rename to ng-oxigraph/src/oxigraph/model.rs index dbca934..a173d8a 100644 --- a/ng-oxigraph/src/model.rs +++ b/ng-oxigraph/src/oxigraph/model.rs @@ -17,6 +17,6 @@ //! assert_eq!(vec![triple], results); //! ``` -pub use oxrdf::*; +pub use crate::oxrdf::*; -pub use spargebra::term::GroundQuad; +pub use crate::spargebra::term::GroundQuad; diff --git a/ng-oxigraph/src/sparql/algebra.rs b/ng-oxigraph/src/oxigraph/sparql/algebra.rs similarity index 98% rename from ng-oxigraph/src/sparql/algebra.rs rename to ng-oxigraph/src/oxigraph/sparql/algebra.rs index 8b3f385..52af785 100644 --- a/ng-oxigraph/src/sparql/algebra.rs +++ b/ng-oxigraph/src/oxigraph/sparql/algebra.rs @@ -2,10 +2,11 @@ //! //! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`]. -use crate::model::*; -use crate::sparql::eval::Timer; -use oxsdatatypes::DayTimeDuration; -use spargebra::GraphUpdateOperation; +use crate::oxigraph::model::*; +use crate::oxigraph::sparql::eval::Timer; +use crate::oxsdatatypes::DayTimeDuration; +use crate::spargebra; +use crate::spargebra::GraphUpdateOperation; use std::fmt; use std::str::FromStr; diff --git a/ng-oxigraph/src/sparql/dataset.rs b/ng-oxigraph/src/oxigraph/sparql/dataset.rs similarity index 95% rename from ng-oxigraph/src/sparql/dataset.rs rename to ng-oxigraph/src/oxigraph/sparql/dataset.rs index 3253be1..ddd8816 100644 --- a/ng-oxigraph/src/sparql/dataset.rs +++ b/ng-oxigraph/src/oxigraph/sparql/dataset.rs @@ -1,8 +1,10 @@ -use crate::model::TermRef; -use crate::sparql::algebra::QueryDataset; -use crate::sparql::EvaluationError; -use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; -use crate::storage::{StorageError, StorageReader}; +use crate::oxigraph::model::TermRef; +use crate::oxigraph::sparql::algebra::QueryDataset; +use crate::oxigraph::sparql::EvaluationError; +use crate::oxigraph::storage::numeric_encoder::{ + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, +}; +use crate::oxigraph::storage::{StorageError, StorageReader}; use std::cell::RefCell; use std::collections::hash_map::Entry; use std::collections::HashMap; diff --git a/ng-oxigraph/src/sparql/error.rs b/ng-oxigraph/src/oxigraph/sparql/error.rs similarity index 92% rename from ng-oxigraph/src/sparql/error.rs rename to ng-oxigraph/src/oxigraph/sparql/error.rs index 38731de..39ba505 100644 --- a/ng-oxigraph/src/sparql/error.rs +++ b/ng-oxigraph/src/oxigraph/sparql/error.rs @@ -1,8 +1,8 @@ -use crate::io::RdfParseError; -use crate::model::NamedNode; -use crate::sparql::results::QueryResultsParseError as ResultsParseError; -use crate::sparql::SparqlSyntaxError; -use crate::storage::StorageError; +use crate::oxigraph::io::RdfParseError; +use crate::oxigraph::model::NamedNode; +use crate::oxigraph::sparql::results::QueryResultsParseError as ResultsParseError; +use crate::oxigraph::sparql::SparqlSyntaxError; +use crate::oxigraph::storage::StorageError; use std::convert::Infallible; use std::error::Error; use std::io; diff --git a/ng-oxigraph/src/sparql/eval.rs b/ng-oxigraph/src/oxigraph/sparql/eval.rs similarity index 99% rename from ng-oxigraph/src/sparql/eval.rs rename to ng-oxigraph/src/oxigraph/sparql/eval.rs index 5065f52..d6bd322 100644 --- a/ng-oxigraph/src/sparql/eval.rs +++ b/ng-oxigraph/src/oxigraph/sparql/eval.rs @@ -1,33 +1,34 @@ -use crate::model::vocab::{rdf, xsd}; -use crate::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; -use crate::sparql::algebra::{Query, QueryDataset}; -use crate::sparql::dataset::DatasetView; -use crate::sparql::error::EvaluationError; -use crate::sparql::model::*; -use crate::sparql::service::ServiceHandler; -use crate::sparql::CustomFunctionRegistry; -use crate::storage::numeric_encoder::*; -use crate::storage::small_string::SmallString; +use crate::oxigraph::model::vocab::{rdf, xsd}; +use crate::oxigraph::model::{BlankNode, LiteralRef, NamedNodeRef, Term, Triple}; +use crate::oxigraph::sparql::algebra::{Query, QueryDataset}; +use crate::oxigraph::sparql::dataset::DatasetView; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::model::*; +use crate::oxigraph::sparql::service::ServiceHandler; +use crate::oxigraph::sparql::CustomFunctionRegistry; +use crate::oxigraph::storage::numeric_encoder::*; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxrdf::{TermRef, Variable}; +use crate::oxsdatatypes::*; +use crate::spargebra; +use crate::spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; +use crate::spargebra::term::{ + GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, + TriplePattern, +}; +use crate::sparopt::algebra::{ + AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, + MinusAlgorithm, OrderExpression, +}; use digest::Digest; use json_event_parser::{JsonEvent, ToWriteJsonWriter}; use md5::Md5; use oxilangtag::LanguageTag; use oxiri::Iri; -use oxrdf::{TermRef, Variable}; -use oxsdatatypes::*; use rand::random; use regex::{Regex, RegexBuilder}; use sha1::Sha1; use sha2::{Sha256, Sha384, Sha512}; -use spargebra::algebra::{AggregateFunction, Function, PropertyPathExpression}; -use spargebra::term::{ - GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, NamedNodePattern, TermPattern, - TriplePattern, -}; -use sparopt::algebra::{ - AggregateExpression, Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, - MinusAlgorithm, OrderExpression, -}; use std::cell::Cell; use std::cmp::Ordering; use std::collections::hash_map::DefaultHasher; diff --git a/ng-oxigraph/src/sparql/http/dummy.rs b/ng-oxigraph/src/oxigraph/sparql/http/dummy.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/dummy.rs rename to ng-oxigraph/src/oxigraph/sparql/http/dummy.rs diff --git a/ng-oxigraph/src/sparql/http/mod.rs b/ng-oxigraph/src/oxigraph/sparql/http/mod.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/mod.rs rename to ng-oxigraph/src/oxigraph/sparql/http/mod.rs diff --git a/ng-oxigraph/src/sparql/http/simple.rs b/ng-oxigraph/src/oxigraph/sparql/http/simple.rs similarity index 100% rename from ng-oxigraph/src/sparql/http/simple.rs rename to ng-oxigraph/src/oxigraph/sparql/http/simple.rs diff --git a/ng-oxigraph/src/sparql/mod.rs b/ng-oxigraph/src/oxigraph/sparql/mod.rs similarity index 93% rename from ng-oxigraph/src/sparql/mod.rs rename to ng-oxigraph/src/oxigraph/sparql/mod.rs index 089f84b..2aeb2e2 100644 --- a/ng-oxigraph/src/sparql/mod.rs +++ b/ng-oxigraph/src/oxigraph/sparql/mod.rs @@ -12,22 +12,23 @@ pub mod results; mod service; mod update; -use crate::model::{NamedNode, Term}; -pub use crate::sparql::algebra::{Query, QueryDataset, Update}; -use crate::sparql::dataset::DatasetView; -pub use crate::sparql::error::EvaluationError; -use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer}; -pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; -pub use crate::sparql::service::ServiceHandler; -use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; -pub(crate) use crate::sparql::update::evaluate_update; -use crate::storage::StorageReader; +use super::model::{NamedNode, Term}; +pub use super::sparql::algebra::{Query, QueryDataset, Update}; +use super::sparql::dataset::DatasetView; +pub use super::sparql::error::EvaluationError; +use super::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer}; +pub use super::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter}; +pub use super::sparql::service::ServiceHandler; +use super::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler}; +pub(super) use super::sparql::update::evaluate_update; +use super::storage::StorageReader; +pub use crate::oxrdf::{Variable, VariableNameParseError}; +use crate::oxsdatatypes::{DayTimeDuration, Float}; +use crate::spargebra; +pub use crate::spargebra::SparqlSyntaxError; +use crate::sparopt::algebra::GraphPattern; +use crate::sparopt::Optimizer; use json_event_parser::{JsonEvent, ToWriteJsonWriter}; -pub use oxrdf::{Variable, VariableNameParseError}; -use oxsdatatypes::{DayTimeDuration, Float}; -pub use spargebra::SparqlSyntaxError; -use sparopt::algebra::GraphPattern; -use sparopt::Optimizer; use std::collections::HashMap; use std::rc::Rc; use std::sync::Arc; diff --git a/ng-oxigraph/src/sparql/model.rs b/ng-oxigraph/src/oxigraph/sparql/model.rs similarity index 98% rename from ng-oxigraph/src/sparql/model.rs rename to ng-oxigraph/src/oxigraph/sparql/model.rs index 0fca83e..7352cf9 100644 --- a/ng-oxigraph/src/sparql/model.rs +++ b/ng-oxigraph/src/oxigraph/sparql/model.rs @@ -1,11 +1,11 @@ -use crate::io::{RdfFormat, RdfSerializer}; -use crate::model::*; -use crate::sparql::error::EvaluationError; -use crate::sparql::results::{ +use crate::oxigraph::io::{RdfFormat, RdfSerializer}; +use crate::oxigraph::model::*; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::results::{ FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, }; -pub use sparesults::QuerySolution; +pub use crate::sparesults::QuerySolution; use std::io::{Read, Write}; use std::sync::Arc; @@ -276,6 +276,7 @@ impl Iterator for QueryTripleIter { } } +#[cfg(feature = "rdf-star")] #[cfg(test)] #[allow(clippy::panic_in_result_fn)] mod tests { diff --git a/ng-oxigraph/src/sparql/results.rs b/ng-oxigraph/src/oxigraph/sparql/results.rs similarity index 98% rename from ng-oxigraph/src/sparql/results.rs rename to ng-oxigraph/src/oxigraph/sparql/results.rs index 00f8cc3..6dea288 100644 --- a/ng-oxigraph/src/sparql/results.rs +++ b/ng-oxigraph/src/oxigraph/sparql/results.rs @@ -41,4 +41,4 @@ //! ); //! ``` -pub use sparesults::*; +pub use crate::sparesults::*; diff --git a/ng-oxigraph/src/sparql/service.rs b/ng-oxigraph/src/oxigraph/sparql/service.rs similarity index 92% rename from ng-oxigraph/src/sparql/service.rs rename to ng-oxigraph/src/oxigraph/sparql/service.rs index e3dd560..40e9aad 100644 --- a/ng-oxigraph/src/sparql/service.rs +++ b/ng-oxigraph/src/oxigraph/sparql/service.rs @@ -1,9 +1,9 @@ -use crate::model::NamedNode; -use crate::sparql::algebra::Query; -use crate::sparql::error::EvaluationError; -use crate::sparql::http::Client; -use crate::sparql::model::QueryResults; -use crate::sparql::results::QueryResultsFormat; +use crate::oxigraph::model::NamedNode; +use crate::oxigraph::sparql::algebra::Query; +use crate::oxigraph::sparql::error::EvaluationError; +use crate::oxigraph::sparql::http::Client; +use crate::oxigraph::sparql::model::QueryResults; +use crate::oxigraph::sparql::results::QueryResultsFormat; use std::error::Error; use std::time::Duration; diff --git a/ng-oxigraph/src/sparql/update.rs b/ng-oxigraph/src/oxigraph/sparql/update.rs similarity index 96% rename from ng-oxigraph/src/sparql/update.rs rename to ng-oxigraph/src/oxigraph/sparql/update.rs index 967de82..3ee9c8a 100644 --- a/ng-oxigraph/src/sparql/update.rs +++ b/ng-oxigraph/src/oxigraph/sparql/update.rs @@ -1,21 +1,22 @@ -use crate::io::{RdfFormat, RdfParser}; -use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; -use crate::sparql::algebra::QueryDataset; -use crate::sparql::dataset::DatasetView; -use crate::sparql::eval::{EncodedTuple, SimpleEvaluator}; -use crate::sparql::http::Client; -use crate::sparql::{EvaluationError, Update, UpdateOptions}; -use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; -use crate::storage::StorageWriter; -use oxiri::Iri; -use spargebra::algebra::{GraphPattern, GraphTarget}; -use spargebra::term::{ +use crate::oxigraph::io::{RdfFormat, RdfParser}; +use crate::oxigraph::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; +use crate::oxigraph::sparql::algebra::QueryDataset; +use crate::oxigraph::sparql::dataset::DatasetView; +use crate::oxigraph::sparql::eval::{EncodedTuple, SimpleEvaluator}; +use crate::oxigraph::sparql::http::Client; +use crate::oxigraph::sparql::{EvaluationError, Update, UpdateOptions}; +use crate::oxigraph::storage::numeric_encoder::{Decoder, EncodedTerm}; +use crate::oxigraph::storage::StorageWriter; +use crate::spargebra::algebra::{GraphPattern, GraphTarget}; +use crate::spargebra::term::{ BlankNode, GraphName, GraphNamePattern, GroundQuad, GroundQuadPattern, GroundSubject, GroundTerm, GroundTermPattern, GroundTriple, GroundTriplePattern, NamedNode, NamedNodePattern, Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable, }; -use spargebra::GraphUpdateOperation; -use sparopt::Optimizer; +use crate::spargebra::GraphUpdateOperation; +use crate::sparopt; +use crate::sparopt::Optimizer; +use oxiri::Iri; use std::collections::HashMap; use std::io; use std::rc::Rc; diff --git a/ng-oxigraph/src/storage/backend/fallback.rs b/ng-oxigraph/src/oxigraph/storage/backend/fallback.rs similarity index 99% rename from ng-oxigraph/src/storage/backend/fallback.rs rename to ng-oxigraph/src/oxigraph/storage/backend/fallback.rs index 7214851..e52ff0a 100644 --- a/ng-oxigraph/src/storage/backend/fallback.rs +++ b/ng-oxigraph/src/oxigraph/storage/backend/fallback.rs @@ -1,7 +1,7 @@ //! TODO: This storage is dramatically naive. -use crate::storage::StorageError; -use crate::store::CorruptionError; +use crate::oxigraph::storage::StorageError; +use crate::oxigraph::store::CorruptionError; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::error::Error; diff --git a/ng-oxigraph/src/storage/backend/mod.rs b/ng-oxigraph/src/oxigraph/storage/backend/mod.rs similarity index 100% rename from ng-oxigraph/src/storage/backend/mod.rs rename to ng-oxigraph/src/oxigraph/storage/backend/mod.rs diff --git a/ng-oxigraph/src/storage/backend/oxi_rocksdb.rs b/ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs similarity index 99% rename from ng-oxigraph/src/storage/backend/oxi_rocksdb.rs rename to ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs index 37e18ee..9dbe7e3 100644 --- a/ng-oxigraph/src/storage/backend/oxi_rocksdb.rs +++ b/ng-oxigraph/src/oxigraph/storage/backend/oxi_rocksdb.rs @@ -8,10 +8,10 @@ clippy::unwrap_in_result )] -use crate::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; use libc::{c_char, c_void}; +use ng_rocksdb::ffi::*; use rand::random; -use rocksdb::ffi::*; use std::borrow::Borrow; #[cfg(unix)] use std::cmp::min; diff --git a/ng-oxigraph/src/storage/binary_encoder.rs b/ng-oxigraph/src/oxigraph/storage/binary_encoder.rs similarity index 98% rename from ng-oxigraph/src/storage/binary_encoder.rs rename to ng-oxigraph/src/oxigraph/storage/binary_encoder.rs index 1e789b7..d1cf1ac 100644 --- a/ng-oxigraph/src/storage/binary_encoder.rs +++ b/ng-oxigraph/src/oxigraph/storage/binary_encoder.rs @@ -1,7 +1,7 @@ -use crate::storage::error::{CorruptionError, StorageError}; -use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; -use crate::storage::small_string::SmallString; -use oxsdatatypes::*; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxsdatatypes::*; use std::io::Read; use std::mem::size_of; @@ -635,8 +635,8 @@ pub fn write_term(sink: &mut Vec, term: &EncodedTerm) { #[allow(clippy::panic_in_result_fn)] mod tests { use super::*; - use crate::model::TermRef; - use crate::storage::numeric_encoder::*; + use crate::oxigraph::model::TermRef; + use crate::oxigraph::storage::numeric_encoder::*; use std::cell::RefCell; use std::collections::HashMap; @@ -670,8 +670,8 @@ mod tests { #[test] fn test_encoding() { - use crate::model::vocab::xsd; - use crate::model::*; + use crate::oxigraph::model::vocab::xsd; + use crate::oxigraph::model::*; let store = MemoryStrStore::default(); let terms: Vec = vec![ diff --git a/ng-oxigraph/src/storage/error.rs b/ng-oxigraph/src/oxigraph/storage/error.rs similarity index 95% rename from ng-oxigraph/src/storage/error.rs rename to ng-oxigraph/src/oxigraph/storage/error.rs index d58d031..b974683 100644 --- a/ng-oxigraph/src/storage/error.rs +++ b/ng-oxigraph/src/oxigraph/storage/error.rs @@ -1,7 +1,7 @@ -use crate::io::{RdfFormat, RdfParseError}; -use crate::storage::numeric_encoder::EncodedTerm; +use crate::oxigraph::io::{RdfFormat, RdfParseError}; +use crate::oxigraph::storage::numeric_encoder::EncodedTerm; +use crate::oxrdf::TermRef; use oxiri::IriParseError; -use oxrdf::TermRef; use std::error::Error; use std::io; @@ -78,7 +78,7 @@ impl From for io::Error { } } -/// An error raised while loading a file into a [`Store`](crate::store::Store). +/// An error raised while loading a file into a [`Store`](crate::oxigraph::store::Store). #[derive(Debug, thiserror::Error)] pub enum LoaderError { /// An error raised while reading the file. @@ -111,7 +111,7 @@ impl From for io::Error { } } -/// An error raised while writing a file from a [`Store`](crate::store::Store). +/// An error raised while writing a file from a [`Store`](crate::oxigraph::store::Store). #[derive(Debug, thiserror::Error)] pub enum SerializerError { /// An error raised while writing the content. diff --git a/ng-oxigraph/src/storage/mod.rs b/ng-oxigraph/src/oxigraph/storage/mod.rs similarity index 98% rename from ng-oxigraph/src/storage/mod.rs rename to ng-oxigraph/src/oxigraph/storage/mod.rs index 8dc332e..a20740e 100644 --- a/ng-oxigraph/src/storage/mod.rs +++ b/ng-oxigraph/src/oxigraph/storage/mod.rs @@ -1,20 +1,24 @@ #![allow(clippy::same_name_method)] #[cfg(all(not(target_family = "wasm")))] -use crate::model::Quad; -use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; -use crate::storage::backend::{Reader, Transaction}; +use crate::oxigraph::model::Quad; +use crate::oxigraph::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; +use crate::oxigraph::storage::backend::{Reader, Transaction}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; -use crate::storage::binary_encoder::{ +use crate::oxigraph::storage::binary_encoder::LATEST_STORAGE_VERSION; +use crate::oxigraph::storage::binary_encoder::{ decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, write_gosp_quad, write_gpos_quad, write_gspo_quad, write_osp_quad, write_ospg_quad, write_pos_quad, write_posg_quad, write_spo_quad, write_spog_quad, write_term, QuadEncoding, WRITTEN_TERM_MAX_SIZE, }; -pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; +pub use crate::oxigraph::storage::error::{ + CorruptionError, LoaderError, SerializerError, StorageError, +}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::numeric_encoder::Decoder; -use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup}; +use crate::oxigraph::storage::numeric_encoder::Decoder; +use crate::oxigraph::storage::numeric_encoder::{ + insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup, +}; use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; #[cfg(all(not(target_family = "wasm")))] use std::collections::VecDeque; diff --git a/ng-oxigraph/src/storage/numeric_encoder.rs b/ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs similarity index 99% rename from ng-oxigraph/src/storage/numeric_encoder.rs rename to ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs index bf4b070..81fa52c 100644 --- a/ng-oxigraph/src/storage/numeric_encoder.rs +++ b/ng-oxigraph/src/oxigraph/storage/numeric_encoder.rs @@ -1,9 +1,9 @@ #![allow(clippy::unreadable_literal)] -use crate::model::*; -use crate::storage::error::{CorruptionError, StorageError}; -use crate::storage::small_string::SmallString; -use oxsdatatypes::*; +use crate::oxigraph::model::*; +use crate::oxigraph::storage::error::{CorruptionError, StorageError}; +use crate::oxigraph::storage::small_string::SmallString; +use crate::oxsdatatypes::*; use siphasher::sip128::{Hasher128, SipHasher24}; use std::fmt::Debug; use std::hash::{Hash, Hasher}; diff --git a/ng-oxigraph/src/storage/small_string.rs b/ng-oxigraph/src/oxigraph/storage/small_string.rs similarity index 100% rename from ng-oxigraph/src/storage/small_string.rs rename to ng-oxigraph/src/oxigraph/storage/small_string.rs diff --git a/ng-oxigraph/src/store.rs b/ng-oxigraph/src/oxigraph/store.rs similarity index 99% rename from ng-oxigraph/src/store.rs rename to ng-oxigraph/src/oxigraph/store.rs index dc2447b..d5f105c 100644 --- a/ng-oxigraph/src/store.rs +++ b/ng-oxigraph/src/oxigraph/store.rs @@ -26,20 +26,20 @@ //! # Result::<_, Box>::Ok(()) //! ``` #[cfg(all(not(target_family = "wasm")))] -use crate::io::RdfParseError; -use crate::io::{RdfFormat, RdfParser, RdfSerializer}; -use crate::model::*; -use crate::sparql::{ +use super::io::RdfParseError; +use super::io::{RdfFormat, RdfParser, RdfSerializer}; +use super::model::*; +use super::sparql::{ evaluate_query, evaluate_update, EvaluationError, Query, QueryExplanation, QueryOptions, QueryResults, Update, UpdateOptions, }; -use crate::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; +use super::storage::numeric_encoder::{Decoder, EncodedQuad, EncodedTerm}; #[cfg(all(not(target_family = "wasm")))] -use crate::storage::StorageBulkLoader; -use crate::storage::{ +use super::storage::StorageBulkLoader; +use super::storage::{ ChainedDecodingQuadIterator, DecodingGraphIterator, Storage, StorageReader, StorageWriter, }; -pub use crate::storage::{CorruptionError, LoaderError, SerializerError, StorageError}; +pub use super::storage::{CorruptionError, LoaderError, SerializerError, StorageError}; use std::error::Error; use std::io::{Read, Write}; #[cfg(all(not(target_family = "wasm")))] @@ -1930,7 +1930,7 @@ mod tests { #[test] fn store() -> Result<(), StorageError> { - use crate::model::*; + use super::super::model::*; let main_s = Subject::from(BlankNode::default()); let main_p = NamedNode::new("http://example.com").unwrap(); diff --git a/ng-oxigraph/src/oxrdf/README.md b/ng-oxigraph/src/oxrdf/README.md new file mode 100644 index 0000000..88ffa62 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/README.md @@ -0,0 +1,51 @@ +OxRDF +===== + +[![Latest Version](https://img.shields.io/crates/v/oxrdf.svg)](https://crates.io/crates/oxrdf) +[![Released API docs](https://docs.rs/oxrdf/badge.svg)](https://docs.rs/oxrdf) +[![Crates.io downloads](https://img.shields.io/crates/d/oxrdf)](https://crates.io/crates/oxrdf) +[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) + +OxRDF is a simple library providing datastructures encoding [RDF 1.1 concepts](https://www.w3.org/TR/rdf11-concepts/). + +This crate is intended to be a basic building block of other crates like [Oxigraph](https://crates.io/crates/oxigraph) or [Spargebra](https://crates.io/crates/spargebra). + +Support for [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is available behind the `rdf-star` feature. + +OxRDF is inspired by [RDF/JS](https://rdf.js.org/data-model-spec/) and [Apache Commons RDF](http://commons.apache.org/proper/commons-rdf/). + +Use [`oxrdfio`](https://crates.io/crates/oxrdfio) if you need to read or write RDF files. + +Usage example: + +```rust +use oxrdf::*; + +let mut graph = Graph::default(); + +// insertion +let ex = NamedNodeRef::new("http://example.com").unwrap(); +let triple = TripleRef::new(ex, ex, ex); +graph.insert(triple); + +// simple filter +let results: Vec<_> = graph.triples_for_subject(ex).collect(); +assert_eq!(vec![triple], results); +``` + +## License + +This project is licensed under either of + +* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or + ``) +* MIT license ([LICENSE-MIT](../LICENSE-MIT) or + ``) + +at your option. + + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/ng-oxigraph/src/oxrdf/blank_node.rs b/ng-oxigraph/src/oxrdf/blank_node.rs new file mode 100644 index 0000000..2fe02c2 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/blank_node.rs @@ -0,0 +1,403 @@ +use rand::random; +use serde::{Deserialize, Serialize}; +use std::io::Write; +use std::{fmt, str}; + +/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The common way to create a new blank node is to use the [`BlankNode::default()`] function. +/// +/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function. +/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::BlankNode; +/// +/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string()); +/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct BlankNode(BlankNodeContent); + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +enum BlankNodeContent { + Named(String), + Anonymous { id: u128, str: IdStr }, +} + +impl BlankNode { + /// Creates a blank node from a unique identifier. + /// + /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`] + /// that creates a random ID that could be easily inlined by Oxigraph stores. + pub fn new(id: impl Into) -> Result { + let id = id.into(); + validate_blank_node_identifier(&id)?; + Ok(Self::new_unchecked(id)) + } + + /// Creates a blank node from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to N-Triples, Turtle, and SPARQL grammars. + /// + /// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(id: impl Into) -> Self { + let id = id.into(); + if let Some(numerical_id) = to_integer_id(&id) { + Self::new_from_unique_id(numerical_id) + } else { + Self(BlankNodeContent::Named(id)) + } + } + + /// Creates a blank node from a unique numerical id. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]. + #[inline] + pub fn new_from_unique_id(id: u128) -> Self { + Self(BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }) + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub fn as_str(&self) -> &str { + match &self.0 { + BlankNodeContent::Named(id) => id, + BlankNodeContent::Anonymous { str, .. } => str.as_str(), + } + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub fn into_string(self) -> String { + match self.0 { + BlankNodeContent::Named(id) => id, + BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(), + } + } + + #[inline] + pub fn as_ref(&self) -> BlankNodeRef<'_> { + BlankNodeRef(match &self.0 { + BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()), + BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous { + id: *id, + str: str.as_str(), + }, + }) + } +} + +impl fmt::Display for BlankNode { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl Default for BlankNode { + /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id. + #[inline] + fn default() -> Self { + // We ensure the ID does not start with a number to be also valid with RDF/XML + loop { + let id = random(); + let str = IdStr::new(id); + if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) { + return Self(BlankNodeContent::Anonymous { id, str }); + } + } + } +} + +/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). +/// +/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method. +/// +/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function. +/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// use oxrdf::BlankNodeRef; +/// +/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string()); +/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] +pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>); + +#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)] +enum BlankNodeRefContent<'a> { + Named(&'a str), + Anonymous { id: u128, str: &'a str }, +} + +impl<'a> BlankNodeRef<'a> { + /// Creates a blank node from a unique identifier. + /// + /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars. + /// + /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]. + /// that creates a random ID that could be easily inlined by Oxigraph stores. + pub fn new(id: &'a str) -> Result { + validate_blank_node_identifier(id)?; + Ok(Self::new_unchecked(id)) + } + + /// Creates a blank node from a unique identifier without validation. + /// + /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier + /// according to N-Triples, Turtle, and SPARQL grammars. + /// + /// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_unchecked(id: &'a str) -> Self { + if let Some(numerical_id) = to_integer_id(id) { + Self(BlankNodeRefContent::Anonymous { + id: numerical_id, + str: id, + }) + } else { + Self(BlankNodeRefContent::Named(id)) + } + } + + /// Returns the underlying ID of this blank node. + #[inline] + pub const fn as_str(self) -> &'a str { + match self.0 { + BlankNodeRefContent::Named(id) => id, + BlankNodeRefContent::Anonymous { str, .. } => str, + } + } + + /// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`]. + /// + /// ``` + /// use oxrdf::BlankNode; + /// + /// assert_eq!( + /// BlankNode::new_from_unique_id(128).as_ref().unique_id(), + /// Some(128) + /// ); + /// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None); + /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(()) + /// ``` + #[inline] + pub const fn unique_id(&self) -> Option { + match self.0 { + BlankNodeRefContent::Named(_) => None, + BlankNodeRefContent::Anonymous { id, .. } => Some(id), + } + } + + #[inline] + pub fn into_owned(self) -> BlankNode { + BlankNode(match self.0 { + BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()), + BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous { + id, + str: IdStr::new(id), + }, + }) + } +} + +impl fmt::Display for BlankNodeRef<'_> { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "_:{}", self.as_str()) + } +} + +impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> { + #[inline] + fn from(node: &'a BlankNode) -> Self { + node.as_ref() + } +} + +impl<'a> From> for BlankNode { + #[inline] + fn from(node: BlankNodeRef<'a>) -> Self { + node.into_owned() + } +} + +impl PartialEq for BlankNodeRef<'_> { + #[inline] + fn eq(&self, other: &BlankNode) -> bool { + *self == other.as_ref() + } +} + +impl PartialEq> for BlankNode { + #[inline] + fn eq(&self, other: &BlankNodeRef<'_>) -> bool { + self.as_ref() == *other + } +} + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +struct IdStr([u8; 32]); + +impl IdStr { + #[inline] + fn new(id: u128) -> Self { + let mut str = [0; 32]; + write!(&mut str[..], "{id:x}").unwrap(); + Self(str) + } + + #[inline] + fn as_str(&self) -> &str { + let len = self.0.iter().position(|x| x == &0).unwrap_or(32); + str::from_utf8(&self.0[..len]).unwrap() + } +} + +fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> { + let mut chars = id.chars(); + let front = chars.next().ok_or(BlankNodeIdParseError)?; + match front { + '0'..='9' + | '_' + | ':' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(BlankNodeIdParseError), + } + for c in chars { + match c { + '.' // validated later + | '-' + | '0'..='9' + | '\u{00B7}' + | '\u{0300}'..='\u{036F}' + | '\u{203F}'..='\u{2040}' + | '_' + | ':' + | 'A'..='Z' + | 'a'..='z' + | '\u{00C0}'..='\u{00D6}' + | '\u{00D8}'..='\u{00F6}' + | '\u{00F8}'..='\u{02FF}' + | '\u{0370}'..='\u{037D}' + | '\u{037F}'..='\u{1FFF}' + | '\u{200C}'..='\u{200D}' + | '\u{2070}'..='\u{218F}' + | '\u{2C00}'..='\u{2FEF}' + | '\u{3001}'..='\u{D7FF}' + | '\u{F900}'..='\u{FDCF}' + | '\u{FDF0}'..='\u{FFFD}' + | '\u{10000}'..='\u{EFFFF}' => (), + _ => return Err(BlankNodeIdParseError), + } + } + + // Could not end with a dot + if id.ends_with('.') { + Err(BlankNodeIdParseError) + } else { + Ok(()) + } +} + +#[inline] +fn to_integer_id(id: &str) -> Option { + let digits = id.as_bytes(); + let mut value: u128 = 0; + if let None | Some(b'0') = digits.first() { + return None; // No empty string or leading zeros + } + for digit in digits { + value = value.checked_mul(16)?.checked_add( + match *digit { + b'0'..=b'9' => digit - b'0', + b'a'..=b'f' => digit - b'a' + 10, + _ => return None, + } + .into(), + )?; + } + Some(value) +} + +/// An error raised during [`BlankNode`] IDs validation. +#[derive(Debug, thiserror::Error)] +#[error("The blank node identifier is invalid")] +pub struct BlankNodeIdParseError; + +#[cfg(test)] +#[allow(clippy::panic_in_result_fn)] +mod tests { + use super::*; + + #[test] + fn as_str_partial() { + let b = BlankNode::new_from_unique_id(0x42); + assert_eq!(b.as_str(), "42"); + } + + #[test] + fn as_str_full() { + let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000); + assert_eq!(b.as_str(), "77776666555544443333222211110000"); + } + + #[test] + fn new_validation() { + BlankNode::new("").unwrap_err(); + BlankNode::new("a").unwrap(); + BlankNode::new("-").unwrap_err(); + BlankNode::new("a-").unwrap(); + BlankNode::new(".").unwrap_err(); + BlankNode::new("a.").unwrap_err(); + BlankNode::new("a.a").unwrap(); + } + + #[test] + fn new_numerical() { + assert_eq!( + BlankNode::new("100a").unwrap(), + BlankNode::new_from_unique_id(0x100a), + ); + assert_ne!( + BlankNode::new("100A").unwrap(), + BlankNode::new_from_unique_id(0x100a) + ); + } + + #[test] + fn test_equals() { + assert_eq!( + BlankNode::new("100a").unwrap(), + BlankNodeRef::new("100a").unwrap() + ); + assert_eq!( + BlankNode::new("zzz").unwrap(), + BlankNodeRef::new("zzz").unwrap() + ); + } +} diff --git a/ng-oxigraph/src/oxrdf/dataset.rs b/ng-oxigraph/src/oxrdf/dataset.rs new file mode 100644 index 0000000..169bdf7 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/dataset.rs @@ -0,0 +1,1641 @@ +//! [In-memory implementation](Dataset) of [RDF datasets](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +//! +//! Usage example: +//! ``` +//! use oxrdf::*; +//! +//! let mut dataset = Dataset::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let quad = QuadRef::new(ex, ex, ex, ex); +//! dataset.insert(quad); +//! +//! // simple filter +//! let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +//! assert_eq!(vec![quad], results); +//! +//! // direct access to a dataset graph +//! let results: Vec<_> = dataset.graph(ex).iter().collect(); +//! assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +//! +//! // Print +//! assert_eq!( +//! dataset.to_string(), +//! " .\n" +//! ); +//! # Result::<_,Box>::Ok(()) +//! ``` +//! +//! See also [`Graph`] if you only care about plain triples. + +use crate::oxrdf::interning::*; +use crate::oxrdf::*; +use std::collections::hash_map::DefaultHasher; +use std::collections::{BTreeSet, HashMap, HashSet}; +use std::fmt; +use std::hash::{Hash, Hasher}; + +/// An in-memory [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). +/// +/// It can accommodate a fairly large number of quads (in the few millions). +/// +///
It interns the strings and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction.
+/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let quad = QuadRef::new(ex, ex, ex, ex); +/// dataset.insert(quad); +/// +/// // simple filter +/// let results: Vec<_> = dataset.quads_for_subject(ex).collect(); +/// assert_eq!(vec![quad], results); +/// +/// // direct access to a dataset graph +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug, Default, Clone)] +pub struct Dataset { + interner: Interner, + gspo: BTreeSet<( + InternedGraphName, + InternedSubject, + InternedNamedNode, + InternedTerm, + )>, + gpos: BTreeSet<( + InternedGraphName, + InternedNamedNode, + InternedTerm, + InternedSubject, + )>, + gosp: BTreeSet<( + InternedGraphName, + InternedTerm, + InternedSubject, + InternedNamedNode, + )>, + spog: BTreeSet<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )>, + posg: BTreeSet<( + InternedNamedNode, + InternedTerm, + InternedSubject, + InternedGraphName, + )>, + ospg: BTreeSet<( + InternedTerm, + InternedSubject, + InternedNamedNode, + InternedGraphName, + )>, +} + +impl Dataset { + /// Creates a new dataset + pub fn new() -> Self { + Self::default() + } + + /// Provides a read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. + /// + /// ``` + /// use oxrdf::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// dataset.insert(QuadRef::new(ex, ex, ex, ex)); + /// + /// let results: Vec<_> = dataset.graph(ex).iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn graph<'a, 'b>(&'a self, graph_name: impl Into>) -> GraphView<'a> { + let graph_name = self + .encoded_graph_name(graph_name) + .unwrap_or_else(InternedGraphName::impossible); + GraphView { + dataset: self, + graph_name, + } + } + + /// Provides a read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in this dataset. + /// + /// ``` + /// use oxrdf::*; + /// + /// let mut dataset = Dataset::default(); + /// let ex = NamedNodeRef::new("http://example.com")?; + /// + /// // We edit and query the dataset http://example.com graph + /// { + /// let mut graph = dataset.graph_mut(ex); + /// graph.insert(TripleRef::new(ex, ex, ex)); + /// let results: Vec<_> = graph.iter().collect(); + /// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); + /// } + /// + /// // We have also changes the dataset itself + /// let results: Vec<_> = dataset.iter().collect(); + /// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); + /// # Result::<_,Box>::Ok(()) + /// ``` + pub fn graph_mut<'a, 'b>( + &'a mut self, + graph_name: impl Into>, + ) -> GraphViewMut<'a> { + let graph_name = InternedGraphName::encoded_into(graph_name.into(), &mut self.interner); + GraphViewMut { + dataset: self, + graph_name, + } + } + + /// Returns all the quads contained by the dataset. + pub fn iter(&self) -> Iter<'_> { + let iter = self.spog.iter(); + Iter { + dataset: self, + inner: iter, + } + } + + pub fn quads_for_subject<'a, 'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + let subject = self + .encoded_subject(subject) + .unwrap_or_else(InternedSubject::impossible); + self.interned_quads_for_subject(&subject) + .map(move |q| self.decode_spog(q)) + } + + #[allow(clippy::map_identity)] + fn interned_quads_for_subject( + &self, + subject: &InternedSubject, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.spog + .range( + &( + subject.clone(), + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ) + ..&( + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + InternedGraphName::first(), + ), + ) + .map(|(s, p, o, g)| (s, p, o, g)) + } + + pub fn quads_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + let predicate = self + .encoded_named_node(predicate) + .unwrap_or_else(InternedNamedNode::impossible); + self.interned_quads_for_predicate(predicate) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_predicate( + &self, + predicate: InternedNamedNode, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.posg + .range( + &( + predicate, + InternedTerm::first(), + InternedSubject::first(), + InternedGraphName::first(), + ) + ..&( + predicate.next(), + InternedTerm::first(), + InternedSubject::first(), + InternedGraphName::first(), + ), + ) + .map(|(p, o, s, g)| (s, p, o, g)) + } + + pub fn quads_for_object<'a, 'b>( + &'a self, + object: impl Into>, + ) -> impl Iterator> + 'a { + let object = self + .encoded_term(object) + .unwrap_or_else(InternedTerm::impossible); + + self.interned_quads_for_object(&object) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_object( + &self, + object: &InternedTerm, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.ospg + .range( + &( + object.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ) + ..&( + object.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedGraphName::first(), + ), + ) + .map(|(o, s, p, g)| (s, p, o, g)) + } + + pub fn quads_for_graph_name<'a, 'b>( + &'a self, + graph_name: impl Into>, + ) -> impl Iterator> + 'a { + let graph_name = self + .encoded_graph_name(graph_name) + .unwrap_or_else(InternedGraphName::impossible); + + self.interned_quads_for_graph_name(&graph_name) + .map(move |q| self.decode_spog(q)) + } + + fn interned_quads_for_graph_name( + &self, + graph_name: &InternedGraphName, + ) -> impl Iterator< + Item = ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + > + '_ { + self.gspo + .range( + &( + graph_name.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + graph_name.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .map(|(g, s, p, o)| (s, p, o, g)) + } + + /// Checks if the dataset contains the given quad + pub fn contains<'a>(&self, quad: impl Into>) -> bool { + if let Some(q) = self.encoded_quad(quad.into()) { + self.spog.contains(&q) + } else { + false + } + } + + /// Returns the number of quads in this dataset. + pub fn len(&self) -> usize { + self.gspo.len() + } + + /// Checks if this dataset contains a quad. + pub fn is_empty(&self) -> bool { + self.gspo.is_empty() + } + + /// Adds a quad to the dataset. + pub fn insert<'a>(&mut self, quad: impl Into>) -> bool { + let quad = self.encode_quad(quad.into()); + self.insert_encoded(quad) + } + + fn insert_encoded( + &mut self, + quad: ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.insert((g.clone(), s.clone(), p, o.clone())); + self.gpos.insert((g.clone(), p, o.clone(), s.clone())); + self.gosp.insert((g.clone(), o.clone(), s.clone(), p)); + self.spog.insert((s.clone(), p, o.clone(), g.clone())); + self.posg.insert((p, o.clone(), s.clone(), g.clone())); + self.ospg.insert((o, s, p, g)) + } + + /// Removes a concrete quad from the dataset. + pub fn remove<'a>(&mut self, quad: impl Into>) -> bool { + if let Some(quad) = self.encoded_quad(quad.into()) { + self.remove_encoded(quad) + } else { + false + } + } + + fn remove_encoded( + &mut self, + quad: ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + ) -> bool { + let (s, p, o, g) = quad; + self.gspo.remove(&(g.clone(), s.clone(), p, o.clone())); + self.gpos.remove(&(g.clone(), p, o.clone(), s.clone())); + self.gosp.remove(&(g.clone(), o.clone(), s.clone(), p)); + self.spog.remove(&(s.clone(), p, o.clone(), g.clone())); + self.posg.remove(&(p, o.clone(), s.clone(), g.clone())); + self.ospg.remove(&(o, s, p, g)) + } + + /// Clears the dataset. + pub fn clear(&mut self) { + self.gspo.clear(); + self.gpos.clear(); + self.gosp.clear(); + self.spog.clear(); + self.posg.clear(); + self.ospg.clear(); + } + + fn encode_quad( + &mut self, + quad: QuadRef<'_>, + ) -> ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ) { + ( + InternedSubject::encoded_into(quad.subject, &mut self.interner), + InternedNamedNode::encoded_into(quad.predicate, &mut self.interner), + InternedTerm::encoded_into(quad.object, &mut self.interner), + InternedGraphName::encoded_into(quad.graph_name, &mut self.interner), + ) + } + + fn encoded_quad( + &self, + quad: QuadRef<'_>, + ) -> Option<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + Some(( + self.encoded_subject(quad.subject)?, + self.encoded_named_node(quad.predicate)?, + self.encoded_term(quad.object)?, + self.encoded_graph_name(quad.graph_name)?, + )) + } + + pub(super) fn encoded_named_node<'a>( + &self, + node: impl Into>, + ) -> Option { + InternedNamedNode::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_subject<'a>( + &self, + node: impl Into>, + ) -> Option { + InternedSubject::encoded_from(node.into(), &self.interner) + } + + pub(super) fn encoded_term<'a>(&self, term: impl Into>) -> Option { + InternedTerm::encoded_from(term.into(), &self.interner) + } + + pub(super) fn encoded_graph_name<'a>( + &self, + graph_name: impl Into>, + ) -> Option { + InternedGraphName::encoded_from(graph_name.into(), &self.interner) + } + + fn decode_spog( + &self, + quad: ( + &InternedSubject, + &InternedNamedNode, + &InternedTerm, + &InternedGraphName, + ), + ) -> QuadRef<'_> { + QuadRef { + subject: quad.0.decode_from(&self.interner), + predicate: quad.1.decode_from(&self.interner), + object: quad.2.decode_from(&self.interner), + graph_name: quad.3.decode_from(&self.interner), + } + } + + fn decode_spo( + &self, + triple: (&InternedSubject, &InternedNamedNode, &InternedTerm), + ) -> TripleRef<'_> { + TripleRef { + subject: triple.0.decode_from(&self.interner), + predicate: triple.1.decode_from(&self.interner), + object: triple.2.decode_from(&self.interner), + } + } + + /// Canonicalizes the dataset by renaming blank nodes. + /// + /// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)): + /// ``` + /// use oxrdf::dataset::CanonicalizationAlgorithm; + /// use oxrdf::*; + /// + /// let iri = NamedNodeRef::new("http://example.com")?; + /// + /// let mut graph1 = Graph::new(); + /// let bnode1 = BlankNode::default(); + /// let g1 = BlankNode::default(); + /// graph1.insert(QuadRef::new(iri, iri, &bnode1, &g1)); + /// graph1.insert(QuadRef::new(&bnode1, iri, iri, &g1)); + /// + /// let mut graph2 = Graph::new(); + /// let bnode2 = BlankNode::default(); + /// let g2 = BlankNode::default(); + /// graph2.insert(QuadRef::new(iri, iri, &bnode2, &g2)); + /// graph2.insert(QuadRef::new(&bnode2, iri, iri, &g2)); + /// + /// assert_ne!(graph1, graph2); + /// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); + /// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); + /// assert_eq!(graph1, graph2); + /// # Result::<_,Box>::Ok(()) + /// ``` + /// + ///
Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. + /// Hence, this canonization might not be suitable for diffs.
+ /// + ///
This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
+ pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { + let bnode_mapping = self.canonicalize_interned_blank_nodes(algorithm); + let new_quads = self.map_blank_nodes(&bnode_mapping); + self.clear(); + for quad in new_quads { + self.insert_encoded(quad); + } + } + + /// Returns a map between the current dataset blank node and the canonicalized blank node + /// to create a canonical dataset. + /// + /// See also [`canonicalize`](Self::canonicalize). + pub fn canonicalize_blank_nodes( + &self, + algorithm: CanonicalizationAlgorithm, + ) -> HashMap, BlankNode> { + self.canonicalize_interned_blank_nodes(algorithm) + .into_iter() + .map(|(from, to)| (from.decode_from(&self.interner), to)) + .collect() + } + + fn canonicalize_interned_blank_nodes( + &self, + algorithm: CanonicalizationAlgorithm, + ) -> HashMap { + match algorithm { + CanonicalizationAlgorithm::Unstable => { + let bnodes = self.blank_nodes(); + let quads_per_blank_node = self.quads_per_blank_nodes(); + let (hash, partition) = self.hash_bnodes( + bnodes.into_iter().map(|bnode| (bnode, 0)).collect(), + &quads_per_blank_node, + ); + self.distinguish(hash, &partition, &quads_per_blank_node) + .into_iter() + .map(|(from, to)| (from, BlankNode::new_from_unique_id(to.into()))) + .collect() + } + } + } + + fn blank_nodes(&self) -> HashSet { + let mut bnodes = HashSet::new(); + for (g, s, _, o) in &self.gspo { + if let InternedSubject::BlankNode(bnode) = s { + bnodes.insert(*bnode); + } + #[cfg(feature = "rdf-star")] + if let InternedSubject::Triple(triple) = s { + Self::triple_blank_nodes(triple, &mut bnodes); + } + if let InternedTerm::BlankNode(bnode) = o { + bnodes.insert(*bnode); + } + #[cfg(feature = "rdf-star")] + if let InternedTerm::Triple(triple) = o { + Self::triple_blank_nodes(triple, &mut bnodes); + } + if let InternedGraphName::BlankNode(bnode) = g { + bnodes.insert(*bnode); + } + } + bnodes + } + + #[cfg(feature = "rdf-star")] + fn triple_blank_nodes(triple: &InternedTriple, bnodes: &mut HashSet) { + if let InternedSubject::BlankNode(bnode) = &triple.subject { + bnodes.insert(*bnode); + } else if let InternedSubject::Triple(t) = &triple.subject { + Self::triple_blank_nodes(t, bnodes); + } + if let InternedTerm::BlankNode(bnode) = &triple.object { + bnodes.insert(*bnode); + } else if let InternedTerm::Triple(t) = &triple.object { + Self::triple_blank_nodes(t, bnodes); + } + } + + fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode { + let mut map: HashMap<_, Vec<_>> = HashMap::new(); + for quad in &self.spog { + if let InternedSubject::BlankNode(bnode) = &quad.0 { + map.entry(*bnode).or_default().push(quad.clone()); + } + #[cfg(feature = "rdf-star")] + if let InternedSubject::Triple(t) = &quad.0 { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map); + } + if let InternedTerm::BlankNode(bnode) = &quad.2 { + map.entry(*bnode).or_default().push(quad.clone()); + } + #[cfg(feature = "rdf-star")] + if let InternedTerm::Triple(t) = &quad.2 { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map); + } + if let InternedGraphName::BlankNode(bnode) = &quad.3 { + map.entry(*bnode).or_default().push(quad.clone()); + } + } + map + } + + #[cfg(feature = "rdf-star")] + fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map( + quad: &( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + triple: &InternedTriple, + map: &mut QuadsPerBlankNode, + ) { + if let InternedSubject::BlankNode(bnode) = &triple.subject { + map.entry(*bnode).or_default().push(quad.clone()); + } + if let InternedSubject::Triple(t) = &triple.subject { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map); + } + if let InternedTerm::BlankNode(bnode) = &triple.object { + map.entry(*bnode).or_default().push(quad.clone()); + } + if let InternedTerm::Triple(t) = &triple.object { + Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map); + } + } + + fn hash_bnodes( + &self, + mut hashes: HashMap, + quads_per_blank_node: &QuadsPerBlankNode, + ) -> ( + HashMap, + Vec<(u64, Vec)>, + ) { + let mut to_hash = Vec::new(); + let mut to_do = hashes + .keys() + .map(|bnode| (*bnode, true)) + .collect::>(); + let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len()); + let mut old_partition_count = usize::MAX; + while old_partition_count != partition.len() { + old_partition_count = partition.len(); + partition.clear(); + let mut new_hashes = hashes.clone(); + for bnode in hashes.keys() { + let hash = if to_do.contains_key(bnode) { + for (s, p, o, g) in &quads_per_blank_node[bnode] { + to_hash.push(( + self.hash_subject(s, *bnode, &hashes), + self.hash_named_node(*p), + self.hash_term(o, *bnode, &hashes), + self.hash_graph_name(g, *bnode, &hashes), + )); + } + to_hash.sort_unstable(); + let hash = Self::hash_tuple((&to_hash, hashes[bnode])); + to_hash.clear(); + if hash == hashes[bnode] { + to_do.insert(*bnode, false); + } else { + new_hashes.insert(*bnode, hash); + } + hash + } else { + hashes[bnode] + }; + partition.entry(hash).or_default().push(*bnode); + } + hashes = new_hashes; + } + let mut partition: Vec<_> = partition.into_iter().collect(); + partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2))); + (hashes, partition) + } + + fn hash_named_node(&self, node: InternedNamedNode) -> u64 { + Self::hash_tuple(node.decode_from(&self.interner)) + } + + fn hash_blank_node( + node: InternedBlankNode, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + if node == current_blank_node { + u64::MAX + } else { + bnodes_hash[&node] + } + } + + fn hash_subject( + &self, + node: &InternedSubject, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + match node { + InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), + InternedSubject::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + #[cfg(feature = "rdf-star")] + InternedSubject::Triple(triple) => { + self.hash_triple(triple, current_blank_node, bnodes_hash) + } + } + } + + fn hash_term( + &self, + term: &InternedTerm, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + match term { + InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)), + InternedTerm::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)), + #[cfg(feature = "rdf-star")] + InternedTerm::Triple(triple) => { + self.hash_triple(triple, current_blank_node, bnodes_hash) + } + } + } + + fn hash_graph_name( + &self, + graph_name: &InternedGraphName, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + match graph_name { + InternedGraphName::NamedNode(node) => { + Self::hash_tuple(node.decode_from(&self.interner)) + } + InternedGraphName::BlankNode(bnode) => { + Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash) + } + InternedGraphName::DefaultGraph => 0, + } + } + + #[cfg(feature = "rdf-star")] + fn hash_triple( + &self, + triple: &InternedTriple, + current_blank_node: InternedBlankNode, + bnodes_hash: &HashMap, + ) -> u64 { + Self::hash_tuple(( + self.hash_subject(&triple.subject, current_blank_node, bnodes_hash), + self.hash_named_node(triple.predicate), + self.hash_term(&triple.object, current_blank_node, bnodes_hash), + )) + } + + fn hash_tuple(v: impl Hash) -> u64 { + let mut hasher = DefaultHasher::new(); + v.hash(&mut hasher); + hasher.finish() + } + + fn distinguish( + &self, + hash: HashMap, + partition: &[(u64, Vec)], + quads_per_blank_node: &QuadsPerBlankNode, + ) -> HashMap { + let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1); + if let Some(b_prime) = b_prime { + b_prime + .iter() + .map(|b| { + let mut hash_prime = hash.clone(); + hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22))); + let (hash_prime_prime, partition_prime) = + self.hash_bnodes(hash_prime, quads_per_blank_node); + self.distinguish(hash_prime_prime, &partition_prime, quads_per_blank_node) + }) + .reduce(|a, b| { + let mut a_hashes = a.values().collect::>(); + a_hashes.sort(); + let mut b_hashes = a.values().collect::>(); + b_hashes.sort(); + if a_hashes <= b_hashes { + a + } else { + b + } + }) + .unwrap_or_default() + } else { + hash + } + } + + #[allow(clippy::needless_collect)] + fn map_blank_nodes( + &mut self, + bnode_mapping: &HashMap, + ) -> Vec<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )> { + let old_quads: Vec<_> = self.spog.iter().cloned().collect(); + old_quads + .into_iter() + .map(|(s, p, o, g)| { + ( + match s { + InternedSubject::NamedNode(_) => s, + InternedSubject::BlankNode(bnode) => { + InternedSubject::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + #[cfg(feature = "rdf-star")] + InternedSubject::Triple(triple) => { + InternedSubject::Triple(Box::new(InternedTriple::encoded_into( + self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), + &mut self.interner, + ))) + } + }, + p, + match o { + InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o, + InternedTerm::BlankNode(bnode) => { + InternedTerm::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + #[cfg(feature = "rdf-star")] + InternedTerm::Triple(triple) => { + InternedTerm::Triple(Box::new(InternedTriple::encoded_into( + self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(), + &mut self.interner, + ))) + } + }, + match g { + InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g, + InternedGraphName::BlankNode(bnode) => { + InternedGraphName::BlankNode(InternedBlankNode::encoded_into( + bnode_mapping[&bnode].as_ref(), + &mut self.interner, + )) + } + }, + ) + }) + .collect() + } + + #[cfg(feature = "rdf-star")] + fn map_triple_blank_nodes( + &mut self, + triple: &InternedTriple, + bnode_mapping: &HashMap, + ) -> Triple { + Triple { + subject: if let InternedSubject::BlankNode(bnode) = &triple.subject { + bnode_mapping[bnode].clone().into() + } else if let InternedSubject::Triple(t) = &triple.subject { + self.map_triple_blank_nodes(t, bnode_mapping).into() + } else { + triple.subject.decode_from(&self.interner).into_owned() + }, + predicate: triple.predicate.decode_from(&self.interner).into_owned(), + object: if let InternedTerm::BlankNode(bnode) = &triple.object { + bnode_mapping[bnode].clone().into() + } else if let InternedTerm::Triple(t) = &triple.object { + self.map_triple_blank_nodes(t, bnode_mapping).into() + } else { + triple.object.decode_from(&self.interner).into_owned() + }, + } + } +} + +impl PartialEq for Dataset { + fn eq(&self, other: &Self) -> bool { + if self.len() != other.len() { + return false; + } + for q in self { + if !other.contains(q) { + return false; + } + } + true + } +} + +impl Eq for Dataset {} + +impl<'a> IntoIterator for &'a Dataset { + type Item = QuadRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl FromIterator for Dataset { + fn from_iter>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into>> FromIterator for Dataset { + fn from_iter>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl Extend for Dataset { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, T: Into>> Extend for Dataset { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl fmt::Display for Dataset { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t} .")?; + } + Ok(()) + } +} + +/// A read-only view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph`] method. +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// dataset.insert(QuadRef::new(ex, ex, ex, ex)); +/// +/// let results: Vec<_> = dataset.graph(ex).iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Clone, Debug)] +pub struct GraphView<'a> { + dataset: &'a Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphView<'a> { + /// Returns all the triples contained by the graph. + pub fn iter(&self) -> GraphViewIter<'a> { + let iter = self.dataset.gspo.range( + &( + self.graph_name.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name.next(), + InternedSubject::first(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ); + GraphViewIter { + dataset: self.dataset, + inner: iter, + } + } + + pub fn triples_for_subject<'b>( + &self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub(super) fn triples_for_interned_subject( + &self, + subject: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let ds = self.dataset; + self.dataset + .gspo + .range( + &( + self.graph_name.clone(), + subject.clone(), + InternedNamedNode::first(), + InternedTerm::first(), + ) + ..&( + self.graph_name.clone(), + subject.next(), + InternedNamedNode::first(), + InternedTerm::first(), + ), + ) + .map(move |q| { + let (_, s, p, o) = q; + ds.decode_spo((s, p, o)) + }) + } + + pub fn objects_for_subject_predicate<'b>( + &self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub(super) fn objects_for_interned_subject_predicate( + &self, + subject: Option, + predicate: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let ds = self.dataset; + self.dataset + .gspo + .range( + &( + self.graph_name.clone(), + subject.clone(), + predicate, + InternedTerm::first(), + ) + ..&( + self.graph_name.clone(), + subject, + predicate.next(), + InternedTerm::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn object_for_subject_predicate<'b>( + &self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'b>( + &self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn predicates_for_interned_subject_object( + &self, + subject: Option, + object: Option, + ) -> impl Iterator> + 'a { + let subject = subject.unwrap_or_else(InternedSubject::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gosp + .range( + &( + self.graph_name.clone(), + object.clone(), + subject.clone(), + InternedNamedNode::first(), + ) + ..&( + self.graph_name.clone(), + object, + subject.next(), + InternedNamedNode::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn triples_for_predicate<'b>( + &self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub(super) fn triples_for_interned_predicate( + &self, + predicate: Option, + ) -> impl Iterator> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let ds = self.dataset; + self.dataset + .gpos + .range( + &( + self.graph_name.clone(), + predicate, + InternedTerm::first(), + InternedSubject::first(), + ) + ..&( + self.graph_name.clone(), + predicate.next(), + InternedTerm::first(), + InternedSubject::first(), + ), + ) + .map(move |(_, p, o, s)| ds.decode_spo((s, p, o))) + } + + pub fn subjects_for_predicate_object<'b>( + &self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub(super) fn subjects_for_interned_predicate_object( + &self, + predicate: Option, + object: Option, + ) -> impl Iterator> + 'a { + let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible); + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gpos + .range( + &( + self.graph_name.clone(), + predicate, + object.clone(), + InternedSubject::first(), + ) + ..&( + self.graph_name.clone(), + predicate, + object.next(), + InternedSubject::first(), + ), + ) + .map(move |q| q.3.decode_from(&ds.interner)) + } + + pub fn subject_for_predicate_object<'b>( + &self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.subjects_for_predicate_object(predicate, object).next() + } + + pub fn triples_for_object<'b>( + &self, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.triples_for_interned_object(self.dataset.encoded_term(object)) + } + + pub(super) fn triples_for_interned_object( + &self, + object: Option, + ) -> impl Iterator> + 'a { + let object = object.unwrap_or_else(InternedTerm::impossible); + let ds = self.dataset; + self.dataset + .gosp + .range( + &( + self.graph_name.clone(), + object.clone(), + InternedSubject::first(), + InternedNamedNode::first(), + ) + ..&( + self.graph_name.clone(), + object.next(), + InternedSubject::first(), + InternedNamedNode::first(), + ), + ) + .map(move |(_, o, s, p)| ds.decode_spo((s, p, o))) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'b>(&self, triple: impl Into>) -> bool { + if let Some(triple) = self.encoded_triple(triple.into()) { + self.dataset.gspo.contains(&( + self.graph_name.clone(), + triple.subject, + triple.predicate, + triple.object, + )) + } else { + false + } + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.iter().count() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.iter().next().is_none() + } + + fn encoded_triple(&self, triple: TripleRef<'_>) -> Option { + Some(InternedTriple { + subject: self.dataset.encoded_subject(triple.subject)?, + predicate: self.dataset.encoded_named_node(triple.predicate)?, + object: self.dataset.encoded_term(triple.object)?, + }) + } +} + +impl<'a> IntoIterator for GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a, 'b> IntoIterator for &'b GraphView<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> fmt::Display for GraphView<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t} .")?; + } + Ok(()) + } +} + +/// A read/write view on an [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) contained in a [`Dataset`]. +/// +/// It is built using the [`Dataset::graph_mut`] method. +/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut dataset = Dataset::default(); +/// let ex = NamedNodeRef::new("http://example.com")?; +/// +/// // We edit and query the dataset http://example.com graph +/// { +/// let mut graph = dataset.graph_mut(ex); +/// graph.insert(TripleRef::new(ex, ex, ex)); +/// let results: Vec<_> = graph.iter().collect(); +/// assert_eq!(vec![TripleRef::new(ex, ex, ex)], results); +/// } +/// +/// // We have also changes the dataset itself +/// let results: Vec<_> = dataset.iter().collect(); +/// assert_eq!(vec![QuadRef::new(ex, ex, ex, ex)], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug)] +pub struct GraphViewMut<'a> { + dataset: &'a mut Dataset, + graph_name: InternedGraphName, +} + +impl<'a> GraphViewMut<'a> { + fn read(&self) -> GraphView<'_> { + GraphView { + dataset: self.dataset, + graph_name: self.graph_name.clone(), + } + } + + /// Adds a triple to the graph. + pub fn insert<'b>(&mut self, triple: impl Into>) -> bool { + let triple = self.encode_triple(triple.into()); + self.dataset.insert_encoded(( + triple.subject, + triple.predicate, + triple.object, + self.graph_name.clone(), + )) + } + + /// Removes a concrete triple from the graph. + pub fn remove<'b>(&mut self, triple: impl Into>) -> bool { + if let Some(triple) = self.read().encoded_triple(triple.into()) { + self.dataset.remove_encoded(( + triple.subject, + triple.predicate, + triple.object, + self.graph_name.clone(), + )) + } else { + false + } + } + + fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple { + InternedTriple { + subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner), + predicate: InternedNamedNode::encoded_into( + triple.predicate, + &mut self.dataset.interner, + ), + object: InternedTerm::encoded_into(triple.object, &mut self.dataset.interner), + } + } + + /// Returns all the triples contained by the graph + pub fn iter(&'a self) -> GraphViewIter<'a> { + self.read().iter() + } + + pub fn triples_for_subject<'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub fn objects_for_subject_predicate<'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.read().objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.read().object_for_subject_predicate(subject, predicate) + } + + pub fn predicates_for_subject_object<'b>( + &'a self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.read().predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.read().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.read().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'b>( + &'a self, + object: TermRef<'b>, + ) -> impl Iterator> + 'a { + self.read() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'b>(&self, triple: impl Into>) -> bool { + self.read().contains(triple) + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.read().len() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.read().is_empty() + } +} + +impl<'a> Extend for GraphViewMut<'a> { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(&t); + } + } +} + +impl<'a, 'b, T: Into>> Extend for GraphViewMut<'a> { + fn extend>(&mut self, iter: I) { + for t in iter { + self.insert(t); + } + } +} + +impl<'a> IntoIterator for &'a GraphViewMut<'a> { + type Item = TripleRef<'a>; + type IntoIter = GraphViewIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> fmt::Display for GraphViewMut<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for t in self { + writeln!(f, "{t}")?; + } + Ok(()) + } +} + +/// Iterator returned by [`Dataset::iter`]. +pub struct Iter<'a> { + dataset: &'a Dataset, + inner: std::collections::btree_set::Iter< + 'a, + ( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + ), + >, +} + +impl<'a> Iterator for Iter<'a> { + type Item = QuadRef<'a>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g))) + } +} + +/// Iterator returned by [`GraphView::iter`]. +pub struct GraphViewIter<'a> { + dataset: &'a Dataset, + inner: std::collections::btree_set::Range< + 'a, + ( + InternedGraphName, + InternedSubject, + InternedNamedNode, + InternedTerm, + ), + >, +} + +impl<'a> Iterator for GraphViewIter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option { + self.inner + .next() + .map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o))) + } +} + +type QuadsPerBlankNode = HashMap< + InternedBlankNode, + Vec<( + InternedSubject, + InternedNamedNode, + InternedTerm, + InternedGraphName, + )>, +>; + +/// An algorithm used to canonicalize graph and datasets. +/// +/// See [`Graph::canonicalize`] and [`Dataset::canonicalize`]. +#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum CanonicalizationAlgorithm { + /// The algorithm preferred by OxRDF. + /// + ///
The canonicalization algorithm is not stable and canonical blank node ids might change between Oxigraph version.
+ #[default] + Unstable, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_canon() { + let mut dataset = Dataset::new(); + dataset.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset.canonicalize(CanonicalizationAlgorithm::Unstable); + let mut dataset2 = Dataset::new(); + dataset2.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset2.insert(QuadRef::new( + BlankNode::default().as_ref(), + NamedNodeRef::new_unchecked("http://ex"), + BlankNode::default().as_ref(), + GraphNameRef::DefaultGraph, + )); + dataset2.canonicalize(CanonicalizationAlgorithm::Unstable); + assert_eq!(dataset, dataset2); + } +} diff --git a/ng-oxigraph/src/oxrdf/graph.rs b/ng-oxigraph/src/oxrdf/graph.rs new file mode 100644 index 0000000..8273590 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/graph.rs @@ -0,0 +1,284 @@ +//! [In-memory implementation](Graph) of [RDF graphs](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). +//! +//! Usage example: +//! ``` +//! use oxrdf::*; +//! +//! let mut graph = Graph::default(); +//! +//! // insertion +//! let ex = NamedNodeRef::new("http://example.com")?; +//! let triple = TripleRef::new(ex, ex, ex); +//! graph.insert(triple); +//! +//! // simple filter +//! let results: Vec<_> = graph.triples_for_subject(ex).collect(); +//! assert_eq!(vec![triple], results); +//! +//! // Print +//! assert_eq!( +//! graph.to_string(), +//! " .\n" +//! ); +//! # Result::<_,Box>::Ok(()) +//! ``` +//! +//! See also [`Dataset`] if you want to get support of multiple RDF graphs at the same time. + +pub use crate::oxrdf::dataset::CanonicalizationAlgorithm; +use crate::oxrdf::dataset::*; +use crate::oxrdf::*; +use std::fmt; + +/// An in-memory [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph). +/// +/// It can accommodate a fairly large number of triples (in the few millions). +/// +///
It interns the string and does not do any garbage collection yet: +/// if you insert and remove a lot of different terms, memory will grow without any reduction.
+/// +/// Usage example: +/// ``` +/// use oxrdf::*; +/// +/// let mut graph = Graph::default(); +/// +/// // insertion +/// let ex = NamedNodeRef::new("http://example.com")?; +/// let triple = TripleRef::new(ex, ex, ex); +/// graph.insert(triple); +/// +/// // simple filter +/// let results: Vec<_> = graph.triples_for_subject(ex).collect(); +/// assert_eq!(vec![triple], results); +/// # Result::<_,Box>::Ok(()) +/// ``` +#[derive(Debug, Default, Clone)] +pub struct Graph { + dataset: Dataset, +} + +impl Graph { + /// Creates a new graph. + pub fn new() -> Self { + Self::default() + } + + fn graph(&self) -> GraphView<'_> { + self.dataset.graph(GraphNameRef::DefaultGraph) + } + + fn graph_mut(&mut self) -> GraphViewMut<'_> { + self.dataset.graph_mut(GraphNameRef::DefaultGraph) + } + + /// Returns all the triples contained by the graph. + pub fn iter(&self) -> Iter<'_> { + Iter { + inner: self.graph().iter(), + } + } + + pub fn triples_for_subject<'a, 'b>( + &'a self, + subject: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_subject(self.dataset.encoded_subject(subject)) + } + + pub fn objects_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().objects_for_interned_subject_predicate( + self.dataset.encoded_subject(subject), + self.dataset.encoded_named_node(predicate), + ) + } + + pub fn object_for_subject_predicate<'a, 'b>( + &'a self, + subject: impl Into>, + predicate: impl Into>, + ) -> Option> { + self.graph() + .objects_for_subject_predicate(subject, predicate) + .next() + } + + pub fn predicates_for_subject_object<'a, 'b>( + &'a self, + subject: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().predicates_for_interned_subject_object( + self.dataset.encoded_subject(subject), + self.dataset.encoded_term(object), + ) + } + + pub fn triples_for_predicate<'a, 'b>( + &'a self, + predicate: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate)) + } + + pub fn subjects_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph().subjects_for_interned_predicate_object( + self.dataset.encoded_named_node(predicate), + self.dataset.encoded_term(object), + ) + } + + pub fn subject_for_predicate_object<'a, 'b>( + &'a self, + predicate: impl Into>, + object: impl Into>, + ) -> Option> { + self.graph().subject_for_predicate_object(predicate, object) + } + + pub fn triples_for_object<'a, 'b>( + &'a self, + object: impl Into>, + ) -> impl Iterator> + 'a { + self.graph() + .triples_for_interned_object(self.dataset.encoded_term(object)) + } + + /// Checks if the graph contains the given triple. + pub fn contains<'a>(&self, triple: impl Into>) -> bool { + self.graph().contains(triple) + } + + /// Returns the number of triples in this graph. + pub fn len(&self) -> usize { + self.dataset.len() + } + + /// Checks if this graph contains a triple. + pub fn is_empty(&self) -> bool { + self.dataset.is_empty() + } + + /// Adds a triple to the graph. + pub fn insert<'a>(&mut self, triple: impl Into>) -> bool { + self.graph_mut().insert(triple) + } + + /// Removes a concrete triple from the graph. + pub fn remove<'a>(&mut self, triple: impl Into>) -> bool { + self.graph_mut().remove(triple) + } + + /// Clears the graph. + pub fn clear(&mut self) { + self.dataset.clear() + } + + /// Canonicalizes the dataset by renaming blank nodes. + /// + /// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)): + /// ``` + /// use oxrdf::graph::CanonicalizationAlgorithm; + /// use oxrdf::*; + /// + /// let iri = NamedNodeRef::new("http://example.com")?; + /// + /// let mut graph1 = Graph::new(); + /// let bnode1 = BlankNode::default(); + /// graph1.insert(TripleRef::new(iri, iri, &bnode1)); + /// graph1.insert(TripleRef::new(&bnode1, iri, iri)); + /// + /// let mut graph2 = Graph::new(); + /// let bnode2 = BlankNode::default(); + /// graph2.insert(TripleRef::new(iri, iri, &bnode2)); + /// graph2.insert(TripleRef::new(&bnode2, iri, iri)); + /// + /// assert_ne!(graph1, graph2); + /// graph1.canonicalize(CanonicalizationAlgorithm::Unstable); + /// graph2.canonicalize(CanonicalizationAlgorithm::Unstable); + /// assert_eq!(graph1, graph2); + /// # Result::<_,Box>::Ok(()) + /// ``` + /// + ///
Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. + /// Hence, this canonization might not be suitable for diffs.
+ /// + ///
This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
+ pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) { + self.dataset.canonicalize(algorithm) + } +} + +impl PartialEq for Graph { + fn eq(&self, other: &Self) -> bool { + self.dataset == other.dataset + } +} + +impl Eq for Graph {} + +impl<'a> IntoIterator for &'a Graph { + type Item = TripleRef<'a>; + type IntoIter = Iter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl FromIterator for Graph { + fn from_iter>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl<'a, T: Into>> FromIterator for Graph { + fn from_iter>(iter: I) -> Self { + let mut g = Self::new(); + g.extend(iter); + g + } +} + +impl Extend for Graph { + fn extend>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl<'a, T: Into>> Extend for Graph { + fn extend>(&mut self, iter: I) { + self.graph_mut().extend(iter) + } +} + +impl fmt::Display for Graph { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.graph().fmt(f) + } +} + +/// Iterator returned by [`Graph::iter`]. +pub struct Iter<'a> { + inner: GraphViewIter<'a>, +} + +impl<'a> Iterator for Iter<'a> { + type Item = TripleRef<'a>; + + fn next(&mut self) -> Option { + self.inner.next() + } +} diff --git a/ng-oxigraph/src/oxrdf/interning.rs b/ng-oxigraph/src/oxrdf/interning.rs new file mode 100644 index 0000000..7eca09d --- /dev/null +++ b/ng-oxigraph/src/oxrdf/interning.rs @@ -0,0 +1,535 @@ +//! Interning of RDF elements using Rodeo + +use crate::oxrdf::*; +use std::collections::hash_map::{Entry, HashMap, RandomState}; +use std::hash::{BuildHasher, Hasher}; + +#[derive(Debug, Default, Clone)] +pub struct Interner { + hasher: RandomState, + string_for_hash: HashMap, + string_for_blank_node_id: HashMap, + #[cfg(feature = "rdf-star")] + triples: HashMap, +} + +impl Interner { + #[allow(clippy::never_loop)] + fn get_or_intern(&mut self, value: &str) -> Key { + let mut hash = self.hash(value); + loop { + match self.string_for_hash.entry(hash) { + Entry::Vacant(e) => { + e.insert(value.into()); + return Key(hash); + } + Entry::Occupied(e) => loop { + if e.get() == value { + return Key(hash); + } else if hash == u64::MAX - 1 { + hash = 0; + } else { + hash += 1; + } + }, + } + } + } + + fn get(&self, value: &str) -> Option { + let mut hash = self.hash(value); + loop { + let v = self.string_for_hash.get(&hash)?; + if v == value { + return Some(Key(hash)); + } else if hash == u64::MAX - 1 { + hash = 0; + } else { + hash += 1; + } + } + } + + fn hash(&self, value: &str) -> u64 { + let mut hasher = self.hasher.build_hasher(); + hasher.write(value.as_bytes()); + let hash = hasher.finish(); + if hash == u64::MAX { + 0 + } else { + hash + } + } + + fn resolve(&self, key: Key) -> &str { + &self.string_for_hash[&key.0] + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct Key(u64); + +impl Key { + fn first() -> Self { + Self(0) + } + + fn next(self) -> Self { + Self(self.0.saturating_add(1)) + } + + fn impossible() -> Self { + Self(u64::MAX) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct InternedNamedNode { + id: Key, +} + +impl InternedNamedNode { + pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self { + Self { + id: interner.get_or_intern(named_node.as_str()), + } + } + + pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option { + Some(Self { + id: interner.get(named_node.as_str())?, + }) + } + + pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> { + NamedNodeRef::new_unchecked(interner.resolve(self.id)) + } + + pub fn first() -> Self { + Self { id: Key::first() } + } + + pub fn next(self) -> Self { + Self { id: self.id.next() } + } + + pub fn impossible() -> Self { + Self { + id: Key::impossible(), + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedBlankNode { + Number { id: u128 }, + Other { id: Key }, +} + +impl InternedBlankNode { + pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self { + if let Some(id) = blank_node.unique_id() { + interner + .string_for_blank_node_id + .entry(id) + .or_insert_with(|| blank_node.as_str().into()); + Self::Number { id } + } else { + Self::Other { + id: interner.get_or_intern(blank_node.as_str()), + } + } + } + + pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option { + if let Some(id) = blank_node.unique_id() { + interner + .string_for_blank_node_id + .contains_key(&id) + .then_some(Self::Number { id }) + } else { + Some(Self::Other { + id: interner.get(blank_node.as_str())?, + }) + } + } + + pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> { + BlankNodeRef::new_unchecked(match self { + Self::Number { id } => &interner.string_for_blank_node_id[&id], + Self::Other { id } => interner.resolve(id), + }) + } + + pub fn next(self) -> Self { + match self { + Self::Number { id } => Self::Number { + id: id.saturating_add(1), + }, + Self::Other { id } => Self::Other { id: id.next() }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub enum InternedLiteral { + String { + value_id: Key, + }, + LanguageTaggedString { + value_id: Key, + language_id: Key, + }, + TypedLiteral { + value_id: Key, + datatype: InternedNamedNode, + }, +} + +impl InternedLiteral { + pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self { + let value_id = interner.get_or_intern(literal.value()); + if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get_or_intern(language), + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_into(literal.datatype(), interner), + } + } + } + + pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option { + let value_id = interner.get(literal.value())?; + Some(if literal.is_plain() { + if let Some(language) = literal.language() { + Self::LanguageTaggedString { + value_id, + language_id: interner.get(language)?, + } + } else { + Self::String { value_id } + } + } else { + Self::TypedLiteral { + value_id, + datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?, + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> { + match self { + Self::String { value_id } => { + LiteralRef::new_simple_literal(interner.resolve(*value_id)) + } + Self::LanguageTaggedString { + value_id, + language_id, + } => LiteralRef::new_language_tagged_literal_unchecked( + interner.resolve(*value_id), + interner.resolve(*language_id), + ), + Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal( + interner.resolve(*value_id), + datatype.decode_from(interner), + ), + } + } + + pub fn next(&self) -> Self { + match self { + Self::String { value_id } => Self::String { + value_id: value_id.next(), + }, + Self::LanguageTaggedString { + value_id, + language_id, + } => Self::LanguageTaggedString { + value_id: *value_id, + language_id: language_id.next(), + }, + Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral { + value_id: *value_id, + datatype: datatype.next(), + }, + } + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedSubject { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), + #[cfg(feature = "rdf-star")] + Triple(Box), +} + +impl InternedSubject { + pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self { + match node { + SubjectRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + SubjectRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + #[cfg(feature = "rdf-star")] + SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( + triple.as_ref(), + interner, + ))), + } + } + + pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option { + Some(match node { + SubjectRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + SubjectRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + #[cfg(feature = "rdf-star")] + SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( + triple.as_ref(), + interner, + )?)), + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> { + match self { + Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Self::Triple(Box::new(triple.next())), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedGraphName { + DefaultGraph, + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), +} + +impl InternedGraphName { + pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self { + match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_into(node, interner)) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_into(node, interner)) + } + } + } + + pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option { + Some(match node { + GraphNameRef::DefaultGraph => Self::DefaultGraph, + GraphNameRef::NamedNode(node) => { + Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?) + } + GraphNameRef::BlankNode(node) => { + Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?) + } + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> { + match self { + Self::DefaultGraph => GraphNameRef::DefaultGraph, + Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)), + Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)), + } + } + + pub fn first() -> Self { + Self::DefaultGraph + } + + pub fn next(&self) -> Self { + match self { + Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()), + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub enum InternedTerm { + NamedNode(InternedNamedNode), + BlankNode(InternedBlankNode), + Literal(InternedLiteral), + #[cfg(feature = "rdf-star")] + Triple(Box), +} + +impl InternedTerm { + pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self { + match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_into(term, interner)) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_into(term, interner)) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)), + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into( + triple.as_ref(), + interner, + ))), + } + } + + pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option { + Some(match term { + TermRef::NamedNode(term) => { + Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?) + } + TermRef::BlankNode(term) => { + Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?) + } + TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?), + #[cfg(feature = "rdf-star")] + TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from( + triple.as_ref(), + interner, + )?)), + }) + } + + pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> { + match self { + Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)), + Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)), + Self::Literal(term) => TermRef::Literal(term.decode_from(interner)), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]), + } + } + + pub fn first() -> Self { + Self::NamedNode(InternedNamedNode::first()) + } + + pub fn next(&self) -> Self { + match self { + Self::NamedNode(node) => Self::NamedNode(node.next()), + Self::BlankNode(node) => Self::BlankNode(node.next()), + Self::Literal(node) => Self::Literal(node.next()), + #[cfg(feature = "rdf-star")] + Self::Triple(triple) => Self::Triple(Box::new(triple.next())), + } + } + + pub fn impossible() -> Self { + Self::NamedNode(InternedNamedNode::impossible()) + } +} + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] +pub struct InternedTriple { + pub subject: InternedSubject, + pub predicate: InternedNamedNode, + pub object: InternedTerm, +} + +#[cfg(feature = "rdf-star")] +impl InternedTriple { + pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self { + let interned_triple = Self { + subject: InternedSubject::encoded_into(triple.subject, interner), + predicate: InternedNamedNode::encoded_into(triple.predicate, interner), + object: InternedTerm::encoded_into(triple.object, interner), + }; + interner + .triples + .insert(interned_triple.clone(), triple.into_owned()); + interned_triple + } + + pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option { + let interned_triple = Self { + subject: InternedSubject::encoded_from(triple.subject, interner)?, + predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, + object: InternedTerm::encoded_from(triple.object, interner)?, + }; + interner + .triples + .contains_key(&interned_triple) + .then_some(interned_triple) + } + + pub fn next(&self) -> Self { + Self { + subject: self.subject.clone(), + predicate: self.predicate, + object: self.object.next(), + } + } +} + +#[derive(Default, Clone)] +struct IdentityHasherBuilder; + +impl BuildHasher for IdentityHasherBuilder { + type Hasher = IdentityHasher; + + fn build_hasher(&self) -> Self::Hasher { + Self::Hasher::default() + } +} + +#[derive(Default)] +struct IdentityHasher { + value: u64, +} + +impl Hasher for IdentityHasher { + fn finish(&self) -> u64 { + self.value + } + + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Should only be used on u64 values") + } + + fn write_u64(&mut self, i: u64) { + self.value = i + } +} diff --git a/ng-oxigraph/src/oxrdf/literal.rs b/ng-oxigraph/src/oxrdf/literal.rs new file mode 100644 index 0000000..b9647c9 --- /dev/null +++ b/ng-oxigraph/src/oxrdf/literal.rs @@ -0,0 +1,669 @@ +use crate::oxrdf::named_node::{NamedNode, NamedNodeRef}; +use crate::oxrdf::vocab::{rdf, xsd}; +#[cfg(feature = "oxsdatatypes")] +use crate::oxsdatatypes::*; +use oxilangtag::{LanguageTag, LanguageTagParseError}; +use serde::{Deserialize, Serialize}; +use std::borrow::Cow; +use std::fmt; +use std::fmt::Write; + +/// An owned RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). +/// +/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation: +/// ``` +/// # use oxilangtag::LanguageTagParseError; +/// use oxrdf::vocab::xsd; +/// use oxrdf::Literal; +/// +/// assert_eq!( +/// "\"foo\\nbar\"", +/// Literal::new_simple_literal("foo\nbar").to_string() +/// ); +/// +/// assert_eq!( +/// r#""1999-01-01"^^"#, +/// Literal::new_typed_literal("1999-01-01", xsd::DATE).to_string() +/// ); +/// +/// assert_eq!( +/// r#""foo"@en"#, +/// Literal::new_language_tagged_literal("foo", "en")?.to_string() +/// ); +/// # Result::<(), LanguageTagParseError>::Ok(()) +/// ``` +#[derive(Eq, PartialEq, Debug, Clone, Hash, Serialize, Deserialize)] +pub struct Literal(LiteralContent); + +#[derive(PartialEq, Eq, Debug, Clone, Hash, Serialize, Deserialize)] +enum LiteralContent { + String(String), + LanguageTaggedString { value: String, language: String }, + TypedLiteral { value: String, datatype: NamedNode }, +} + +impl Literal { + /// Builds an RDF [simple literal](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal). + #[inline] + pub fn new_simple_literal(value: impl Into) -> Self { + Self(LiteralContent::String(value.into())) + } + + /// Builds an RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal) with a [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + #[inline] + pub fn new_typed_literal(value: impl Into, datatype: impl Into) -> Self { + let value = value.into(); + let datatype = datatype.into(); + Self(if datatype == xsd::STRING { + LiteralContent::String(value) + } else { + LiteralContent::TypedLiteral { value, datatype } + }) + } + + /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + #[inline] + pub fn new_language_tagged_literal( + value: impl Into, + language: impl Into, + ) -> Result { + let mut language = language.into(); + language.make_ascii_lowercase(); + Ok(Self::new_language_tagged_literal_unchecked( + value, + LanguageTag::parse(language)?.into_inner(), + )) + } + + /// Builds an RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// It is the responsibility of the caller to check that `language` + /// is valid [BCP47](https://tools.ietf.org/html/bcp47) language tag, + /// and is lowercase. + /// + /// [`Literal::new_language_tagged_literal()`] is a safe version of this constructor and should be used for untrusted data. + #[inline] + pub fn new_language_tagged_literal_unchecked( + value: impl Into, + language: impl Into, + ) -> Self { + Self(LiteralContent::LanguageTaggedString { + value: value.into(), + language: language.into(), + }) + } + + /// The literal [lexical form](https://www.w3.org/TR/rdf11-concepts/#dfn-lexical-form). + #[inline] + pub fn value(&self) -> &str { + self.as_ref().value() + } + + /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// + /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). + /// They are normalized to lowercase by this implementation. + #[inline] + pub fn language(&self) -> Option<&str> { + self.as_ref().language() + } + + /// The literal [datatype](https://www.w3.org/TR/rdf11-concepts/#dfn-datatype-iri). + /// + /// The datatype of [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) is always [rdf:langString](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). + /// The datatype of [simple literals](https://www.w3.org/TR/rdf11-concepts/#dfn-simple-literal) is [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub fn datatype(&self) -> NamedNodeRef<'_> { + self.as_ref().datatype() + } + + /// Checks if this literal could be seen as an RDF 1.0 [plain literal](https://www.w3.org/TR/2004/REC-rdf-concepts-20040210/#dfn-plain-literal). + /// + /// It returns true if the literal is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) + /// or has the datatype [xsd:string](https://www.w3.org/TR/xmlschema11-2/#string). + #[inline] + pub fn is_plain(&self) -> bool { + self.as_ref().is_plain() + } + + #[inline] + pub fn as_ref(&self) -> LiteralRef<'_> { + LiteralRef(match &self.0 { + LiteralContent::String(value) => LiteralRefContent::String(value), + LiteralContent::LanguageTaggedString { value, language } => { + LiteralRefContent::LanguageTaggedString { value, language } + } + LiteralContent::TypedLiteral { value, datatype } => LiteralRefContent::TypedLiteral { + value, + datatype: datatype.as_ref(), + }, + }) + } + + /// Extract components from this literal (value, datatype and language tag). + #[inline] + pub fn destruct(self) -> (String, Option, Option) { + match self.0 { + LiteralContent::String(s) => (s, None, None), + LiteralContent::LanguageTaggedString { value, language } => { + (value, None, Some(language)) + } + LiteralContent::TypedLiteral { value, datatype } => (value, Some(datatype), None), + } + } +} + +impl fmt::Display for Literal { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_ref().fmt(f) + } +} + +impl<'a> From<&'a str> for Literal { + #[inline] + fn from(value: &'a str) -> Self { + Self(LiteralContent::String(value.into())) + } +} + +impl From for Literal { + #[inline] + fn from(value: String) -> Self { + Self(LiteralContent::String(value)) + } +} + +impl<'a> From> for Literal { + #[inline] + fn from(value: Cow<'a, str>) -> Self { + Self(LiteralContent::String(value.into())) + } +} + +impl From for Literal { + #[inline] + fn from(value: bool) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::BOOLEAN.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: i128) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: i64) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: i32) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: i16) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: u64) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: u32) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: u16) -> Self { + Self(LiteralContent::TypedLiteral { + value: value.to_string(), + datatype: xsd::INTEGER.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: f32) -> Self { + Self(LiteralContent::TypedLiteral { + value: if value == f32::INFINITY { + "INF".to_owned() + } else if value == f32::NEG_INFINITY { + "-INF".to_owned() + } else { + value.to_string() + }, + datatype: xsd::FLOAT.into(), + }) + } +} + +impl From for Literal { + #[inline] + fn from(value: f64) -> Self { + Self(LiteralContent::TypedLiteral { + value: if value == f64::INFINITY { + "INF".to_owned() + } else if value == f64::NEG_INFINITY { + "-INF".to_owned() + } else { + value.to_string() + }, + datatype: xsd::DOUBLE.into(), + }) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: Boolean) -> Self { + Self::new_typed_literal(value.to_string(), xsd::BOOLEAN) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: Float) -> Self { + Self::new_typed_literal(value.to_string(), xsd::FLOAT) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: Double) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DOUBLE) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: Integer) -> Self { + Self::new_typed_literal(value.to_string(), xsd::INTEGER) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: Decimal) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DECIMAL) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From for Literal { + #[inline] + fn from(value: DateTime) -> Self { + Self::new_typed_literal(value.to_string(), xsd::DATE_TIME) + } +} + +#[cfg(feature = "oxsdatatypes")] +impl From