Merge commit 'c20417c18c1209d5f2c61c998bd0c81d1e017016' as 'oxigraph'

pull/19/head
Niko PLP 7 months ago
commit 7ea52aa5ae
  1. 59
      oxigraph/Cargo.toml
  2. 82
      oxigraph/README.md
  3. 301
      oxigraph/src/io/format.rs
  4. 39
      oxigraph/src/io/mod.rs
  5. 199
      oxigraph/src/io/read.rs
  6. 185
      oxigraph/src/io/write.rs
  7. 12
      oxigraph/src/lib.rs
  8. 22
      oxigraph/src/model.rs
  9. 311
      oxigraph/src/sparql/algebra.rs
  10. 184
      oxigraph/src/sparql/dataset.rs
  11. 84
      oxigraph/src/sparql/error.rs
  12. 5870
      oxigraph/src/sparql/eval.rs
  13. 34
      oxigraph/src/sparql/http/dummy.rs
  14. 9
      oxigraph/src/sparql/http/mod.rs
  15. 90
      oxigraph/src/sparql/http/simple.rs
  16. 328
      oxigraph/src/sparql/mod.rs
  17. 371
      oxigraph/src/sparql/model.rs
  18. 44
      oxigraph/src/sparql/results.rs
  19. 124
      oxigraph/src/sparql/service.rs
  20. 565
      oxigraph/src/sparql/update.rs
  21. 310
      oxigraph/src/storage/backend/fallback.rs
  22. 12
      oxigraph/src/storage/backend/mod.rs
  23. 1445
      oxigraph/src/storage/backend/oxi_rocksdb.rs
  24. 742
      oxigraph/src/storage/binary_encoder.rs
  25. 139
      oxigraph/src/storage/error.rs
  26. 1552
      oxigraph/src/storage/mod.rs
  27. 1031
      oxigraph/src/storage/numeric_encoder.rs
  28. 177
      oxigraph/src/storage/small_string.rs
  29. 2143
      oxigraph/src/store.rs
  30. BIN
      oxigraph/tests/rocksdb_bc_data/000003.log
  31. 1
      oxigraph/tests/rocksdb_bc_data/CURRENT
  32. 1
      oxigraph/tests/rocksdb_bc_data/IDENTITY
  33. 0
      oxigraph/tests/rocksdb_bc_data/LOCK
  34. BIN
      oxigraph/tests/rocksdb_bc_data/MANIFEST-000004
  35. 964
      oxigraph/tests/rocksdb_bc_data/OPTIONS-000026
  36. 542
      oxigraph/tests/store.rs

@ -0,0 +1,59 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]

@ -0,0 +1,82 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature:
```toml
oxigraph = { version = "*", default-features = false }
```
This is the default behavior when compiling Oxigraph to WASM.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -0,0 +1,301 @@
#![allow(deprecated)]
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum GraphFormat {
/// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples,
/// [Turtle](https://www.w3.org/TR/turtle/)
Turtle,
/// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/)
RdfXml,
}
impl GraphFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(
/// GraphFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ```
#[inline]
pub fn iri(self) -> &'static str {
match self {
Self::NTriples => "http://www.w3.org/ns/formats/N-Triples",
Self::Turtle => "http://www.w3.org/ns/formats/Turtle",
Self::RdfXml => "http://www.w3.org/ns/formats/RDF_XML",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(GraphFormat::NTriples.media_type(), "application/n-triples")
/// ```
#[inline]
pub fn media_type(self) -> &'static str {
match self {
Self::NTriples => "application/n-triples",
Self::Turtle => "text/turtle",
Self::RdfXml => "application/rdf+xml",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(GraphFormat::NTriples.file_extension(), "nt")
/// ```
#[inline]
pub fn file_extension(self) -> &'static str {
match self {
Self::NTriples => "nt",
Self::Turtle => "ttl",
Self::RdfXml => "rdf",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
/// For example, "application/xml" is going to return `GraphFormat::RdfXml` even if it is not its canonical media type.
///
/// Example:
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(
/// GraphFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(GraphFormat::Turtle)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
match media_type.split(';').next()?.trim() {
"application/n-triples" | "text/plain" => Some(Self::NTriples),
"text/turtle" | "application/turtle" | "application/x-turtle" => Some(Self::Turtle),
"application/rdf+xml" | "application/xml" | "text/xml" => Some(Self::RdfXml),
_ => None,
}
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxigraph::io::GraphFormat;
///
/// assert_eq!(
/// GraphFormat::from_extension("nt"),
/// Some(GraphFormat::NTriples)
/// )
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
match extension {
"nt" | "txt" => Some(Self::NTriples),
"ttl" => Some(Self::Turtle),
"rdf" | "xml" => Some(Self::RdfXml),
_ => None,
}
}
}
impl From<GraphFormat> for RdfFormat {
#[inline]
fn from(format: GraphFormat) -> Self {
match format {
GraphFormat::NTriples => Self::NTriples,
GraphFormat::Turtle => Self::Turtle,
GraphFormat::RdfXml => Self::RdfXml,
}
}
}
impl From<GraphFormat> for RdfParser {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<GraphFormat> for RdfSerializer {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
/// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats.
///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum DatasetFormat {
/// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads,
/// [TriG](https://www.w3.org/TR/trig/)
TriG,
}
impl DatasetFormat {
/// The format canonical IRI according to the [Unique URIs for file formats registry](https://www.w3.org/ns/formats/).
///
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(
/// DatasetFormat::NQuads.iri(),
/// "http://www.w3.org/ns/formats/N-Quads"
/// )
/// ```
#[inline]
pub fn iri(self) -> &'static str {
match self {
Self::NQuads => "http://www.w3.org/ns/formats/N-Quads",
Self::TriG => "http://www.w3.org/ns/formats/TriG",
}
}
/// The format [IANA media type](https://tools.ietf.org/html/rfc2046).
///
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(DatasetFormat::NQuads.media_type(), "application/n-quads")
/// ```
#[inline]
pub fn media_type(self) -> &'static str {
match self {
Self::NQuads => "application/n-quads",
Self::TriG => "application/trig",
}
}
/// The format [IANA-registered](https://tools.ietf.org/html/rfc2046) file extension.
///
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(DatasetFormat::NQuads.file_extension(), "nq")
/// ```
#[inline]
pub fn file_extension(self) -> &'static str {
match self {
Self::NQuads => "nq",
Self::TriG => "trig",
}
}
/// Looks for a known format from a media type.
///
/// It supports some media type aliases.
///
/// Example:
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(
/// DatasetFormat::from_media_type("application/n-quads; charset=utf-8"),
/// Some(DatasetFormat::NQuads)
/// )
/// ```
#[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> {
match media_type.split(';').next()?.trim() {
"application/n-quads" | "text/x-nquads" | "text/nquads" => Some(Self::NQuads),
"application/trig" | "application/x-trig" => Some(Self::TriG),
_ => None,
}
}
/// Looks for a known format from an extension.
///
/// It supports some aliases.
///
/// Example:
/// ```
/// use oxigraph::io::DatasetFormat;
///
/// assert_eq!(
/// DatasetFormat::from_extension("nq"),
/// Some(DatasetFormat::NQuads)
/// )
/// ```
#[inline]
pub fn from_extension(extension: &str) -> Option<Self> {
match extension {
"nq" | "txt" => Some(Self::NQuads),
"trig" => Some(Self::TriG),
_ => None,
}
}
}
impl From<DatasetFormat> for RdfFormat {
#[inline]
fn from(format: DatasetFormat) -> Self {
match format {
DatasetFormat::NQuads => Self::NQuads,
DatasetFormat::TriG => Self::TriG,
}
}
}
impl From<DatasetFormat> for RdfParser {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<DatasetFormat> for RdfSerializer {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl TryFrom<DatasetFormat> for GraphFormat {
type Error = ();
/// Attempts to find a graph format that is a subset of this [`DatasetFormat`].
#[inline]
fn try_from(value: DatasetFormat) -> Result<Self, Self::Error> {
match value {
DatasetFormat::NQuads => Ok(Self::NTriples),
DatasetFormat::TriG => Ok(Self::Turtle),
}
}
}
impl TryFrom<GraphFormat> for DatasetFormat {
type Error = ();
/// Attempts to find a dataset format that is a superset of this [`GraphFormat`].
#[inline]
fn try_from(value: GraphFormat) -> Result<Self, Self::Error> {
match value {
GraphFormat::NTriples => Ok(Self::NQuads),
GraphFormat::Turtle => Ok(Self::TriG),
GraphFormat::RdfXml => Err(()),
}
}
}

@ -0,0 +1,39 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -0,0 +1,199 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -0,0 +1,185 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -0,0 +1,12 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -0,0 +1,22 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;
pub use spargebra::term::GroundQuad;

@ -0,0 +1,311 @@
//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery)
//!
//! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`].
use crate::model::*;
use crate::sparql::eval::Timer;
use oxsdatatypes::DayTimeDuration;
use spargebra::GraphUpdateOperation;
use std::fmt;
use std::str::FromStr;
/// A parsed [SPARQL query](https://www.w3.org/TR/sparql11-query/).
///
/// ```
/// use oxigraph::model::NamedNode;
/// use oxigraph::sparql::Query;
///
/// let query_str = "SELECT ?s ?p ?o WHERE { ?s ?p ?o . }";
/// let mut query = Query::parse(query_str, None)?;
///
/// assert_eq!(query.to_string(), query_str);
///
/// // We edit the query dataset specification
/// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Query {
pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<DayTimeDuration>,
}
impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(
query: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
let start = Timer::now();
let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self {
dataset: query.dataset,
inner: query.inner,
parsing_duration: start.elapsed(),
})
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
pub fn dataset(&self) -> &QueryDataset {
&self.dataset
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
pub fn dataset_mut(&mut self) -> &mut QueryDataset {
&mut self.dataset
}
}
impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f) // TODO: override
}
}
impl FromStr for Query {
type Err = spargebra::SparqlSyntaxError;
fn from_str(query: &str) -> Result<Self, Self::Err> {
Self::parse(query, None)
}
}
impl TryFrom<&str> for Query {
type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &str) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}
impl TryFrom<&String> for Query {
type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &String) -> Result<Self, Self::Error> {
Self::from_str(query)
}
}
impl From<spargebra::Query> for Query {
fn from(query: spargebra::Query) -> Self {
Self {
dataset: QueryDataset::from_algebra(match &query {
spargebra::Query::Select { dataset, .. }
| spargebra::Query::Construct { dataset, .. }
| spargebra::Query::Describe { dataset, .. }
| spargebra::Query::Ask { dataset, .. } => dataset,
}),
inner: query,
parsing_duration: None,
}
}
}
/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/).
///
/// ```
/// use oxigraph::sparql::Update;
///
/// let update_str = "CLEAR ALL ;";
/// let update = Update::parse(update_str, None)?;
///
/// assert_eq!(update.to_string().trim(), update_str);
/// # Ok::<_, oxigraph::sparql::SparqlSyntaxError>(())
/// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Update {
pub(super) inner: spargebra::Update,
pub(super) using_datasets: Vec<Option<QueryDataset>>,
}
impl Update {
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(
update: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
Ok(spargebra::Update::parse(update, base_iri)?.into())
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert).
pub fn using_datasets(&self) -> impl Iterator<Item = &QueryDataset> {
self.using_datasets.iter().filter_map(Option::as_ref)
}
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert).
pub fn using_datasets_mut(&mut self) -> impl Iterator<Item = &mut QueryDataset> {
self.using_datasets.iter_mut().filter_map(Option::as_mut)
}
}
impl fmt::Display for Update {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}
impl FromStr for Update {
type Err = spargebra::SparqlSyntaxError;
fn from_str(update: &str) -> Result<Self, Self::Err> {
Self::parse(update, None)
}
}
impl TryFrom<&str> for Update {
type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &str) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
impl TryFrom<&String> for Update {
type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &String) -> Result<Self, Self::Error> {
Self::from_str(update)
}
}
impl From<spargebra::Update> for Update {
fn from(update: spargebra::Update) -> Self {
Self {
using_datasets: update
.operations
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
}
}
}
/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct QueryDataset {
default: Option<Vec<GraphName>>,
named: Option<Vec<NamedOrBlankNode>>,
}
impl QueryDataset {
pub(crate) fn new() -> Self {
Self {
default: None,
named: None,
}
}
fn from_algebra(inner: &Option<spargebra::algebra::QueryDataset>) -> Self {
if let Some(inner) = inner {
Self {
default: Some(inner.default.iter().map(|g| g.clone().into()).collect()),
named: inner
.named
.as_ref()
.map(|named| named.iter().map(|g| g.clone().into()).collect()),
}
} else {
Self {
default: Some(vec![GraphName::DefaultGraph]),
named: None,
}
}
}
/// Checks if this dataset specification is the default one
/// (i.e. the default graph is the store default graph and all the store named graphs are available)
///
/// ```
/// use oxigraph::sparql::Query;
///
/// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?
/// .dataset()
/// .is_default_dataset());
/// assert!(!Query::parse(
/// "SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }",
/// None
/// )?
/// .dataset()
/// .is_default_dataset());
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
pub fn is_default_dataset(&self) -> bool {
self.default
.as_ref()
.map_or(false, |t| t == &[GraphName::DefaultGraph])
&& self.named.is_none()
}
/// Returns the list of the store graphs that are available to the query as the default graph or `None` if the union of all graphs is used as the default graph
/// This list is by default only the store default graph
pub fn default_graph_graphs(&self) -> Option<&[GraphName]> {
self.default.as_deref()
}
/// Sets if the default graph for the query should be the union of all the graphs in the queried store
pub fn set_default_graph_as_union(&mut self) {
self.default = None;
}
/// Sets the list of graphs the query should consider as being part of the default graph.
///
/// By default only the store default graph is considered.
/// ```
/// use oxigraph::model::NamedNode;
/// use oxigraph::sparql::Query;
///
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
pub fn set_default_graph(&mut self, graphs: Vec<GraphName>) {
self.default = Some(graphs)
}
/// Returns the list of the available named graphs for the query or `None` if all graphs are available
pub fn available_named_graphs(&self) -> Option<&[NamedOrBlankNode]> {
self.named.as_deref()
}
/// Sets the list of allowed named graphs in the query.
///
/// ```
/// use oxigraph::model::NamedNode;
/// use oxigraph::sparql::Query;
///
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let named = vec![NamedNode::new("http://example.com")?.into()];
/// query
/// .dataset_mut()
/// .set_available_named_graphs(named.clone());
/// assert_eq!(
/// query.dataset().available_named_graphs(),
/// Some(named.as_slice())
/// );
///
/// # Ok::<_, Box<dyn std::error::Error>>(())
/// ```
pub fn set_available_named_graphs(&mut self, named_graphs: Vec<NamedOrBlankNode>) {
self.named = Some(named_graphs);
}
}

@ -0,0 +1,184 @@
use crate::model::TermRef;
use crate::sparql::algebra::QueryDataset;
use crate::sparql::EvaluationError;
use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use crate::storage::{StorageError, StorageReader};
use std::cell::RefCell;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::iter::empty;
pub struct DatasetView {
reader: StorageReader,
extra: RefCell<HashMap<StrHash, String>>,
dataset: EncodedDatasetSpec,
}
impl DatasetView {
pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self {
let dataset = EncodedDatasetSpec {
default: dataset
.default_graph_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
named: dataset
.available_named_graphs()
.map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
};
Self {
reader,
extra: RefCell::new(HashMap::default()),
dataset,
}
}
fn store_encoded_quads_for_pattern(
&self,
subject: Option<&EncodedTerm>,
predicate: Option<&EncodedTerm>,
object: Option<&EncodedTerm>,
graph_name: Option<&EncodedTerm>,
) -> impl Iterator<Item = Result<EncodedQuad, EvaluationError>> + 'static {
self.reader
.quads_for_pattern(subject, predicate, object, graph_name)
.map(|t| t.map_err(Into::into))
}
#[allow(clippy::needless_collect)]
pub fn encoded_quads_for_pattern(
&self,
subject: Option<&EncodedTerm>,
predicate: Option<&EncodedTerm>,
object: Option<&EncodedTerm>,
graph_name: Option<&EncodedTerm>,
) -> Box<dyn Iterator<Item = Result<EncodedQuad, EvaluationError>>> {
if let Some(graph_name) = graph_name {
if graph_name.is_default_graph() {
if let Some(default_graph_graphs) = &self.dataset.default {
if default_graph_graphs.len() == 1 {
// Single graph optimization
Box::new(
self.store_encoded_quads_for_pattern(
subject,
predicate,
object,
Some(&default_graph_graphs[0]),
)
.map(|quad| {
let quad = quad?;
Ok(EncodedQuad::new(
quad.subject,
quad.predicate,
quad.object,
EncodedTerm::DefaultGraph,
))
}),
)
} else {
let iters = default_graph_graphs
.iter()
.map(|graph_name| {
self.store_encoded_quads_for_pattern(
subject,
predicate,
object,
Some(graph_name),
)
})
.collect::<Vec<_>>();
Box::new(iters.into_iter().flatten().map(|quad| {
let quad = quad?;
Ok(EncodedQuad::new(
quad.subject,
quad.predicate,
quad.object,
EncodedTerm::DefaultGraph,
))
}))
}
} else {
Box::new(
self.store_encoded_quads_for_pattern(subject, predicate, object, None)
.map(|quad| {
let quad = quad?;
Ok(EncodedQuad::new(
quad.subject,
quad.predicate,
quad.object,
EncodedTerm::DefaultGraph,
))
}),
)
}
} else if self
.dataset
.named
.as_ref()
.map_or(true, |d| d.contains(graph_name))
{
Box::new(self.store_encoded_quads_for_pattern(
subject,
predicate,
object,
Some(graph_name),
))
} else {
Box::new(empty())
}
} else if let Some(named_graphs) = &self.dataset.named {
let iters = named_graphs
.iter()
.map(|graph_name| {
self.store_encoded_quads_for_pattern(
subject,
predicate,
object,
Some(graph_name),
)
})
.collect::<Vec<_>>();
Box::new(iters.into_iter().flatten())
} else {
Box::new(
self.store_encoded_quads_for_pattern(subject, predicate, object, None)
.filter(|quad| match quad {
Err(_) => true,
Ok(quad) => !quad.graph_name.is_default_graph(),
}),
)
}
}
pub fn encode_term<'a>(&self, term: impl Into<TermRef<'a>>) -> EncodedTerm {
let term = term.into();
let encoded = term.into();
insert_term(term, &encoded, &mut |key, value| {
self.insert_str(key, value);
Ok(())
})
.unwrap();
encoded
}
pub fn insert_str(&self, key: &StrHash, value: &str) {
if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) {
if !matches!(self.reader.contains_str(key), Ok(true)) {
e.insert(value.to_owned());
}
}
}
}
impl StrLookup for DatasetView {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(if let Some(value) = self.extra.borrow().get(key) {
Some(value.clone())
} else {
self.reader.get_str(key)?
})
}
}
struct EncodedDatasetSpec {
default: Option<Vec<EncodedTerm>>,
named: Option<Vec<EncodedTerm>>,
}

@ -0,0 +1,84 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,34 @@
//! Simple HTTP client
use std::io::{Empty, Error, ErrorKind, Result};
use std::time::Duration;
pub struct Client;
impl Client {
pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self {
Self
}
#[allow(clippy::unused_self)]
pub fn get(&self, _url: &str, _accept: &'static str) -> Result<(String, Empty)> {
Err(Error::new(
ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http-client'",
))
}
#[allow(clippy::unused_self, clippy::needless_pass_by_value)]
pub fn post(
&self,
_url: &str,
_payload: Vec<u8>,
_content_type: &'static str,
_accept: &'static str,
) -> Result<(String, Empty)> {
Err(Error::new(
ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http-client'",
))
}
}

@ -0,0 +1,9 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -0,0 +1,90 @@
use oxhttp::model::{Body, HeaderName, Method, Request};
use std::io::{Error, ErrorKind, Result};
use std::time::Duration;
pub struct Client {
client: oxhttp::Client,
}
impl Client {
pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self {
let mut client = oxhttp::Client::new()
.with_redirection_limit(redirection_limit)
.with_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
if let Some(timeout) = timeout {
client = client.with_global_timeout(timeout);
}
Self { client }
}
pub fn get(&self, url: &str, accept: &'static str) -> Result<(String, Body)> {
let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept)
.map_err(invalid_input_error)?
.build();
let response = self.client.request(request)?;
let status = response.status();
if !status.is_successful() {
return Err(Error::new(
ErrorKind::Other,
format!(
"Error {} returned by {} with payload:\n{}",
status,
url,
response.into_body().to_string()?
),
));
}
let content_type = response
.header(&HeaderName::CONTENT_TYPE)
.ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {url}")))?
.to_str()
.map_err(invalid_data_error)?
.to_owned();
Ok((content_type, response.into_body()))
}
pub fn post(
&self,
url: &str,
payload: Vec<u8>,
content_type: &'static str,
accept: &'static str,
) -> Result<(String, Body)> {
let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept)
.map_err(invalid_input_error)?
.with_header(HeaderName::CONTENT_TYPE, content_type)
.map_err(invalid_input_error)?
.with_body(payload);
let response = self.client.request(request)?;
let status = response.status();
if !status.is_successful() {
return Err(Error::new(
ErrorKind::Other,
format!(
"Error {} returned by {} with payload:\n{}",
status,
url,
response.into_body().to_string()?
),
));
}
let content_type = response
.header(&HeaderName::CONTENT_TYPE)
.ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {url}")))?
.to_str()
.map_err(invalid_data_error)?
.to_owned();
Ok((content_type, response.into_body()))
}
}
fn invalid_data_error(error: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Error {
Error::new(ErrorKind::InvalidData, error)
}
fn invalid_input_error(error: impl Into<Box<dyn std::error::Error + Send + Sync>>) -> Error {
Error::new(ErrorKind::InvalidInput, error)
}

@ -0,0 +1,328 @@
//! [SPARQL](https://www.w3.org/TR/sparql11-overview/) implementation.
//!
//! Stores execute SPARQL. See [`Store`](crate::store::Store::query()) for an example.
mod algebra;
mod dataset;
mod error;
mod eval;
mod http;
mod model;
pub mod results;
mod service;
mod update;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io};
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn evaluate_query(
reader: StorageReader,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
run_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
let query = query.try_into().map_err(Into::into)?;
let dataset = DatasetView::new(reader, &query.dataset);
let start_planning = Timer::now();
let (results, plan_node_with_stats, planning_duration) = match query.inner {
spargebra::Query::Select {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(pattern);
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_select(&pattern);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Ask {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_ask(&pattern);
(results, explanation, planning_duration)
}
spargebra::Query::Construct {
template,
pattern,
base_iri,
..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_construct(&pattern, &template);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Describe {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_describe(&pattern);
(Ok(results), explanation, planning_duration)
}
};
let explanation = QueryExplanation {
inner: plan_node_with_stats,
with_stats: run_stats,
parsing_duration: query.parsing_duration,
planning_duration,
};
Ok((results, explanation))
}
/// Options for SPARQL query evaluation.
///
///
/// If the `"http-client"` optional feature is enabled,
/// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
///
/// Usage example disabling the federated query support:
/// ```
/// use oxigraph::sparql::QueryOptions;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// store.query_opt(
/// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }",
/// QueryOptions::default().without_service_handler(),
/// )?;
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Clone, Default)]
pub struct QueryOptions {
service_handler: Option<Arc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: CustomFunctionRegistry,
http_timeout: Option<Duration>,
http_redirection_limit: usize,
without_optimizations: bool,
}
pub(crate) type CustomFunctionRegistry =
HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
impl QueryOptions {
/// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
#[inline]
#[must_use]
pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self {
self.service_handler = Some(Arc::new(ErrorConversionServiceHandler::wrap(
service_handler,
)));
self
}
/// Disables the `SERVICE` calls
#[inline]
#[must_use]
pub fn without_service_handler(mut self) -> Self {
self.service_handler = Some(Arc::new(EmptyServiceHandler));
self
}
/// Sets a timeout for HTTP requests done during SPARQL evaluation.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_timeout(mut self, timeout: Duration) -> Self {
self.http_timeout = Some(timeout);
self
}
/// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation.
///
/// By default this value is `0`.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self {
self.http_redirection_limit = redirection_limit;
self
}
/// Adds a custom SPARQL evaluation function.
///
/// Example with a function serializing terms to N-Triples:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
/// QueryOptions::default().with_custom_function(
/// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
/// ),
/// )? {
/// assert_eq!(
/// solutions.next().unwrap()?.get("nt"),
/// Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
#[must_use]
pub fn with_custom_function(
mut self,
name: NamedNode,
evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
) -> Self {
self.custom_functions.insert(name, Arc::new(evaluator));
self
}
fn service_handler(&self) -> Arc<dyn ServiceHandler<Error = EvaluationError>> {
self.service_handler.clone().unwrap_or_else(|| {
if cfg!(feature = "http-client") {
Arc::new(service::SimpleServiceHandler::new(
self.http_timeout,
self.http_redirection_limit,
))
} else {
Arc::new(EmptyServiceHandler)
}
})
}
#[doc(hidden)]
#[inline]
#[must_use]
pub fn without_optimizations(mut self) -> Self {
self.without_optimizations = true;
self
}
}
/// Options for SPARQL update evaluation.
#[derive(Clone, Default)]
pub struct UpdateOptions {
query_options: QueryOptions,
}
impl From<QueryOptions> for UpdateOptions {
#[inline]
fn from(query_options: QueryOptions) -> Self {
Self { query_options }
}
}
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<EvalNodeWithStats>,
with_stats: bool,
parsing_duration: Option<DayTimeDuration>,
planning_duration: Option<DayTimeDuration>,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, write: impl io::Write) -> io::Result<()> {
let mut writer = ToWriteJsonWriter::new(write);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
parsing_duration.as_seconds().to_string().into(),
))?;
}
if let Some(planning_duration) = self.planning_duration {
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
planning_duration.as_seconds().to_string().into(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("plan".into()))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
}
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut obj = f.debug_struct("QueryExplanation");
if let Some(parsing_duration) = self.parsing_duration {
obj.field(
"parsing duration in seconds",
&f32::from(Float::from(parsing_duration.as_seconds())),
);
}
if let Some(planning_duration) = self.planning_duration {
obj.field(
"planning duration in seconds",
&f32::from(Float::from(planning_duration.as_seconds())),
);
}
obj.field("tree", &self.inner);
obj.finish_non_exhaustive()
}
}

@ -0,0 +1,371 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -0,0 +1,44 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -0,0 +1,124 @@
use crate::model::NamedNode;
use crate::sparql::algebra::Query;
use crate::sparql::error::EvaluationError;
use crate::sparql::http::Client;
use crate::sparql::model::QueryResults;
use crate::sparql::results::QueryResultsFormat;
use std::error::Error;
use std::time::Duration;
/// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE.
///
/// Should be given to [`QueryOptions`](super::QueryOptions::with_service_handler())
/// before evaluating a SPARQL query that uses SERVICE calls.
///
/// ```
/// use oxigraph::model::*;
/// use oxigraph::sparql::{EvaluationError, Query, QueryOptions, QueryResults, ServiceHandler};
/// use oxigraph::store::Store;
///
/// struct TestServiceHandler {
/// store: Store,
/// }
///
/// impl ServiceHandler for TestServiceHandler {
/// type Error = EvaluationError;
///
/// fn handle(
/// &self,
/// service_name: NamedNode,
/// query: Query,
/// ) -> Result<QueryResults, Self::Error> {
/// if service_name == "http://example.com/service" {
/// self.store.query(query)
/// } else {
/// panic!()
/// }
/// }
/// }
///
/// let store = Store::new()?;
/// let service = TestServiceHandler {
/// store: Store::new()?,
/// };
/// let ex = NamedNodeRef::new("http://example.com")?;
/// service
/// .store
/// .insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }",
/// QueryOptions::default().with_service_handler(service),
/// )? {
/// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into()));
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub trait ServiceHandler: Send + Sync {
/// The service evaluation error.
type Error: Error + Send + Sync + 'static;
/// Evaluates a [`Query`] against a given service identified by a [`NamedNode`].
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error>;
}
pub struct EmptyServiceHandler;
impl ServiceHandler for EmptyServiceHandler {
type Error = EvaluationError;
fn handle(&self, name: NamedNode, _: Query) -> Result<QueryResults, Self::Error> {
Err(EvaluationError::UnsupportedService(name))
}
}
pub struct ErrorConversionServiceHandler<S: ServiceHandler> {
handler: S,
}
impl<S: ServiceHandler> ErrorConversionServiceHandler<S> {
pub fn wrap(handler: S) -> Self {
Self { handler }
}
}
impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> {
type Error = EvaluationError;
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
self.handler
.handle(service_name, query)
.map_err(|e| EvaluationError::Service(Box::new(e)))
}
}
pub struct SimpleServiceHandler {
client: Client,
}
impl SimpleServiceHandler {
pub fn new(http_timeout: Option<Duration>, http_redirection_limit: usize) -> Self {
Self {
client: Client::new(http_timeout, http_redirection_limit),
}
}
}
impl ServiceHandler for SimpleServiceHandler {
type Error = EvaluationError;
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
let (content_type, body) = self
.client
.post(
service_name.as_str(),
query.to_string().into_bytes(),
"application/sparql-query",
"application/sparql-results+json, application/sparql-results+xml",
)
.map_err(|e| EvaluationError::Service(Box::new(e)))?;
let format = QueryResultsFormat::from_media_type(&content_type)
.ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
Ok(QueryResults::read(body, format)?)
}
}

@ -0,0 +1,565 @@
use crate::io::{RdfFormat, RdfParser};
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::sparql::algebra::QueryDataset;
use crate::sparql::dataset::DatasetView;
use crate::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::sparql::http::Client;
use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::StorageWriter;
use oxiri::Iri;
use spargebra::algebra::{GraphPattern, GraphTarget};
use spargebra::term::{
BlankNode, GraphName, GraphNamePattern, GroundQuad, GroundQuadPattern, GroundSubject,
GroundTerm, GroundTermPattern, GroundTriple, GroundTriplePattern, NamedNode, NamedNodePattern,
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable,
};
use spargebra::GraphUpdateOperation;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::io;
use std::rc::Rc;
use std::sync::Arc;
pub fn evaluate_update<'a, 'b: 'a>(
transaction: &'a mut StorageWriter<'b>,
update: &Update,
options: &UpdateOptions,
) -> Result<(), EvaluationError> {
SimpleUpdateEvaluator {
transaction,
base_iri: update.inner.base_iri.clone().map(Rc::new),
options: options.clone(),
client: Client::new(
options.query_options.http_timeout,
options.query_options.http_redirection_limit,
),
}
.eval_all(&update.inner.operations, &update.using_datasets)
}
struct SimpleUpdateEvaluator<'a, 'b> {
transaction: &'a mut StorageWriter<'b>,
base_iri: Option<Rc<Iri<String>>>,
options: UpdateOptions,
client: Client,
}
impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
fn eval_all(
&mut self,
updates: &[GraphUpdateOperation],
using_datasets: &[Option<QueryDataset>],
) -> Result<(), EvaluationError> {
for (update, using_dataset) in updates.iter().zip(using_datasets) {
self.eval(update, using_dataset)?;
}
Ok(())
}
fn eval(
&mut self,
update: &GraphUpdateOperation,
using_dataset: &Option<QueryDataset>,
) -> Result<(), EvaluationError> {
match update {
GraphUpdateOperation::InsertData { data } => self.eval_insert_data(data),
GraphUpdateOperation::DeleteData { data } => self.eval_delete_data(data),
GraphUpdateOperation::DeleteInsert {
delete,
insert,
pattern,
..
} => self.eval_delete_insert(
delete,
insert,
using_dataset.as_ref().unwrap_or(&QueryDataset::new()),
pattern,
),
GraphUpdateOperation::Load {
silent,
source,
destination,
} => {
if let Err(error) = self.eval_load(source, destination) {
if *silent {
Ok(())
} else {
Err(error)
}
} else {
Ok(())
}
}
GraphUpdateOperation::Clear { graph, silent } => self.eval_clear(graph, *silent),
GraphUpdateOperation::Create { graph, silent } => self.eval_create(graph, *silent),
GraphUpdateOperation::Drop { graph, silent } => self.eval_drop(graph, *silent),
}
}
fn eval_insert_data(&mut self, data: &[Quad]) -> Result<(), EvaluationError> {
let mut bnodes = HashMap::new();
for quad in data {
let quad = Self::convert_quad(quad, &mut bnodes);
self.transaction.insert(quad.as_ref())?;
}
Ok(())
}
fn eval_delete_data(&mut self, data: &[GroundQuad]) -> Result<(), EvaluationError> {
for quad in data {
let quad = Self::convert_ground_quad(quad);
self.transaction.remove(quad.as_ref())?;
}
Ok(())
}
fn eval_delete_insert(
&mut self,
delete: &[GroundQuadPattern],
insert: &[QuadPattern],
using: &QueryDataset,
algebra: &GraphPattern,
) -> Result<(), EvaluationError> {
let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using));
let mut pattern = sparopt::algebra::GraphPattern::from(algebra);
if !self.options.query_options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(sparopt::algebra::GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let evaluator = SimpleEvaluator::new(
Rc::clone(&dataset),
self.base_iri.clone(),
self.options.query_options.service_handler(),
Arc::new(self.options.query_options.custom_functions.clone()),
false,
);
let mut variables = Vec::new();
let mut bnodes = HashMap::new();
let (eval, _) = evaluator.graph_pattern_evaluator(&pattern, &mut variables);
let tuples =
eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; // TODO: would be much better to stream
for tuple in tuples {
for quad in delete {
if let Some(quad) =
Self::convert_ground_quad_pattern(quad, &variables, &tuple, &dataset)?
{
self.transaction.remove(quad.as_ref())?;
}
}
for quad in insert {
if let Some(quad) =
Self::convert_quad_pattern(quad, &variables, &tuple, &dataset, &mut bnodes)?
{
self.transaction.insert(quad.as_ref())?;
}
}
bnodes.clear();
}
Ok(())
}
fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> {
let (content_type, body) = self
.client
.get(
from.as_str(),
"application/n-triples, text/turtle, application/rdf+xml",
)
.map_err(|e| EvaluationError::Service(Box::new(e)))?;
let format = RdfFormat::from_media_type(&content_type)
.ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
let to_graph_name = match to {
GraphName::NamedNode(graph_name) => graph_name.into(),
GraphName::DefaultGraph => GraphNameRef::DefaultGraph,
};
let mut parser = RdfParser::from_format(format)
.rename_blank_nodes()
.without_named_graphs()
.with_default_graph(to_graph_name);
parser = parser.with_base_iri(from.as_str()).map_err(|e| {
EvaluationError::Service(Box::new(io::Error::new(
io::ErrorKind::InvalidInput,
format!("Invalid URL: {from}: {e}"),
)))
})?;
for q in parser.parse_read(body) {
self.transaction.insert(q?.as_ref())?;
}
Ok(())
}
fn eval_create(&mut self, graph_name: &NamedNode, silent: bool) -> Result<(), EvaluationError> {
if self.transaction.insert_named_graph(graph_name.into())? || silent {
Ok(())
} else {
Err(EvaluationError::GraphAlreadyExists(graph_name.clone()))
}
}
fn eval_clear(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> {
match graph {
GraphTarget::NamedNode(graph_name) => {
if self
.transaction
.reader()
.contains_named_graph(&graph_name.as_ref().into())?
{
Ok(self.transaction.clear_graph(graph_name.into())?)
} else if silent {
Ok(())
} else {
Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
}
}
GraphTarget::DefaultGraph => {
self.transaction.clear_graph(GraphNameRef::DefaultGraph)?;
Ok(())
}
GraphTarget::NamedGraphs => Ok(self.transaction.clear_all_named_graphs()?),
GraphTarget::AllGraphs => Ok(self.transaction.clear_all_graphs()?),
}
}
fn eval_drop(&mut self, graph: &GraphTarget, silent: bool) -> Result<(), EvaluationError> {
match graph {
GraphTarget::NamedNode(graph_name) => {
if self.transaction.remove_named_graph(graph_name.into())? || silent {
Ok(())
} else {
Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
}
}
GraphTarget::DefaultGraph => {
Ok(self.transaction.clear_graph(GraphNameRef::DefaultGraph)?)
}
GraphTarget::NamedGraphs => Ok(self.transaction.remove_all_named_graphs()?),
GraphTarget::AllGraphs => Ok(self.transaction.clear()?),
}
}
fn convert_quad(quad: &Quad, bnodes: &mut HashMap<BlankNode, BlankNode>) -> OxQuad {
OxQuad {
subject: match &quad.subject {
Subject::NamedNode(subject) => subject.clone().into(),
Subject::BlankNode(subject) => Self::convert_blank_node(subject, bnodes).into(),
Subject::Triple(subject) => Self::convert_triple(subject, bnodes).into(),
},
predicate: quad.predicate.clone(),
object: match &quad.object {
Term::NamedNode(object) => object.clone().into(),
Term::BlankNode(object) => Self::convert_blank_node(object, bnodes).into(),
Term::Literal(object) => object.clone().into(),
Term::Triple(subject) => Self::convert_triple(subject, bnodes).into(),
},
graph_name: match &quad.graph_name {
GraphName::NamedNode(graph_name) => graph_name.clone().into(),
GraphName::DefaultGraph => OxGraphName::DefaultGraph,
},
}
}
fn convert_triple(triple: &Triple, bnodes: &mut HashMap<BlankNode, BlankNode>) -> Triple {
Triple {
subject: match &triple.subject {
Subject::NamedNode(subject) => subject.clone().into(),
Subject::BlankNode(subject) => Self::convert_blank_node(subject, bnodes).into(),
Subject::Triple(subject) => Self::convert_triple(subject, bnodes).into(),
},
predicate: triple.predicate.clone(),
object: match &triple.object {
Term::NamedNode(object) => object.clone().into(),
Term::BlankNode(object) => Self::convert_blank_node(object, bnodes).into(),
Term::Literal(object) => object.clone().into(),
Term::Triple(subject) => Self::convert_triple(subject, bnodes).into(),
},
}
}
fn convert_blank_node(
node: &BlankNode,
bnodes: &mut HashMap<BlankNode, BlankNode>,
) -> BlankNode {
bnodes.entry(node.clone()).or_default().clone()
}
fn convert_ground_quad(quad: &GroundQuad) -> OxQuad {
OxQuad {
subject: match &quad.subject {
GroundSubject::NamedNode(subject) => subject.clone().into(),
GroundSubject::Triple(subject) => Self::convert_ground_triple(subject).into(),
},
predicate: quad.predicate.clone(),
object: match &quad.object {
GroundTerm::NamedNode(object) => object.clone().into(),
GroundTerm::Literal(object) => object.clone().into(),
GroundTerm::Triple(subject) => Self::convert_ground_triple(subject).into(),
},
graph_name: match &quad.graph_name {
GraphName::NamedNode(graph_name) => graph_name.clone().into(),
GraphName::DefaultGraph => OxGraphName::DefaultGraph,
},
}
}
fn convert_ground_triple(triple: &GroundTriple) -> Triple {
Triple {
subject: match &triple.subject {
GroundSubject::NamedNode(subject) => subject.clone().into(),
GroundSubject::Triple(subject) => Self::convert_ground_triple(subject).into(),
},
predicate: triple.predicate.clone(),
object: match &triple.object {
GroundTerm::NamedNode(object) => object.clone().into(),
GroundTerm::Literal(object) => object.clone().into(),
GroundTerm::Triple(subject) => Self::convert_ground_triple(subject).into(),
},
}
}
fn convert_quad_pattern(
quad: &QuadPattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
bnodes: &mut HashMap<BlankNode, BlankNode>,
) -> Result<Option<OxQuad>, EvaluationError> {
Ok(Some(OxQuad {
subject: match Self::convert_term_or_var(
&quad.subject,
variables,
values,
dataset,
bnodes,
)? {
Some(Term::NamedNode(node)) => node.into(),
Some(Term::BlankNode(node)) => node.into(),
Some(Term::Triple(triple)) => triple.into(),
Some(Term::Literal(_)) | None => return Ok(None),
},
predicate: if let Some(predicate) =
Self::convert_named_node_or_var(&quad.predicate, variables, values, dataset)?
{
predicate
} else {
return Ok(None);
},
object: if let Some(object) =
Self::convert_term_or_var(&quad.object, variables, values, dataset, bnodes)?
{
object
} else {
return Ok(None);
},
graph_name: if let Some(graph_name) =
Self::convert_graph_name_or_var(&quad.graph_name, variables, values, dataset)?
{
graph_name
} else {
return Ok(None);
},
}))
}
fn convert_term_or_var(
term: &TermPattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
bnodes: &mut HashMap<BlankNode, BlankNode>,
) -> Result<Option<Term>, EvaluationError> {
Ok(match term {
TermPattern::NamedNode(term) => Some(term.clone().into()),
TermPattern::BlankNode(bnode) => Some(Self::convert_blank_node(bnode, bnodes).into()),
TermPattern::Literal(term) => Some(term.clone().into()),
TermPattern::Triple(triple) => {
Self::convert_triple_pattern(triple, variables, values, dataset, bnodes)?
.map(Into::into)
}
TermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node))
.transpose()?,
})
}
fn convert_named_node_or_var(
term: &NamedNodePattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
) -> Result<Option<NamedNode>, EvaluationError> {
Ok(match term {
NamedNodePattern::NamedNode(term) => Some(term.clone()),
NamedNodePattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_named_node(&node))
.transpose()?,
})
}
fn convert_graph_name_or_var(
term: &GraphNamePattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
) -> Result<Option<OxGraphName>, EvaluationError> {
match term {
GraphNamePattern::NamedNode(term) => Ok(Some(term.clone().into())),
GraphNamePattern::DefaultGraph => Ok(Some(OxGraphName::DefaultGraph)),
GraphNamePattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| {
Ok(if node == EncodedTerm::DefaultGraph {
OxGraphName::DefaultGraph
} else {
dataset.decode_named_node(&node)?.into()
})
})
.transpose(),
}
}
fn convert_triple_pattern(
triple: &TriplePattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
bnodes: &mut HashMap<BlankNode, BlankNode>,
) -> Result<Option<Triple>, EvaluationError> {
Ok(Some(Triple {
subject: match Self::convert_term_or_var(
&triple.subject,
variables,
values,
dataset,
bnodes,
)? {
Some(Term::NamedNode(node)) => node.into(),
Some(Term::BlankNode(node)) => node.into(),
Some(Term::Triple(triple)) => triple.into(),
Some(Term::Literal(_)) | None => return Ok(None),
},
predicate: if let Some(predicate) =
Self::convert_named_node_or_var(&triple.predicate, variables, values, dataset)?
{
predicate
} else {
return Ok(None);
},
object: if let Some(object) =
Self::convert_term_or_var(&triple.object, variables, values, dataset, bnodes)?
{
object
} else {
return Ok(None);
},
}))
}
fn convert_ground_quad_pattern(
quad: &GroundQuadPattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
) -> Result<Option<OxQuad>, EvaluationError> {
Ok(Some(OxQuad {
subject: match Self::convert_ground_term_or_var(
&quad.subject,
variables,
values,
dataset,
)? {
Some(Term::NamedNode(node)) => node.into(),
Some(Term::BlankNode(node)) => node.into(),
Some(Term::Triple(triple)) => triple.into(),
Some(Term::Literal(_)) | None => return Ok(None),
},
predicate: if let Some(predicate) =
Self::convert_named_node_or_var(&quad.predicate, variables, values, dataset)?
{
predicate
} else {
return Ok(None);
},
object: if let Some(object) =
Self::convert_ground_term_or_var(&quad.object, variables, values, dataset)?
{
object
} else {
return Ok(None);
},
graph_name: if let Some(graph_name) =
Self::convert_graph_name_or_var(&quad.graph_name, variables, values, dataset)?
{
graph_name
} else {
return Ok(None);
},
}))
}
fn convert_ground_term_or_var(
term: &GroundTermPattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
) -> Result<Option<Term>, EvaluationError> {
Ok(match term {
GroundTermPattern::NamedNode(term) => Some(term.clone().into()),
GroundTermPattern::Literal(term) => Some(term.clone().into()),
GroundTermPattern::Triple(triple) => {
Self::convert_ground_triple_pattern(triple, variables, values, dataset)?
.map(Into::into)
}
GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node))
.transpose()?,
})
}
fn convert_ground_triple_pattern(
triple: &GroundTriplePattern,
variables: &[Variable],
values: &EncodedTuple,
dataset: &DatasetView,
) -> Result<Option<Triple>, EvaluationError> {
Ok(Some(Triple {
subject: match Self::convert_ground_term_or_var(
&triple.subject,
variables,
values,
dataset,
)? {
Some(Term::NamedNode(node)) => node.into(),
Some(Term::BlankNode(node)) => node.into(),
Some(Term::Triple(triple)) => triple.into(),
Some(Term::Literal(_)) | None => return Ok(None),
},
predicate: if let Some(predicate) =
Self::convert_named_node_or_var(&triple.predicate, variables, values, dataset)?
{
predicate
} else {
return Ok(None);
},
object: if let Some(object) =
Self::convert_ground_term_or_var(&triple.object, variables, values, dataset)?
{
object
} else {
return Ok(None);
},
}))
}
fn lookup_variable(
v: &Variable,
variables: &[Variable],
values: &EncodedTuple,
) -> Option<EncodedTerm> {
variables
.iter()
.position(|v2| v == v2)
.and_then(|i| values.get(i))
.cloned()
}
}

@ -0,0 +1,310 @@
//! TODO: This storage is dramatically naive.
use crate::storage::StorageError;
use crate::store::CorruptionError;
use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap};
use std::error::Error;
use std::mem::transmute;
use std::rc::{Rc, Weak};
use std::sync::{Arc, RwLock, RwLockWriteGuard};
pub struct ColumnFamilyDefinition {
pub name: &'static str,
pub use_iter: bool,
pub min_prefix_size: usize,
pub unordered_writes: bool,
}
#[derive(Clone)]
pub struct Db(Arc<RwLock<HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>);
impl Db {
#[allow(clippy::unnecessary_wraps)]
pub fn new(column_families: Vec<ColumnFamilyDefinition>) -> Result<Self, StorageError> {
let mut trees = HashMap::new();
for cf in column_families {
trees.insert(ColumnFamily(cf.name), BTreeMap::default());
}
trees.entry(ColumnFamily("default")).or_default(); // We make sure that "default" key exists.
Ok(Self(Arc::new(RwLock::new(trees))))
}
#[allow(clippy::unwrap_in_result)]
pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
let column_family = ColumnFamily(name);
if self.0.read().unwrap().contains_key(&column_family) {
Ok(column_family)
} else {
Err(CorruptionError::from_missing_column_family_name(name).into())
}
}
#[must_use]
pub fn snapshot(&self) -> Reader {
Reader(InnerReader::Simple(Arc::clone(&self.0)))
}
#[allow(clippy::unwrap_in_result)]
pub fn transaction<'a, 'b: 'a, T, E: Error + 'static + From<StorageError>>(
&'b self,
f: impl Fn(Transaction<'a>) -> Result<T, E>,
) -> Result<T, E> {
f(Transaction(Rc::new(RefCell::new(self.0.write().unwrap()))))
}
}
#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct ColumnFamily(&'static str);
pub struct Reader(InnerReader);
enum InnerReader {
Simple(Arc<RwLock<HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>),
Transaction(
Weak<RefCell<RwLockWriteGuard<'static, HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>>,
),
}
impl Reader {
#[allow(clippy::unwrap_in_result)]
pub fn get(
&self,
column_family: &ColumnFamily,
key: &[u8],
) -> Result<Option<Vec<u8>>, StorageError> {
match &self.0 {
InnerReader::Simple(reader) => Ok(reader
.read()
.unwrap()
.get(column_family)
.and_then(|cf| cf.get(key).cloned())),
InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() {
Ok((*reader)
.borrow()
.get(column_family)
.and_then(|cf| cf.get(key).cloned()))
} else {
Err(StorageError::Other(
"The transaction is already ended".into(),
))
}
}
}
}
#[allow(clippy::unwrap_in_result)]
pub fn contains_key(
&self,
column_family: &ColumnFamily,
key: &[u8],
) -> Result<bool, StorageError> {
match &self.0 {
InnerReader::Simple(reader) => Ok(reader
.read()
.unwrap()
.get(column_family)
.map_or(false, |cf| cf.contains_key(key))),
InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() {
Ok((*reader)
.borrow()
.get(column_family)
.map_or(false, |cf| cf.contains_key(key)))
} else {
Err(StorageError::Other(
"The transaction is already ended".into(),
))
}
}
}
}
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[])
}
#[allow(clippy::unwrap_in_result)]
pub fn scan_prefix(
&self,
column_family: &ColumnFamily,
prefix: &[u8],
) -> Result<Iter, StorageError> {
let data: Vec<_> = match &self.0 {
InnerReader::Simple(reader) => {
let trees = reader.read().unwrap();
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
}
InnerReader::Transaction(reader) => {
let Some(reader) = reader.upgrade() else {
return Err(StorageError::Other(
"The transaction is already ended".into(),
));
};
let trees = (*reader).borrow();
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
}
};
let mut iter = data.into_iter();
let current = iter.next();
Ok(Iter { iter, current })
}
#[allow(clippy::unwrap_in_result)]
pub fn len(&self, column_family: &ColumnFamily) -> Result<usize, StorageError> {
match &self.0 {
InnerReader::Simple(reader) => Ok(reader
.read()
.unwrap()
.get(column_family)
.map_or(0, BTreeMap::len)),
InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() {
Ok((*reader)
.borrow()
.get(column_family)
.map_or(0, BTreeMap::len))
} else {
Err(StorageError::Other(
"The transaction is already ended".into(),
))
}
}
}
}
#[allow(clippy::unwrap_in_result)]
pub fn is_empty(&self, column_family: &ColumnFamily) -> Result<bool, StorageError> {
match &self.0 {
InnerReader::Simple(reader) => Ok(reader
.read()
.unwrap()
.get(column_family)
.map_or(true, BTreeMap::is_empty)),
InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() {
Ok((*reader)
.borrow()
.get(column_family)
.map_or(true, BTreeMap::is_empty))
} else {
Err(StorageError::Other(
"The transaction is already ended".into(),
))
}
}
}
}
}
pub struct Transaction<'a>(
Rc<RefCell<RwLockWriteGuard<'a, HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>>,
);
impl Transaction<'_> {
#[allow(unsafe_code, clippy::useless_transmute)]
pub fn reader(&self) -> Reader {
// SAFETY: This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime.
Reader(InnerReader::Transaction(Rc::downgrade(unsafe {
transmute(&self.0)
})))
}
#[allow(clippy::unnecessary_wraps)]
pub fn contains_key_for_update(
&self,
column_family: &ColumnFamily,
key: &[u8],
) -> Result<bool, StorageError> {
Ok((*self.0)
.borrow()
.get(column_family)
.map_or(false, |cf| cf.contains_key(key)))
}
#[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)]
pub fn insert(
&mut self,
column_family: &ColumnFamily,
key: &[u8],
value: &[u8],
) -> Result<(), StorageError> {
self.0
.borrow_mut()
.get_mut(column_family)
.unwrap()
.insert(key.into(), value.into());
Ok(())
}
pub fn insert_empty(
&mut self,
column_family: &ColumnFamily,
key: &[u8],
) -> Result<(), StorageError> {
self.insert(column_family, key, &[])
}
#[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)]
pub fn remove(&mut self, column_family: &ColumnFamily, key: &[u8]) -> Result<(), StorageError> {
self.0
.borrow_mut()
.get_mut(column_family)
.unwrap()
.remove(key);
Ok(())
}
}
pub struct Iter {
iter: std::vec::IntoIter<(Vec<u8>, Vec<u8>)>,
current: Option<(Vec<u8>, Vec<u8>)>,
}
impl Iter {
pub fn key(&self) -> Option<&[u8]> {
Some(&self.current.as_ref()?.0)
}
#[allow(dead_code)]
pub fn value(&self) -> Option<&[u8]> {
Some(&self.current.as_ref()?.1)
}
pub fn next(&mut self) {
self.current = self.iter.next();
}
#[allow(clippy::unnecessary_wraps, clippy::unused_self)]
pub fn status(&self) -> Result<(), StorageError> {
Ok(())
}
}

@ -0,0 +1,12 @@
//! A storage backend
//! RocksDB is available, if not in memory
#[cfg(any(target_family = "wasm"))]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(all(not(target_family = "wasm")))]
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(any(target_family = "wasm"))]
mod fallback;
#[cfg(all(not(target_family = "wasm")))]
mod oxi_rocksdb;

File diff suppressed because it is too large Load Diff

@ -0,0 +1,742 @@
use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::storage::small_string::SmallString;
use oxsdatatypes::*;
use std::io::Read;
use std::mem::size_of;
#[cfg(all(not(target_family = "wasm")))]
pub const LATEST_STORAGE_VERSION: u64 = 1;
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
// Encoded term type blocks
// 1-7: usual named nodes (except prefixes c.f. later)
// 8-15: blank nodes
// 16-47: literals
// 48-55: triples
// 56-64: future use
// 64-127: default named node prefixes
// 128-255: custom named node prefixes
const TYPE_NAMED_NODE_ID: u8 = 1;
const TYPE_NUMERICAL_BLANK_NODE_ID: u8 = 8;
const TYPE_SMALL_BLANK_NODE_ID: u8 = 9;
const TYPE_BIG_BLANK_NODE_ID: u8 = 10;
const TYPE_SMALL_STRING_LITERAL: u8 = 16;
const TYPE_BIG_STRING_LITERAL: u8 = 17;
const TYPE_SMALL_SMALL_LANG_STRING_LITERAL: u8 = 20;
const TYPE_SMALL_BIG_LANG_STRING_LITERAL: u8 = 21;
const TYPE_BIG_SMALL_LANG_STRING_LITERAL: u8 = 22;
const TYPE_BIG_BIG_LANG_STRING_LITERAL: u8 = 23;
const TYPE_SMALL_TYPED_LITERAL: u8 = 24;
const TYPE_BIG_TYPED_LITERAL: u8 = 25;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 28;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 29;
const TYPE_FLOAT_LITERAL: u8 = 30;
const TYPE_DOUBLE_LITERAL: u8 = 31;
const TYPE_INTEGER_LITERAL: u8 = 32;
const TYPE_DECIMAL_LITERAL: u8 = 33;
const TYPE_DATE_TIME_LITERAL: u8 = 34;
const TYPE_TIME_LITERAL: u8 = 35;
const TYPE_DATE_LITERAL: u8 = 36;
const TYPE_G_YEAR_MONTH_LITERAL: u8 = 37;
const TYPE_G_YEAR_LITERAL: u8 = 38;
const TYPE_G_MONTH_DAY_LITERAL: u8 = 39;
const TYPE_G_DAY_LITERAL: u8 = 40;
const TYPE_G_MONTH_LITERAL: u8 = 41;
const TYPE_DURATION_LITERAL: u8 = 42;
const TYPE_YEAR_MONTH_DURATION_LITERAL: u8 = 43;
const TYPE_DAY_TIME_DURATION_LITERAL: u8 = 44;
const TYPE_TRIPLE: u8 = 48;
#[derive(Clone, Copy)]
pub enum QuadEncoding {
Spog,
Posg,
Ospg,
Gspo,
Gpos,
Gosp,
Dspo,
Dpos,
Dosp,
}
impl QuadEncoding {
pub fn decode(self, mut buffer: &[u8]) -> Result<EncodedQuad, StorageError> {
match self {
Self::Spog => buffer.read_spog_quad(),
Self::Posg => buffer.read_posg_quad(),
Self::Ospg => buffer.read_ospg_quad(),
Self::Gspo => buffer.read_gspo_quad(),
Self::Gpos => buffer.read_gpos_quad(),
Self::Gosp => buffer.read_gosp_quad(),
Self::Dspo => buffer.read_dspo_quad(),
Self::Dpos => buffer.read_dpos_quad(),
Self::Dosp => buffer.read_dosp_quad(),
}
}
}
pub fn decode_term(mut buffer: &[u8]) -> Result<EncodedTerm, StorageError> {
buffer.read_term()
}
pub trait TermReader {
fn read_term(&mut self) -> Result<EncodedTerm, StorageError>;
fn read_spog_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_posg_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_ospg_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let graph_name = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gspo_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let graph_name = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gpos_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let graph_name = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_gosp_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let graph_name = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name,
})
}
fn read_dspo_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let subject = self.read_term()?;
let predicate = self.read_term()?;
let object = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dpos_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let predicate = self.read_term()?;
let object = self.read_term()?;
let subject = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
fn read_dosp_quad(&mut self) -> Result<EncodedQuad, StorageError> {
let object = self.read_term()?;
let subject = self.read_term()?;
let predicate = self.read_term()?;
Ok(EncodedQuad {
subject,
predicate,
object,
graph_name: EncodedTerm::DefaultGraph,
})
}
}
impl<R: Read> TermReader for R {
fn read_term(&mut self) -> Result<EncodedTerm, StorageError> {
let mut type_buffer = [0];
self.read_exact(&mut type_buffer)?;
match type_buffer[0] {
TYPE_NAMED_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode {
iri_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_NUMERICAL_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NumericalBlankNode {
id: u128::from_be_bytes(buffer),
})
}
TYPE_SMALL_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::SmallBlankNode(
SmallString::from_be_bytes(buffer).map_err(CorruptionError::new)?,
))
}
TYPE_BIG_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::BigBlankNode {
id_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_SMALL_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallSmallLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer)
.map_err(CorruptionError::new)?,
language: SmallString::from_be_bytes(language_buffer)
.map_err(CorruptionError::new)?,
})
}
TYPE_SMALL_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallBigLangStringLiteral {
value: SmallString::from_be_bytes(value_buffer)
.map_err(CorruptionError::new)?,
language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_BIG_SMALL_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer),
language: SmallString::from_be_bytes(language_buffer)
.map_err(CorruptionError::new)?,
})
}
TYPE_BIG_BIG_LANG_STRING_LITERAL => {
let mut language_buffer = [0; 16];
self.read_exact(&mut language_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::from_be_bytes(value_buffer),
language_id: StrHash::from_be_bytes(language_buffer),
})
}
TYPE_SMALL_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::SmallTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value: SmallString::from_be_bytes(value_buffer)
.map_err(CorruptionError::new)?,
})
}
TYPE_BIG_TYPED_LITERAL => {
let mut datatype_buffer = [0; 16];
self.read_exact(&mut datatype_buffer)?;
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::BigTypedLiteral {
datatype_id: StrHash::from_be_bytes(datatype_buffer),
value_id: StrHash::from_be_bytes(value_buffer),
})
}
TYPE_SMALL_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::SmallStringLiteral(
SmallString::from_be_bytes(buffer).map_err(CorruptionError::new)?,
))
}
TYPE_BIG_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::BigStringLiteral {
value_id: StrHash::from_be_bytes(buffer),
})
}
TYPE_BOOLEAN_LITERAL_TRUE => Ok(true.into()),
TYPE_BOOLEAN_LITERAL_FALSE => Ok(false.into()),
TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4];
self.read_exact(&mut buffer)?;
Ok(Float::from_be_bytes(buffer).into())
}
TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(Double::from_be_bytes(buffer).into())
}
TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(Integer::from_be_bytes(buffer).into())
}
TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(Decimal::from_be_bytes(buffer).into())
}
TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(DateTime::from_be_bytes(buffer).into())
}
TYPE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(Time::from_be_bytes(buffer).into())
}
TYPE_DATE_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(Date::from_be_bytes(buffer).into())
}
TYPE_G_YEAR_MONTH_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(GYearMonth::from_be_bytes(buffer).into())
}
TYPE_G_YEAR_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(GYear::from_be_bytes(buffer).into())
}
TYPE_G_MONTH_DAY_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(GMonthDay::from_be_bytes(buffer).into())
}
TYPE_G_DAY_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(GDay::from_be_bytes(buffer).into())
}
TYPE_G_MONTH_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(GMonth::from_be_bytes(buffer).into())
}
TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24];
self.read_exact(&mut buffer)?;
Ok(Duration::from_be_bytes(buffer).into())
}
TYPE_YEAR_MONTH_DURATION_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(YearMonthDuration::from_be_bytes(buffer).into())
}
TYPE_DAY_TIME_DURATION_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(DayTimeDuration::from_be_bytes(buffer).into())
}
TYPE_TRIPLE => Ok(EncodedTriple {
subject: self.read_term()?,
predicate: self.read_term()?,
object: self.read_term()?,
}
.into()),
_ => Err(CorruptionError::msg("the term buffer has an invalid type id").into()),
}
}
}
pub fn write_spog_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
write_term(sink, &quad.graph_name);
}
pub fn write_posg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
write_term(sink, &quad.graph_name);
}
pub fn write_ospg_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
write_term(sink, &quad.graph_name);
}
pub fn write_gspo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.graph_name);
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
}
pub fn write_gpos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.graph_name);
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
}
pub fn write_gosp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.graph_name);
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
}
pub fn write_spo_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
}
pub fn write_pos_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.predicate);
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
}
pub fn write_osp_quad(sink: &mut Vec<u8>, quad: &EncodedQuad) {
write_term(sink, &quad.object);
write_term(sink, &quad.subject);
write_term(sink, &quad.predicate);
}
pub fn encode_term(t: &EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t);
vec
}
pub fn encode_term_pair(t1: &EncodedTerm, t2: &EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(2 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
vec
}
pub fn encode_term_triple(t1: &EncodedTerm, t2: &EncodedTerm, t3: &EncodedTerm) -> Vec<u8> {
let mut vec = Vec::with_capacity(3 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
vec
}
pub fn encode_term_quad(
t1: &EncodedTerm,
t2: &EncodedTerm,
t3: &EncodedTerm,
t4: &EncodedTerm,
) -> Vec<u8> {
let mut vec = Vec::with_capacity(4 * WRITTEN_TERM_MAX_SIZE);
write_term(&mut vec, t1);
write_term(&mut vec, t2);
write_term(&mut vec, t3);
write_term(&mut vec, t4);
vec
}
pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
match term {
EncodedTerm::DefaultGraph => (),
EncodedTerm::NamedNode { iri_id } => {
sink.push(TYPE_NAMED_NODE_ID);
sink.extend_from_slice(&iri_id.to_be_bytes());
}
EncodedTerm::NumericalBlankNode { id } => {
sink.push(TYPE_NUMERICAL_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes())
}
EncodedTerm::SmallBlankNode(id) => {
sink.push(TYPE_SMALL_BLANK_NODE_ID);
sink.extend_from_slice(&id.to_be_bytes())
}
EncodedTerm::BigBlankNode { id_id } => {
sink.push(TYPE_BIG_BLANK_NODE_ID);
sink.extend_from_slice(&id_id.to_be_bytes());
}
EncodedTerm::SmallStringLiteral(value) => {
sink.push(TYPE_SMALL_STRING_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::BigStringLiteral { value_id } => {
sink.push(TYPE_BIG_STRING_LITERAL);
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
sink.push(TYPE_SMALL_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
sink.push(TYPE_SMALL_BIG_LANG_STRING_LITERAL);
sink.extend_from_slice(&language_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
sink.push(TYPE_BIG_SMALL_LANG_STRING_LITERAL);
sink.extend_from_slice(&language.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::BigBigLangStringLiteral {
value_id,
language_id,
} => {
sink.push(TYPE_BIG_BIG_LANG_STRING_LITERAL);
sink.extend_from_slice(&language_id.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
sink.push(TYPE_SMALL_TYPED_LITERAL);
sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value.to_be_bytes());
}
EncodedTerm::BigTypedLiteral {
value_id,
datatype_id,
} => {
sink.push(TYPE_BIG_TYPED_LITERAL);
sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes());
}
EncodedTerm::BooleanLiteral(value) => sink.push(if bool::from(*value) {
TYPE_BOOLEAN_LITERAL_TRUE
} else {
TYPE_BOOLEAN_LITERAL_FALSE
}),
EncodedTerm::FloatLiteral(value) => {
sink.push(TYPE_FLOAT_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DoubleLiteral(value) => {
sink.push(TYPE_DOUBLE_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::IntegerLiteral(value) => {
sink.push(TYPE_INTEGER_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DecimalLiteral(value) => {
sink.push(TYPE_DECIMAL_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DateTimeLiteral(value) => {
sink.push(TYPE_DATE_TIME_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::TimeLiteral(value) => {
sink.push(TYPE_TIME_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DurationLiteral(value) => {
sink.push(TYPE_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DateLiteral(value) => {
sink.push(TYPE_DATE_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::GYearMonthLiteral(value) => {
sink.push(TYPE_G_YEAR_MONTH_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::GYearLiteral(value) => {
sink.push(TYPE_G_YEAR_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::GMonthDayLiteral(value) => {
sink.push(TYPE_G_MONTH_DAY_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::GDayLiteral(value) => {
sink.push(TYPE_G_DAY_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::GMonthLiteral(value) => {
sink.push(TYPE_G_MONTH_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::YearMonthDurationLiteral(value) => {
sink.push(TYPE_YEAR_MONTH_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::DayTimeDurationLiteral(value) => {
sink.push(TYPE_DAY_TIME_DURATION_LITERAL);
sink.extend_from_slice(&value.to_be_bytes())
}
EncodedTerm::Triple(value) => {
sink.push(TYPE_TRIPLE);
write_term(sink, &value.subject);
write_term(sink, &value.predicate);
write_term(sink, &value.object);
}
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use crate::model::TermRef;
use crate::storage::numeric_encoder::*;
use std::cell::RefCell;
use std::collections::HashMap;
#[derive(Default)]
struct MemoryStrStore {
id2str: RefCell<HashMap<StrHash, String>>,
}
impl StrLookup for MemoryStrStore {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self.id2str.borrow().get(key).cloned())
}
}
impl MemoryStrStore {
fn insert_term(&self, term: TermRef<'_>, encoded: &EncodedTerm) {
insert_term(term, encoded, &mut |h, v| {
self.insert_str(h, v);
Ok(())
})
.unwrap();
}
fn insert_str(&self, key: &StrHash, value: &str) {
self.id2str
.borrow_mut()
.entry(*key)
.or_insert_with(|| value.to_owned());
}
}
#[test]
fn test_encoding() {
use crate::model::vocab::xsd;
use crate::model::*;
let store = MemoryStrStore::default();
let terms: Vec<Term> = vec![
NamedNode::new_unchecked("http://foo.com").into(),
NamedNode::new_unchecked("http://bar.com").into(),
NamedNode::new_unchecked("http://foo.com").into(),
BlankNode::default().into(),
BlankNode::new_unchecked("bnode").into(),
BlankNode::new_unchecked("foo-bnode-thisisaverylargeblanknode").into(),
Literal::new_simple_literal("literal").into(),
BlankNode::new_unchecked("foo-literal-thisisaverylargestringliteral").into(),
Literal::from(true).into(),
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo-string").into(),
Literal::new_language_tagged_literal_unchecked("foo-fr", "fr").into(),
Literal::new_language_tagged_literal_unchecked(
"foo-fr-literal-thisisaverylargelanguagetaggedstringliteral",
"fr",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_language_tagged_literal_unchecked(
"foo-big-literal-thisisaverylargelanguagetaggedstringliteral",
"fr-FR-Latn-x-foo-bar-baz-bat-aaaa-bbbb-cccc",
)
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME).into(),
Literal::new_typed_literal("2020-01", xsd::G_YEAR_MONTH).into(),
Literal::new_typed_literal("2020", xsd::G_YEAR).into(),
Literal::new_typed_literal("--01-01", xsd::G_MONTH_DAY).into(),
Literal::new_typed_literal("--01", xsd::G_MONTH).into(),
Literal::new_typed_literal("---01", xsd::G_DAY).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION).into(),
Literal::new_typed_literal("PT1S", xsd::DAY_TIME_DURATION).into(),
Literal::new_typed_literal("P1Y", xsd::YEAR_MONTH_DURATION).into(),
Literal::new_typed_literal("-foo", NamedNode::new_unchecked("http://foo.com")).into(),
Literal::new_typed_literal(
"-foo-thisisaverybigtypedliteralwiththefoodatatype",
NamedNode::new_unchecked("http://foo.com"),
)
.into(),
Triple::new(
NamedNode::new_unchecked("http://foo.com"),
NamedNode::new_unchecked("http://bar.com"),
Literal::from(true),
)
.into(),
];
for term in terms {
let encoded = term.as_ref().into();
store.insert_term(term.as_ref(), &encoded);
assert_eq!(encoded, term.as_ref().into());
assert_eq!(term, store.decode_term(&encoded).unwrap());
let mut buffer = Vec::new();
write_term(&mut buffer, &encoded);
assert_eq!(encoded, buffer.as_slice().read_term().unwrap());
}
}
}

@ -0,0 +1,139 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,177 @@
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::ops::Deref;
use std::str::{FromStr, Utf8Error};
use std::{fmt, str};
/// A small inline string
#[derive(Clone, Copy, Default)]
#[repr(transparent)]
pub struct SmallString {
inner: [u8; 16],
}
impl SmallString {
#[inline]
pub const fn new() -> Self {
Self { inner: [0; 16] }
}
#[inline]
pub fn from_utf8(bytes: &[u8]) -> Result<Self, BadSmallStringError> {
Self::from_str(str::from_utf8(bytes).map_err(BadSmallStringError::BadUtf8)?)
}
#[inline]
pub fn from_be_bytes(bytes: [u8; 16]) -> Result<Self, BadSmallStringError> {
// We check that it is valid UTF-8
str::from_utf8(&bytes.as_ref()[..bytes[15].into()])
.map_err(BadSmallStringError::BadUtf8)?;
Ok(Self { inner: bytes })
}
#[inline]
pub fn len(&self) -> usize {
self.inner[15].into()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
#[allow(unsafe_code)]
pub fn as_str(&self) -> &str {
// SAFETY: safe because we ensured it in constructors
unsafe { str::from_utf8_unchecked(self.as_bytes()) }
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.inner[..self.len()]
}
#[inline]
pub fn to_be_bytes(self) -> [u8; 16] {
self.inner
}
}
impl Deref for SmallString {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl AsRef<str> for SmallString {
#[inline]
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl Borrow<str> for SmallString {
#[inline]
fn borrow(&self) -> &str {
self.as_str()
}
}
impl fmt::Debug for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl fmt::Display for SmallString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.as_str().fmt(f)
}
}
impl PartialEq for SmallString {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}
impl Eq for SmallString {}
impl PartialOrd for SmallString {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for SmallString {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Hash for SmallString {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl From<SmallString> for String {
#[inline]
fn from(value: SmallString) -> Self {
value.as_str().into()
}
}
impl<'a> From<&'a SmallString> for &'a str {
#[inline]
fn from(value: &'a SmallString) -> Self {
value.as_str()
}
}
impl FromStr for SmallString {
type Err = BadSmallStringError;
#[inline]
fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.len() <= 15 {
let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value
.len()
.try_into()
.map_err(|_| Self::Err::TooLong(value.len()))?;
Ok(Self { inner })
} else {
Err(Self::Err::TooLong(value.len()))
}
}
}
impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError;
#[inline]
fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::from_str(value)
}
}
#[derive(Debug, Clone, Copy, thiserror::Error)]
pub enum BadSmallStringError {
#[error("small strings could only contain at most 15 characters, found {0}")]
TooLong(usize),
#[error(transparent)]
BadUtf8(#[from] Utf8Error),
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1 @@
f08a4c5b-0479-408c-80d3-7d4b10d7c7aa

@ -0,0 +1,964 @@
# This is a RocksDB option file.
#
# For detailed file format spec, please refer to the example file
# in examples/rocksdb_option_file_example.ini
#
[Version]
rocksdb_version=6.7.3
options_file_version=1.1
[DBOptions]
write_dbid_to_manifest=false
avoid_unnecessary_blocking_io=false
two_write_queues=false
allow_ingest_behind=false
writable_file_max_buffer_size=1048576
avoid_flush_during_shutdown=false
avoid_flush_during_recovery=false
info_log_level=INFO_LEVEL
access_hint_on_compaction_start=NORMAL
allow_concurrent_memtable_write=true
enable_pipelined_write=false
stats_dump_period_sec=600
stats_persist_period_sec=600
strict_bytes_per_sync=false
WAL_ttl_seconds=0
WAL_size_limit_MB=0
max_subcompactions=1
dump_malloc_stats=false
db_log_dir=
wal_recovery_mode=kPointInTimeRecovery
log_file_time_to_roll=0
enable_write_thread_adaptive_yield=true
recycle_log_file_num=0
table_cache_numshardbits=6
atomic_flush=false
preserve_deletes=false
stats_history_buffer_size=1048576
max_open_files=-1
max_file_opening_threads=16
delete_obsolete_files_period_micros=21600000000
max_background_flushes=-1
write_thread_slow_yield_usec=3
base_background_compactions=-1
manual_wal_flush=false
wal_dir=tests/rockdb_bc_data
max_background_compactions=-1
bytes_per_sync=0
max_background_jobs=2
use_fsync=false
unordered_write=false
fail_if_options_file_error=false
random_access_max_buffer_size=1048576
compaction_readahead_size=0
wal_bytes_per_sync=0
new_table_reader_for_compaction_inputs=false
skip_stats_update_on_db_open=false
persist_stats_to_disk=false
skip_log_error_on_recovery=false
log_readahead_size=0
is_fd_close_on_exec=true
use_adaptive_mutex=false
error_if_exists=false
write_thread_max_yield_usec=100
enable_thread_tracking=false
db_write_buffer_size=0
create_missing_column_families=true
paranoid_checks=true
create_if_missing=true
max_manifest_file_size=1073741824
allow_2pc=false
max_total_wal_size=0
use_direct_io_for_flush_and_compaction=false
manifest_preallocation_size=4194304
use_direct_reads=false
delayed_write_rate=16777216
allow_fallocate=true
max_write_batch_group_size_bytes=1048576
keep_log_file_num=1000
allow_mmap_reads=false
max_log_file_size=0
allow_mmap_writes=false
advise_random_on_open=true
[CFOptions "default"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "default"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "id2str"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "id2str"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "spog"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "spog"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "posg"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "posg"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "ospg"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "ospg"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "gspo"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "gspo"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "gpos"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "gpos"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "gosp"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "gosp"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "dspo"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "dspo"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "dpos"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "dpos"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory
[CFOptions "dosp"]
sample_for_compression=0
compaction_pri=kMinOverlappingRatio
merge_operator=nullptr
compaction_filter_factory=nullptr
memtable_factory=SkipListFactory
memtable_insert_with_hint_prefix_extractor=nullptr
comparator=leveldb.BytewiseComparator
target_file_size_base=67108864
max_sequential_skip_in_iterations=8
compaction_style=kCompactionStyleLevel
max_bytes_for_level_base=268435456
bloom_locality=0
write_buffer_size=67108864
compression_per_level=
memtable_huge_page_size=0
max_successive_merges=0
arena_block_size=8388608
memtable_whole_key_filtering=false
target_file_size_multiplier=1
max_bytes_for_level_multiplier_additional=1:1:1:1:1:1:1
num_levels=7
min_write_buffer_number_to_merge=1
max_write_buffer_number_to_maintain=0
max_write_buffer_number=2
compression=kSnappyCompression
level0_stop_writes_trigger=36
level0_slowdown_writes_trigger=20
compaction_filter=nullptr
level0_file_num_compaction_trigger=4
max_compaction_bytes=1677721600
compaction_options_universal={stop_style=kCompactionStopStyleTotalSize;compression_size_percent=-1;allow_trivial_move=false;max_merge_width=4294967295;max_size_amplification_percent=200;min_merge_width=2;size_ratio=1;}
memtable_prefix_bloom_size_ratio=0.000000
max_write_buffer_size_to_maintain=0
hard_pending_compaction_bytes_limit=274877906944
ttl=2592000
table_factory=BlockBasedTable
soft_pending_compaction_bytes_limit=68719476736
prefix_extractor=nullptr
bottommost_compression=kDisableCompressionOption
force_consistency_checks=false
paranoid_file_checks=false
compaction_options_fifo={allow_compaction=false;max_table_files_size=1073741824;}
max_bytes_for_level_multiplier=10.000000
optimize_filters_for_hits=false
level_compaction_dynamic_level_bytes=false
inplace_update_num_locks=10000
inplace_update_support=false
periodic_compaction_seconds=0
disable_auto_compactions=false
report_bg_io_stats=false
[TableOptions/BlockBasedTable "dosp"]
pin_top_level_index_and_filter=true
enable_index_compression=true
read_amp_bytes_per_bit=8589934592
format_version=2
block_align=false
metadata_block_size=4096
block_size_deviation=10
partition_filters=false
block_size=4096
index_block_restart_interval=1
no_block_cache=false
checksum=kCRC32c
whole_key_filtering=true
index_shortening=kShortenSeparators
data_block_index_type=kDataBlockBinarySearch
index_type=kBinarySearch
verify_compression=false
filter_policy=nullptr
data_block_hash_table_util_ratio=0.750000
pin_l0_filter_and_index_blocks_in_cache=false
block_restart_interval=16
cache_index_and_filter_blocks_with_high_priority=true
cache_index_and_filter_blocks=false
hash_index_allow_collision=true
flush_block_policy_factory=FlushBlockBySizePolicyFactory

@ -0,0 +1,542 @@
#![cfg(test)]
#![allow(clippy::panic_in_result_fn)]
use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::{rdf, xsd};
use oxigraph::model::*;
use oxigraph::store::Store;
#[cfg(all(not(target_family = "wasm")))]
use rand::random;
#[cfg(all(not(target_family = "wasm")))]
use std::env::temp_dir;
use std::error::Error;
#[cfg(all(not(target_family = "wasm")))]
use std::fs::{create_dir_all, remove_dir_all, File};
#[cfg(all(not(target_family = "wasm")))]
use std::io::Write;
#[cfg(all(not(target_family = "wasm")))]
use std::iter::empty;
#[cfg(all(target_os = "linux"))]
use std::iter::once;
#[cfg(all(not(target_family = "wasm")))]
use std::path::{Path, PathBuf};
#[cfg(all(target_os = "linux"))]
use std::process::Command;
#[allow(clippy::non_ascii_literal)]
const DATA: &str = r#"
@prefix schema: <http://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
wd:Q90 a schema:City ;
schema:name "Paris"@fr , "la ville lumière"@fr ;
schema:country wd:Q142 ;
schema:population 2000000 ;
schema:startDate "-300"^^xsd:gYear ;
schema:url "https://www.paris.fr/"^^xsd:anyURI ;
schema:postalCode "75001" .
"#;
#[allow(clippy::non_ascii_literal)]
const GRAPH_DATA: &str = r#"
@prefix schema: <http://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
GRAPH <http://www.wikidata.org/wiki/Special:EntityData/Q90> {
wd:Q90 a schema:City ;
schema:name "Paris"@fr , "la ville lumière"@fr ;
schema:country wd:Q142 ;
schema:population 2000000 ;
schema:startDate "-300"^^xsd:gYear ;
schema:url "https://www.paris.fr/"^^xsd:anyURI ;
schema:postalCode "75001" .
}
"#;
const NUMBER_OF_TRIPLES: usize = 8;
fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>> {
let graph_name = graph_name.into();
let paris = NamedNodeRef::new_unchecked("http://www.wikidata.org/entity/Q90");
let france = NamedNodeRef::new_unchecked("http://www.wikidata.org/entity/Q142");
let city = NamedNodeRef::new_unchecked("http://schema.org/City");
let name = NamedNodeRef::new_unchecked("http://schema.org/name");
let country = NamedNodeRef::new_unchecked("http://schema.org/country");
let population = NamedNodeRef::new_unchecked("http://schema.org/population");
let start_date = NamedNodeRef::new_unchecked("http://schema.org/startDate");
let url = NamedNodeRef::new_unchecked("http://schema.org/url");
let postal_code = NamedNodeRef::new_unchecked("http://schema.org/postalCode");
vec![
QuadRef::new(paris, rdf::TYPE, city, graph_name),
QuadRef::new(
paris,
name,
LiteralRef::new_language_tagged_literal_unchecked("Paris", "fr"),
graph_name,
),
QuadRef::new(
paris,
name,
LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"),
graph_name,
),
QuadRef::new(paris, country, france, graph_name),
QuadRef::new(
paris,
population,
LiteralRef::new_typed_literal("2000000", xsd::INTEGER),
graph_name,
),
QuadRef::new(
paris,
start_date,
LiteralRef::new_typed_literal("-300", xsd::G_YEAR),
graph_name,
),
QuadRef::new(
paris,
url,
LiteralRef::new_typed_literal("https://www.paris.fr/", xsd::ANY_URI),
graph_name,
),
QuadRef::new(
paris,
postal_code,
LiteralRef::new_simple_literal("75001"),
graph_name,
),
]
}
#[test]
fn test_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?);
}
store.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store
.bulk_loader()
.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?);
}
store.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().on_parse_error(|_| Ok(())).load_from_read(
RdfFormat::NTriples,
b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> .".as_slice(),
)?;
assert_eq!(store.len()?, 1);
assert!(store.contains(QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com"),
NamedNodeRef::new_unchecked("http://example.com"),
NamedNodeRef::new_unchecked("http://example.com"),
GraphNameRef::DefaultGraph
))?);
store.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_empty() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().load_quads(empty::<Quad>())?;
assert!(store.is_empty()?);
store.validate()?;
Ok(())
}
#[test]
fn test_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
for q in quads(NamedNodeRef::new_unchecked(
"http://www.wikidata.org/wiki/Special:EntityData/Q90",
)) {
assert!(store.contains(q)?);
}
store.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store
.bulk_loader()
.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
let graph_name =
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90");
for q in quads(graph_name) {
assert!(store.contains(q)?);
}
assert!(store.contains_named_graph(graph_name)?);
store.validate()?;
Ok(())
}
#[test]
fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
for _ in 0..2 {
store.load_from_read(
RdfFormat::NTriples,
"_:a <http://example.com/p> <http://example.com/p> .".as_bytes(),
)?;
}
assert_eq!(store.len()?, 2);
Ok(())
}
#[test]
fn test_dump_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
for q in quads(GraphNameRef::DefaultGraph) {
store.insert(q)?;
}
let mut buffer = Vec::new();
store.dump_graph_to_write(GraphNameRef::DefaultGraph, RdfFormat::NTriples, &mut buffer)?;
assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES
);
Ok(())
}
#[test]
fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
for q in quads(GraphNameRef::DefaultGraph) {
store.insert(q)?;
}
let buffer = store.dump_to_write(RdfFormat::NQuads, Vec::new())?;
assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES
);
Ok(())
}
#[test]
fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new(
NamedNodeRef::new("http://example.com/s")?,
NamedNodeRef::new("http://example.com/p")?,
NamedNodeRef::new("http://example.com/o")?,
NamedNodeRef::new("http://www.wikidata.org/wiki/Special:EntityData/Q90")?,
);
let store = Store::new()?;
store.insert(quad)?;
let iter = store.iter();
store.remove(quad)?;
assert_eq!(
iter.collect::<Result<Vec<_>, _>>()?,
vec![quad.into_owned()]
);
store.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"),
NamedNodeRef::new_unchecked("http://example.com/p"),
NamedNodeRef::new_unchecked("http://example.com/o"),
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"),
);
let store = Store::new()?;
store.remove(quad)?;
store.bulk_loader().load_quads([quad.into_owned()])?;
assert_eq!(store.len()?, 1);
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_open_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
create_dir_all(&dir.0)?;
{
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
}
assert!(Store::open(&dir.0).is_err());
Ok(())
}
#[test]
#[cfg(all(target_os = "linux"))]
fn test_bad_stt_open() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
let store = Store::open(&dir.0)?;
remove_dir_all(&dir.0)?;
store
.bulk_loader()
.load_quads(once(Quad::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
NamedNode::new_unchecked("http://example.com/o"),
GraphName::DefaultGraph,
)))
.unwrap_err();
Ok(())
}
// #[test]
// #[cfg(all(not(target_family = "wasm")))]
// fn test_backup() -> Result<(), Box<dyn Error>> {
// let quad = QuadRef::new(
// NamedNodeRef::new_unchecked("http://example.com/s"),
// NamedNodeRef::new_unchecked("http://example.com/p"),
// NamedNodeRef::new_unchecked("http://example.com/o"),
// GraphNameRef::DefaultGraph,
// );
// let store_dir = TempDir::default();
// let backup_from_rw_dir = TempDir::default();
// let backup_from_ro_dir = TempDir::default();
// let backup_from_secondary_dir = TempDir::default();
// let store = Store::open(&store_dir)?;
// store.insert(quad)?;
// let secondary_store = Store::open_secondary(&store_dir)?;
// store.flush()?;
// store.backup(&backup_from_rw_dir)?;
// secondary_store.backup(&backup_from_secondary_dir)?;
// store.remove(quad)?;
// assert!(!store.contains(quad)?);
// let backup_from_rw = Store::open_read_only(&backup_from_rw_dir.0)?;
// backup_from_rw.validate()?;
// assert!(backup_from_rw.contains(quad)?);
// backup_from_rw.backup(&backup_from_ro_dir)?;
// let backup_from_ro = Store::open_read_only(&backup_from_ro_dir.0)?;
// backup_from_ro.validate()?;
// assert!(backup_from_ro.contains(quad)?);
// let backup_from_secondary = Store::open_read_only(&backup_from_secondary_dir.0)?;
// backup_from_secondary.validate()?;
// assert!(backup_from_secondary.contains(quad)?);
// Ok(())
// }
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bad_backup() -> Result<(), Box<dyn Error>> {
let store_dir = TempDir::default();
let backup_dir = TempDir::default();
create_dir_all(&backup_dir.0)?;
Store::open(&store_dir)?.backup(&backup_dir.0).unwrap_err();
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> {
let backup_dir = TempDir::default();
Store::new()?.backup(&backup_dir).unwrap_err();
Ok(())
}
#[test]
#[cfg(all(target_os = "linux"))]
fn test_backward_compatibility() -> Result<(), Box<dyn Error>> {
// We run twice to check if data is properly saved and closed
for _ in 0..2 {
let store = Store::open("tests/rocksdb_bc_data")?;
for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?);
}
let graph_name =
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90");
for q in quads(graph_name) {
assert!(store.contains(q)?);
}
assert!(store.contains_named_graph(graph_name)?);
assert_eq!(
vec![NamedOrBlankNode::from(graph_name)],
store.named_graphs().collect::<Result<Vec<_>, _>>()?
);
}
reset_dir("tests/rocksdb_bc_data")?;
Ok(())
}
// #[test]
// #[cfg(all(not(target_family = "wasm")))]
// fn test_secondary() -> Result<(), Box<dyn Error>> {
// let quad = QuadRef::new(
// NamedNodeRef::new_unchecked("http://example.com/s"),
// NamedNodeRef::new_unchecked("http://example.com/p"),
// NamedNodeRef::new_unchecked("http://example.com/o"),
// GraphNameRef::DefaultGraph,
// );
// let primary_dir = TempDir::default();
// // We open the store
// let primary = Store::open(&primary_dir)?;
// let secondary = Store::open_secondary(&primary_dir)?;
// // We insert a quad
// primary.insert(quad)?;
// primary.flush()?;
// // It is readable from both stores
// for store in &[&primary, &secondary] {
// assert!(store.contains(quad)?);
// assert_eq!(
// store.iter().collect::<Result<Vec<_>, _>>()?,
// vec![quad.into_owned()]
// );
// }
// // We validate the states
// primary.validate()?;
// secondary.validate()?;
// // We close the primary store and remove its content
// drop(primary);
// remove_dir_all(&primary_dir)?;
// // We secondary store is still readable
// assert!(secondary.contains(quad)?);
// secondary.validate()?;
// Ok(())
// }
// #[test]
// #[cfg(all(not(target_family = "wasm")))]
// fn test_open_secondary_bad_dir() -> Result<(), Box<dyn Error>> {
// let primary_dir = TempDir::default();
// create_dir_all(&primary_dir.0)?;
// {
// File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?;
// }
// assert!(Store::open_secondary(&primary_dir).is_err());
// Ok(())
// }
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_read_only() -> Result<(), Box<dyn Error>> {
let s = NamedNodeRef::new_unchecked("http://example.com/s");
let p = NamedNodeRef::new_unchecked("http://example.com/p");
let first_quad = QuadRef::new(
s,
p,
NamedNodeRef::new_unchecked("http://example.com/o"),
GraphNameRef::DefaultGraph,
);
let second_quad = QuadRef::new(
s,
p,
NamedNodeRef::new_unchecked("http://example.com/o2"),
GraphNameRef::DefaultGraph,
);
let store_dir = TempDir::default();
// We write to the store and close it
{
let read_write = Store::open(&store_dir)?;
read_write.insert(first_quad)?;
read_write.flush()?;
}
// We open as read-only
let read_only = Store::open_read_only(&store_dir, None)?;
assert!(read_only.contains(first_quad)?);
assert_eq!(
read_only.iter().collect::<Result<Vec<_>, _>>()?,
vec![first_quad.into_owned()]
);
read_only.validate()?;
// We open as read-write again
let read_write = Store::open(&store_dir)?;
read_write.insert(second_quad)?;
read_write.flush()?;
read_write.optimize()?; // Makes sure it's well flushed
// The new quad is in the read-write instance but not the read-only instance
assert!(read_write.contains(second_quad)?);
assert!(!read_only.contains(second_quad)?);
read_only.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
create_dir_all(&dir.0)?;
{
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
}
assert!(Store::open_read_only(&dir, None).is_err());
Ok(())
}
#[cfg(all(target_os = "linux"))]
fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> {
assert!(Command::new("git")
.args(["clean", "-fX", dir])
.status()?
.success());
assert!(Command::new("git")
.args(["checkout", "HEAD", "--", dir])
.status()?
.success());
Ok(())
}
#[cfg(all(not(target_family = "wasm")))]
struct TempDir(PathBuf);
#[cfg(all(not(target_family = "wasm")))]
impl Default for TempDir {
fn default() -> Self {
Self(temp_dir().join(format!("oxigraph-test-{}", random::<u128>())))
}
}
#[cfg(all(not(target_family = "wasm")))]
impl AsRef<Path> for TempDir {
fn as_ref(&self) -> &Path {
&self.0
}
}
#[cfg(all(not(target_family = "wasm")))]
impl Drop for TempDir {
fn drop(&mut self) {
if self.0.is_dir() {
remove_dir_all(&self.0).unwrap();
}
}
}
Loading…
Cancel
Save