diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7f3a8e42..c0b3ac23 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,8 +51,10 @@ jobs: with: python-version: 3.5 - run: python -m venv python/tests/venv - - run: source python/tests/venv/bin/activate && pip install --upgrade pip maturin + - run: source python/tests/venv/bin/activate && pip install --upgrade pip maturin sphinx - run: source tests/venv/bin/activate && maturin develop working-directory: ./python - run: source venv/bin/activate && python -m unittest working-directory: ./python/tests + - run: source venv/bin/activate && sphinx-build -M doctest . build + working-directory: ./python/docs diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..da00c0c2 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,8 @@ +version: 2 +python: + version: 3.7 + install: + - method: pip + path: python +sphinx: + configuration: python/docs/conf.py \ No newline at end of file diff --git a/README.md b/README.md index 713daa87..30fbeb20 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Oxigraph Oxigraph is a work in progress graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. There is no released version yet. -The storage format is not stable yet and may be brocken at any time. +The storage format is not stable yet and may be at any time. Its goal is to provide a compliant, safe and fast graph database based on the [RocksDB](https://rocksdb.org/) and [Sled](https://sled.rs/) key-value stores. It is written in Rust. diff --git a/js/README.md b/js/README.md index 714c27ba..0785dbdf 100644 --- a/js/README.md +++ b/js/README.md @@ -3,6 +3,7 @@ Oxigraph for JavaScript [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) This package provides a JavaScript API on top of Oxigraph compiled with WebAssembly. @@ -10,7 +11,7 @@ Oxigraph is a work in progress graph database written in Rust implementing the [ It is a work in progress and currently offers a simple in-memory store with [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) capabilities. -The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/). +The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with nodeJS. @@ -148,7 +149,7 @@ The available formats are: * [TriG](https://www.w3.org/TR/trig/): `application/trig` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` -* [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` +* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` Example of loading a Turtle file into the named graph `` with the base IRI `http://example.com`: ```js diff --git a/lib/src/io/format.rs b/lib/src/io/format.rs index 8e2bfac1..4e2ceea9 100644 --- a/lib/src/io/format.rs +++ b/lib/src/io/format.rs @@ -26,7 +26,7 @@ pub enum GraphFormat { NTriples, /// [Turtle](https://www.w3.org/TR/turtle/) Turtle, - /// [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) + /// [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RdfXml, } diff --git a/lib/src/io/read.rs b/lib/src/io/read.rs index 0d90037e..1d49eecd 100644 --- a/lib/src/io/read.rs +++ b/lib/src/io/read.rs @@ -17,7 +17,7 @@ use std::io::BufRead; /// It currently supports the following formats: /// * [N-Triples](https://www.w3.org/TR/n-triples/) (`GraphFormat::NTriples`) /// * [Turtle](https://www.w3.org/TR/turtle/) (`GraphFormat::Turtle`) -/// * [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) (`GraphFormat::RdfXml`) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) (`GraphFormat::RdfXml`) /// /// ``` /// use oxigraph::io::{GraphFormat, GraphParser}; diff --git a/lib/src/io/write.rs b/lib/src/io/write.rs index 3a202bd2..eae9516a 100644 --- a/lib/src/io/write.rs +++ b/lib/src/io/write.rs @@ -13,7 +13,7 @@ use std::io::Write; /// It currently supports the following formats: /// * [N-Triples](https://www.w3.org/TR/n-triples/) (`GraphFormat::NTriples`) /// * [Turtle](https://www.w3.org/TR/turtle/) (`GraphFormat::Turtle`) -/// * [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) (`GraphFormat::RdfXml`) +/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) (`GraphFormat::RdfXml`) /// /// ``` /// use oxigraph::io::{GraphFormat, GraphSerializer}; diff --git a/python/Cargo.toml b/python/Cargo.toml index df308c5e..20333c25 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "oxigraph_python" +name = "pyoxigraph" version = "0.1.0" authors = ["Tpt "] license = "MIT/Apache-2.0" @@ -13,7 +13,8 @@ edition = "2018" [lib] crate-type = ["cdylib"] -name = "oxigraph" +name = "pyoxigraph" +doctest = false [dependencies] oxigraph = {path = "../lib", features=["sled"]} diff --git a/python/README.md b/python/README.md index 8eda2658..91369cea 100644 --- a/python/README.md +++ b/python/README.md @@ -1,16 +1,24 @@ -Oxigraph for Python -=================== +Oxigraph for Python (`pyoxigraph`) +================================== [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) -This package provides a Python API on top of Oxigraph. +This Python package provides a Python API on top of Oxigraph named `pyoxigraph`. -Oxigraph is a work in progress graph database written in Rust implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. +Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. It offers two stores with [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/) capabilities. One of the store is in-memory, and the other one is disk based. -The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/). +It also provides a set of utility functions for reading, writing and processing RDF files. + +The stores are also able to load and dump RDF data serialized in +[Turtle](https://www.w3.org/TR/turtle/), +[TriG](https://www.w3.org/TR/trig/), +[N-Triples](https://www.w3.org/TR/n-triples/), +[N-Quads](https://www.w3.org/TR/n-quads/) and +[RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). ## Install @@ -21,205 +29,6 @@ This could be done using the usual `pip install maturin`. This wheel could be installed using `pip install PATH` in the current Python environment where `PATH` is the path to the built Oxigraph wheel. -## Example - -Insert the triple ` "example"` and print the name of `` in SPARQL: -```python -from oxigraph import * - -store = MemoryStore() -ex = NamedNode('http://example/') -schemaName = NamedNode('http://schema.org/name') -store.add((ex, schemaName, Literal('example'))) -for binding in store.query('SELECT ?name WHERE { ?name }'): - print(binding['name'].value) -``` - -## API - -### Model - -Oxigraph provides python classes for the basic RDF model elements. - -#### `NamedNode` - -An RDF [IRI](https://www.w3.org/TR/rdf11-concepts/#dfn-iri). -```python -from oxigraph import NamedNode - -assert NamedNode('http://example.com/foo').value == 'http://example.com/foo' -assert str(NamedNode('http://example.com/foo')) == '' -``` - -#### `BlankNode` - -An RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). -```python -from oxigraph import BlankNode - -assert BlankNode('foo').value == 'foo' -assert str(BlankNode('foo')) == '_:foo' -``` - -#### `Literal` - -An RDF [literal](https://www.w3.org/TR/rdf11-concepts/#dfn-literal). -```python -from oxigraph import NamedNode, Literal - -assert Literal('foo').value == 'foo' -assert str(Literal('foo')) == '"foo"' - -assert Literal('foo', language='en').language == 'en' -assert str(Literal('foo', language='en')) == '"foo"@en' - -assert Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer')).datatype == NamedNode('http://www.w3.org/2001/XMLSchema#integer') -assert str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) == '"11"^^' -``` - -#### `DefaultGraph` - -The RDF [default graph name](https://www.w3.org/TR/rdf11-concepts/#dfn-default-graph). -```python -from oxigraph import DefaultGraph - -DefaultGraph() -``` - -### Stores - -Oxigraph provides two stores: - -* `MemoryStore` that stores the RDF quads in memory -* `SledStore` that stores the graph on disk using [Sled](https://github.com/spacejam/sled). - -Both stores provide a similar API. They encode an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset). - -#### Constructor - -##### `MemoryStore` - -It could be constructed using: -```python -from oxigraph import MemoryStore - -store = MemoryStore() -``` - -##### `SledStore` - -The following code creates a store using the directory `foo/bar` for storage. -```python -from oxigraph import SledStore - -store = SledStore('foo/bar') -``` - -It is also possible to use a temporary directory that will be removed when the `SledStore` Python object is dropped: -```python -from oxigraph import SledStore - -store = SledStore() -``` - -#### `add` - -To add a quad in the store: -```python -s = NamedNode('http://example.com/subject') -p = NamedNode('http://example.com/predicate') -o = NamedNode('http://example.com/object') -g = NamedNode('http://example.com/graph') -store.add((s, p, o, g)) -``` - -If a triple is provided, it is added to the default graph i.e. `store.add((s, p, o, g))` is the same as `store.add((s, p, o, DefaultGraph()))` - -#### `remove` - -To remove a quad from the store: -```python -store.remove((s, p, o, g)) -``` - -#### `__contains__` - -Checks if a quad is in the store: -```python -assert (s, p, o, g) in store -``` - -#### `__len__` - -Returns the number of quads in the store: -```python -assert len(store) == 1 -``` - -#### `__iter__` - -Iterates on all quads in the store: -```python -assert list(iter(store)) == [(s, p, o, g)] -``` - -#### `match` - -Returns all the quads matching a given pattern using an iterator. - -Return all the quads with the subject `s`: -```python -assert list(store.match(s, None, None, None)) == [(s, p, o, g)] -``` - -Return all the quads in the default graph: -```python -assert list(store.match(s, None, None, DefaultGraph())) == [] -``` - -#### `query` - -Executes a [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/). - -The `ASK` queries return a boolean: -```python -assert store.query('ASK { ?s ?s ?s }') -``` - -The `SELECT` queries return an iterator of query solutions that could be indexed by variable name or position in the `SELECT` clause: -```python -for solution in store.query('SELECT ?s WHERE { ?s ?p ?o }'): - assert solution[0] == solution['s'] -``` - -The `CONSTRUCT` and `DESCRIBE` queries return an iterator of triples: -```python -for (s, p, o) in store.query('CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }'): - print(s) -``` - -#### `load` - -Loads serialized RDF triples or quad into the store. -The method arguments are: -1. `data`: the serialized RDF triples or quads. -2. `mime_type`: the MIME type of the serialization. See below for the supported mime types. -3. `base_iri`: the base IRI used to resolve the relative IRIs in the serialization. -4. `to_named_graph`: for triple serialization formats, the name of the named graph the triple should be loaded to. - -The available formats are: -* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` -* [TriG](https://www.w3.org/TR/trig/): `application/trig` -* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` -* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` -* [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` - -Example of loading a Turtle file into the named graph `` with the base IRI `http://example.com`: -```python -store.load(' <> .', mime_type='text/turtle', base_iri="http://example.com", to_graph=NamedNode('http://example.com/graph')) -``` - - ## How to contribute The Oxigraph bindings are written in Rust using [PyO3](https://github.com/PyO3/pyo3). diff --git a/python/docs/conf.py b/python/docs/conf.py new file mode 100644 index 00000000..6d1c8e52 --- /dev/null +++ b/python/docs/conf.py @@ -0,0 +1,37 @@ +import datetime +import sys +from pathlib import Path + +import pyoxigraph + +sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) + +# -- Project information ----------------------------------------------------- + +project = "Oxigraph" +copyright = f"{datetime.date.today().year}, Oxigraph contributors" +author = pyoxigraph.__author__ +version = pyoxigraph.__version__ +release = pyoxigraph.__version__ + +# -- General configuration --------------------------------------------------- + +extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] + +exclude_patterns = ["build", "Thumbs.db", ".DS_Store"] + +# -- Options for HTML output ------------------------------------------------- + +html_theme = "classic" +html_static_path = [] +html_logo = "../../logo.svg" +html_favicon = "../../logo.svg" +html_theme_options = {"body_max_width": None} + +# -- Options for doctests ------------------------------------------------- + +doctest_global_setup = "from pyoxigraph import *\nimport io" + +# -- Options for intersphinx ------------------------------------------------- + +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} diff --git a/python/docs/index.rst b/python/docs/index.rst new file mode 100644 index 00000000..03af302a --- /dev/null +++ b/python/docs/index.rst @@ -0,0 +1,53 @@ +Oxigraph Python (``pyoxigraph`` |release|) +========================================== + +Oxigraph is a graph database implementing the `SPARQL `_ standard. + +It offers two stores with `SPARQL 1.1 Query `_ capabilities. +One of the store is in-memory, and the other one is disk based. + +It also provides a set of utility functions for reading, writing and processing RDF files. + +The stores are also able to load and dump RDF data serialized in +`Turtle `_, +`TriG `_, +`N-Triples `_, +`N-Quads `_ and +`RDF/XML `_. + +Oxigraph source code is on `GitHub `_. + + +Installation +"""""""""""" + +Just run the usual ``pip install pyoxigraph``. + + +Example +""""""" + +Insert the triple `` "example"`` and print the name of ```` in SPARQL: + +:: + + from pyoxigraph import * + + store = MemoryStore() + ex = NamedNode('http://example/') + schemaName = NamedNode('http://schema.org/name') + store.add((ex, schemaName, Literal('example'))) + for binding in store.query('SELECT ?name WHERE { ?name }'): + print(binding['name'].value) + + +Table of contents +""""""""""""""""" + +.. toctree:: + :maxdepth: 2 + + model + io + store/memory + store/sled diff --git a/python/docs/io.rst b/python/docs/io.rst new file mode 100644 index 00000000..8be36f43 --- /dev/null +++ b/python/docs/io.rst @@ -0,0 +1,14 @@ +RDF Parsing and Serialization +============================= + +Oxigraph provides functions to parse and serialize RDF files: + + +Parsing +""""""" +.. autofunction:: pyoxigraph.parse + + +Serialization +""""""""""""" +.. autofunction:: pyoxigraph.serialize diff --git a/python/docs/model.rst b/python/docs/model.rst new file mode 100644 index 00000000..e6044c7e --- /dev/null +++ b/python/docs/model.rst @@ -0,0 +1,37 @@ +RDF Model +========= + +Oxigraph provides python classes to represents basic RDF concepts: + + +`IRIs `_ +""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. autoclass:: pyoxigraph.NamedNode + :members: + + +`Blank Nodes `_ +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. autoclass:: pyoxigraph.BlankNode + :members: + + +`Literals `_ +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. autoclass:: pyoxigraph.Literal + :members: + + +`Triple `_ +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. autoclass:: pyoxigraph.Triple + :members: + + +Quads (`triples `_ in a `RDF dataset `_) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.. autoclass:: pyoxigraph.Quad + :members: + +.. autoclass:: pyoxigraph.DefaultGraph + :members: diff --git a/python/docs/store/memory.rst b/python/docs/store/memory.rst new file mode 100644 index 00000000..adb3af50 --- /dev/null +++ b/python/docs/store/memory.rst @@ -0,0 +1,5 @@ +In-Memory Store +=============== + +.. autoclass:: pyoxigraph.MemoryStore + :members: diff --git a/python/docs/store/sled.rst b/python/docs/store/sled.rst new file mode 100644 index 00000000..417c3163 --- /dev/null +++ b/python/docs/store/sled.rst @@ -0,0 +1,5 @@ +Disk-based Store +================ + +.. autoclass:: pyoxigraph.SledStore + :members: diff --git a/python/src/io.rs b/python/src/io.rs new file mode 100644 index 00000000..63f733be --- /dev/null +++ b/python/src/io.rs @@ -0,0 +1,247 @@ +use crate::model::{PyQuad, PyTriple}; +use crate::store_utils::map_io_err; +use oxigraph::io::read::{QuadReader, TripleReader}; +use oxigraph::io::{ + DatasetFormat, DatasetParser, DatasetSerializer, GraphFormat, GraphParser, GraphSerializer, +}; +use pyo3::exceptions::ValueError; +use pyo3::prelude::*; +use pyo3::types::PyBytes; +use pyo3::wrap_pyfunction; +use pyo3::PyIterProtocol; +use std::io; +use std::io::{BufReader, Read, Write}; + +pub fn add_to_module(module: &PyModule) -> PyResult<()> { + module.add_wrapped(wrap_pyfunction!(parse))?; + module.add_wrapped(wrap_pyfunction!(serialize)) +} + +/// Parses RDF graph and dataset serialization formats +/// +/// It currently supports the following formats: +/// +/// * `N-Triples `_ (``application/n-triples``) +/// * `N-Quads `_ (``application/n-quads``) +/// * `Turtle `_ (``text/turtle``) +/// * `TriG `_ (``application/trig``) +/// * `RDF/XML `_ (``application/rdf+xml``) +/// +/// It supports also some MIME type aliases. +/// For example ``application/turtle`` could also be used for `Turtle `_ +/// and ``application/xml`` for `RDF/XML `_. +/// +/// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. +/// :type input: io.RawIOBase or io.BufferedIOBase +/// :param mime_type: the MIME type of the RDF serialization +/// :type mime_type: str +/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done +/// :type base_iri: str or None +/// :return: an iterator of RDF triples or quads depending on the format +/// :rtype: iter(Triple) or iter(Quad) +/// :raises ValueError: if the MIME type is not supported +/// :raises SyntaxError: if the provided data is invalid +/// +/// >>> input = io.BytesIO(b'

"1" .') +/// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) +/// [ predicate= object=>>] +#[pyfunction] +#[text_signature = "(input, /, mime_type, *, base_iri = None)"] +pub fn parse( + input: &PyAny, + mime_type: &str, + base_iri: Option<&str>, + py: Python<'_>, +) -> PyResult { + let input = BufReader::new(PyFileLike::new(input.to_object(py))); + if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { + let mut parser = GraphParser::from_format(graph_format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| ValueError::py_err(e.to_string()))?; + } + Ok(PyTripleReader { + inner: parser.read_triples(input).map_err(map_io_err)?, + } + .into_py(py)) + } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { + let mut parser = DatasetParser::from_format(dataset_format); + if let Some(base_iri) = base_iri { + parser = parser + .with_base_iri(base_iri) + .map_err(|e| ValueError::py_err(e.to_string()))?; + } + Ok(PyQuadReader { + inner: parser.read_quads(input).map_err(map_io_err)?, + } + .into_py(py)) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } +} + +/// Serializes an RDF graph or dataset +/// +/// It currently supports the following formats: +/// +/// * `N-Triples `_ (``application/n-triples``) +/// * `N-Quads `_ (``application/n-quads``) +/// * `Turtle `_ (``text/turtle``) +/// * `TriG `_ (``application/trig``) +/// * `RDF/XML `_ (``application/rdf+xml``) +/// +/// It supports also some MIME type aliases. +/// For example ``application/turtle`` could also be used for `Turtle `_ +/// and ``application/xml`` for `RDF/XML `_. +/// +/// :param input: the RDF triples and quads to serialize +/// :type input: iterable(Triple) or iterable(Quad) +/// :param output: The binary I/O object to write to. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'wb')``. +/// :type output: io.RawIOBase or io.BufferedIOBase +/// :param mime_type: the MIME type of the RDF serialization +/// :type mime_type: str +/// :return: the serialized RDF +/// :rtype: bytes +/// :raises ValueError: if the MIME type is not supported +/// :raises TypeError: if a triple is given during a quad format serialization or reverse +/// +/// >>> output = io.BytesIO() +/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, "text/turtle") +/// >>> output.getvalue() +/// b' "1" .\n' +#[pyfunction] +#[text_signature = "(input, output, /, mime_type, *, base_iri = None)"] +pub fn serialize(input: &PyAny, output: &PyAny, mime_type: &str, py: Python<'_>) -> PyResult<()> { + let output = PyFileLike::new(output.to_object(py)); + if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { + let mut writer = GraphSerializer::from_format(graph_format) + .triple_writer(output) + .map_err(map_io_err)?; + for i in input.iter()? { + writer + .write(&*i?.downcast::>()?.borrow()) + .map_err(map_io_err)?; + } + writer.finish().map_err(map_io_err)?; + Ok(()) + } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { + let mut writer = DatasetSerializer::from_format(dataset_format) + .quad_writer(output) + .map_err(map_io_err)?; + for i in input.iter()? { + writer + .write(&*i?.downcast::>()?.borrow()) + .map_err(map_io_err)?; + } + writer.finish().map_err(map_io_err)?; + Ok(()) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } +} + +#[pyclass(unsendable, name= TripleReader)] +pub struct PyTripleReader { + inner: TripleReader>, +} + +#[pyproto] +impl PyIterProtocol for PyTripleReader { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> PyResult> { + slf.inner + .next() + .map(|q| Ok(q.map_err(map_io_err)?.into())) + .transpose() + } +} + +#[pyclass(unsendable, name= QuadReader)] +pub struct PyQuadReader { + inner: QuadReader>, +} + +#[pyproto] +impl PyIterProtocol for PyQuadReader { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> PyResult> { + slf.inner + .next() + .map(|q| Ok(q.map_err(map_io_err)?.into())) + .transpose() + } +} + +pub struct PyFileLike { + inner: PyObject, +} + +impl PyFileLike { + pub fn new(inner: PyObject) -> Self { + Self { inner } + } +} + +impl Read for PyFileLike { + fn read(&mut self, mut buf: &mut [u8]) -> io::Result { + let gil = Python::acquire_gil(); + let py = gil.python(); + let read = self + .inner + .call_method(py, "read", (buf.len(),), None) + .map_err(|e| to_io_err(e, py))?; + let bytes: &PyBytes = read.cast_as(py).map_err(|e| to_io_err(e, py))?; + buf.write_all(bytes.as_bytes())?; + Ok(bytes.len()?) + } +} + +impl Write for PyFileLike { + fn write(&mut self, buf: &[u8]) -> io::Result { + let gil = Python::acquire_gil(); + let py = gil.python(); + Ok(usize::extract( + self.inner + .call_method(py, "write", (PyBytes::new(py, buf),), None) + .map_err(|e| to_io_err(e, py))? + .as_ref(py), + ) + .map_err(|e| to_io_err(e, py))?) + } + + fn flush(&mut self) -> io::Result<()> { + let gil = Python::acquire_gil(); + let py = gil.python(); + self.inner.call_method(py, "flush", (), None)?; + Ok(()) + } +} + +fn to_io_err(error: impl Into, py: Python<'_>) -> io::Error { + if let Ok(message) = error + .into() + .to_object(py) + .call_method(py, "__str__", (), None) + { + if let Ok(message) = message.extract::(py) { + io::Error::new(io::ErrorKind::Other, message) + } else { + io::Error::new(io::ErrorKind::Other, "An unknown error has occurred") + } + } else { + io::Error::new(io::ErrorKind::Other, "An unknown error has occurred") + } +} diff --git a/python/src/lib.rs b/python/src/lib.rs index 5b8cd3b8..e3e413aa 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -8,6 +8,7 @@ unused_qualifications )] +mod io; mod memory_store; mod model; mod sled_store; @@ -18,14 +19,20 @@ use crate::model::*; use crate::sled_store::*; use pyo3::prelude::*; -/// Oxigraph library +/// Oxigraph Python bindings #[pymodule] -fn oxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> { +fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> { + module.add("__package__", "pyoxigraph")?; + module.add("__version__", env!("CARGO_PKG_VERSION"))?; + module.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?; + module.add_class::()?; module.add_class::()?; module.add_class::()?; module.add_class::()?; + module.add_class::()?; + module.add_class::()?; module.add_class::()?; module.add_class::()?; - Ok(()) + io::add_to_module(module) } diff --git a/python/src/memory_store.rs b/python/src/memory_store.rs index ff4a36fb..6049497b 100644 --- a/python/src/memory_store.rs +++ b/python/src/memory_store.rs @@ -1,3 +1,4 @@ +use crate::io::PyFileLike; use crate::model::*; use crate::store_utils::*; use oxigraph::io::{DatasetFormat, GraphFormat}; @@ -7,12 +8,22 @@ use oxigraph::MemoryStore; use pyo3::basic::CompareOp; use pyo3::exceptions::{NotImplementedError, ValueError}; use pyo3::prelude::*; -use pyo3::types::PyTuple; use pyo3::{PyIterProtocol, PyObjectProtocol, PySequenceProtocol}; -use std::io::Cursor; +use std::io::BufReader; +/// In-memory store. +/// It encodes a `RDF dataset `_ and allows to query and update it using SPARQL. +/// +/// +/// The :py:func:`str` function provides a serialization of the store data compatible with NTriples, Turtle and SPARQL: +/// +/// >>> store = MemoryStore() +/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) +/// >>> str(store) +/// ' "1" .\n' #[pyclass(name = MemoryStore)] #[derive(Eq, PartialEq, Clone)] +#[text_signature = "()"] pub struct PyMemoryStore { inner: MemoryStore, } @@ -26,17 +37,55 @@ impl PyMemoryStore { } } - fn add(&self, quad: &PyTuple) -> PyResult<()> { - self.inner.insert(extract_quad(quad)?); - Ok(()) + /// Adds a quad to the store + /// + /// :param quad: the quad to add + /// :type quad: Quad + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> list(store) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, quad)"] + fn add(&self, quad: PyQuad) { + self.inner.insert(quad.into()); } - fn remove(&self, quad: &PyTuple) -> PyResult<()> { - self.inner.remove(&extract_quad(quad)?); - Ok(()) + /// Removes a quad from the store + /// + /// :param quad: the quad to remove + /// :type quad: Quad + /// + /// >>> store = MemoryStore() + /// >>> quad = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) + /// >>> store.add(quad) + /// >>> store.remove(quad) + /// >>> list(store) + /// [] + #[text_signature = "($self, quad)"] + fn remove(&self, quad: &PyQuad) { + self.inner.remove(quad); } - fn r#match( + /// Looks for the quads matching a given pattern + /// + /// :param subject: the quad subject or :py:const:`None` to match everything. + /// :type subject: NamedNode or BlankNode or None + /// :param predicate: the quad predicate or :py:const:`None` to match everything. + /// :type predicate: NamedNode or None + /// :param object: the quad object or :py:const:`None` to match everything. + /// :type object: NamedNode or BlankNode or Literal or None + /// :param graph: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`. + /// :type graph: NamedNode or BlankNode or DefaultGraph or None + /// :return: an iterator of the quads matching the pattern + /// :rtype: iter(Quad) + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> list(store.quads_for_pattern(NamedNode('http://example.com'), None, None, None)) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, subject, predicate, object, graph_name = None)"] + fn quads_for_pattern( &self, subject: &PyAny, predicate: &PyAny, @@ -55,37 +104,93 @@ impl PyMemoryStore { }) } + /// Executes a `SPARQL 1.1 query `_. + /// + /// :param query: the query to execute + /// :type query: str + /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of solution bindings for ``SELECT`` queries. + /// :rtype: iter(QuerySolution) or iter(Triple) or bool + /// :raises SyntaxError: if the provided query is invalid + /// + /// ``SELECT`` query: + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> list(solution['s'] for solution in store.query('SELECT ?s WHERE { ?s ?p ?o }')) + /// [] + /// + /// ``CONSTRUCT`` query: + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> list(store.query('CONSTRUCT WHERE { ?s ?p ?o }')) + /// [ predicate= object=>>] + /// + /// ``ASK`` query: + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> store.query('ASK { ?s ?p ?o }') + /// True + #[text_signature = "($self, query)"] fn query(&self, query: &str, py: Python<'_>) -> PyResult { - let results = self - .inner - .query(query, QueryOptions::default()) - .map_err(map_evaluation_error)?; + let results = py.allow_threads(move || { + self.inner + .query(query, QueryOptions::default()) + .map_err(map_evaluation_error) + })?; query_results_to_python(py, results) } - #[args(data, mime_type, "*", base_iri = "\"\"", to_graph = "None")] + /// Loads an RDF serialization into the store + /// + /// It currently supports the following formats: + /// + /// * `N-Triples `_ (``application/n-triples``) + /// * `N-Quads `_ (``application/n-quads``) + /// * `Turtle `_ (``text/turtle``) + /// * `TriG `_ (``application/trig``) + /// * `RDF/XML `_ (``application/rdf+xml``) + /// + /// It supports also some MIME type aliases. + /// For example ``application/turtle`` could also be used for `Turtle `_ + /// and ``application/xml`` for `RDF/XML `_. + /// + /// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :type input: io.RawIOBase or io.BufferedIOBase + /// :param mime_type: the MIME type of the RDF serialization + /// :type mime_type: str + /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done + /// :type base_iri: str or None + /// :param to_graph: if it is a file composed of triples, the graph in which store the triples. By default, the default graph is used. + /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None + /// :raises ValueError: if the MIME type is not supported or the `to_graph` parameter is given with a quad file. + /// :raises SyntaxError: if the provided data is invalid + /// + /// >>> store = MemoryStore() + /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> list(store) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, input, /, mime_type, *, base_iri = None, to_graph = None)"] + #[args(input, mime_type, "*", base_iri = "None", to_graph = "None")] fn load( &self, - data: &str, + input: &PyAny, mime_type: &str, - base_iri: &str, + base_iri: Option<&str>, to_graph: Option<&PyAny>, + py: Python<'_>, ) -> PyResult<()> { let to_graph_name = if let Some(graph_name) = to_graph { Some(extract_graph_name(graph_name)?) } else { None }; - let base_iri = if base_iri.is_empty() { - None - } else { - Some(base_iri) - }; - + let input = BufReader::new(PyFileLike::new(input.to_object(py))); if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { self.inner .load_graph( - Cursor::new(data), + input, graph_format, &to_graph_name.unwrap_or(GraphName::DefaultGraph), base_iri, @@ -98,7 +203,75 @@ impl PyMemoryStore { )); } self.inner - .load_dataset(Cursor::new(data), dataset_format, base_iri) + .load_dataset(input, dataset_format, base_iri) + .map_err(map_io_err) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } + } + + /// Dumps the store quads or triples into a file + /// + /// It currently supports the following formats: + /// + /// * `N-Triples `_ (``application/n-triples``) + /// * `N-Quads `_ (``application/n-quads``) + /// * `Turtle `_ (``text/turtle``) + /// * `TriG `_ (``application/trig``) + /// * `RDF/XML `_ (``application/rdf+xml``) + /// + /// It supports also some MIME type aliases. + /// For example ``application/turtle`` could also be used for `Turtle `_ + /// and ``application/xml`` for `RDF/XML `_. + /// + /// :param output: The binary I/O object to write to. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'wb')``. + /// :type input: io.RawIOBase or io.BufferedIOBase + /// :param mime_type: the MIME type of the RDF serialization + /// :type mime_type: str + /// :param from_graph: if a triple based format is requested, the store graph from which dump the triples. By default, the default graph is used. + /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None + /// :raises ValueError: if the MIME type is not supported or the `from_graph` parameter is given with a quad syntax. + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> output = io.BytesIO() + /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) + /// >>> output.getvalue() + /// b' "1" .\n' + #[text_signature = "($self, output, /, mime_type, *, from_graph = None)"] + #[args(output, mime_type, "*", from_graph = "None")] + fn dump( + &self, + output: &PyAny, + mime_type: &str, + from_graph: Option<&PyAny>, + py: Python<'_>, + ) -> PyResult<()> { + let from_graph_name = if let Some(graph_name) = from_graph { + Some(extract_graph_name(graph_name)?) + } else { + None + }; + let output = PyFileLike::new(output.to_object(py)); + if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { + self.inner + .dump_graph( + output, + graph_format, + &from_graph_name.unwrap_or(GraphName::DefaultGraph), + ) + .map_err(map_io_err) + } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { + if from_graph_name.is_some() { + return Err(ValueError::py_err( + "The target graph name parameter is not available for dataset formats", + )); + } + self.inner + .dump_dataset(output, dataset_format) .map_err(map_io_err) } else { Err(ValueError::py_err(format!( @@ -130,13 +303,13 @@ impl PyObjectProtocol for PyMemoryStore { } #[pyproto] -impl PySequenceProtocol for PyMemoryStore { +impl<'p> PySequenceProtocol<'p> for PyMemoryStore { fn __len__(&self) -> usize { self.inner.len() } - fn __contains__(&self, quad: &PyTuple) -> PyResult { - Ok(self.inner.contains(&extract_quad(quad)?)) + fn __contains__(&self, quad: PyQuad) -> bool { + self.inner.contains(&quad) } } @@ -160,7 +333,7 @@ impl PyIterProtocol for QuadIter { slf.into() } - fn __next__(mut slf: PyRefMut) -> Option<(PyObject, PyObject, PyObject, PyObject)> { - slf.inner.next().map(move |q| quad_to_python(slf.py(), q)) + fn __next__(mut slf: PyRefMut) -> Option { + slf.inner.next().map(|q| q.into()) } } diff --git a/python/src/model.rs b/python/src/model.rs index 237fe4ee..6c4c9c13 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -1,14 +1,25 @@ use oxigraph::model::*; use pyo3::basic::CompareOp; -use pyo3::exceptions::{NotImplementedError, TypeError, ValueError}; +use pyo3::exceptions::{IndexError, NotImplementedError, TypeError, ValueError}; use pyo3::prelude::*; -use pyo3::types::PyTuple; -use pyo3::PyObjectProtocol; +use pyo3::{PyIterProtocol, PyMappingProtocol, PyObjectProtocol}; use std::collections::hash_map::DefaultHasher; use std::hash::Hash; use std::hash::Hasher; - +use std::vec::IntoIter; + +/// An RDF `node identified by an IRI `_ +/// +/// :param value: the IRI as a string +/// :type value: str +/// :raises ValueError: if the IRI is not valid according to `RFC 3987 `_ +/// +/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL: +/// +/// >>> str(NamedNode('http://example.com')) +/// '' #[pyclass(name = NamedNode)] +#[text_signature = "(value)"] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct PyNamedNode { inner: NamedNode, @@ -53,6 +64,11 @@ impl PyNamedNode { .into()) } + /// :return: the named node IRI + /// :rtype: str + /// + /// >>> NamedNode("http://example.com").value + /// 'http://example.com' #[getter] fn value(&self) -> &str { self.inner.as_str() @@ -66,7 +82,9 @@ impl PyObjectProtocol for PyNamedNode { } fn __repr__(&self) -> String { - format!("", self.inner.as_str()) + let mut buffer = String::new(); + named_node_repr(self.inner.as_ref(), &mut buffer); + buffer } fn __hash__(&self) -> u64 { @@ -78,7 +96,17 @@ impl PyObjectProtocol for PyNamedNode { } } +/// An RDF `blank node `_ +/// +/// :param value: the `blank node ID `_ (if not present, a random blank node ID is automatically generated). +/// :type value: str, optional +/// +/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL: +/// +/// >>> str(BlankNode('ex')) +/// '_:ex' #[pyclass(name = BlankNode)] +#[text_signature = "(value)"] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyBlankNode { inner: BlankNode, @@ -126,6 +154,11 @@ impl PyBlankNode { .into()) } + /// :return: the `blank node ID `_ + /// :rtype: str + /// + /// >>> BlankNode("ex").value + /// 'ex' #[getter] fn value(&self) -> &str { self.inner.as_str() @@ -139,7 +172,9 @@ impl PyObjectProtocol for PyBlankNode { } fn __repr__(&self) -> String { - format!("", self.inner.as_str()) + let mut buffer = String::new(); + blank_node_repr(self.inner.as_ref(), &mut buffer); + buffer } fn __hash__(&self) -> u64 { @@ -151,7 +186,26 @@ impl PyObjectProtocol for PyBlankNode { } } +/// An RDF `literal `_ +/// +/// :param value: the literal value or `lexical form `_ +/// :type value: str +/// :param datatype: the literal `datatype IRI `_. +/// :type datatype: NamedNode, optional +/// :param language: the literal `language tag `_ +/// :type language: str, optional +/// :raises ValueError: if the language tag is not valid according to `RFC 5646 `_ (`BCP 47 `_) +/// +/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL: +/// +/// >>> str(Literal('example')) +/// '"example"' +/// >>> str(Literal('example', language='en')) +/// '"example"@en' +/// >>> str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) +/// '"11"^^' #[pyclass(name = Literal)] +#[text_signature = "(value, *, datatype = None, language = None)"] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyLiteral { inner: Literal, @@ -178,7 +232,7 @@ impl From for Term { #[pymethods] impl PyLiteral { #[new] - #[args(value, "*", language = "None", datatype = "None")] + #[args(value, "*", datatype = "None", language = "None")] fn new( value: String, language: Option, @@ -202,16 +256,37 @@ impl PyLiteral { .into()) } + /// :return: the literal value or `lexical form `_ + /// :rtype: str + /// + /// >>> Literal("example").value + /// 'example' #[getter] fn value(&self) -> &str { self.inner.value() } + /// :return: the literal `language tag `_ + /// :rtype: str or None + /// + /// >>> Literal('example', language='en').language + /// 'en' + /// >>> Literal('example').language + /// #[getter] fn language(&self) -> Option<&str> { self.inner.language() } + /// :return: the literal `datatype IRI `_ + /// :rtype: NamedNode + /// + /// >>> Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer')).datatype + /// + /// >>> Literal('example').datatype + /// + /// >>> Literal('example', language='en').datatype + /// #[getter] fn datatype(&self) -> PyNamedNode { self.inner.datatype().into_owned().into() @@ -225,12 +300,9 @@ impl PyObjectProtocol for PyLiteral { } fn __repr__(&self) -> String { - format!( - "", - self.inner.value(), - self.inner.language().unwrap_or(""), - self.inner.datatype().as_str() - ) + let mut buffer = String::new(); + literal_repr(self.inner.as_ref(), &mut buffer); + buffer } fn __hash__(&self) -> u64 { @@ -242,6 +314,7 @@ impl PyObjectProtocol for PyLiteral { } } +/// The RDF `default graph name `_ #[pyclass(name = DefaultGraph)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct PyDefaultGraph {} @@ -284,12 +357,351 @@ impl PyObjectProtocol for PyDefaultGraph { } } +/// An RDF `triple `_ +/// +/// :param subject: the triple subject +/// :type subject: NamedNode or BlankNode +/// :param predicate: the triple predicate +/// :type predicate: NamedNode +/// :param object: the triple object +/// :type object: NamedNode or BlankNode or Literal +/// +/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL: +/// +/// >>> str(Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) +/// ' "1" .' +/// +/// A triple could also be easily destructed into its components: +/// +/// >>> (s, p, o) = Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')) +#[pyclass(name = Triple)] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[text_signature = "(subject, predicate, object)"] +pub struct PyTriple { + inner: Triple, +} + +impl From for PyTriple { + fn from(inner: Triple) -> Self { + Self { inner } + } +} + +impl From for Triple { + fn from(node: PyTriple) -> Self { + node.inner + } +} + +impl<'a> From<&'a PyTriple> for TripleRef<'a> { + fn from(node: &'a PyTriple) -> Self { + node.inner.as_ref() + } +} + +#[pymethods] +impl PyTriple { + #[new] + fn new(subject: &PyAny, predicate: &PyAny, object: &PyAny) -> PyResult { + Ok(Triple::new( + extract_named_or_blank_node(subject)?, + extract_named_node(predicate)?, + extract_term(object)?, + ) + .into()) + } + + /// :return: the triple subject + /// :rtype: NamedNode or BlankNode + /// + /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).subject + /// + #[getter] + fn subject(&self, py: Python<'_>) -> PyObject { + named_or_blank_node_to_python(py, self.inner.subject.clone()) + } + + /// :return: the triple predicate + /// :rtype: NamedNode + /// + /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).predicate + /// + #[getter] + fn predicate(&self) -> PyNamedNode { + self.inner.predicate.clone().into() + } + + /// :return: the triple object + /// :rtype: NamedNode or BlankNode or Literal + /// + /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).object + /// > + #[getter] + fn object(&self, py: Python<'_>) -> PyObject { + term_to_python(py, self.inner.object.clone()) + } +} + +#[pyproto] +impl PyObjectProtocol for PyTriple { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + let mut buffer = String::new(); + buffer.push_str(") -> TripleComponentsIter { + TripleComponentsIter { + inner: vec![ + slf.inner.subject.clone().into(), + slf.inner.predicate.clone().into(), + slf.inner.object.clone(), + ] + .into_iter(), + } + } +} + +/// An RDF `triple `_ +/// in a `RDF dataset `_ +/// +/// :param subject: the quad subject +/// :type subject: NamedNode or BlankNode +/// :param predicate: the quad predicate +/// :type predicate: NamedNode +/// :param object: the quad object +/// :type object: NamedNode or BlankNode or Literal +/// :param graph: the quad graph name. If not present, the default graph is assumed. +/// :type object: NamedNode or BlankNode or DefaultGraph or None +/// +/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL: +/// +/// >>> str(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) +/// ' "1" .' +/// +/// >>> str(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), DefaultGraph())) +/// ' "1" .' +/// +/// A quad could also be easily destructed into its components: +/// +/// >>> (s, p, o, g) = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) +#[pyclass(name = Quad)] +#[text_signature = "(subject, predicate, object, graph_name = None)"] +#[derive(Eq, PartialEq, Debug, Clone, Hash)] +pub struct PyQuad { + inner: Quad, +} + +impl From for PyQuad { + fn from(inner: Quad) -> Self { + Self { inner } + } +} + +impl From for Quad { + fn from(node: PyQuad) -> Self { + node.inner + } +} + +impl<'a> From<&'a PyQuad> for QuadRef<'a> { + fn from(node: &'a PyQuad) -> Self { + node.inner.as_ref() + } +} + +#[pymethods] +impl PyQuad { + #[new] + fn new( + subject: &PyAny, + predicate: &PyAny, + object: &PyAny, + graph_name: Option<&PyAny>, + ) -> PyResult { + Ok(Quad::new( + extract_named_or_blank_node(subject)?, + extract_named_node(predicate)?, + extract_term(object)?, + if let Some(graph_name) = graph_name { + extract_graph_name(graph_name)? + } else { + GraphName::DefaultGraph + }, + ) + .into()) + } + + /// :return: the quad subject + /// :rtype: NamedNode or BlankNode + /// + /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).subject + /// + #[getter] + fn subject(&self, py: Python<'_>) -> PyObject { + named_or_blank_node_to_python(py, self.inner.subject.clone()) + } + + /// :return: the quad predicate + /// :rtype: NamedNode + /// + /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).predicate + /// + #[getter] + fn predicate(&self) -> PyNamedNode { + self.inner.predicate.clone().into() + } + + /// :return: the quad object + /// :rtype: NamedNode or BlankNode or Literal + /// + /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).object + /// > + #[getter] + fn object(&self, py: Python<'_>) -> PyObject { + term_to_python(py, self.inner.object.clone()) + } + + /// :return: the quad graph name + /// :rtype: NamedNode or BlankNode or DefaultGraph + /// + /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).graph_name + /// + #[getter] + fn graph_name(&self, py: Python<'_>) -> PyObject { + graph_name_to_python(py, self.inner.graph_name.clone()) + } + + /// :return: the quad underlying triple + /// :rtype: Triple + /// + /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).triple + /// predicate= object=>> + #[getter] + fn triple(&self) -> PyTriple { + Triple::from(self.inner.clone()).into() + } +} + +#[pyproto] +impl PyObjectProtocol for PyQuad { + fn __str__(&self) -> String { + self.inner.to_string() + } + + fn __repr__(&self) -> String { + let mut buffer = String::new(); + buffer.push_str(") -> QuadComponentsIter { + QuadComponentsIter { + inner: vec![ + Some(slf.inner.subject.clone().into()), + Some(slf.inner.predicate.clone().into()), + Some(slf.inner.object.clone()), + match slf.inner.graph_name.clone() { + GraphName::NamedNode(node) => Some(node.into()), + GraphName::BlankNode(node) => Some(node.into()), + GraphName::DefaultGraph => None, + }, + ] + .into_iter(), + } + } +} + pub fn extract_named_node(py: &PyAny) -> PyResult { if let Ok(node) = py.downcast::>() { Ok(node.borrow().clone().into()) } else { Err(TypeError::py_err(format!( - "{} is not a RDF named node", + "{} is not an RDF named node", py.get_type().name(), ))) } @@ -302,7 +714,7 @@ pub fn extract_named_or_blank_node(py: &PyAny) -> PyResult { Ok(node.borrow().clone().into()) } else { Err(TypeError::py_err(format!( - "{} is not a RDF named or blank node", + "{} is not an RDF named or blank node", py.get_type().name(), ))) } @@ -324,7 +736,7 @@ pub fn extract_term(py: &PyAny) -> PyResult { Ok(literal.borrow().clone().into()) } else { Err(TypeError::py_err(format!( - "{} is not a RDF named or blank node", + "{} is not an RDF named or blank node", py.get_type().name(), ))) } @@ -361,42 +773,6 @@ pub fn graph_name_to_python(py: Python<'_>, name: GraphName) -> PyObject { } } -pub fn triple_to_python(py: Python<'_>, triple: Triple) -> (PyObject, PyObject, PyObject) { - ( - named_or_blank_node_to_python(py, triple.subject), - PyNamedNode::from(triple.predicate).into_py(py), - term_to_python(py, triple.object), - ) -} - -pub fn extract_quad(tuple: &PyTuple) -> PyResult { - let len = tuple.len(); - if len != 3 && len != 4 { - return Err(TypeError::py_err( - "A quad should be tuple with 3 or 4 elements", - )); - } - Ok(Quad { - subject: extract_named_or_blank_node(tuple.get_item(0))?, - predicate: extract_named_node(tuple.get_item(1))?, - object: extract_term(tuple.get_item(2))?, - graph_name: if len == 4 { - extract_graph_name(tuple.get_item(3))? - } else { - GraphName::DefaultGraph - }, - }) -} - -pub fn quad_to_python(py: Python<'_>, quad: Quad) -> (PyObject, PyObject, PyObject, PyObject) { - ( - named_or_blank_node_to_python(py, quad.subject), - PyNamedNode::from(quad.predicate).into_py(py), - term_to_python(py, quad.object), - graph_name_to_python(py, quad.graph_name), - ) -} - fn eq_compare(a: &T, b: &T, op: CompareOp) -> PyResult { match op { CompareOp::Eq => Ok(a == b), @@ -415,8 +791,88 @@ fn eq_ord_compare(a: &T, b: &T, op: CompareOp) -> bool { CompareOp::Ge => a >= b, } } + fn hash(t: &impl Hash) -> u64 { let mut s = DefaultHasher::new(); t.hash(&mut s); s.finish() } + +fn named_node_repr(node: NamedNodeRef<'_>, buffer: &mut String) { + buffer.push_str(""), + } +} + +#[pyclass(unsendable)] +pub struct TripleComponentsIter { + inner: IntoIter, +} + +#[pyproto] +impl PyIterProtocol for TripleComponentsIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> Option { + slf.inner.next().map(move |t| term_to_python(slf.py(), t)) + } +} + +#[pyclass(unsendable)] +pub struct QuadComponentsIter { + inner: IntoIter>, +} + +#[pyproto] +impl PyIterProtocol for QuadComponentsIter { + fn __iter__(slf: PyRefMut) -> Py { + slf.into() + } + + fn __next__(mut slf: PyRefMut) -> Option { + slf.inner.next().map(move |t| { + if let Some(t) = t { + term_to_python(slf.py(), t) + } else { + PyDefaultGraph {}.into_py(slf.py()) + } + }) + } +} diff --git a/python/src/sled_store.rs b/python/src/sled_store.rs index 60882ee0..75278397 100644 --- a/python/src/sled_store.rs +++ b/python/src/sled_store.rs @@ -1,3 +1,4 @@ +use crate::io::PyFileLike; use crate::model::*; use crate::store_utils::*; use oxigraph::io::{DatasetFormat, GraphFormat}; @@ -6,12 +7,29 @@ use oxigraph::sparql::QueryOptions; use oxigraph::SledStore; use pyo3::exceptions::ValueError; use pyo3::prelude::*; -use pyo3::types::PyTuple; use pyo3::{PyIterProtocol, PyObjectProtocol, PySequenceProtocol}; use std::io; -use std::io::Cursor; +use std::io::BufReader; +/// Store based on the `Sled `_ key-value database. +/// +/// In-memory store. +/// It encodes a `RDF dataset `_ and allows to query and update it using SPARQL. +/// +/// :param path: the path of the directory in which Sled should read and write its data. If the directoty does not exist, it is created. If no directory is provided a temporary one is created and removed when the Python garbage collector removes the store. +/// :type path: str or None +/// :raises IOError: if the target directory contains invalid data or could not be accessed +/// +/// Warning: Sled is not stable yet and might break its storage format. +/// +/// The :py:func:`str` function provides a serialization of the store data compatible with NTriples, Turtle and SPARQL: +/// +/// >>> store = SledStore() +/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) +/// >>> str(store) +/// ' "1" .\n' #[pyclass(name = SledStore)] +#[text_signature = "(path = None)"] #[derive(Clone)] pub struct PySledStore { inner: SledStore, @@ -23,22 +41,66 @@ impl PySledStore { fn new(path: Option<&str>) -> PyResult { Ok(Self { inner: if let Some(path) = path { - SledStore::open(path).map_err(map_io_err)? + SledStore::open(path) } else { - SledStore::new().map_err(map_io_err)? - }, + SledStore::new() + } + .map_err(map_io_err)?, }) } - fn add(&self, quad: &PyTuple) -> PyResult<()> { - self.inner.insert(&extract_quad(quad)?).map_err(map_io_err) + /// Adds a quad to the store + /// + /// :param quad: the quad to add + /// :type quad: Quad + /// :raises IOError: if an I/O error happens during the quad insertion + /// + /// >>> store = SledStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> list(store) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, quad)"] + fn add(&self, quad: &PyQuad) -> PyResult<()> { + self.inner.insert(quad).map_err(map_io_err) } - fn remove(&self, quad: &PyTuple) -> PyResult<()> { - self.inner.remove(&extract_quad(quad)?).map_err(map_io_err) + /// Removes a quad from the store + /// + /// :param quad: the quad to remove + /// :type quad: Quad + /// :raises IOError: if an I/O error happens during the quad removal + /// + /// >>> store = SledStore() + /// >>> quad = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) + /// >>> store.add(quad) + /// >>> store.remove(quad) + /// >>> list(store) + /// [] + #[text_signature = "($self, quad)"] + fn remove(&self, quad: &PyQuad) -> PyResult<()> { + self.inner.remove(quad).map_err(map_io_err) } - fn r#match( + /// Looks for the quads matching a given pattern + /// + /// :param subject: the quad subject or :py:const:`None` to match everything. + /// :type subject: NamedNode or BlankNode or None + /// :param predicate: the quad predicate or :py:const:`None` to match everything. + /// :type predicate: NamedNode or None + /// :param object: the quad object or :py:const:`None` to match everything. + /// :type object: NamedNode or BlankNode or Literal or None + /// :param graph: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`. + /// :type graph: NamedNode or BlankNode or DefaultGraph or None + /// :return: an iterator of the quads matching the pattern + /// :rtype: iter(Quad) + /// :raises IOError: if an I/O error happens during the quads lookup + /// + /// >>> store = SledStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> list(store.quads_for_pattern(NamedNode('http://example.com'), None, None, None)) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, subject, predicate, object, graph_name = None)"] + fn quads_for_pattern( &self, subject: &PyAny, predicate: &PyAny, @@ -57,37 +119,95 @@ impl PySledStore { }) } + /// Executes a `SPARQL 1.1 query `_. + /// + /// :param query: the query to execute + /// :type query: str + /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of solution bindings for ``SELECT`` queries. + /// :rtype: iter(QuerySolution) or iter(Triple) or bool + /// :raises SyntaxError: if the provided query is invalid + /// :raises IOError: if an I/O error happens while reading the store + /// + /// ``SELECT`` query: + /// + /// >>> store = SledStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> list(solution['s'] for solution in store.query('SELECT ?s WHERE { ?s ?p ?o }')) + /// [] + /// + /// ``CONSTRUCT`` query: + /// + /// >>> store = SledStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> list(store.query('CONSTRUCT WHERE { ?s ?p ?o }')) + /// [ predicate= object=>>] + /// + /// ``ASK`` query: + /// + /// >>> store = SledStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) + /// >>> store.query('ASK { ?s ?p ?o }') + /// True + #[text_signature = "($self, query)"] fn query(&self, query: &str, py: Python<'_>) -> PyResult { - let results = self - .inner - .query(query, QueryOptions::default()) - .map_err(map_evaluation_error)?; + let results = py.allow_threads(move || { + self.inner + .query(query, QueryOptions::default()) + .map_err(map_evaluation_error) + })?; query_results_to_python(py, results) } - #[args(data, mime_type, "*", base_iri = "\"\"", to_graph = "None")] + /// Loads an RDF serialization into the store + /// + /// It currently supports the following formats: + /// + /// * `N-Triples `_ (``application/n-triples``) + /// * `N-Quads `_ (``application/n-quads``) + /// * `Turtle `_ (``text/turtle``) + /// * `TriG `_ (``application/trig``) + /// * `RDF/XML `_ (``application/rdf+xml``) + /// + /// It supports also some MIME type aliases. + /// For example ``application/turtle`` could also be used for `Turtle `_ + /// and ``application/xml`` for `RDF/XML `_. + /// + /// :param input: The binary I/O object to read from. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'rb')``. + /// :type input: io.RawIOBase or io.BufferedIOBase + /// :param mime_type: the MIME type of the RDF serialization + /// :type mime_type: str + /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done + /// :type base_iri: str or None + /// :param to_graph: if it is a file composed of triples, the graph in which store the triples. By default, the default graph is used. + /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None + /// :raises ValueError: if the MIME type is not supported or the `to_graph` parameter is given with a quad file. + /// :raises SyntaxError: if the provided data is invalid + /// :raises IOError: if an I/O error happens during a quad insertion + /// + /// >>> store = SledStore() + /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) + /// >>> list(store) + /// [ predicate= object=> graph_name=>] + #[text_signature = "($self, data, /, mime_type, *, base_iri = None, to_graph = None)"] + #[args(input, mime_type, "*", base_iri = "None", to_graph = "None")] fn load( &self, - data: &str, + input: &PyAny, mime_type: &str, - base_iri: &str, + base_iri: Option<&str>, to_graph: Option<&PyAny>, + py: Python<'_>, ) -> PyResult<()> { let to_graph_name = if let Some(graph_name) = to_graph { Some(extract_graph_name(graph_name)?) } else { None }; - let base_iri = if base_iri.is_empty() { - None - } else { - Some(base_iri) - }; - + let input = BufReader::new(PyFileLike::new(input.to_object(py))); if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { self.inner .load_graph( - Cursor::new(data), + input, graph_format, &to_graph_name.unwrap_or(GraphName::DefaultGraph), base_iri, @@ -100,7 +220,76 @@ impl PySledStore { )); } self.inner - .load_dataset(Cursor::new(data), dataset_format, base_iri) + .load_dataset(input, dataset_format, base_iri) + .map_err(map_io_err) + } else { + Err(ValueError::py_err(format!( + "Not supported MIME type: {}", + mime_type + ))) + } + } + + /// Dumps the store quads or triples into a file + /// + /// It currently supports the following formats: + /// + /// * `N-Triples `_ (``application/n-triples``) + /// * `N-Quads `_ (``application/n-quads``) + /// * `Turtle `_ (``text/turtle``) + /// * `TriG `_ (``application/trig``) + /// * `RDF/XML `_ (``application/rdf+xml``) + /// + /// It supports also some MIME type aliases. + /// For example ``application/turtle`` could also be used for `Turtle `_ + /// and ``application/xml`` for `RDF/XML `_. + /// + /// :param output: The binary I/O object to write to. For example, it could be a file opened in binary mode with ``open('my_file.ttl', 'wb')``. + /// :type input: io.RawIOBase or io.BufferedIOBase + /// :param mime_type: the MIME type of the RDF serialization + /// :type mime_type: str + /// :param from_graph: if a triple based format is requested, the store graph from which dump the triples. By default, the default graph is used. + /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None + /// :raises ValueError: if the MIME type is not supported or the `from_graph` parameter is given with a quad syntax. + /// :raises IOError: if an I/O error happens during a quad lookup + /// + /// >>> store = MemoryStore() + /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) + /// >>> output = io.BytesIO() + /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) + /// >>> output.getvalue() + /// b' "1" .\n' + #[text_signature = "($self, output, /, mime_type, *, from_graph = None)"] + #[args(output, mime_type, "*", from_graph = "None")] + fn dump( + &self, + output: &PyAny, + mime_type: &str, + from_graph: Option<&PyAny>, + py: Python<'_>, + ) -> PyResult<()> { + let from_graph_name = if let Some(graph_name) = from_graph { + Some(extract_graph_name(graph_name)?) + } else { + None + }; + let output = PyFileLike::new(output.to_object(py)); + if let Some(graph_format) = GraphFormat::from_media_type(mime_type) { + self.inner + .dump_graph( + output, + graph_format, + &from_graph_name.unwrap_or(GraphName::DefaultGraph), + ) + .map_err(map_io_err) + } else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { + if from_graph_name.is_some() { + return Err(ValueError::py_err( + "The target graph name parameter is not available for dataset formats", + )); + } + self.inner + .dump_dataset(output, dataset_format) .map_err(map_io_err) } else { Err(ValueError::py_err(format!( @@ -128,10 +317,8 @@ impl PySequenceProtocol for PySledStore { self.inner.len() } - fn __contains__(&self, quad: &PyTuple) -> PyResult { - self.inner - .contains(&extract_quad(quad)?) - .map_err(map_io_err) + fn __contains__(&self, quad: PyQuad) -> PyResult { + self.inner.contains(&quad).map_err(map_io_err) } } @@ -155,12 +342,10 @@ impl PyIterProtocol for QuadIter { slf.into() } - fn __next__( - mut slf: PyRefMut, - ) -> PyResult> { + fn __next__(mut slf: PyRefMut) -> PyResult> { slf.inner .next() - .map(move |q| Ok(quad_to_python(slf.py(), q.map_err(map_io_err)?))) + .map(|q| Ok(q.map_err(map_io_err)?.into())) .transpose() } } diff --git a/python/src/store_utils.rs b/python/src/store_utils.rs index ff9b5e8c..139786d1 100644 --- a/python/src/store_utils.rs +++ b/python/src/store_utils.rs @@ -3,10 +3,9 @@ use oxigraph::model::*; use oxigraph::sparql::{ EvaluationError, QueryResult, QuerySolution, QuerySolutionsIterator, QueryTriplesIterator, }; -use pyo3::exceptions::{IOError, RuntimeError, TypeError, ValueError}; +use pyo3::exceptions::{IOError, RuntimeError, SyntaxError, TypeError, ValueError}; use pyo3::prelude::*; use pyo3::{PyIterProtocol, PyMappingProtocol, PyNativeType, PyObjectProtocol}; -use std::fmt::Write; use std::io; pub fn extract_quads_pattern( @@ -56,7 +55,7 @@ pub fn query_results_to_python(py: Python<'_>, results: QueryResult) -> PyResult }) } -#[pyclass(unsendable)] +#[pyclass(unsendable, name = QuerySolution)] pub struct PyQuerySolution { inner: QuerySolution, } @@ -64,9 +63,13 @@ pub struct PyQuerySolution { #[pyproto] impl PyObjectProtocol for PyQuerySolution { fn __repr__(&self) -> String { - let mut buffer = "'); buffer @@ -131,20 +134,21 @@ impl PyIterProtocol for TripleResultIter { slf.into() } - fn __next__(mut slf: PyRefMut) -> PyResult> { + fn __next__(mut slf: PyRefMut) -> PyResult> { Ok(slf .inner .next() .transpose() .map_err(map_evaluation_error)? - .map(move |t| triple_to_python(slf.py(), t))) + .map(|t| t.into())) } } pub fn map_io_err(error: io::Error) -> PyErr { match error.kind() { - io::ErrorKind::InvalidInput | io::ErrorKind::InvalidData => { - ValueError::py_err(error.to_string()) + io::ErrorKind::InvalidInput => ValueError::py_err(error.to_string()), + io::ErrorKind::InvalidData | io::ErrorKind::UnexpectedEof => { + SyntaxError::py_err(error.to_string()) } _ => IOError::py_err(error.to_string()), } @@ -152,7 +156,7 @@ pub fn map_io_err(error: io::Error) -> PyErr { pub fn map_evaluation_error(error: EvaluationError) -> PyErr { match error { - EvaluationError::Parsing(error) => ValueError::py_err(error.to_string()), + EvaluationError::Parsing(error) => SyntaxError::py_err(error.to_string()), EvaluationError::Io(error) => map_io_err(error), EvaluationError::Query(error) => ValueError::py_err(error.to_string()), _ => RuntimeError::py_err(error.to_string()), diff --git a/python/tests/test_model.py b/python/tests/test_model.py index 7b17ad77..72587612 100644 --- a/python/tests/test_model.py +++ b/python/tests/test_model.py @@ -1,5 +1,5 @@ import unittest -from oxigraph import * +from pyoxigraph import * XSD_STRING = NamedNode("http://www.w3.org/2001/XMLSchema#string") XSD_INTEGER = NamedNode("http://www.w3.org/2001/XMLSchema#integer") @@ -65,5 +65,120 @@ class TestLiteral(unittest.TestCase): # TODO self.assertNotEqual(Literal('foo'), BlankNode('foo')) +class TestTriple(unittest.TestCase): + def test_constructor(self): + t = Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ) + self.assertEqual(t.subject, NamedNode("http://example.com/s")) + self.assertEqual(t.predicate, NamedNode("http://example.com/p")) + self.assertEqual(t.object, NamedNode("http://example.com/o")) + + def test_mapping(self): + t = Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ) + self.assertEqual(t[0], NamedNode("http://example.com/s")) + self.assertEqual(t[1], NamedNode("http://example.com/p")) + self.assertEqual(t[2], NamedNode("http://example.com/o")) + + def test_destruct(self): + (s, p, o) = Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ) + self.assertEqual(s, NamedNode("http://example.com/s")) + self.assertEqual(p, NamedNode("http://example.com/p")) + self.assertEqual(o, NamedNode("http://example.com/o")) + + def test_string(self): + self.assertEqual( + str( + Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ) + ), + " .", + ) + + +class TestQuad(unittest.TestCase): + def test_constructor(self): + t = Quad( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + NamedNode("http://example.com/g"), + ) + self.assertEqual(t.subject, NamedNode("http://example.com/s")) + self.assertEqual(t.predicate, NamedNode("http://example.com/p")) + self.assertEqual(t.object, NamedNode("http://example.com/o")) + self.assertEqual(t.graph_name, NamedNode("http://example.com/g")) + self.assertEqual( + t.triple, + Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ), + ) + self.assertEqual( + Quad( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ), + Quad( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + DefaultGraph(), + ), + ) + + def test_mapping(self): + t = Quad( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + NamedNode("http://example.com/g"), + ) + self.assertEqual(t[0], NamedNode("http://example.com/s")) + self.assertEqual(t[1], NamedNode("http://example.com/p")) + self.assertEqual(t[2], NamedNode("http://example.com/o")) + self.assertEqual(t[3], NamedNode("http://example.com/g")) + + def test_destruct(self): + (s, p, o, g) = Quad( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + NamedNode("http://example.com/g"), + ) + self.assertEqual(s, NamedNode("http://example.com/s")) + self.assertEqual(p, NamedNode("http://example.com/p")) + self.assertEqual(o, NamedNode("http://example.com/o")) + self.assertEqual(g, NamedNode("http://example.com/g")) + + def test_string(self): + self.assertEqual( + str( + Triple( + NamedNode("http://example.com/s"), + NamedNode("http://example.com/p"), + NamedNode("http://example.com/o"), + ) + ), + " .", + ) + + if __name__ == "__main__": unittest.main() diff --git a/python/tests/test_store.py b/python/tests/test_store.py index a6e48e7e..63be3f21 100644 --- a/python/tests/test_store.py +++ b/python/tests/test_store.py @@ -1,7 +1,8 @@ import unittest from abc import ABC, abstractmethod +from io import BytesIO -from oxigraph import * +from pyoxigraph import * foo = NamedNode("http://foo") bar = NamedNode("http://bar") @@ -16,80 +17,81 @@ class TestAbstractStore(unittest.TestCase, ABC): def test_add(self): store = self.store() - store.add((foo, bar, baz)) - store.add((foo, bar, baz, DefaultGraph())) - store.add((foo, bar, baz, graph)) + store.add(Quad(foo, bar, baz)) + store.add(Quad(foo, bar, baz, DefaultGraph())) + store.add(Quad(foo, bar, baz, graph)) self.assertEqual(len(store), 2) def test_remove(self): store = self.store() - store.add((foo, bar, baz)) - store.add((foo, bar, baz, DefaultGraph())) - store.add((foo, bar, baz, graph)) - store.remove((foo, bar, baz)) + store.add(Quad(foo, bar, baz)) + store.add(Quad(foo, bar, baz, DefaultGraph())) + store.add(Quad(foo, bar, baz, graph)) + store.remove(Quad(foo, bar, baz)) self.assertEqual(len(store), 1) def test_len(self): store = self.store() - store.add((foo, bar, baz)) - store.add((foo, bar, baz, graph)) + store.add(Quad(foo, bar, baz)) + store.add(Quad(foo, bar, baz, graph)) self.assertEqual(len(store), 2) def test_in(self): store = self.store() - store.add((foo, bar, baz)) - store.add((foo, bar, baz, DefaultGraph())) - store.add((foo, bar, baz, graph)) - self.assertTrue((foo, bar, baz) in store) - self.assertTrue((foo, bar, baz, DefaultGraph()) in store) - self.assertTrue((foo, bar, baz, graph) in store) - self.assertTrue((foo, bar, baz, foo) not in store) + store.add(Quad(foo, bar, baz)) + store.add(Quad(foo, bar, baz, DefaultGraph())) + store.add(Quad(foo, bar, baz, graph)) + self.assertTrue(Quad(foo, bar, baz) in store) + self.assertTrue(Quad(foo, bar, baz, DefaultGraph()) in store) + self.assertTrue(Quad(foo, bar, baz, graph) in store) + self.assertTrue(Quad(foo, bar, baz, foo) not in store) def test_iter(self): store = self.store() - store.add((foo, bar, baz, DefaultGraph())) - store.add((foo, bar, baz, graph)) + store.add(Quad(foo, bar, baz, DefaultGraph())) + store.add(Quad(foo, bar, baz, graph)) self.assertEqual( - set(store), {(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)} + set(store), + {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, ) - def test_match(self): + def test_quads_for_pattern(self): store = self.store() - store.add((foo, bar, baz, DefaultGraph())) - store.add((foo, bar, baz, graph)) + store.add(Quad(foo, bar, baz, DefaultGraph())) + store.add(Quad(foo, bar, baz, graph)) self.assertEqual( - set(store.match(None, None, None)), - {(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)}, + set(store.quads_for_pattern(None, None, None)), + {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, ) self.assertEqual( - set(store.match(foo, None, None)), - {(foo, bar, baz, DefaultGraph()), (foo, bar, baz, graph)}, + set(store.quads_for_pattern(foo, None, None)), + {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, ) self.assertEqual( - set(store.match(None, None, None, graph)), {(foo, bar, baz, graph)}, + set(store.quads_for_pattern(None, None, None, graph)), {Quad(foo, bar, baz, graph)}, ) self.assertEqual( - set(store.match(foo, None, None, DefaultGraph())), - {(foo, bar, baz, DefaultGraph())}, + set(store.quads_for_pattern(foo, None, None, DefaultGraph())), + {Quad(foo, bar, baz, DefaultGraph())}, ) def test_ask_query(self): store = self.store() - store.add((foo, foo, foo)) + store.add(Quad(foo, foo, foo)) self.assertTrue(store.query("ASK { ?s ?s ?s }")) self.assertFalse(store.query("ASK { FILTER(false) }")) def test_construct_query(self): store = self.store() - store.add((foo, bar, baz)) + store.add(Quad(foo, bar, baz)) self.assertEqual( set(store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }")), - {(foo, bar, baz)}, + {Triple(foo, bar, baz)}, ) def test_select_query(self): store = self.store() - store.add((foo, bar, baz)) + store.add(Quad(foo, bar, baz)) results = list(store.query("SELECT ?s WHERE { ?s ?p ?o }")) self.assertEqual(len(results), 1) self.assertEqual(results[0][0], foo) @@ -98,45 +100,65 @@ class TestAbstractStore(unittest.TestCase, ABC): def test_load_ntriples_to_default_graph(self): store = self.store() store.load( - " .", + BytesIO(b" ."), mime_type="application/n-triples", ) - self.assertEqual(set(store), {(foo, bar, baz, DefaultGraph())}) + self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) def test_load_ntriples_to_named_graph(self): store = self.store() store.load( - " .", + BytesIO(b" ."), mime_type="application/n-triples", to_graph=graph, ) - self.assertEqual(set(store), {(foo, bar, baz, graph)}) + self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) def test_load_turtle_with_base_iri(self): store = self.store() store.load( - " <> .", + BytesIO(b" <> ."), mime_type="text/turtle", base_iri="http://baz", ) - self.assertEqual(set(store), {(foo, bar, baz, DefaultGraph())}) + self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) def test_load_nquads(self): store = self.store() store.load( - " .", + BytesIO(b" ."), mime_type="application/n-quads", ) - self.assertEqual(set(store), {(foo, bar, baz, graph)}) + self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) def test_load_trig_with_base_iri(self): store = self.store() store.load( - " { <> . }", + BytesIO(b" { <> . }"), mime_type="application/trig", base_iri="http://baz", ) - self.assertEqual(set(store), {(foo, bar, baz, graph)}) + self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) + + def test_dump_ntriples(self): + store = self.store() + store.add(Quad(foo, bar, baz, graph)) + output = BytesIO() + store.dump(output, "application/n-triples", from_graph=graph) + self.assertEqual( + output.getvalue(), + b" .\n", + ) + + def test_dump_nquads(self): + store = self.store() + store.add(Quad(foo, bar, baz, graph)) + output = BytesIO() + store.dump(output, "application/n-quads") + self.assertEqual( + output.getvalue(), + b" .\n", + ) class TestMemoryStore(TestAbstractStore):