Python: query results I/O

pull/631/head
Tpt 1 year ago committed by Thomas Tanon
parent 180ae22293
commit a6f32390df
  1. 11
      python/docs/sparql.rst
  2. 132
      python/src/io.rs
  3. 6
      python/src/lib.rs
  4. 297
      python/src/sparql.rs
  5. 53
      python/src/store.rs
  6. 47
      python/tests/test_io.py
  7. 33
      python/tests/test_store.py

@ -9,7 +9,6 @@ Variable
.. autoclass:: pyoxigraph.Variable .. autoclass:: pyoxigraph.Variable
:members: :members:
``SELECT`` solutions ``SELECT`` solutions
"""""""""""""""""""" """"""""""""""""""""
.. autoclass:: pyoxigraph.QuerySolutions .. autoclass:: pyoxigraph.QuerySolutions
@ -17,7 +16,17 @@ Variable
.. autoclass:: pyoxigraph.QuerySolution .. autoclass:: pyoxigraph.QuerySolution
:members: :members:
``ASK`` results
"""""""""""""""
.. autoclass:: pyoxigraph.QueryBoolean
:members:
``CONSTRUCT`` results ``CONSTRUCT`` results
""""""""""""""""""""" """""""""""""""""""""
.. autoclass:: pyoxigraph.QueryTriples .. autoclass:: pyoxigraph.QueryTriples
:members: :members:
Query results parsing
"""""""""""""""""""""
.. autoclass:: pyoxigraph.parse_query_results
:members:

@ -3,10 +3,11 @@
use crate::model::{PyQuad, PyTriple}; use crate::model::{PyQuad, PyTriple};
use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer}; use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer};
use oxigraph::model::QuadRef; use oxigraph::model::QuadRef;
use oxigraph::sparql::results::QueryResultsFormat;
use pyo3::exceptions::{PySyntaxError, PyValueError}; use pyo3::exceptions::{PySyntaxError, PyValueError};
use pyo3::intern;
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use pyo3::{intern, wrap_pyfunction};
use std::cmp::max; use std::cmp::max;
use std::error::Error; use std::error::Error;
use std::ffi::OsStr; use std::ffi::OsStr;
@ -14,11 +15,6 @@ use std::fs::File;
use std::io::{self, BufWriter, Cursor, Read, Write}; use std::io::{self, BufWriter, Cursor, Read, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
pub fn add_to_module(module: &PyModule) -> PyResult<()> {
module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(serialize))
}
/// Parses RDF graph and dataset serialization formats. /// Parses RDF graph and dataset serialization formats.
/// ///
/// It currently supports the following formats: /// It currently supports the following formats:
@ -63,7 +59,7 @@ pub fn parse(
py: Python<'_>, py: Python<'_>,
) -> PyResult<PyObject> { ) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok(); let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?; let format = parse_format(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path { let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)? PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else { } else {
@ -128,40 +124,31 @@ pub fn serialize<'a>(
format: Option<&str>, format: Option<&str>,
py: Python<'a>, py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> { ) -> PyResult<Option<&'a PyBytes>> {
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok()); PyWritable::do_write(
let format = rdf_format(format, file_path.as_deref())?; |output, format| {
let output = if let Some(output) = output { let mut writer = RdfSerializer::from_format(format).serialize_to_write(output);
if let Some(file_path) = &file_path { for i in input.iter()? {
PyWritable::from_file(file_path, py).map_err(map_io_err)? let i = i?;
} else { if let Ok(triple) = i.extract::<PyRef<PyTriple>>() {
PyWritable::from_data(output) writer.write_triple(&*triple)
} } else {
} else { let quad = i.extract::<PyRef<PyQuad>>()?;
PyWritable::Bytes(Vec::new()) let quad = QuadRef::from(&*quad);
}; if !quad.graph_name.is_default_graph() && !format.supports_datasets() {
let mut writer = RdfSerializer::from_format(format).serialize_to_write(BufWriter::new(output)); return Err(PyValueError::new_err(
for i in input.iter()? { "The {format} format does not support named graphs",
let i = i?; ));
if let Ok(triple) = i.extract::<PyRef<PyTriple>>() { }
writer.write_triple(&*triple) writer.write_quad(quad)
} else { }
let quad = i.extract::<PyRef<PyQuad>>()?; .map_err(map_io_err)?;
let quad = QuadRef::from(&*quad);
if !quad.graph_name.is_default_graph() && !format.supports_datasets() {
return Err(PyValueError::new_err(
"The {format} format does not support named graphs",
));
} }
writer.write_quad(quad) writer.finish().map_err(map_io_err)
} },
.map_err(map_io_err)?; output,
} format,
writer py,
.finish() )
.map_err(map_io_err)?
.into_inner()
.map_err(|e| map_io_err(e.into_error()))?
.close(py)
} }
#[pyclass(name = "QuadReader", module = "pyoxigraph")] #[pyclass(name = "QuadReader", module = "pyoxigraph")]
@ -228,15 +215,33 @@ pub enum PyWritable {
} }
impl PyWritable { impl PyWritable {
pub fn from_file(file: &Path, py: Python<'_>) -> io::Result<Self> { pub fn do_write<'a, F: Format>(
Ok(Self::File(py.allow_threads(|| File::create(file))?)) write: impl FnOnce(BufWriter<Self>, F) -> PyResult<BufWriter<Self>>,
} output: Option<&PyAny>,
format: Option<&str>,
pub fn from_data(data: &PyAny) -> Self { py: Python<'a>,
Self::Io(PyIo(data.into())) ) -> PyResult<Option<&'a PyBytes>> {
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = parse_format::<F>(format, file_path.as_deref())?;
let output = if let Some(output) = output {
if let Some(file_path) = &file_path {
Self::File(
py.allow_threads(|| File::create(file_path))
.map_err(map_io_err)?,
)
} else {
Self::Io(PyIo(output.into()))
}
} else {
PyWritable::Bytes(Vec::new())
};
let writer = write(BufWriter::new(output), format)?;
py.allow_threads(|| writer.into_inner())
.map_err(|e| map_io_err(e.into_error()))?
.close(py)
} }
pub fn close(self, py: Python<'_>) -> PyResult<Option<&PyBytes>> { fn close(self, py: Python<'_>) -> PyResult<Option<&PyBytes>> {
match self { match self {
Self::Bytes(bytes) => Ok(Some(PyBytes::new(py, &bytes))), Self::Bytes(bytes) => Ok(Some(PyBytes::new(py, &bytes))),
Self::File(mut file) => { Self::File(mut file) => {
@ -320,7 +325,32 @@ impl Write for PyIo {
} }
} }
pub fn rdf_format(format: Option<&str>, path: Option<&Path>) -> PyResult<RdfFormat> { pub trait Format: Sized {
fn from_media_type(media_type: &str) -> Option<Self>;
fn from_extension(extension: &str) -> Option<Self>;
}
impl Format for RdfFormat {
fn from_media_type(media_type: &str) -> Option<Self> {
Self::from_media_type(media_type)
}
fn from_extension(extension: &str) -> Option<Self> {
Self::from_extension(extension)
}
}
impl Format for QueryResultsFormat {
fn from_media_type(media_type: &str) -> Option<Self> {
Self::from_media_type(media_type)
}
fn from_extension(extension: &str) -> Option<Self> {
Self::from_extension(extension)
}
}
pub fn parse_format<F: Format>(format: Option<&str>, path: Option<&Path>) -> PyResult<F> {
let format = if let Some(format) = format { let format = if let Some(format) = format {
format format
} else if let Some(path) = path { } else if let Some(path) = path {
@ -338,11 +368,11 @@ pub fn rdf_format(format: Option<&str>, path: Option<&Path>) -> PyResult<RdfForm
)); ));
}; };
if format.contains('/') { if format.contains('/') {
RdfFormat::from_media_type(format).ok_or_else(|| { F::from_media_type(format).ok_or_else(|| {
PyValueError::new_err(format!("Not supported RDF format media type: {format}")) PyValueError::new_err(format!("Not supported RDF format media type: {format}"))
}) })
} else { } else {
RdfFormat::from_extension(format).ok_or_else(|| { F::from_extension(format).ok_or_else(|| {
PyValueError::new_err(format!("Not supported RDF format extension: {format}")) PyValueError::new_err(format!("Not supported RDF format extension: {format}"))
}) })
} }
@ -404,7 +434,7 @@ pub fn map_parse_error(error: ParseError, file_path: Option<PathBuf>) -> PyErr {
/// ///
/// Code from pyo3: https://github.com/PyO3/pyo3/blob/a67180c8a42a0bc0fdc45b651b62c0644130cf47/src/python.rs#L366 /// Code from pyo3: https://github.com/PyO3/pyo3/blob/a67180c8a42a0bc0fdc45b651b62c0644130cf47/src/python.rs#L366
#[allow(unsafe_code)] #[allow(unsafe_code)]
pub fn allow_threads_unsafe<T>(f: impl FnOnce() -> T) -> T { pub fn allow_threads_unsafe<T>(_py: Python<'_>, f: impl FnOnce() -> T) -> T {
struct RestoreGuard { struct RestoreGuard {
tstate: *mut pyo3::ffi::PyThreadState, tstate: *mut pyo3::ffi::PyThreadState,
} }

@ -10,6 +10,7 @@ mod model;
mod sparql; mod sparql;
mod store; mod store;
use crate::io::*;
use crate::model::*; use crate::model::*;
use crate::sparql::*; use crate::sparql::*;
use crate::store::*; use crate::store::*;
@ -34,5 +35,8 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_class::<PyQuerySolution>()?; module.add_class::<PyQuerySolution>()?;
module.add_class::<PyQueryBoolean>()?; module.add_class::<PyQueryBoolean>()?;
module.add_class::<PyQueryTriples>()?; module.add_class::<PyQueryTriples>()?;
io::add_to_module(module) module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(parse_query_results))?;
module.add_wrapped(wrap_pyfunction!(serialize))?;
Ok(())
} }

@ -1,14 +1,23 @@
use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error}; use crate::io::*;
use crate::map_storage_error;
use crate::model::*; use crate::model::*;
use crate::store::map_storage_error;
use oxigraph::io::RdfSerializer;
use oxigraph::model::Term; use oxigraph::model::Term;
use oxigraph::sparql::*; use oxigraph::sparql::results::{
ParseError, QueryResultsParser, QueryResultsReader, QueryResultsSerializer, SolutionsReader,
};
use oxigraph::sparql::{
EvaluationError, Query, QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter,
Variable,
};
use pyo3::basic::CompareOp; use pyo3::basic::CompareOp;
use pyo3::exceptions::{ use pyo3::exceptions::{
PyNotImplementedError, PyRuntimeError, PySyntaxError, PyTypeError, PyValueError, PyNotImplementedError, PyRuntimeError, PySyntaxError, PyTypeError, PyValueError,
}; };
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes;
use std::io::BufReader;
use std::path::PathBuf;
use std::vec::IntoIter; use std::vec::IntoIter;
pub fn parse_query( pub fn parse_query(
@ -17,8 +26,9 @@ pub fn parse_query(
use_default_graph_as_union: bool, use_default_graph_as_union: bool,
default_graph: Option<&PyAny>, default_graph: Option<&PyAny>,
named_graphs: Option<&PyAny>, named_graphs: Option<&PyAny>,
py: Python<'_>,
) -> PyResult<Query> { ) -> PyResult<Query> {
let mut query = allow_threads_unsafe(|| Query::parse(query, base_iri)) let mut query = allow_threads_unsafe(py, || Query::parse(query, base_iri))
.map_err(|e| map_evaluation_error(e.into()))?; .map_err(|e| map_evaluation_error(e.into()))?;
if use_default_graph_as_union && default_graph.is_some() { if use_default_graph_as_union && default_graph.is_some() {
@ -63,7 +73,10 @@ pub fn parse_query(
pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObject { pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObject {
match results { match results {
QueryResults::Solutions(inner) => PyQuerySolutions { inner }.into_py(py), QueryResults::Solutions(inner) => PyQuerySolutions {
inner: PyQuerySolutionsVariant::Query(inner),
}
.into_py(py),
QueryResults::Graph(inner) => PyQueryTriples { inner }.into_py(py), QueryResults::Graph(inner) => PyQueryTriples { inner }.into_py(py),
QueryResults::Boolean(inner) => PyQueryBoolean { inner }.into_py(py), QueryResults::Boolean(inner) => PyQueryBoolean { inner }.into_py(py),
} }
@ -172,7 +185,11 @@ impl SolutionValueIter {
/// [<QuerySolution s=<NamedNode value=http://example.com>>] /// [<QuerySolution s=<NamedNode value=http://example.com>>]
#[pyclass(unsendable, name = "QuerySolutions", module = "pyoxigraph")] #[pyclass(unsendable, name = "QuerySolutions", module = "pyoxigraph")]
pub struct PyQuerySolutions { pub struct PyQuerySolutions {
inner: QuerySolutionIter, inner: PyQuerySolutionsVariant,
}
enum PyQuerySolutionsVariant {
Query(QuerySolutionIter),
Reader(SolutionsReader<BufReader<PyReadable>>),
} }
#[pymethods] #[pymethods]
@ -185,22 +202,99 @@ impl PyQuerySolutions {
/// [<Variable value=s>] /// [<Variable value=s>]
#[getter] #[getter]
fn variables(&self) -> Vec<PyVariable> { fn variables(&self) -> Vec<PyVariable> {
self.inner match &self.inner {
.variables() PyQuerySolutionsVariant::Query(inner) => {
.iter() inner.variables().iter().map(|v| v.clone().into()).collect()
.map(|v| v.clone().into()) }
.collect() PyQuerySolutionsVariant::Reader(inner) => {
inner.variables().iter().map(|v| v.clone().into()).collect()
}
}
}
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o }")
/// >>> results.serialize(format="json")
/// b'{"head":{"vars":["s","p","o"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"},"p":{"type":"uri","value":"http://example.com/p"},"o":{"type":"literal","value":"1"}}]}}'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(
output,
match &self.inner {
PyQuerySolutionsVariant::Query(inner) => inner.variables().to_vec(),
PyQuerySolutionsVariant::Reader(inner) => inner.variables().to_vec(),
},
)
.map_err(map_io_err)?;
match &mut self.inner {
PyQuerySolutionsVariant::Query(inner) => {
for solution in inner {
writer
.write(&solution.map_err(map_evaluation_error)?)
.map_err(map_io_err)?;
}
}
PyQuerySolutionsVariant::Reader(inner) => {
for solution in inner {
writer
.write(&solution.map_err(map_query_results_parse_error)?)
.map_err(map_io_err)?;
}
}
}
writer.finish().map_err(map_io_err)
},
output,
format,
py,
)
} }
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> { fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> {
slf slf
} }
fn __next__(&mut self) -> PyResult<Option<PyQuerySolution>> { fn __next__(&mut self, py: Python<'_>) -> PyResult<Option<PyQuerySolution>> {
Ok(allow_threads_unsafe(|| self.inner.next()) Ok(match &mut self.inner {
.transpose() PyQuerySolutionsVariant::Query(inner) => allow_threads_unsafe(py, || {
.map_err(map_evaluation_error)? inner.next().transpose().map_err(map_evaluation_error)
.map(move |inner| PyQuerySolution { inner })) }),
PyQuerySolutionsVariant::Reader(inner) => inner
.next()
.transpose()
.map_err(map_query_results_parse_error),
}?
.map(move |inner| PyQuerySolution { inner }))
} }
} }
@ -219,6 +313,52 @@ pub struct PyQueryBoolean {
#[pymethods] #[pymethods]
impl PyQueryBoolean { impl PyQueryBoolean {
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("ASK { ?s ?p ?o }")
/// >>> results.serialize(format="json")
/// b'{"head":{},"boolean":true}'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
py.allow_threads(|| {
QueryResultsSerializer::from_format(format)
.write_boolean_result(output, self.inner)
.map_err(map_io_err)
})
},
output,
format,
py,
)
}
fn __bool__(&self) -> bool { fn __bool__(&self) -> bool {
self.inner self.inner
} }
@ -249,29 +389,129 @@ pub struct PyQueryTriples {
#[pymethods] #[pymethods]
impl PyQueryTriples { impl PyQueryTriples {
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `N-Triples <https://www.w3.org/TR/n-triples/>`_ (``application/n-triples`` or ``nt``)
/// * `N-Quads <https://www.w3.org/TR/n-quads/>`_ (``application/n-quads`` or ``nq``)
/// * `Turtle <https://www.w3.org/TR/turtle/>`_ (``text/turtle`` or ``ttl``)
/// * `TriG <https://www.w3.org/TR/trig/>`_ (``application/trig`` or ``trig``)
/// * `N3 <https://w3c.github.io/N3/spec/>`_ (``text/n3`` or ``n3``)
/// * `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_ (``application/rdf+xml`` or ``rdf``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }")
/// >>> results.serialize(format="nt")
/// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
let mut writer = RdfSerializer::from_format(format).serialize_to_write(output);
for triple in &mut self.inner {
writer
.write_triple(&triple.map_err(map_evaluation_error)?)
.map_err(map_io_err)?;
}
writer.finish().map_err(map_io_err)
},
output,
format,
py,
)
}
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> { fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> {
slf slf
} }
fn __next__(&mut self) -> PyResult<Option<PyTriple>> { fn __next__(&mut self, py: Python<'_>) -> PyResult<Option<PyTriple>> {
Ok(allow_threads_unsafe(|| self.inner.next()) Ok(allow_threads_unsafe(py, || self.inner.next())
.transpose() .transpose()
.map_err(map_evaluation_error)? .map_err(map_evaluation_error)?
.map(Into::into)) .map(Into::into))
} }
} }
/// Parses SPARQL query results.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :return: an iterator of :py:class:`QuerySolution` or a :py:func:`bool`.
/// :rtype: QuerySolutions or QueryBoolean
/// :raises ValueError: if the format is not supported.
/// :raises SyntaxError: if the provided data is invalid.
///
/// >>> input = io.BytesIO(b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t1\n')
/// >>> list(parse_query_results(input, "text/tsv"))
/// [<QuerySolution s=<NamedNode value=http://example.com/s> p=<NamedNode value=http://example.com/s> o=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#integer>>>]
///
/// >>> input = io.BytesIO(b'{"head":{},"boolean":true}')
/// >>> parse_query_results(input, "application/sparql-results+json")
/// <QueryBoolean true>
#[pyfunction]
#[pyo3(signature = (input, /, format = None))]
pub fn parse_query_results(
input: &PyAny,
format: Option<&str>,
py: Python<'_>,
) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok();
let format = parse_format(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyReadable::from_data(input)
};
let results = QueryResultsParser::from_format(format)
.read_results(BufReader::new(input))
.map_err(map_query_results_parse_error)?;
Ok(match results {
QueryResultsReader::Solutions(inner) => PyQuerySolutions {
inner: PyQuerySolutionsVariant::Reader(inner),
}
.into_py(py),
QueryResultsReader::Boolean(inner) => PyQueryBoolean { inner }.into_py(py),
})
}
pub fn map_evaluation_error(error: EvaluationError) -> PyErr { pub fn map_evaluation_error(error: EvaluationError) -> PyErr {
match error { match error {
EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()), EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()),
EvaluationError::Storage(error) => map_storage_error(error), EvaluationError::Storage(error) => map_storage_error(error),
EvaluationError::GraphParsing(error) => map_parse_error(error, None), EvaluationError::GraphParsing(error) => map_parse_error(error, None),
EvaluationError::ResultsParsing(error) => match error { EvaluationError::ResultsParsing(error) => map_query_results_parse_error(error),
oxigraph::sparql::results::ParseError::Syntax(error) => {
PySyntaxError::new_err(error.to_string())
}
oxigraph::sparql::results::ParseError::Io(error) => map_io_err(error),
},
EvaluationError::ResultsSerialization(error) => map_io_err(error), EvaluationError::ResultsSerialization(error) => map_io_err(error),
EvaluationError::Service(error) => match error.downcast() { EvaluationError::Service(error) => match error.downcast() {
Ok(error) => map_io_err(*error), Ok(error) => map_io_err(*error),
@ -280,3 +520,10 @@ pub fn map_evaluation_error(error: EvaluationError) -> PyErr {
_ => PyRuntimeError::new_err(error.to_string()), _ => PyRuntimeError::new_err(error.to_string()),
} }
} }
pub fn map_query_results_parse_error(error: ParseError) -> PyErr {
match error {
ParseError::Syntax(error) => PySyntaxError::new_err(error.to_string()),
ParseError::Io(error) => map_io_err(error),
}
}

@ -1,17 +1,17 @@
#![allow(clippy::needless_option_as_deref)] #![allow(clippy::needless_option_as_deref)]
use crate::io::{ use crate::io::{
allow_threads_unsafe, map_io_err, map_parse_error, rdf_format, PyReadable, PyWritable, allow_threads_unsafe, map_io_err, map_parse_error, parse_format, PyReadable, PyWritable,
}; };
use crate::model::*; use crate::model::*;
use crate::sparql::*; use crate::sparql::*;
use oxigraph::io::RdfFormat;
use oxigraph::model::{GraphName, GraphNameRef}; use oxigraph::model::{GraphName, GraphNameRef};
use oxigraph::sparql::Update; use oxigraph::sparql::Update;
use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store}; use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store};
use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use std::io::BufWriter;
use std::path::PathBuf; use std::path::PathBuf;
/// RDF store. /// RDF store.
@ -251,7 +251,7 @@ impl PyStore {
/// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional
/// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available. /// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available.
/// :type named_graphs: list(NamedNode or BlankNode) or None, optional /// :type named_graphs: list(NamedNode or BlankNode) or None, optional
/// :return: a :py:class:`QueryBoolean` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries. /// :return: a :py:func:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries.
/// :rtype: QuerySolutions or QueryBoolean or QueryTriples /// :rtype: QuerySolutions or QueryBoolean or QueryTriples
/// :raises SyntaxError: if the provided query is invalid. /// :raises SyntaxError: if the provided query is invalid.
/// :raises OSError: if an error happens while reading the store. /// :raises OSError: if an error happens while reading the store.
@ -292,9 +292,10 @@ impl PyStore {
use_default_graph_as_union, use_default_graph_as_union,
default_graph, default_graph,
named_graphs, named_graphs,
py,
)?; )?;
let results = let results =
allow_threads_unsafe(|| self.inner.query(query)).map_err(map_evaluation_error)?; allow_threads_unsafe(py, || self.inner.query(query)).map_err(map_evaluation_error)?;
Ok(query_results_to_python(py, results)) Ok(query_results_to_python(py, results))
} }
@ -393,7 +394,7 @@ impl PyStore {
None None
}; };
let file_path = input.extract::<PathBuf>().ok(); let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?; let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path { let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)? PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else { } else {
@ -462,7 +463,7 @@ impl PyStore {
None None
}; };
let file_path = input.extract::<PathBuf>().ok(); let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?; let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path { let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)? PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else { } else {
@ -505,7 +506,7 @@ impl PyStore {
/// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional
/// :rtype: bytes or None /// :rtype: bytes or None
/// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs. /// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs.
/// :raises OSError: if an error happens during a quad lookup /// :raises OSError: if an error happens during a quad lookup or file writing.
/// ///
/// >>> store = Store() /// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
@ -531,29 +532,21 @@ impl PyStore {
} else { } else {
None None
}; };
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok()); PyWritable::do_write::<RdfFormat>(
let format = rdf_format(format, file_path.as_deref())?; |output, format| {
let output = if let Some(output) = output { py.allow_threads(|| {
if let Some(file_path) = &file_path { if let Some(from_graph_name) = &from_graph_name {
PyWritable::from_file(file_path, py).map_err(map_io_err)? self.inner.dump_graph(output, format, from_graph_name)
} else { } else {
PyWritable::from_data(output) self.inner.dump_dataset(output, format)
} }
} else { .map_err(map_serializer_error)
PyWritable::Bytes(Vec::new()) })
}; },
py.allow_threads(|| { output,
let output = BufWriter::new(output); format,
if let Some(from_graph_name) = &from_graph_name { py,
self.inner.dump_graph(output, format, from_graph_name) )
} else {
self.inner.dump_dataset(output, format)
}
.map_err(map_serializer_error)?
.into_inner()
.map_err(|e| map_io_err(e.into_error()))
})?
.close(py)
} }
/// Returns an iterator over all the store named graphs. /// Returns an iterator over all the store named graphs.

@ -3,7 +3,16 @@ import unittest
from io import BytesIO, StringIO, UnsupportedOperation from io import BytesIO, StringIO, UnsupportedOperation
from tempfile import NamedTemporaryFile, TemporaryFile from tempfile import NamedTemporaryFile, TemporaryFile
from pyoxigraph import Literal, NamedNode, Quad, parse, serialize from pyoxigraph import (
Literal,
NamedNode,
Quad,
QueryBoolean,
QuerySolutions,
parse,
parse_query_results,
serialize,
)
EXAMPLE_TRIPLE = Quad( EXAMPLE_TRIPLE = Quad(
NamedNode("http://example.com/foo"), NamedNode("http://example.com/foo"),
@ -131,7 +140,7 @@ class TestParse(unittest.TestCase):
class TestSerialize(unittest.TestCase): class TestSerialize(unittest.TestCase):
def test_serialize_to_bytes(self) -> None: def test_serialize_to_bytes(self) -> None:
self.assertEqual( self.assertEqual(
serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle").decode(), (serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle") or b"").decode(),
'<http://example.com/foo> <http://example.com/p> "éù" .\n', '<http://example.com/foo> <http://example.com/p> "éù" .\n',
) )
@ -162,3 +171,37 @@ class TestSerialize(unittest.TestCase):
output.getvalue(), output.getvalue(),
b'<http://example.com/g> {\n\t<http://example.com/foo> <http://example.com/p> "1" .\n}\n', b'<http://example.com/g> {\n\t<http://example.com/foo> <http://example.com/p> "1" .\n}\n',
) )
class TestParseQuerySolutions(unittest.TestCase):
def test_parse_file(self) -> None:
with NamedTemporaryFile(suffix=".tsv") as fp:
fp.write(
b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t"1"\n'
)
fp.flush()
r = parse_query_results(fp.name)
self.assertIsInstance(r, QuerySolutions)
results = list(r) # type: ignore[arg-type]
self.assertEqual(results[0]["s"], NamedNode("http://example.com/s"))
self.assertEqual(results[0][2], Literal("1"))
def test_parse_not_existing_file(self) -> None:
with self.assertRaises(IOError) as _:
parse_query_results(
"/tmp/not-existing-oxigraph-file.ttl", "application/json"
)
def test_parse_str_io(self) -> None:
result = parse_query_results(StringIO("true"), "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertTrue(result)
def test_parse_bytes_io(self) -> None:
result = parse_query_results(BytesIO(b"false"), "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertFalse(result)
def test_parse_io_error(self) -> None:
with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp:
parse_query_results(fp, "srx")

@ -189,6 +189,39 @@ class TestStore(unittest.TestCase):
) )
self.assertEqual(len(list(results)), 2) self.assertEqual(len(list(results)), 2)
def test_select_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("SELECT ?s WHERE { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "csv")
self.assertEqual(
output.getvalue().decode(),
"s\r\nhttp://foo\r\n",
)
def test_ask_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("ASK { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "csv")
self.assertEqual(
output.getvalue().decode(),
"true",
)
def test_construct_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "nt")
self.assertEqual(
output.getvalue().decode(),
"<http://foo> <http://bar> <http://baz> .\n",
)
def test_update_insert_data(self) -> None: def test_update_insert_data(self) -> None:
store = Store() store = Store()
store.update("INSERT DATA { <http://foo> <http://foo> <http://foo> }") store.update("INSERT DATA { <http://foo> <http://foo> <http://foo> }")

Loading…
Cancel
Save