Python: query results I/O

pull/631/head
Tpt 1 year ago committed by Thomas Tanon
parent 180ae22293
commit a6f32390df
  1. 11
      python/docs/sparql.rst
  2. 132
      python/src/io.rs
  3. 6
      python/src/lib.rs
  4. 297
      python/src/sparql.rs
  5. 53
      python/src/store.rs
  6. 47
      python/tests/test_io.py
  7. 33
      python/tests/test_store.py

@ -9,7 +9,6 @@ Variable
.. autoclass:: pyoxigraph.Variable
:members:
``SELECT`` solutions
""""""""""""""""""""
.. autoclass:: pyoxigraph.QuerySolutions
@ -17,7 +16,17 @@ Variable
.. autoclass:: pyoxigraph.QuerySolution
:members:
``ASK`` results
"""""""""""""""
.. autoclass:: pyoxigraph.QueryBoolean
:members:
``CONSTRUCT`` results
"""""""""""""""""""""
.. autoclass:: pyoxigraph.QueryTriples
:members:
Query results parsing
"""""""""""""""""""""
.. autoclass:: pyoxigraph.parse_query_results
:members:

@ -3,10 +3,11 @@
use crate::model::{PyQuad, PyTriple};
use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer};
use oxigraph::model::QuadRef;
use oxigraph::sparql::results::QueryResultsFormat;
use pyo3::exceptions::{PySyntaxError, PyValueError};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::{intern, wrap_pyfunction};
use std::cmp::max;
use std::error::Error;
use std::ffi::OsStr;
@ -14,11 +15,6 @@ use std::fs::File;
use std::io::{self, BufWriter, Cursor, Read, Write};
use std::path::{Path, PathBuf};
pub fn add_to_module(module: &PyModule) -> PyResult<()> {
module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(serialize))
}
/// Parses RDF graph and dataset serialization formats.
///
/// It currently supports the following formats:
@ -63,7 +59,7 @@ pub fn parse(
py: Python<'_>,
) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?;
let format = parse_format(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
@ -128,40 +124,31 @@ pub fn serialize<'a>(
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = rdf_format(format, file_path.as_deref())?;
let output = if let Some(output) = output {
if let Some(file_path) = &file_path {
PyWritable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyWritable::from_data(output)
}
} else {
PyWritable::Bytes(Vec::new())
};
let mut writer = RdfSerializer::from_format(format).serialize_to_write(BufWriter::new(output));
for i in input.iter()? {
let i = i?;
if let Ok(triple) = i.extract::<PyRef<PyTriple>>() {
writer.write_triple(&*triple)
} else {
let quad = i.extract::<PyRef<PyQuad>>()?;
let quad = QuadRef::from(&*quad);
if !quad.graph_name.is_default_graph() && !format.supports_datasets() {
return Err(PyValueError::new_err(
"The {format} format does not support named graphs",
));
PyWritable::do_write(
|output, format| {
let mut writer = RdfSerializer::from_format(format).serialize_to_write(output);
for i in input.iter()? {
let i = i?;
if let Ok(triple) = i.extract::<PyRef<PyTriple>>() {
writer.write_triple(&*triple)
} else {
let quad = i.extract::<PyRef<PyQuad>>()?;
let quad = QuadRef::from(&*quad);
if !quad.graph_name.is_default_graph() && !format.supports_datasets() {
return Err(PyValueError::new_err(
"The {format} format does not support named graphs",
));
}
writer.write_quad(quad)
}
.map_err(map_io_err)?;
}
writer.write_quad(quad)
}
.map_err(map_io_err)?;
}
writer
.finish()
.map_err(map_io_err)?
.into_inner()
.map_err(|e| map_io_err(e.into_error()))?
.close(py)
writer.finish().map_err(map_io_err)
},
output,
format,
py,
)
}
#[pyclass(name = "QuadReader", module = "pyoxigraph")]
@ -228,15 +215,33 @@ pub enum PyWritable {
}
impl PyWritable {
pub fn from_file(file: &Path, py: Python<'_>) -> io::Result<Self> {
Ok(Self::File(py.allow_threads(|| File::create(file))?))
}
pub fn from_data(data: &PyAny) -> Self {
Self::Io(PyIo(data.into()))
pub fn do_write<'a, F: Format>(
write: impl FnOnce(BufWriter<Self>, F) -> PyResult<BufWriter<Self>>,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = parse_format::<F>(format, file_path.as_deref())?;
let output = if let Some(output) = output {
if let Some(file_path) = &file_path {
Self::File(
py.allow_threads(|| File::create(file_path))
.map_err(map_io_err)?,
)
} else {
Self::Io(PyIo(output.into()))
}
} else {
PyWritable::Bytes(Vec::new())
};
let writer = write(BufWriter::new(output), format)?;
py.allow_threads(|| writer.into_inner())
.map_err(|e| map_io_err(e.into_error()))?
.close(py)
}
pub fn close(self, py: Python<'_>) -> PyResult<Option<&PyBytes>> {
fn close(self, py: Python<'_>) -> PyResult<Option<&PyBytes>> {
match self {
Self::Bytes(bytes) => Ok(Some(PyBytes::new(py, &bytes))),
Self::File(mut file) => {
@ -320,7 +325,32 @@ impl Write for PyIo {
}
}
pub fn rdf_format(format: Option<&str>, path: Option<&Path>) -> PyResult<RdfFormat> {
pub trait Format: Sized {
fn from_media_type(media_type: &str) -> Option<Self>;
fn from_extension(extension: &str) -> Option<Self>;
}
impl Format for RdfFormat {
fn from_media_type(media_type: &str) -> Option<Self> {
Self::from_media_type(media_type)
}
fn from_extension(extension: &str) -> Option<Self> {
Self::from_extension(extension)
}
}
impl Format for QueryResultsFormat {
fn from_media_type(media_type: &str) -> Option<Self> {
Self::from_media_type(media_type)
}
fn from_extension(extension: &str) -> Option<Self> {
Self::from_extension(extension)
}
}
pub fn parse_format<F: Format>(format: Option<&str>, path: Option<&Path>) -> PyResult<F> {
let format = if let Some(format) = format {
format
} else if let Some(path) = path {
@ -338,11 +368,11 @@ pub fn rdf_format(format: Option<&str>, path: Option<&Path>) -> PyResult<RdfForm
));
};
if format.contains('/') {
RdfFormat::from_media_type(format).ok_or_else(|| {
F::from_media_type(format).ok_or_else(|| {
PyValueError::new_err(format!("Not supported RDF format media type: {format}"))
})
} else {
RdfFormat::from_extension(format).ok_or_else(|| {
F::from_extension(format).ok_or_else(|| {
PyValueError::new_err(format!("Not supported RDF format extension: {format}"))
})
}
@ -404,7 +434,7 @@ pub fn map_parse_error(error: ParseError, file_path: Option<PathBuf>) -> PyErr {
///
/// Code from pyo3: https://github.com/PyO3/pyo3/blob/a67180c8a42a0bc0fdc45b651b62c0644130cf47/src/python.rs#L366
#[allow(unsafe_code)]
pub fn allow_threads_unsafe<T>(f: impl FnOnce() -> T) -> T {
pub fn allow_threads_unsafe<T>(_py: Python<'_>, f: impl FnOnce() -> T) -> T {
struct RestoreGuard {
tstate: *mut pyo3::ffi::PyThreadState,
}

@ -10,6 +10,7 @@ mod model;
mod sparql;
mod store;
use crate::io::*;
use crate::model::*;
use crate::sparql::*;
use crate::store::*;
@ -34,5 +35,8 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_class::<PyQuerySolution>()?;
module.add_class::<PyQueryBoolean>()?;
module.add_class::<PyQueryTriples>()?;
io::add_to_module(module)
module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(parse_query_results))?;
module.add_wrapped(wrap_pyfunction!(serialize))?;
Ok(())
}

@ -1,14 +1,23 @@
use crate::io::{allow_threads_unsafe, map_io_err, map_parse_error};
use crate::map_storage_error;
use crate::io::*;
use crate::model::*;
use crate::store::map_storage_error;
use oxigraph::io::RdfSerializer;
use oxigraph::model::Term;
use oxigraph::sparql::*;
use oxigraph::sparql::results::{
ParseError, QueryResultsParser, QueryResultsReader, QueryResultsSerializer, SolutionsReader,
};
use oxigraph::sparql::{
EvaluationError, Query, QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter,
Variable,
};
use pyo3::basic::CompareOp;
use pyo3::exceptions::{
PyNotImplementedError, PyRuntimeError, PySyntaxError, PyTypeError, PyValueError,
};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use std::io::BufReader;
use std::path::PathBuf;
use std::vec::IntoIter;
pub fn parse_query(
@ -17,8 +26,9 @@ pub fn parse_query(
use_default_graph_as_union: bool,
default_graph: Option<&PyAny>,
named_graphs: Option<&PyAny>,
py: Python<'_>,
) -> PyResult<Query> {
let mut query = allow_threads_unsafe(|| Query::parse(query, base_iri))
let mut query = allow_threads_unsafe(py, || Query::parse(query, base_iri))
.map_err(|e| map_evaluation_error(e.into()))?;
if use_default_graph_as_union && default_graph.is_some() {
@ -63,7 +73,10 @@ pub fn parse_query(
pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObject {
match results {
QueryResults::Solutions(inner) => PyQuerySolutions { inner }.into_py(py),
QueryResults::Solutions(inner) => PyQuerySolutions {
inner: PyQuerySolutionsVariant::Query(inner),
}
.into_py(py),
QueryResults::Graph(inner) => PyQueryTriples { inner }.into_py(py),
QueryResults::Boolean(inner) => PyQueryBoolean { inner }.into_py(py),
}
@ -172,7 +185,11 @@ impl SolutionValueIter {
/// [<QuerySolution s=<NamedNode value=http://example.com>>]
#[pyclass(unsendable, name = "QuerySolutions", module = "pyoxigraph")]
pub struct PyQuerySolutions {
inner: QuerySolutionIter,
inner: PyQuerySolutionsVariant,
}
enum PyQuerySolutionsVariant {
Query(QuerySolutionIter),
Reader(SolutionsReader<BufReader<PyReadable>>),
}
#[pymethods]
@ -185,22 +202,99 @@ impl PyQuerySolutions {
/// [<Variable value=s>]
#[getter]
fn variables(&self) -> Vec<PyVariable> {
self.inner
.variables()
.iter()
.map(|v| v.clone().into())
.collect()
match &self.inner {
PyQuerySolutionsVariant::Query(inner) => {
inner.variables().iter().map(|v| v.clone().into()).collect()
}
PyQuerySolutionsVariant::Reader(inner) => {
inner.variables().iter().map(|v| v.clone().into()).collect()
}
}
}
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o }")
/// >>> results.serialize(format="json")
/// b'{"head":{"vars":["s","p","o"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"},"p":{"type":"uri","value":"http://example.com/p"},"o":{"type":"literal","value":"1"}}]}}'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
let mut writer = QueryResultsSerializer::from_format(format)
.solutions_writer(
output,
match &self.inner {
PyQuerySolutionsVariant::Query(inner) => inner.variables().to_vec(),
PyQuerySolutionsVariant::Reader(inner) => inner.variables().to_vec(),
},
)
.map_err(map_io_err)?;
match &mut self.inner {
PyQuerySolutionsVariant::Query(inner) => {
for solution in inner {
writer
.write(&solution.map_err(map_evaluation_error)?)
.map_err(map_io_err)?;
}
}
PyQuerySolutionsVariant::Reader(inner) => {
for solution in inner {
writer
.write(&solution.map_err(map_query_results_parse_error)?)
.map_err(map_io_err)?;
}
}
}
writer.finish().map_err(map_io_err)
},
output,
format,
py,
)
}
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> {
slf
}
fn __next__(&mut self) -> PyResult<Option<PyQuerySolution>> {
Ok(allow_threads_unsafe(|| self.inner.next())
.transpose()
.map_err(map_evaluation_error)?
.map(move |inner| PyQuerySolution { inner }))
fn __next__(&mut self, py: Python<'_>) -> PyResult<Option<PyQuerySolution>> {
Ok(match &mut self.inner {
PyQuerySolutionsVariant::Query(inner) => allow_threads_unsafe(py, || {
inner.next().transpose().map_err(map_evaluation_error)
}),
PyQuerySolutionsVariant::Reader(inner) => inner
.next()
.transpose()
.map_err(map_query_results_parse_error),
}?
.map(move |inner| PyQuerySolution { inner }))
}
}
@ -219,6 +313,52 @@ pub struct PyQueryBoolean {
#[pymethods]
impl PyQueryBoolean {
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("ASK { ?s ?p ?o }")
/// >>> results.serialize(format="json")
/// b'{"head":{},"boolean":true}'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
py.allow_threads(|| {
QueryResultsSerializer::from_format(format)
.write_boolean_result(output, self.inner)
.map_err(map_io_err)
})
},
output,
format,
py,
)
}
fn __bool__(&self) -> bool {
self.inner
}
@ -249,29 +389,129 @@ pub struct PyQueryTriples {
#[pymethods]
impl PyQueryTriples {
/// Writes the query results into a file.
///
/// It currently supports the following formats:
///
/// * `N-Triples <https://www.w3.org/TR/n-triples/>`_ (``application/n-triples`` or ``nt``)
/// * `N-Quads <https://www.w3.org/TR/n-quads/>`_ (``application/n-quads`` or ``nq``)
/// * `Turtle <https://www.w3.org/TR/turtle/>`_ (``text/turtle`` or ``ttl``)
/// * `TriG <https://www.w3.org/TR/trig/>`_ (``application/trig`` or ``trig``)
/// * `N3 <https://w3c.github.io/N3/spec/>`_ (``text/n3`` or ``n3``)
/// * `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_ (``application/rdf+xml`` or ``rdf``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/turtle`` could also be used for `Turtle <https://www.w3.org/TR/turtle/>`_
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:func:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported.
/// :raises OSError: if an error happens during a file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
/// >>> results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }")
/// >>> results.serialize(format="nt")
/// b'<http://example.com> <http://example.com/p> "1" .\n'
#[pyo3(signature = (output = None, /, format = None))]
fn serialize<'a>(
&mut self,
output: Option<&PyAny>,
format: Option<&str>,
py: Python<'a>,
) -> PyResult<Option<&'a PyBytes>> {
PyWritable::do_write(
|output, format| {
let mut writer = RdfSerializer::from_format(format).serialize_to_write(output);
for triple in &mut self.inner {
writer
.write_triple(&triple.map_err(map_evaluation_error)?)
.map_err(map_io_err)?;
}
writer.finish().map_err(map_io_err)
},
output,
format,
py,
)
}
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<Self> {
slf
}
fn __next__(&mut self) -> PyResult<Option<PyTriple>> {
Ok(allow_threads_unsafe(|| self.inner.next())
fn __next__(&mut self, py: Python<'_>) -> PyResult<Option<PyTriple>> {
Ok(allow_threads_unsafe(py, || self.inner.next())
.transpose()
.map_err(map_evaluation_error)?
.map(Into::into))
}
}
/// Parses SPARQL query results.
///
/// It currently supports the following formats:
///
/// * `XML <https://www.w3.org/TR/rdf-sparql-XMLres/>`_ (``application/sparql-results+xml`` or ``srx``)
/// * `JSON <https://www.w3.org/TR/sparql11-results-json/>`_ (``application/sparql-results+json`` or ``srj``)
/// * `CSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/csv`` or ``csv``)
/// * `TSV <https://www.w3.org/TR/sparql11-results-csv-tsv/>`_ (``text/tab-separated-values`` or ``tsv``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :return: an iterator of :py:class:`QuerySolution` or a :py:func:`bool`.
/// :rtype: QuerySolutions or QueryBoolean
/// :raises ValueError: if the format is not supported.
/// :raises SyntaxError: if the provided data is invalid.
///
/// >>> input = io.BytesIO(b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t1\n')
/// >>> list(parse_query_results(input, "text/tsv"))
/// [<QuerySolution s=<NamedNode value=http://example.com/s> p=<NamedNode value=http://example.com/s> o=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#integer>>>]
///
/// >>> input = io.BytesIO(b'{"head":{},"boolean":true}')
/// >>> parse_query_results(input, "application/sparql-results+json")
/// <QueryBoolean true>
#[pyfunction]
#[pyo3(signature = (input, /, format = None))]
pub fn parse_query_results(
input: &PyAny,
format: Option<&str>,
py: Python<'_>,
) -> PyResult<PyObject> {
let file_path = input.extract::<PathBuf>().ok();
let format = parse_format(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyReadable::from_data(input)
};
let results = QueryResultsParser::from_format(format)
.read_results(BufReader::new(input))
.map_err(map_query_results_parse_error)?;
Ok(match results {
QueryResultsReader::Solutions(inner) => PyQuerySolutions {
inner: PyQuerySolutionsVariant::Reader(inner),
}
.into_py(py),
QueryResultsReader::Boolean(inner) => PyQueryBoolean { inner }.into_py(py),
})
}
pub fn map_evaluation_error(error: EvaluationError) -> PyErr {
match error {
EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()),
EvaluationError::Storage(error) => map_storage_error(error),
EvaluationError::GraphParsing(error) => map_parse_error(error, None),
EvaluationError::ResultsParsing(error) => match error {
oxigraph::sparql::results::ParseError::Syntax(error) => {
PySyntaxError::new_err(error.to_string())
}
oxigraph::sparql::results::ParseError::Io(error) => map_io_err(error),
},
EvaluationError::ResultsParsing(error) => map_query_results_parse_error(error),
EvaluationError::ResultsSerialization(error) => map_io_err(error),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => map_io_err(*error),
@ -280,3 +520,10 @@ pub fn map_evaluation_error(error: EvaluationError) -> PyErr {
_ => PyRuntimeError::new_err(error.to_string()),
}
}
pub fn map_query_results_parse_error(error: ParseError) -> PyErr {
match error {
ParseError::Syntax(error) => PySyntaxError::new_err(error.to_string()),
ParseError::Io(error) => map_io_err(error),
}
}

@ -1,17 +1,17 @@
#![allow(clippy::needless_option_as_deref)]
use crate::io::{
allow_threads_unsafe, map_io_err, map_parse_error, rdf_format, PyReadable, PyWritable,
allow_threads_unsafe, map_io_err, map_parse_error, parse_format, PyReadable, PyWritable,
};
use crate::model::*;
use crate::sparql::*;
use oxigraph::io::RdfFormat;
use oxigraph::model::{GraphName, GraphNameRef};
use oxigraph::sparql::Update;
use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store};
use pyo3::exceptions::{PyRuntimeError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use std::io::BufWriter;
use std::path::PathBuf;
/// RDF store.
@ -251,7 +251,7 @@ impl PyStore {
/// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional
/// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available.
/// :type named_graphs: list(NamedNode or BlankNode) or None, optional
/// :return: a :py:class:`QueryBoolean` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries.
/// :return: a :py:func:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries.
/// :rtype: QuerySolutions or QueryBoolean or QueryTriples
/// :raises SyntaxError: if the provided query is invalid.
/// :raises OSError: if an error happens while reading the store.
@ -292,9 +292,10 @@ impl PyStore {
use_default_graph_as_union,
default_graph,
named_graphs,
py,
)?;
let results =
allow_threads_unsafe(|| self.inner.query(query)).map_err(map_evaluation_error)?;
allow_threads_unsafe(py, || self.inner.query(query)).map_err(map_evaluation_error)?;
Ok(query_results_to_python(py, results))
}
@ -393,7 +394,7 @@ impl PyStore {
None
};
let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?;
let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
@ -462,7 +463,7 @@ impl PyStore {
None
};
let file_path = input.extract::<PathBuf>().ok();
let format = rdf_format(format, file_path.as_deref())?;
let format = parse_format::<RdfFormat>(format, file_path.as_deref())?;
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
@ -505,7 +506,7 @@ impl PyStore {
/// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional
/// :rtype: bytes or None
/// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs.
/// :raises OSError: if an error happens during a quad lookup
/// :raises OSError: if an error happens during a quad lookup or file writing.
///
/// >>> store = Store()
/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')))
@ -531,29 +532,21 @@ impl PyStore {
} else {
None
};
let file_path = output.and_then(|output| output.extract::<PathBuf>().ok());
let format = rdf_format(format, file_path.as_deref())?;
let output = if let Some(output) = output {
if let Some(file_path) = &file_path {
PyWritable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyWritable::from_data(output)
}
} else {
PyWritable::Bytes(Vec::new())
};
py.allow_threads(|| {
let output = BufWriter::new(output);
if let Some(from_graph_name) = &from_graph_name {
self.inner.dump_graph(output, format, from_graph_name)
} else {
self.inner.dump_dataset(output, format)
}
.map_err(map_serializer_error)?
.into_inner()
.map_err(|e| map_io_err(e.into_error()))
})?
.close(py)
PyWritable::do_write::<RdfFormat>(
|output, format| {
py.allow_threads(|| {
if let Some(from_graph_name) = &from_graph_name {
self.inner.dump_graph(output, format, from_graph_name)
} else {
self.inner.dump_dataset(output, format)
}
.map_err(map_serializer_error)
})
},
output,
format,
py,
)
}
/// Returns an iterator over all the store named graphs.

@ -3,7 +3,16 @@ import unittest
from io import BytesIO, StringIO, UnsupportedOperation
from tempfile import NamedTemporaryFile, TemporaryFile
from pyoxigraph import Literal, NamedNode, Quad, parse, serialize
from pyoxigraph import (
Literal,
NamedNode,
Quad,
QueryBoolean,
QuerySolutions,
parse,
parse_query_results,
serialize,
)
EXAMPLE_TRIPLE = Quad(
NamedNode("http://example.com/foo"),
@ -131,7 +140,7 @@ class TestParse(unittest.TestCase):
class TestSerialize(unittest.TestCase):
def test_serialize_to_bytes(self) -> None:
self.assertEqual(
serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle").decode(),
(serialize([EXAMPLE_TRIPLE.triple], None, "text/turtle") or b"").decode(),
'<http://example.com/foo> <http://example.com/p> "éù" .\n',
)
@ -162,3 +171,37 @@ class TestSerialize(unittest.TestCase):
output.getvalue(),
b'<http://example.com/g> {\n\t<http://example.com/foo> <http://example.com/p> "1" .\n}\n',
)
class TestParseQuerySolutions(unittest.TestCase):
def test_parse_file(self) -> None:
with NamedTemporaryFile(suffix=".tsv") as fp:
fp.write(
b'?s\t?p\t?o\n<http://example.com/s>\t<http://example.com/s>\t"1"\n'
)
fp.flush()
r = parse_query_results(fp.name)
self.assertIsInstance(r, QuerySolutions)
results = list(r) # type: ignore[arg-type]
self.assertEqual(results[0]["s"], NamedNode("http://example.com/s"))
self.assertEqual(results[0][2], Literal("1"))
def test_parse_not_existing_file(self) -> None:
with self.assertRaises(IOError) as _:
parse_query_results(
"/tmp/not-existing-oxigraph-file.ttl", "application/json"
)
def test_parse_str_io(self) -> None:
result = parse_query_results(StringIO("true"), "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertTrue(result)
def test_parse_bytes_io(self) -> None:
result = parse_query_results(BytesIO(b"false"), "tsv")
self.assertIsInstance(result, QueryBoolean)
self.assertFalse(result)
def test_parse_io_error(self) -> None:
with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp:
parse_query_results(fp, "srx")

@ -189,6 +189,39 @@ class TestStore(unittest.TestCase):
)
self.assertEqual(len(list(results)), 2)
def test_select_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("SELECT ?s WHERE { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "csv")
self.assertEqual(
output.getvalue().decode(),
"s\r\nhttp://foo\r\n",
)
def test_ask_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("ASK { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "csv")
self.assertEqual(
output.getvalue().decode(),
"true",
)
def test_construct_query_dump(self) -> None:
store = Store()
store.add(Quad(foo, bar, baz))
results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")
output = BytesIO()
results.serialize(output, "nt")
self.assertEqual(
output.getvalue().decode(),
"<http://foo> <http://bar> <http://baz> .\n",
)
def test_update_insert_data(self) -> None:
store = Store()
store.update("INSERT DATA { <http://foo> <http://foo> <http://foo> }")

Loading…
Cancel
Save