#![allow(clippy::needless_option_as_deref)]
use crate::model::{PyQuad, PyTriple};
use oxigraph::io::{FromReadQuadReader, ParseError, RdfFormat, RdfParser, RdfSerializer};
use oxigraph::model::QuadRef;
use pyo3::exceptions::{PySyntaxError, PyValueError};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::{intern, wrap_pyfunction};
use std::cmp::max;
use std::error::Error;
use std::fs::File;
use std::io::{self, BufWriter, Cursor, Read, Write};
use std::path::{Path, PathBuf};
use std::sync::OnceLock;
pub fn add_to_module(module: &PyModule) -> PyResult<()> {
module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(serialize))
}
/// Parses RDF graph and dataset serialization formats.
///
/// It currently supports the following formats:
///
/// * `N-Triples `_ (``application/n-triples`` or ``nt``)
/// * `N-Quads `_ (``application/n-quads`` or ``nq``)
/// * `Turtle `_ (``text/turtle`` or ``ttl``)
/// * `TriG `_ (``application/trig`` or ``trig``)
/// * `N3 `_ (``text/n3`` or ``n3``)
/// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/turtle`` could also be used for `Turtle `_
/// and ``application/xml`` or ``xml`` for `RDF/XML `_.
///
/// :param input: The binary I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`.
/// :type format: str
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
/// :type base_iri: str or None, optional
/// :param without_named_graphs: Sets that the parser must fail if parsing a named graph.
/// :type without_named_graphs: bool, optional
/// :param rename_blank_nodes: Renames the blank nodes ids from the ones set in the serialization to random ids. This allows to avoid id conflicts when merging graphs together.
/// :type rename_blank_nodes: bool, optional
/// :return: an iterator of RDF triples or quads depending on the format.
/// :rtype: iterator(Quad)
/// :raises ValueError: if the format is not supported.
/// :raises SyntaxError: if the provided data is invalid.
///
/// >>> input = io.BytesIO(b'
"1" .')
/// >>> list(parse(input, "text/turtle", base_iri="http://example.com/"))
/// [ predicate= object=> graph_name=>]
#[pyfunction]
#[pyo3(signature = (input, format, *, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))]
pub fn parse(
input: PyObject,
format: &str,
base_iri: Option<&str>,
without_named_graphs: bool,
rename_blank_nodes: bool,
py: Python<'_>,
) -> PyResult {
let format = rdf_format(format)?;
let file_path = input.extract::(py).ok();
let input = if let Some(file_path) = &file_path {
PyReadable::from_file(file_path, py).map_err(map_io_err)?
} else {
PyReadable::from_data(input, py)
};
let mut parser = RdfParser::from_format(format);
if let Some(base_iri) = base_iri {
parser = parser
.with_base_iri(base_iri)
.map_err(|e| PyValueError::new_err(e.to_string()))?;
}
if without_named_graphs {
parser = parser.without_named_graphs();
}
if rename_blank_nodes {
parser = parser.rename_blank_nodes();
}
Ok(PyQuadReader {
inner: parser.parse_read(input),
file_path,
}
.into_py(py))
}
/// Serializes an RDF graph or dataset.
///
/// It currently supports the following formats:
///
/// * `N-Triples `_ (``application/n-triples`` or ``nt``)
/// * `N-Quads `_ (``application/n-quads`` or ``nq``)
/// * `Turtle `_ (``text/turtle`` or ``ttl``)
/// * `TriG `_ (``application/trig`` or ``trig``)
/// * `N3 `_ (``text/n3`` or ``n3``)
/// * `RDF/XML `_ (``application/rdf+xml`` or ``rdf``)
///
/// It supports also some media type and extension aliases.
/// For example, ``application/turtle`` could also be used for `Turtle `_
/// and ``application/xml`` or ``xml`` for `RDF/XML `_.
///
/// :param input: the RDF triples and quads to serialize.
/// :type input: iterable(Triple) or iterable(Quad)
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``.
/// :type output: io(bytes) or str or pathlib.Path
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`.
/// :type format: str
/// :rtype: None
/// :raises ValueError: if the format is not supported.
/// :raises TypeError: if a triple is given during a quad format serialization or reverse.
///
/// >>> output = io.BytesIO()
/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, "text/turtle")
/// >>> output.getvalue()
/// b' "1" .\n'
#[pyfunction]
pub fn serialize(input: &PyAny, output: PyObject, format: &str, py: Python<'_>) -> PyResult<()> {
let format = rdf_format(format)?;
let output = if let Ok(path) = output.extract::(py) {
PyWritable::from_file(&path, py).map_err(map_io_err)?
} else {
PyWritable::from_data(output)
};
let mut writer = RdfSerializer::from_format(format).serialize_to_write(BufWriter::new(output));
for i in input.iter()? {
let i = i?;
if let Ok(triple) = i.extract::>() {
writer.write_triple(&*triple)
} else {
let quad = i.extract::>()?;
let quad = QuadRef::from(&*quad);
if !quad.graph_name.is_default_graph() && !format.supports_datasets() {
return Err(PyValueError::new_err(
"The {format} format does not support named graphs",
));
}
writer.write_quad(quad)
}
.map_err(map_io_err)?;
}
writer
.finish()
.map_err(map_io_err)?
.into_inner()
.map_err(|e| map_io_err(e.into_error()))?
.close()
.map_err(map_io_err)
}
#[pyclass(name = "QuadReader", module = "pyoxigraph")]
pub struct PyQuadReader {
inner: FromReadQuadReader,
file_path: Option,
}
#[pymethods]
impl PyQuadReader {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef {
slf
}
fn __next__(&mut self, py: Python<'_>) -> PyResult