Python: exposes the Dataset class

pull/832/head
Tpt 9 months ago committed by Thomas Tanon
parent 2998f795fd
commit f7023a381e
  1. 3
      lib/oxrdf/src/dataset.rs
  2. 3
      lib/oxrdf/src/graph.rs
  3. 9
      python/docs/model.rst
  4. 322
      python/src/dataset.rs
  5. 4
      python/src/lib.rs

@ -504,8 +504,7 @@ impl Dataset {
}
}
/// Applies on the dataset the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
/// Canonicalizes the dataset by renaming blank nodes.
///
/// Usage example ([Dataset isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-dataset-isomorphism)):
/// ```

@ -184,8 +184,7 @@ impl Graph {
self.dataset.clear()
}
/// Applies on the graph the canonicalization process described in
/// [Canonical Forms for Isomorphic and Equivalent RDF Graphs: Algorithms for Leaning and Labelling Blank Nodes, Aidan Hogan, 2017](http://aidanhogan.com/docs/rdf-canonicalisation.pdf).
/// Canonicalizes the dataset by renaming blank nodes.
///
/// Usage example ([Graph isomorphism](https://www.w3.org/TR/rdf11-concepts/#dfn-graph-isomorphism)):
/// ```

@ -36,3 +36,12 @@ Quads (`triples <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-triple>`_ in a `R
.. autoclass:: DefaultGraph
:members:
`Datasets <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset>`_
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
.. autoclass:: Dataset
:members:
.. autoclass:: CanonicalizationAlgorithm
:members:

@ -0,0 +1,322 @@
use crate::model::{hash, PyGraphNameRef, PyNamedNodeRef, PyQuad, PySubjectRef, PyTermRef};
use oxigraph::model::dataset::{CanonicalizationAlgorithm, Dataset};
use oxigraph::model::{Quad, QuadRef};
use pyo3::exceptions::PyKeyError;
use pyo3::prelude::*;
/// An in-memory `RDF dataset <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset>`_.
///
/// It can accommodate a fairly large number of quads (in the few millions).
///
/// Use :py:class:`Store` if you need on-disk persistence or SPARQL.
///
/// Warning: It interns the strings and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// :param quads: some quads to initialize the dataset with.
/// :type quads: collections.abc.Iterable[Quad]
///
/// The :py:class:`str` function provides an N-Quads serialization:
///
/// >>> str(Dataset([Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))]))
/// '<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n'
#[pyclass(name = "Dataset", module = "pyoxigraph")]
#[derive(Eq, PartialEq, Debug, Clone)]
pub struct PyDataset {
inner: Dataset,
}
#[pymethods]
impl PyDataset {
#[new]
#[pyo3(signature = (quads = None))]
fn new(quads: Option<&PyAny>) -> PyResult<Self> {
let mut inner = Dataset::new();
if let Some(quads) = quads {
for quad in quads.iter()? {
inner.insert(&*quad?.extract::<PyRef<'_, PyQuad>>()?);
}
}
Ok(Self { inner })
}
/// Looks for the quads with the given subject.
///
/// :param subject: the quad subject.
/// :type subject: NamedNode or BlankNode or Triple
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_subject(NamedNode('http://example.com')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
pub fn quads_for_subject(&self, subject: &PyAny) -> PyResult<QuadIter> {
Ok(QuadIter {
inner: self
.inner
.quads_for_subject(&PySubjectRef::try_from(subject)?)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
})
}
/// Looks for the quads with the given predicate.
///
/// :param predicate: the quad predicate.
/// :type subject: NamedNode
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_predicate(NamedNode('http://example.com/p')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
pub fn quads_for_predicate(&self, predicate: &PyAny) -> PyResult<QuadIter> {
Ok(QuadIter {
inner: self
.inner
.quads_for_predicate(&PyNamedNodeRef::try_from(predicate)?)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
})
}
/// Looks for the quads with the given object.
///
/// :param object: the quad object.
/// :type object: NamedNode or BlankNode or Literal or Triple
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_object(Literal('1')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
pub fn quads_for_object(&self, object: &PyAny) -> PyResult<QuadIter> {
Ok(QuadIter {
inner: self
.inner
.quads_for_object(&PyTermRef::try_from(object)?)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
})
}
/// Looks for the quads with the given graph name.
///
/// :param graph_name: the quad graph name.
/// :type graph_name: NamedNode or BlankNode or Literal or Triple
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_graph_name(NamedNode('http://example.com/g')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
pub fn quads_for_graph_name(&self, graph_name: &PyAny) -> PyResult<QuadIter> {
Ok(QuadIter {
inner: self
.inner
.quads_for_graph_name(&PyGraphNameRef::try_from(graph_name)?)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
})
}
/// Adds a quad to the dataset.
///
/// :param quad: the quad to add.
/// :type quad: Quad
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset()
/// >>> dataset.add(quad)
/// >>> quad in dataset
/// True
fn add(&mut self, quad: &PyQuad) {
self.inner.insert(quad);
}
/// Removes a quad from the dataset and raises an exception if it is not in the set.
///
/// :param quad: the quad to remove.
/// :type quad: Quad
/// :rtype: None
/// :raises KeyError: if the element was not in the set.
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.remove(quad)
/// >>> quad in dataset
/// False
fn remove(&mut self, quad: &PyQuad) -> PyResult<()> {
if self.inner.remove(quad) {
Ok(())
} else {
Err(PyKeyError::new_err(format!(
"{} is not in the Dataset",
QuadRef::from(quad)
)))
}
}
/// Removes a quad from the dataset if it is present.
///
/// :param quad: the quad to remove.
/// :type quad: Quad
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.discard(quad)
/// >>> quad in dataset
/// False
fn discard(&mut self, quad: &PyQuad) {
self.inner.remove(quad);
}
/// Removes all quads from the dataset.
///
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.clear()
/// >>> len(dataset)
/// 0
fn clear(&mut self) {
self.inner.clear()
}
/// Canonicalizes the dataset by renaming blank nodes.
///
/// Warning: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.
///
/// Warning: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
///
/// :param algorithm: the canonicalization algorithm to use.
/// :type algorithm: CanonicalizationAlgorithm
/// :rtype: None
///
/// >>> d1 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
/// >>> d2 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
/// >>> d1 == d2
/// False
/// >>> d1.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
/// >>> d2.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
/// >>> d1 == d2
/// True
fn canonicalize(&mut self, algorithm: &PyCanonicalizationAlgorithm) {
self.inner.canonicalize(algorithm.inner)
}
fn __str__(&self) -> String {
self.inner.to_string()
}
fn __bool__(&self) -> bool {
self.inner.is_empty()
}
fn __eq__(&self, other: &Self) -> bool {
self.inner == other.inner
}
fn __ne__(&self, other: &Self) -> bool {
self.inner != other.inner
}
fn __len__(&self) -> usize {
self.inner.len()
}
fn __contains__(&self, quad: &PyQuad) -> bool {
self.inner.contains(quad)
}
fn __iter__(&self) -> QuadIter {
// TODO: very inefficient
QuadIter {
inner: self
.inner
.iter()
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
}
#[pyclass(unsendable, module = "pyoxigraph")]
pub struct QuadIter {
inner: std::vec::IntoIter<Quad>,
}
#[pymethods]
impl QuadIter {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
fn __next__(&mut self) -> Option<PyQuad> {
Some(self.inner.next()?.into())
}
}
/// RDF canonicalization algorithms.
///
/// The following algorithms are supported:
/// * :py:attr:`CanonicalizationAlgorithm.UNSTABLE`: an unstable algorithm preferred by PyOxigraph.
#[pyclass(name = "CanonicalizationAlgorithm", module = "pyoxigraph")]
#[derive(Clone)]
pub struct PyCanonicalizationAlgorithm {
inner: CanonicalizationAlgorithm,
}
#[pymethods]
impl PyCanonicalizationAlgorithm {
/// The algorithm preferred by PyOxigraph.
///
/// Warning: Might change between Oxigraph versions. No stability guaranties.
#[classattr]
const UNSTABLE: Self = Self {
inner: CanonicalizationAlgorithm::Unstable,
};
fn __repr__(&self) -> String {
format!(
"<CanonicalizationAlgorithm {}>",
match self.inner {
CanonicalizationAlgorithm::Unstable => "unstable",
_ => "unknown",
}
)
}
fn __hash__(&self) -> u64 {
hash(&self.inner)
}
fn __eq__(&self, other: &Self) -> bool {
self.inner == other.inner
}
fn __ne__(&self, other: &Self) -> bool {
self.inner != other.inner
}
/// :rtype: CanonicalizationAlgorithm
fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
/// :type memo: typing.Any
/// :rtype: CanonicalizationAlgorithm
#[allow(unused_variables)]
fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ PyAny) -> PyRef<'a, Self> {
slf
}
}

@ -4,11 +4,13 @@
unused_qualifications
)]
mod dataset;
mod io;
mod model;
mod sparql;
mod store;
use crate::dataset::*;
use crate::io::*;
use crate::model::*;
use crate::sparql::*;
@ -28,6 +30,7 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_class::<PyDefaultGraph>()?;
module.add_class::<PyTriple>()?;
module.add_class::<PyQuad>()?;
module.add_class::<PyDataset>()?;
module.add_class::<PyStore>()?;
module.add_class::<PyVariable>()?;
module.add_class::<PyQuerySolutions>()?;
@ -36,6 +39,7 @@ fn pyoxigraph(_py: Python<'_>, module: &PyModule) -> PyResult<()> {
module.add_class::<PyQueryTriples>()?;
module.add_class::<PyRdfFormat>()?;
module.add_class::<PyQueryResultsFormat>()?;
module.add_class::<PyCanonicalizationAlgorithm>()?;
module.add_wrapped(wrap_pyfunction!(parse))?;
module.add_wrapped(wrap_pyfunction!(parse_query_results))?;
module.add_wrapped(wrap_pyfunction!(serialize))?;

Loading…
Cancel
Save