Fork of https://github.com/oxigraph/oxigraph.git for the purpose of NextGraph project
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
oxigraph/python/src/dataset.rs

327 lines
12 KiB

use crate::model::{hash, PyGraphNameRef, PyNamedNodeRef, PyQuad, PySubjectRef, PyTermRef};
use oxigraph::model::dataset::{CanonicalizationAlgorithm, Dataset};
use oxigraph::model::{Quad, QuadRef};
use pyo3::exceptions::PyKeyError;
use pyo3::prelude::*;
/// An in-memory `RDF dataset <https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset>`_.
///
/// It can accommodate a fairly large number of quads (in the few millions).
///
/// Use :py:class:`Store` if you need on-disk persistence or SPARQL.
///
/// Warning: It interns the strings and does not do any garbage collection yet:
/// if you insert and remove a lot of different terms, memory will grow without any reduction.
///
/// :param quads: some quads to initialize the dataset with.
/// :type quads: collections.abc.Iterable[Quad] or None, optional
///
/// The :py:class:`str` function provides an N-Quads serialization:
///
/// >>> str(Dataset([Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))]))
/// '<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n'
#[pyclass(name = "Dataset", module = "pyoxigraph")]
#[derive(Eq, PartialEq, Debug, Clone)]
pub struct PyDataset {
inner: Dataset,
}
#[pymethods]
impl PyDataset {
#[new]
#[pyo3(signature = (quads = None))]
fn new(quads: Option<&Bound<'_, PyAny>>) -> PyResult<Self> {
let mut inner = Dataset::new();
if let Some(quads) = quads {
for quad in quads.iter()? {
inner.insert(&*quad?.extract::<PyRef<'_, PyQuad>>()?);
}
}
Ok(Self { inner })
}
/// Looks for the quads with the given subject.
///
/// :param subject: the quad subject.
/// :type subject: NamedNode or BlankNode or Triple
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_subject(NamedNode('http://example.com')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[allow(clippy::needless_pass_by_value)]
pub fn quads_for_subject(&self, subject: PySubjectRef<'_>) -> QuadIter {
QuadIter {
inner: self
.inner
.quads_for_subject(&subject)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
/// Looks for the quads with the given predicate.
///
/// :param predicate: the quad predicate.
/// :type predicate: NamedNode
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_predicate(NamedNode('http://example.com/p')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[allow(clippy::needless_pass_by_value)]
pub fn quads_for_predicate(&self, predicate: PyNamedNodeRef<'_>) -> QuadIter {
QuadIter {
inner: self
.inner
.quads_for_predicate(&predicate)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
/// Looks for the quads with the given object.
///
/// :param object: the quad object.
/// :type object: NamedNode or BlankNode or Literal or Triple
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_object(Literal('1')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[allow(clippy::needless_pass_by_value)]
pub fn quads_for_object(&self, object: PyTermRef<'_>) -> QuadIter {
QuadIter {
inner: self
.inner
.quads_for_object(&object)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
/// Looks for the quads with the given graph name.
///
/// :param graph_name: the quad graph name.
/// :type graph_name: NamedNode or BlankNode or DefaultGraph
/// :return: an iterator of the quads.
/// :rtype: collections.abc.Iterator[Quad]
///
/// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))])
/// >>> list(store.quads_for_graph_name(NamedNode('http://example.com/g')))
/// [<Quad subject=<NamedNode value=http://example.com> predicate=<NamedNode value=http://example.com/p> object=<Literal value=1 datatype=<NamedNode value=http://www.w3.org/2001/XMLSchema#string>> graph_name=<NamedNode value=http://example.com/g>>]
#[allow(clippy::needless_pass_by_value)]
pub fn quads_for_graph_name(&self, graph_name: PyGraphNameRef<'_>) -> QuadIter {
QuadIter {
inner: self
.inner
.quads_for_graph_name(&graph_name)
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
/// Adds a quad to the dataset.
///
/// :param quad: the quad to add.
/// :type quad: Quad
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset()
/// >>> dataset.add(quad)
/// >>> quad in dataset
/// True
fn add(&mut self, quad: &PyQuad) {
self.inner.insert(quad);
}
/// Removes a quad from the dataset and raises an exception if it is not in the set.
///
/// :param quad: the quad to remove.
/// :type quad: Quad
/// :rtype: None
/// :raises KeyError: if the element was not in the set.
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.remove(quad)
/// >>> quad in dataset
/// False
fn remove(&mut self, quad: &PyQuad) -> PyResult<()> {
if self.inner.remove(quad) {
Ok(())
} else {
Err(PyKeyError::new_err(format!(
"{} is not in the Dataset",
QuadRef::from(quad)
)))
}
}
/// Removes a quad from the dataset if it is present.
///
/// :param quad: the quad to remove.
/// :type quad: Quad
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.discard(quad)
/// >>> quad in dataset
/// False
fn discard(&mut self, quad: &PyQuad) {
self.inner.remove(quad);
}
/// Removes all quads from the dataset.
///
/// :rtype: None
///
/// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))
/// >>> dataset = Dataset([quad])
/// >>> dataset.clear()
/// >>> len(dataset)
/// 0
fn clear(&mut self) {
self.inner.clear()
}
/// Canonicalizes the dataset by renaming blank nodes.
///
/// Warning: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes.
/// Hence, this canonization might not be suitable for diffs.
///
/// Warning: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset.
///
/// :param algorithm: the canonicalization algorithm to use.
/// :type algorithm: CanonicalizationAlgorithm
/// :rtype: None
///
/// >>> d1 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
/// >>> d2 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())])
/// >>> d1 == d2
/// False
/// >>> d1.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
/// >>> d2.canonicalize(CanonicalizationAlgorithm.UNSTABLE)
/// >>> d1 == d2
/// True
fn canonicalize(&mut self, algorithm: &PyCanonicalizationAlgorithm) {
self.inner.canonicalize(algorithm.inner)
}
fn __str__(&self) -> String {
self.inner.to_string()
}
fn __bool__(&self) -> bool {
self.inner.is_empty()
}
fn __eq__(&self, other: &Self) -> bool {
self.inner == other.inner
}
fn __ne__(&self, other: &Self) -> bool {
self.inner != other.inner
}
fn __len__(&self) -> usize {
self.inner.len()
}
fn __contains__(&self, quad: &PyQuad) -> bool {
self.inner.contains(quad)
}
fn __iter__(&self) -> QuadIter {
// TODO: very inefficient
QuadIter {
inner: self
.inner
.iter()
.map(QuadRef::into_owned)
.collect::<Vec<_>>()
.into_iter(),
}
}
}
#[pyclass(unsendable, module = "pyoxigraph")]
pub struct QuadIter {
inner: std::vec::IntoIter<Quad>,
}
#[pymethods]
impl QuadIter {
fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
fn __next__(&mut self) -> Option<PyQuad> {
Some(self.inner.next()?.into())
}
}
/// RDF canonicalization algorithms.
///
/// The following algorithms are supported:
///
/// * :py:attr:`CanonicalizationAlgorithm.UNSTABLE`: an unstable algorithm preferred by PyOxigraph.
#[pyclass(name = "CanonicalizationAlgorithm", module = "pyoxigraph")]
#[derive(Clone)]
pub struct PyCanonicalizationAlgorithm {
inner: CanonicalizationAlgorithm,
}
#[pymethods]
impl PyCanonicalizationAlgorithm {
/// The algorithm preferred by PyOxigraph.
///
/// Warning: Might change between Oxigraph versions. No stability guaranties.
#[classattr]
const UNSTABLE: Self = Self {
inner: CanonicalizationAlgorithm::Unstable,
};
fn __repr__(&self) -> String {
format!(
"<CanonicalizationAlgorithm {}>",
match self.inner {
CanonicalizationAlgorithm::Unstable => "unstable",
_ => "unknown",
}
)
}
fn __hash__(&self) -> u64 {
hash(&self.inner)
}
fn __eq__(&self, other: &Self) -> bool {
self.inner == other.inner
}
fn __ne__(&self, other: &Self) -> bool {
self.inner != other.inner
}
/// :rtype: CanonicalizationAlgorithm
fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
slf
}
/// :type memo: typing.Any
/// :rtype: CanonicalizationAlgorithm
#[allow(unused_variables)]
fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> {
slf
}
}