Python: any os.PathLike path is now supported

Improves stub generation
pull/665/head
Tpt 11 months ago committed by Thomas Tanon
parent 8a7c6cf2c1
commit 48db7f872b
  1. 127
      python/generate_stubs.py
  2. 8
      python/src/io.rs
  3. 10
      python/src/sparql.rs
  4. 20
      python/src/store.rs

@ -5,65 +5,60 @@ import inspect
import logging
import re
import subprocess
from functools import reduce
from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union
def _path_to_type(*elements: str) -> ast.AST:
base: ast.AST = ast.Name(id=elements[0], ctx=AST_LOAD)
def path_to_type(*elements: str) -> ast.AST:
base: ast.AST = ast.Name(id=elements[0], ctx=ast.Load())
for e in elements[1:]:
base = ast.Attribute(value=base, attr=e, ctx=AST_LOAD)
base = ast.Attribute(value=base, attr=e, ctx=ast.Load())
return base
AST_LOAD = ast.Load()
AST_ELLIPSIS = ast.Ellipsis()
AST_STORE = ast.Store()
AST_TYPING_ANY = _path_to_type("typing", "Any")
GENERICS = {
"iterable": _path_to_type("typing", "Iterable"),
"iterator": _path_to_type("typing", "Iterator"),
"list": _path_to_type("typing", "List"),
"io": _path_to_type("typing", "IO"),
}
OBJECT_MEMBERS = dict(inspect.getmembers(object))
BUILTINS: Dict[str, Union[None, Tuple[List[ast.AST], ast.AST]]] = {
"__annotations__": None,
"__bool__": ([], _path_to_type("bool")),
"__bytes__": ([], _path_to_type("bytes")),
"__bool__": ([], path_to_type("bool")),
"__bytes__": ([], path_to_type("bytes")),
"__class__": None,
"__contains__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__contains__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__del__": None,
"__delattr__": ([_path_to_type("str")], _path_to_type("None")),
"__delitem__": ([AST_TYPING_ANY], AST_TYPING_ANY),
"__delattr__": ([path_to_type("str")], path_to_type("None")),
"__delitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")),
"__dict__": None,
"__dir__": None,
"__doc__": None,
"__eq__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__format__": ([_path_to_type("str")], _path_to_type("str")),
"__ge__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__getattribute__": ([_path_to_type("str")], AST_TYPING_ANY),
"__getitem__": ([AST_TYPING_ANY], AST_TYPING_ANY),
"__gt__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__hash__": ([], _path_to_type("int")),
"__init__": ([], _path_to_type("None")),
"__eq__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__format__": ([path_to_type("str")], path_to_type("str")),
"__ge__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__getattribute__": ([path_to_type("str")], path_to_type("typing", "Any")),
"__getitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")),
"__gt__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__hash__": ([], path_to_type("int")),
"__init__": ([], path_to_type("None")),
"__init_subclass__": None,
"__iter__": ([], AST_TYPING_ANY),
"__le__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__len__": ([], _path_to_type("int")),
"__lt__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__iter__": ([], path_to_type("typing", "Any")),
"__le__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__len__": ([], path_to_type("int")),
"__lt__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__module__": None,
"__ne__": ([AST_TYPING_ANY], _path_to_type("bool")),
"__ne__": ([path_to_type("typing", "Any")], path_to_type("bool")),
"__new__": None,
"__next__": ([], AST_TYPING_ANY),
"__next__": ([], path_to_type("typing", "Any")),
"__reduce__": None,
"__reduce_ex__": None,
"__repr__": ([], _path_to_type("str")),
"__setattr__": ([_path_to_type("str"), AST_TYPING_ANY], _path_to_type("None")),
"__setitem__": ([AST_TYPING_ANY, AST_TYPING_ANY], AST_TYPING_ANY),
"__repr__": ([], path_to_type("str")),
"__setattr__": (
[path_to_type("str"), path_to_type("typing", "Any")],
path_to_type("None"),
),
"__setitem__": (
[path_to_type("typing", "Any"), path_to_type("typing", "Any")],
path_to_type("typing", "Any"),
),
"__sizeof__": None,
"__str__": ([], _path_to_type("str")),
"__str__": ([], path_to_type("str")),
"__subclasshook__": None,
}
@ -136,11 +131,11 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i
elif member_name == "__match_args__":
constants.append(
ast.AnnAssign(
target=ast.Name(id=member_name, ctx=AST_STORE),
target=ast.Name(id=member_name, ctx=ast.Store()),
annotation=ast.Subscript(
value=_path_to_type("typing", "Tuple"),
slice=ast.Tuple(elts=[_path_to_type("str"), ast.Ellipsis()], ctx=AST_LOAD),
ctx=AST_LOAD,
value=path_to_type("tuple"),
slice=ast.Tuple(elts=[path_to_type("str"), ast.Ellipsis()], ctx=ast.Load()),
ctx=ast.Load(),
),
value=ast.Constant(member_value),
simple=1,
@ -156,8 +151,8 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i
bases=[],
keywords=[],
body=(([doc_comment] if doc_comment else []) + attributes + methods + magic_methods + constants)
or [AST_ELLIPSIS],
decorator_list=[_path_to_type("typing", "final")],
or [ast.Ellipsis()],
decorator_list=[path_to_type("typing", "final")],
)
@ -182,8 +177,8 @@ def data_descriptor_stub(
)
assign = ast.AnnAssign(
target=ast.Name(id=data_desc_name, ctx=AST_STORE),
annotation=annotation or AST_TYPING_ANY,
target=ast.Name(id=data_desc_name, ctx=ast.Store()),
annotation=annotation or path_to_type("typing", "Any"),
simple=1,
)
doc_comment = build_doc_comment(doc_comment) if doc_comment else None
@ -212,7 +207,7 @@ def function_stub(
return ast.FunctionDef(
fn_name,
arguments_stub(fn_name, fn_def, doc or "", element_path, types_to_import),
body or [AST_ELLIPSIS],
body or [ast.Ellipsis()],
decorator_list=decorator_list,
returns=returns_stub(fn_name, doc, element_path, types_to_import) if doc else None,
lineno=0,
@ -352,11 +347,11 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S
# let's first parse nested parenthesis
stack: List[List[Any]] = [[]]
for token in tokens:
if token == "(":
if token == "[":
children: List[str] = []
stack[-1].append(children)
stack.append(children)
elif token == ")":
elif token == "]":
stack.pop()
else:
stack[-1].append(token)
@ -376,39 +371,31 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S
new_elements: List[ast.AST] = []
for group in or_groups:
if len(group) == 1 and isinstance(group[0], str):
parts = group[0].split(".")
if any(not p for p in parts):
raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}")
if len(parts) > 1:
types_to_import.add(parts[0])
new_elements.append(_path_to_type(*parts))
new_elements.append(concatenated_path_to_type(group[0], element_path, types_to_import))
elif len(group) == 2 and isinstance(group[0], str) and isinstance(group[1], list):
if group[0] not in GENERICS:
raise ValueError(
f"Constructor {group[0]} is not supported in type '{type_str}' used by {'.'.join(element_path)}"
)
new_elements.append(
ast.Subscript(
value=GENERICS[group[0]],
value=concatenated_path_to_type(group[0], element_path, types_to_import),
slice=parse_sequence(group[1]),
ctx=AST_LOAD,
ctx=ast.Load(),
)
)
else:
raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}")
return (
ast.Subscript(
value=_path_to_type("typing", "Union"),
slice=ast.Tuple(elts=new_elements, ctx=AST_LOAD),
ctx=AST_LOAD,
)
if len(new_elements) > 1
else new_elements[0]
)
return reduce(lambda left, right: ast.BinOp(left=left, op=ast.BitOr(), right=right), new_elements)
return parse_sequence(stack[0])
def concatenated_path_to_type(path: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST:
parts = path.split(".")
if any(not p for p in parts):
raise ValueError(f"Not able to parse type '{path}' used by {'.'.join(element_path)}")
if len(parts) > 1:
types_to_import.add(".".join(parts[:-1]))
return path_to_type(*parts)
def build_doc_comment(doc: str) -> Optional[ast.Expr]:
lines = [line.strip() for line in doc.split("\n")]
clean_lines = []

@ -31,7 +31,7 @@ use std::sync::OnceLock;
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -41,7 +41,7 @@ use std::sync::OnceLock;
/// :param rename_blank_nodes: Renames the blank nodes identifiers from the ones set in the serialization to random ids. This allows to avoid identifier conflicts when merging graphs together.
/// :type rename_blank_nodes: bool, optional
/// :return: an iterator of RDF triples or quads depending on the format.
/// :rtype: iterator(Quad)
/// :rtype: collections.abc.Iterator[Quad]
/// :raises ValueError: if the format is not supported.
/// :raises SyntaxError: if the provided data is invalid.
/// :raises OSError: if a system error happens while reading the file.
@ -101,9 +101,9 @@ pub fn parse(
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: the RDF triples and quads to serialize.
/// :type input: iterable(Triple) or iterable(Quad)
/// :type input: collections.abc.Iterable[Triple] or collections.abc.Iterable[Quad]
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :return: py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set.

@ -195,7 +195,7 @@ enum PyQuerySolutionsVariant {
#[pymethods]
impl PyQuerySolutions {
/// :return: the ordered list of all variables that could appear in the query results
/// :rtype: list(Variable)
/// :rtype: list[Variable]
///
/// >>> store = Store()
/// >>> store.query('SELECT ?s WHERE { ?s ?p ?o }').variables
@ -225,7 +225,7 @@ impl PyQuerySolutions {
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
@ -325,7 +325,7 @@ impl PyQueryBoolean {
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
/// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
@ -403,7 +403,7 @@ impl PyQueryTriples {
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :rtype: bytes or None
@ -461,7 +461,7 @@ impl PyQueryTriples {
/// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`.

@ -28,7 +28,7 @@ use std::path::PathBuf;
/// :param path: the path of the directory in which the store should read and write its data. If the directory does not exist, it is created.
/// If no directory is provided a temporary one is created and removed when the Python garbage collector removes the store.
/// In this case, the store data are kept in memory and never written on disk.
/// :type path: str or pathlib.Path or None, optional
/// :type path: str or os.PathLike[str] or None, optional
/// :raises OSError: if the target directory contains invalid data or could not be accessed.
///
/// The :py:class:`str` function provides a serialization of the store in NQuads:
@ -137,7 +137,7 @@ impl PyStore {
/// The :py:func:`bulk_extend` method is also available for much faster loading of a large number of quads but without transactional guarantees.
///
/// :param quads: the quads to add.
/// :type quads: iterable(Quad)
/// :type quads: collections.abc.Iterable[Quad]
/// :rtype: None
/// :raises OSError: if an error happens during the quad insertion.
///
@ -162,7 +162,7 @@ impl PyStore {
/// Only a part of the data might be written to the store.
///
/// :param quads: the quads to add.
/// :type quads: iterable(Quad)
/// :type quads: collections.abc.Iterable[Quad]
/// :rtype: None
/// :raises OSError: if an error happens during the quad insertion.
///
@ -210,7 +210,7 @@ impl PyStore {
/// :param graph_name: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`.
/// :type graph_name: NamedNode or BlankNode or DefaultGraph or None, optional
/// :return: an iterator of the quads matching the pattern.
/// :rtype: iterator(Quad)
/// :rtype: collections.abc.Iterator[Quad]
/// :raises OSError: if an error happens during the quads lookup.
///
/// >>> store = Store()
@ -246,9 +246,9 @@ impl PyStore {
/// :param use_default_graph_as_union: if the SPARQL query should look for triples in all the dataset graphs by default (i.e. without `GRAPH` operations). Disabled by default.
/// :type use_default_graph_as_union: bool, optional
/// :param default_graph: list of the graphs that should be used as the query default graph. By default, the store default graph is used.
/// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional
/// :type default_graph: NamedNode or BlankNode or DefaultGraph or list[NamedNode or BlankNode or DefaultGraph] or None, optional
/// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available.
/// :type named_graphs: list(NamedNode or BlankNode) or None, optional
/// :type named_graphs: list[NamedNode or BlankNode] or None, optional
/// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries.
/// :rtype: QuerySolutions or QueryBoolean or QueryTriples
/// :raises SyntaxError: if the provided query is invalid.
@ -361,7 +361,7 @@ impl PyStore {
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -430,7 +430,7 @@ impl PyStore {
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
/// :type input: io(bytes) or io(str) or str or pathlib.Path
/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -497,7 +497,7 @@ impl PyStore {
/// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
///
/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
/// :type output: io(bytes) or str or pathlib.Path or None, optional
/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
/// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
/// :type format: str or None, optional
/// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written.
@ -551,7 +551,7 @@ impl PyStore {
/// Returns an iterator over all the store named graphs.
///
/// :return: an iterator of the store graph names.
/// :rtype: iterator(NamedNode or BlankNode)
/// :rtype: collections.abc.Iterator[NamedNode or BlankNode]
/// :raises OSError: if an error happens during the named graphs lookup.
///
/// >>> store = Store()

Loading…
Cancel
Save