Python: any os.PathLike path is now supported

Improves stub generation
2 years ago · 48db7f872b
parent 8a7c6cf2c1
commit 48db7f872b
4 changed files with 76 additions and 89 deletions
--- a/python/generate_stubs.py
+++ b/python/generate_stubs.py
@ -5,65 +5,60 @@ import inspect
 import logging
 import re
 import subprocess
+from functools import reduce
 from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union


-def _path_to_type(*elements: str) -> ast.AST:
-    base: ast.AST = ast.Name(id=elements[0], ctx=AST_LOAD)
+def path_to_type(*elements: str) -> ast.AST:
+    base: ast.AST = ast.Name(id=elements[0], ctx=ast.Load())
    for e in elements[1:]:
-        base = ast.Attribute(value=base, attr=e, ctx=AST_LOAD)
+        base = ast.Attribute(value=base, attr=e, ctx=ast.Load())
    return base


-AST_LOAD = ast.Load()
-AST_ELLIPSIS = ast.Ellipsis()
-AST_STORE = ast.Store()
-AST_TYPING_ANY = _path_to_type("typing", "Any")
-GENERICS = {
-    "iterable": _path_to_type("typing", "Iterable"),
-    "iterator": _path_to_type("typing", "Iterator"),
-    "list": _path_to_type("typing", "List"),
-    "io": _path_to_type("typing", "IO"),
-}
 OBJECT_MEMBERS = dict(inspect.getmembers(object))
-
-
 BUILTINS: Dict[str, Union[None, Tuple[List[ast.AST], ast.AST]]] = {
    "__annotations__": None,
-    "__bool__": ([], _path_to_type("bool")),
-    "__bytes__": ([], _path_to_type("bytes")),
+    "__bool__": ([], path_to_type("bool")),
+    "__bytes__": ([], path_to_type("bytes")),
    "__class__": None,
-    "__contains__": ([AST_TYPING_ANY], _path_to_type("bool")),
+    "__contains__": ([path_to_type("typing", "Any")], path_to_type("bool")),
    "__del__": None,
-    "__delattr__": ([_path_to_type("str")], _path_to_type("None")),
-    "__delitem__": ([AST_TYPING_ANY], AST_TYPING_ANY),
+    "__delattr__": ([path_to_type("str")], path_to_type("None")),
+    "__delitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")),
    "__dict__": None,
    "__dir__": None,
    "__doc__": None,
-    "__eq__": ([AST_TYPING_ANY], _path_to_type("bool")),
-    "__format__": ([_path_to_type("str")], _path_to_type("str")),
-    "__ge__": ([AST_TYPING_ANY], _path_to_type("bool")),
-    "__getattribute__": ([_path_to_type("str")], AST_TYPING_ANY),
-    "__getitem__": ([AST_TYPING_ANY], AST_TYPING_ANY),
-    "__gt__": ([AST_TYPING_ANY], _path_to_type("bool")),
-    "__hash__": ([], _path_to_type("int")),
-    "__init__": ([], _path_to_type("None")),
+    "__eq__": ([path_to_type("typing", "Any")], path_to_type("bool")),
+    "__format__": ([path_to_type("str")], path_to_type("str")),
+    "__ge__": ([path_to_type("typing", "Any")], path_to_type("bool")),
+    "__getattribute__": ([path_to_type("str")], path_to_type("typing", "Any")),
+    "__getitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")),
+    "__gt__": ([path_to_type("typing", "Any")], path_to_type("bool")),
+    "__hash__": ([], path_to_type("int")),
+    "__init__": ([], path_to_type("None")),
    "__init_subclass__": None,
-    "__iter__": ([], AST_TYPING_ANY),
-    "__le__": ([AST_TYPING_ANY], _path_to_type("bool")),
-    "__len__": ([], _path_to_type("int")),
-    "__lt__": ([AST_TYPING_ANY], _path_to_type("bool")),
+    "__iter__": ([], path_to_type("typing", "Any")),
+    "__le__": ([path_to_type("typing", "Any")], path_to_type("bool")),
+    "__len__": ([], path_to_type("int")),
+    "__lt__": ([path_to_type("typing", "Any")], path_to_type("bool")),
    "__module__": None,
-    "__ne__": ([AST_TYPING_ANY], _path_to_type("bool")),
+    "__ne__": ([path_to_type("typing", "Any")], path_to_type("bool")),
    "__new__": None,
-    "__next__": ([], AST_TYPING_ANY),
+    "__next__": ([], path_to_type("typing", "Any")),
    "__reduce__": None,
    "__reduce_ex__": None,
-    "__repr__": ([], _path_to_type("str")),
-    "__setattr__": ([_path_to_type("str"), AST_TYPING_ANY], _path_to_type("None")),
-    "__setitem__": ([AST_TYPING_ANY, AST_TYPING_ANY], AST_TYPING_ANY),
+    "__repr__": ([], path_to_type("str")),
+    "__setattr__": (
+        [path_to_type("str"), path_to_type("typing", "Any")],
+        path_to_type("None"),
+    ),
+    "__setitem__": (
+        [path_to_type("typing", "Any"), path_to_type("typing", "Any")],
+        path_to_type("typing", "Any"),
+    ),
    "__sizeof__": None,
-    "__str__": ([], _path_to_type("str")),
+    "__str__": ([], path_to_type("str")),
    "__subclasshook__": None,
 }

@ -136,11 +131,11 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i
        elif member_name == "__match_args__":
            constants.append(
                ast.AnnAssign(
-                    target=ast.Name(id=member_name, ctx=AST_STORE),
+                    target=ast.Name(id=member_name, ctx=ast.Store()),
                    annotation=ast.Subscript(
-                        value=_path_to_type("typing", "Tuple"),
-                        slice=ast.Tuple(elts=[_path_to_type("str"), ast.Ellipsis()], ctx=AST_LOAD),
-                        ctx=AST_LOAD,
+                        value=path_to_type("tuple"),
+                        slice=ast.Tuple(elts=[path_to_type("str"), ast.Ellipsis()], ctx=ast.Load()),
+                        ctx=ast.Load(),
                    ),
                    value=ast.Constant(member_value),
                    simple=1,
@ -156,8 +151,8 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i
        bases=[],
        keywords=[],
        body=(([doc_comment] if doc_comment else []) + attributes + methods + magic_methods + constants)
-        or [AST_ELLIPSIS],
-        decorator_list=[_path_to_type("typing", "final")],
+        or [ast.Ellipsis()],
+        decorator_list=[path_to_type("typing", "final")],
    )


@ -182,8 +177,8 @@ def data_descriptor_stub(
            )

    assign = ast.AnnAssign(
-        target=ast.Name(id=data_desc_name, ctx=AST_STORE),
-        annotation=annotation or AST_TYPING_ANY,
+        target=ast.Name(id=data_desc_name, ctx=ast.Store()),
+        annotation=annotation or path_to_type("typing", "Any"),
        simple=1,
    )
    doc_comment = build_doc_comment(doc_comment) if doc_comment else None
@ -212,7 +207,7 @@ def function_stub(
    return ast.FunctionDef(
        fn_name,
        arguments_stub(fn_name, fn_def, doc or "", element_path, types_to_import),
-        body or [AST_ELLIPSIS],
+        body or [ast.Ellipsis()],
        decorator_list=decorator_list,
        returns=returns_stub(fn_name, doc, element_path, types_to_import) if doc else None,
        lineno=0,
@ -352,11 +347,11 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S
    # let's first parse nested parenthesis
    stack: List[List[Any]] = [[]]
    for token in tokens:
-        if token == "(":
+        if token == "[":
            children: List[str] = []
            stack[-1].append(children)
            stack.append(children)
-        elif token == ")":
+        elif token == "]":
            stack.pop()
        else:
            stack[-1].append(token)
@ -376,39 +371,31 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S
        new_elements: List[ast.AST] = []
        for group in or_groups:
            if len(group) == 1 and isinstance(group[0], str):
-                parts = group[0].split(".")
-                if any(not p for p in parts):
-                    raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}")
-                if len(parts) > 1:
-                    types_to_import.add(parts[0])
-                new_elements.append(_path_to_type(*parts))
+                new_elements.append(concatenated_path_to_type(group[0], element_path, types_to_import))
            elif len(group) == 2 and isinstance(group[0], str) and isinstance(group[1], list):
-                if group[0] not in GENERICS:
-                    raise ValueError(
-                        f"Constructor {group[0]} is not supported in type '{type_str}' used by {'.'.join(element_path)}"
-                    )
                new_elements.append(
                    ast.Subscript(
-                        value=GENERICS[group[0]],
+                        value=concatenated_path_to_type(group[0], element_path, types_to_import),
                        slice=parse_sequence(group[1]),
-                        ctx=AST_LOAD,
+                        ctx=ast.Load(),
                    )
                )
            else:
                raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}")
-        return (
-            ast.Subscript(
-                value=_path_to_type("typing", "Union"),
-                slice=ast.Tuple(elts=new_elements, ctx=AST_LOAD),
-                ctx=AST_LOAD,
-            )
-            if len(new_elements) > 1
-            else new_elements[0]
-        )
+        return reduce(lambda left, right: ast.BinOp(left=left, op=ast.BitOr(), right=right), new_elements)

    return parse_sequence(stack[0])


+def concatenated_path_to_type(path: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST:
+    parts = path.split(".")
+    if any(not p for p in parts):
+        raise ValueError(f"Not able to parse type '{path}' used by {'.'.join(element_path)}")
+    if len(parts) > 1:
+        types_to_import.add(".".join(parts[:-1]))
+    return path_to_type(*parts)
+
+
 def build_doc_comment(doc: str) -> Optional[ast.Expr]:
    lines = [line.strip() for line in doc.split("\n")]
    clean_lines = []
--- a/python/src/io.rs
+++ b/python/src/io.rs
@ -31,7 +31,7 @@ use std::sync::OnceLock;
 /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
 ///
 /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
-/// :type input: io(bytes) or io(str) or str or pathlib.Path
+/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
 /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
 /// :type format: str or None, optional
 /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -41,7 +41,7 @@ use std::sync::OnceLock;
 /// :param rename_blank_nodes: Renames the blank nodes identifiers from the ones set in the serialization to random ids. This allows to avoid identifier conflicts when merging graphs together.
 /// :type rename_blank_nodes: bool, optional
 /// :return: an iterator of RDF triples or quads depending on the format.
-/// :rtype: iterator(Quad)
+/// :rtype: collections.abc.Iterator[Quad]
 /// :raises ValueError: if the format is not supported.
 /// :raises SyntaxError: if the provided data is invalid.
 /// :raises OSError: if a system error happens while reading the file.
@ -101,9 +101,9 @@ pub fn parse(
 /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
 ///
 /// :param input: the RDF triples and quads to serialize.
-/// :type input: iterable(Triple) or iterable(Quad)
+/// :type input: collections.abc.Iterable[Triple] or collections.abc.Iterable[Quad]
 /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
-/// :type output: io(bytes) or str or pathlib.Path or None, optional
+/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
 /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
 /// :type format: str or None, optional
 /// :return: py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set.
--- a/python/src/sparql.rs
+++ b/python/src/sparql.rs
@ -195,7 +195,7 @@ enum PyQuerySolutionsVariant {
 #[pymethods]
 impl PyQuerySolutions {
    /// :return: the ordered list of all variables that could appear in the query results
-    /// :rtype: list(Variable)
+    /// :rtype: list[Variable]
    ///
    /// >>> store = Store()
    /// >>> store.query('SELECT ?s WHERE { ?s ?p ?o }').variables
@ -225,7 +225,7 @@ impl PyQuerySolutions {
    /// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
    ///
    /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
-    /// :type output: io(bytes) or str or pathlib.Path or None, optional
+    /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
    /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :rtype: bytes or None
@ -325,7 +325,7 @@ impl PyQueryBoolean {
    /// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
    ///
    /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
-    /// :type output: io(bytes) or str or pathlib.Path or None, optional
+    /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
    /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :rtype: bytes or None
@ -403,7 +403,7 @@ impl PyQueryTriples {
    /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
    ///
    /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
-    /// :type output: io(bytes) or str or pathlib.Path or None, optional
+    /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
    /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :rtype: bytes or None
@ -461,7 +461,7 @@ impl PyQueryTriples {
 /// For example, ``application/json`` could also be used for `JSON <https://www.w3.org/TR/sparql11-results-json/>`_.
 ///
 /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
-/// :type input: io(bytes) or io(str) or str or pathlib.Path
+/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
 /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension.
 /// :type format: str or None, optional
 /// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`.
--- a/python/src/store.rs
+++ b/python/src/store.rs
@ -28,7 +28,7 @@ use std::path::PathBuf;
 /// :param path: the path of the directory in which the store should read and write its data. If the directory does not exist, it is created.
 ///              If no directory is provided a temporary one is created and removed when the Python garbage collector removes the store.
 ///              In this case, the store data are kept in memory and never written on disk.
-/// :type path: str or pathlib.Path or None, optional
+/// :type path: str or os.PathLike[str] or None, optional
 /// :raises OSError: if the target directory contains invalid data or could not be accessed.
 ///
 /// The :py:class:`str` function provides a serialization of the store in NQuads:
@ -137,7 +137,7 @@ impl PyStore {
    /// The :py:func:`bulk_extend` method is also available for much faster loading of a large number of quads but without transactional guarantees.
    ///
    /// :param quads: the quads to add.
-    /// :type quads: iterable(Quad)
+    /// :type quads: collections.abc.Iterable[Quad]
    /// :rtype: None
    /// :raises OSError: if an error happens during the quad insertion.
    ///
@ -162,7 +162,7 @@ impl PyStore {
    /// Only a part of the data might be written to the store.
    ///
    /// :param quads: the quads to add.
-    /// :type quads: iterable(Quad)
+    /// :type quads: collections.abc.Iterable[Quad]
    /// :rtype: None
    /// :raises OSError: if an error happens during the quad insertion.
    ///
@ -210,7 +210,7 @@ impl PyStore {
    /// :param graph_name: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`.
    /// :type graph_name: NamedNode or BlankNode or DefaultGraph or None, optional
    /// :return: an iterator of the quads matching the pattern.
-    /// :rtype: iterator(Quad)
+    /// :rtype: collections.abc.Iterator[Quad]
    /// :raises OSError: if an error happens during the quads lookup.
    ///
    /// >>> store = Store()
@ -246,9 +246,9 @@ impl PyStore {
    /// :param use_default_graph_as_union: if the SPARQL query should look for triples in all the dataset graphs by default (i.e. without `GRAPH` operations). Disabled by default.
    /// :type use_default_graph_as_union: bool, optional
    /// :param default_graph: list of the graphs that should be used as the query default graph. By default, the store default graph is used.
-    /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional
+    /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list[NamedNode or BlankNode or DefaultGraph] or None, optional
    /// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available.
-    /// :type named_graphs: list(NamedNode or BlankNode) or None, optional
+    /// :type named_graphs: list[NamedNode or BlankNode] or None, optional
    /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries.
    /// :rtype: QuerySolutions or QueryBoolean or QueryTriples
    /// :raises SyntaxError: if the provided query is invalid.
@ -361,7 +361,7 @@ impl PyStore {
    /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
    ///
    /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
-    /// :type input: io(bytes) or io(str) or str or pathlib.Path
+    /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
    /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`.  If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -430,7 +430,7 @@ impl PyStore {
    /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
    ///
    /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``.
-    /// :type input: io(bytes) or io(str) or str or pathlib.Path
+    /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str]
    /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`.  If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done.
@ -497,7 +497,7 @@ impl PyStore {
    /// and ``application/xml`` or ``xml`` for `RDF/XML <https://www.w3.org/TR/rdf-syntax-grammar/>`_.
    ///
    /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content.
-    /// :type output: io(bytes) or str or pathlib.Path or None, optional
+    /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional
    /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`.  If :py:const:`None`, the format is guessed from the file name extension.
    /// :type format: str or None, optional
    /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written.
@ -551,7 +551,7 @@ impl PyStore {
    /// Returns an iterator over all the store named graphs.
    ///
    /// :return: an iterator of the store graph names.
-    /// :rtype: iterator(NamedNode or BlankNode)
+    /// :rtype: collections.abc.Iterator[NamedNode or BlankNode]
    /// :raises OSError: if an error happens during the named graphs lookup.
    ///
    /// >>> store = Store()