From 48db7f872be6a87e5282e4b9472e859839cd4fae Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 13 Oct 2023 15:49:39 +0200 Subject: [PATCH] Python: any os.PathLike path is now supported Improves stub generation --- python/generate_stubs.py | 127 ++++++++++++++++++--------------------- python/src/io.rs | 8 +-- python/src/sparql.rs | 10 +-- python/src/store.rs | 20 +++--- 4 files changed, 76 insertions(+), 89 deletions(-) diff --git a/python/generate_stubs.py b/python/generate_stubs.py index 5498f2db..7d0469ec 100644 --- a/python/generate_stubs.py +++ b/python/generate_stubs.py @@ -5,65 +5,60 @@ import inspect import logging import re import subprocess +from functools import reduce from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union -def _path_to_type(*elements: str) -> ast.AST: - base: ast.AST = ast.Name(id=elements[0], ctx=AST_LOAD) +def path_to_type(*elements: str) -> ast.AST: + base: ast.AST = ast.Name(id=elements[0], ctx=ast.Load()) for e in elements[1:]: - base = ast.Attribute(value=base, attr=e, ctx=AST_LOAD) + base = ast.Attribute(value=base, attr=e, ctx=ast.Load()) return base -AST_LOAD = ast.Load() -AST_ELLIPSIS = ast.Ellipsis() -AST_STORE = ast.Store() -AST_TYPING_ANY = _path_to_type("typing", "Any") -GENERICS = { - "iterable": _path_to_type("typing", "Iterable"), - "iterator": _path_to_type("typing", "Iterator"), - "list": _path_to_type("typing", "List"), - "io": _path_to_type("typing", "IO"), -} OBJECT_MEMBERS = dict(inspect.getmembers(object)) - - BUILTINS: Dict[str, Union[None, Tuple[List[ast.AST], ast.AST]]] = { "__annotations__": None, - "__bool__": ([], _path_to_type("bool")), - "__bytes__": ([], _path_to_type("bytes")), + "__bool__": ([], path_to_type("bool")), + "__bytes__": ([], path_to_type("bytes")), "__class__": None, - "__contains__": ([AST_TYPING_ANY], _path_to_type("bool")), + "__contains__": ([path_to_type("typing", "Any")], path_to_type("bool")), "__del__": None, - "__delattr__": ([_path_to_type("str")], _path_to_type("None")), - "__delitem__": ([AST_TYPING_ANY], AST_TYPING_ANY), + "__delattr__": ([path_to_type("str")], path_to_type("None")), + "__delitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")), "__dict__": None, "__dir__": None, "__doc__": None, - "__eq__": ([AST_TYPING_ANY], _path_to_type("bool")), - "__format__": ([_path_to_type("str")], _path_to_type("str")), - "__ge__": ([AST_TYPING_ANY], _path_to_type("bool")), - "__getattribute__": ([_path_to_type("str")], AST_TYPING_ANY), - "__getitem__": ([AST_TYPING_ANY], AST_TYPING_ANY), - "__gt__": ([AST_TYPING_ANY], _path_to_type("bool")), - "__hash__": ([], _path_to_type("int")), - "__init__": ([], _path_to_type("None")), + "__eq__": ([path_to_type("typing", "Any")], path_to_type("bool")), + "__format__": ([path_to_type("str")], path_to_type("str")), + "__ge__": ([path_to_type("typing", "Any")], path_to_type("bool")), + "__getattribute__": ([path_to_type("str")], path_to_type("typing", "Any")), + "__getitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")), + "__gt__": ([path_to_type("typing", "Any")], path_to_type("bool")), + "__hash__": ([], path_to_type("int")), + "__init__": ([], path_to_type("None")), "__init_subclass__": None, - "__iter__": ([], AST_TYPING_ANY), - "__le__": ([AST_TYPING_ANY], _path_to_type("bool")), - "__len__": ([], _path_to_type("int")), - "__lt__": ([AST_TYPING_ANY], _path_to_type("bool")), + "__iter__": ([], path_to_type("typing", "Any")), + "__le__": ([path_to_type("typing", "Any")], path_to_type("bool")), + "__len__": ([], path_to_type("int")), + "__lt__": ([path_to_type("typing", "Any")], path_to_type("bool")), "__module__": None, - "__ne__": ([AST_TYPING_ANY], _path_to_type("bool")), + "__ne__": ([path_to_type("typing", "Any")], path_to_type("bool")), "__new__": None, - "__next__": ([], AST_TYPING_ANY), + "__next__": ([], path_to_type("typing", "Any")), "__reduce__": None, "__reduce_ex__": None, - "__repr__": ([], _path_to_type("str")), - "__setattr__": ([_path_to_type("str"), AST_TYPING_ANY], _path_to_type("None")), - "__setitem__": ([AST_TYPING_ANY, AST_TYPING_ANY], AST_TYPING_ANY), + "__repr__": ([], path_to_type("str")), + "__setattr__": ( + [path_to_type("str"), path_to_type("typing", "Any")], + path_to_type("None"), + ), + "__setitem__": ( + [path_to_type("typing", "Any"), path_to_type("typing", "Any")], + path_to_type("typing", "Any"), + ), "__sizeof__": None, - "__str__": ([], _path_to_type("str")), + "__str__": ([], path_to_type("str")), "__subclasshook__": None, } @@ -136,11 +131,11 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i elif member_name == "__match_args__": constants.append( ast.AnnAssign( - target=ast.Name(id=member_name, ctx=AST_STORE), + target=ast.Name(id=member_name, ctx=ast.Store()), annotation=ast.Subscript( - value=_path_to_type("typing", "Tuple"), - slice=ast.Tuple(elts=[_path_to_type("str"), ast.Ellipsis()], ctx=AST_LOAD), - ctx=AST_LOAD, + value=path_to_type("tuple"), + slice=ast.Tuple(elts=[path_to_type("str"), ast.Ellipsis()], ctx=ast.Load()), + ctx=ast.Load(), ), value=ast.Constant(member_value), simple=1, @@ -156,8 +151,8 @@ def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_i bases=[], keywords=[], body=(([doc_comment] if doc_comment else []) + attributes + methods + magic_methods + constants) - or [AST_ELLIPSIS], - decorator_list=[_path_to_type("typing", "final")], + or [ast.Ellipsis()], + decorator_list=[path_to_type("typing", "final")], ) @@ -182,8 +177,8 @@ def data_descriptor_stub( ) assign = ast.AnnAssign( - target=ast.Name(id=data_desc_name, ctx=AST_STORE), - annotation=annotation or AST_TYPING_ANY, + target=ast.Name(id=data_desc_name, ctx=ast.Store()), + annotation=annotation or path_to_type("typing", "Any"), simple=1, ) doc_comment = build_doc_comment(doc_comment) if doc_comment else None @@ -212,7 +207,7 @@ def function_stub( return ast.FunctionDef( fn_name, arguments_stub(fn_name, fn_def, doc or "", element_path, types_to_import), - body or [AST_ELLIPSIS], + body or [ast.Ellipsis()], decorator_list=decorator_list, returns=returns_stub(fn_name, doc, element_path, types_to_import) if doc else None, lineno=0, @@ -352,11 +347,11 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S # let's first parse nested parenthesis stack: List[List[Any]] = [[]] for token in tokens: - if token == "(": + if token == "[": children: List[str] = [] stack[-1].append(children) stack.append(children) - elif token == ")": + elif token == "]": stack.pop() else: stack[-1].append(token) @@ -376,39 +371,31 @@ def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: S new_elements: List[ast.AST] = [] for group in or_groups: if len(group) == 1 and isinstance(group[0], str): - parts = group[0].split(".") - if any(not p for p in parts): - raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}") - if len(parts) > 1: - types_to_import.add(parts[0]) - new_elements.append(_path_to_type(*parts)) + new_elements.append(concatenated_path_to_type(group[0], element_path, types_to_import)) elif len(group) == 2 and isinstance(group[0], str) and isinstance(group[1], list): - if group[0] not in GENERICS: - raise ValueError( - f"Constructor {group[0]} is not supported in type '{type_str}' used by {'.'.join(element_path)}" - ) new_elements.append( ast.Subscript( - value=GENERICS[group[0]], + value=concatenated_path_to_type(group[0], element_path, types_to_import), slice=parse_sequence(group[1]), - ctx=AST_LOAD, + ctx=ast.Load(), ) ) else: raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}") - return ( - ast.Subscript( - value=_path_to_type("typing", "Union"), - slice=ast.Tuple(elts=new_elements, ctx=AST_LOAD), - ctx=AST_LOAD, - ) - if len(new_elements) > 1 - else new_elements[0] - ) + return reduce(lambda left, right: ast.BinOp(left=left, op=ast.BitOr(), right=right), new_elements) return parse_sequence(stack[0]) +def concatenated_path_to_type(path: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST: + parts = path.split(".") + if any(not p for p in parts): + raise ValueError(f"Not able to parse type '{path}' used by {'.'.join(element_path)}") + if len(parts) > 1: + types_to_import.add(".".join(parts[:-1])) + return path_to_type(*parts) + + def build_doc_comment(doc: str) -> Optional[ast.Expr]: lines = [line.strip() for line in doc.split("\n")] clean_lines = [] diff --git a/python/src/io.rs b/python/src/io.rs index 73395d71..19c0b631 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -31,7 +31,7 @@ use std::sync::OnceLock; /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. -/// :type input: io(bytes) or io(str) or str or pathlib.Path +/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. @@ -41,7 +41,7 @@ use std::sync::OnceLock; /// :param rename_blank_nodes: Renames the blank nodes identifiers from the ones set in the serialization to random ids. This allows to avoid identifier conflicts when merging graphs together. /// :type rename_blank_nodes: bool, optional /// :return: an iterator of RDF triples or quads depending on the format. -/// :rtype: iterator(Quad) +/// :rtype: collections.abc.Iterator[Quad] /// :raises ValueError: if the format is not supported. /// :raises SyntaxError: if the provided data is invalid. /// :raises OSError: if a system error happens while reading the file. @@ -101,9 +101,9 @@ pub fn parse( /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param input: the RDF triples and quads to serialize. -/// :type input: iterable(Triple) or iterable(Quad) +/// :type input: collections.abc.Iterable[Triple] or collections.abc.Iterable[Quad] /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. -/// :type output: io(bytes) or str or pathlib.Path or None, optional +/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :return: py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 98698708..207a4818 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -195,7 +195,7 @@ enum PyQuerySolutionsVariant { #[pymethods] impl PyQuerySolutions { /// :return: the ordered list of all variables that could appear in the query results - /// :rtype: list(Variable) + /// :rtype: list[Variable] /// /// >>> store = Store() /// >>> store.query('SELECT ?s WHERE { ?s ?p ?o }').variables @@ -225,7 +225,7 @@ impl PyQuerySolutions { /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: io(bytes) or str or pathlib.Path or None, optional + /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :rtype: bytes or None @@ -325,7 +325,7 @@ impl PyQueryBoolean { /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: io(bytes) or str or pathlib.Path or None, optional + /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional /// :param format: the format of the query results serialization using a media type like ``text/csv`` or an extension like `csv`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :rtype: bytes or None @@ -403,7 +403,7 @@ impl PyQueryTriples { /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: io(bytes) or str or pathlib.Path or None, optional + /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :rtype: bytes or None @@ -461,7 +461,7 @@ impl PyQueryTriples { /// For example, ``application/json`` could also be used for `JSON `_. /// /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. -/// :type input: io(bytes) or io(str) or str or pathlib.Path +/// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`. diff --git a/python/src/store.rs b/python/src/store.rs index 5574e652..54fad079 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -28,7 +28,7 @@ use std::path::PathBuf; /// :param path: the path of the directory in which the store should read and write its data. If the directory does not exist, it is created. /// If no directory is provided a temporary one is created and removed when the Python garbage collector removes the store. /// In this case, the store data are kept in memory and never written on disk. -/// :type path: str or pathlib.Path or None, optional +/// :type path: str or os.PathLike[str] or None, optional /// :raises OSError: if the target directory contains invalid data or could not be accessed. /// /// The :py:class:`str` function provides a serialization of the store in NQuads: @@ -137,7 +137,7 @@ impl PyStore { /// The :py:func:`bulk_extend` method is also available for much faster loading of a large number of quads but without transactional guarantees. /// /// :param quads: the quads to add. - /// :type quads: iterable(Quad) + /// :type quads: collections.abc.Iterable[Quad] /// :rtype: None /// :raises OSError: if an error happens during the quad insertion. /// @@ -162,7 +162,7 @@ impl PyStore { /// Only a part of the data might be written to the store. /// /// :param quads: the quads to add. - /// :type quads: iterable(Quad) + /// :type quads: collections.abc.Iterable[Quad] /// :rtype: None /// :raises OSError: if an error happens during the quad insertion. /// @@ -210,7 +210,7 @@ impl PyStore { /// :param graph_name: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`. /// :type graph_name: NamedNode or BlankNode or DefaultGraph or None, optional /// :return: an iterator of the quads matching the pattern. - /// :rtype: iterator(Quad) + /// :rtype: collections.abc.Iterator[Quad] /// :raises OSError: if an error happens during the quads lookup. /// /// >>> store = Store() @@ -246,9 +246,9 @@ impl PyStore { /// :param use_default_graph_as_union: if the SPARQL query should look for triples in all the dataset graphs by default (i.e. without `GRAPH` operations). Disabled by default. /// :type use_default_graph_as_union: bool, optional /// :param default_graph: list of the graphs that should be used as the query default graph. By default, the store default graph is used. - /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list(NamedNode or BlankNode or DefaultGraph) or None, optional + /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list[NamedNode or BlankNode or DefaultGraph] or None, optional /// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available. - /// :type named_graphs: list(NamedNode or BlankNode) or None, optional + /// :type named_graphs: list[NamedNode or BlankNode] or None, optional /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries. /// :rtype: QuerySolutions or QueryBoolean or QueryTriples /// :raises SyntaxError: if the provided query is invalid. @@ -361,7 +361,7 @@ impl PyStore { /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: io(bytes) or io(str) or str or pathlib.Path + /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. @@ -430,7 +430,7 @@ impl PyStore { /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param input: The I/O object or file path to read from. For example, it could be a file path as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: io(bytes) or io(str) or str or pathlib.Path + /// :type input: typing.IO[bytes] or typing.IO[str] or str or os.PathLike[str] /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. @@ -497,7 +497,7 @@ impl PyStore { /// and ``application/xml`` or ``xml`` for `RDF/XML `_. /// /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: io(bytes) or str or pathlib.Path or None, optional + /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional /// :param format: the format of the RDF serialization using a media type like ``text/turtle`` or an extension like `ttl`. If :py:const:`None`, the format is guessed from the file name extension. /// :type format: str or None, optional /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written. @@ -551,7 +551,7 @@ impl PyStore { /// Returns an iterator over all the store named graphs. /// /// :return: an iterator of the store graph names. - /// :rtype: iterator(NamedNode or BlankNode) + /// :rtype: collections.abc.Iterator[NamedNode or BlankNode] /// :raises OSError: if an error happens during the named graphs lookup. /// /// >>> store = Store()