From 77edc05ced492155819761dbd5a33a27b7162b00 Mon Sep 17 00:00:00 2001 From: Niko PLP Date: Tue, 9 Apr 2024 20:34:46 +0300 Subject: [PATCH] use nextgraph's version of rocksdb --- .gitmodules | 3 - Cargo.lock | 276 +--- Cargo.toml | 3 - cli/Cargo.toml | 3 +- lib/oxigraph/Cargo.toml | 5 +- lib/oxigraph/src/model.rs | 2 + lib/oxigraph/src/storage/backend/mod.rs | 4 +- .../backend/{rocksdb.rs => oxi_rocksdb.rs} | 2 +- oxrocksdb-sys/rocksdb | 1 - python/Cargo.toml | 38 - python/README.md | 83 -- python/docs/conf.py | 38 - python/docs/index.rst | 79 - python/docs/io.rst | 21 - python/docs/migration.rst | 47 - python/docs/model.rst | 47 - python/docs/sparql.rst | 33 - python/docs/store.rst | 6 - python/generate_stubs.py | 438 ------ python/mypy_allowlist.txt | 2 - python/pyproject.toml | 59 - python/requirements.dev.txt | 6 - python/src/dataset.rs | 327 ----- python/src/io.rs | 640 -------- python/src/lib.rs | 47 - python/src/model.rs | 1281 ----------------- python/src/sparql.rs | 740 ---------- python/src/store.rs | 865 ----------- python/tests/test_doc.py | 37 - python/tests/test_io.py | 266 ---- python/tests/test_model.py | 376 ----- python/tests/test_store.py | 418 ------ 32 files changed, 74 insertions(+), 6119 deletions(-) rename lib/oxigraph/src/storage/backend/{rocksdb.rs => oxi_rocksdb.rs} (99%) delete mode 160000 oxrocksdb-sys/rocksdb delete mode 100644 python/Cargo.toml delete mode 100644 python/README.md delete mode 100644 python/docs/conf.py delete mode 100644 python/docs/index.rst delete mode 100644 python/docs/io.rst delete mode 100644 python/docs/migration.rst delete mode 100644 python/docs/model.rst delete mode 100644 python/docs/sparql.rst delete mode 100644 python/docs/store.rst delete mode 100644 python/generate_stubs.py delete mode 100644 python/mypy_allowlist.txt delete mode 100644 python/pyproject.toml delete mode 100644 python/requirements.dev.txt delete mode 100644 python/src/dataset.rs delete mode 100644 python/src/io.rs delete mode 100644 python/src/lib.rs delete mode 100644 python/src/model.rs delete mode 100644 python/src/sparql.rs delete mode 100644 python/src/store.rs delete mode 100644 python/tests/test_doc.py delete mode 100644 python/tests/test_io.py delete mode 100644 python/tests/test_model.py delete mode 100644 python/tests/test_store.py diff --git a/.gitmodules b/.gitmodules index fba23723..f9da0706 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,9 +7,6 @@ [submodule "bench/bsbm-tools"] path = bench/bsbm-tools url = https://github.com/Tpt/bsbm-tools.git -[submodule "oxrocksdb-sys/rocksdb"] - path = oxrocksdb-sys/rocksdb - url = https://github.com/oxigraph/rocksdb.git [submodule "oxrocksdb-sys/lz4"] path = oxrocksdb-sys/lz4 url = https://github.com/lz4/lz4.git diff --git a/Cargo.lock b/Cargo.lock index 2b579b9a..1748576d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,17 +154,16 @@ checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "bindgen" -version = "0.69.4" +version = "0.65.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" dependencies = [ - "bitflags 2.5.0", + "bitflags 1.3.2", "cexpr", "clang-sys", - "itertools 0.12.1", "lazy_static", "lazycell", - "log", + "peeking_take_while", "prettyplease", "proc-macro2", "quote", @@ -172,7 +171,6 @@ dependencies = [ "rustc-hash", "shlex", "syn", - "which", ] [[package]] @@ -219,6 +217,17 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cast" version = "0.3.0" @@ -316,7 +325,7 @@ version = "4.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" dependencies = [ - "heck 0.5.0", + "heck", "proc-macro2", "quote", "syn", @@ -422,7 +431,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools 0.10.5", + "itertools", "num-traits", "once_cell", "oorandom", @@ -443,7 +452,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools 0.10.5", + "itertools", ] [[package]] @@ -672,12 +681,6 @@ dependencies = [ "crunchy", ] -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - [[package]] name = "heck" version = "0.5.0" @@ -696,15 +699,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "home" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" -dependencies = [ - "windows-sys 0.52.0", -] - [[package]] name = "httparse" version = "1.8.0" @@ -737,12 +731,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "indoc" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" - [[package]] name = "is-terminal" version = "0.4.12" @@ -763,15 +751,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" -dependencies = [ - "either", -] - [[package]] name = "itoa" version = "1.0.10" @@ -844,21 +823,37 @@ dependencies = [ ] [[package]] -name = "linux-raw-sys" -version = "0.4.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" +name = "librocksdb-sys" +version = "0.11.0+8.3.2" +source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#cedbf494b4ec11638f1e0b7446731e0b73573352" +dependencies = [ + "bindgen", + "bzip2-sys", + "cc", + "glob", + "libc", + "libz-sys", + "openssl", + "pkg-config", +] [[package]] -name = "lock_api" -version = "0.4.11" +name = "libz-sys" +version = "1.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" dependencies = [ - "autocfg", - "scopeguard", + "cc", + "pkg-config", + "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" + [[package]] name = "log" version = "0.4.21" @@ -881,15 +876,6 @@ version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" -[[package]] -name = "memoffset" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" -dependencies = [ - "autocfg", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1007,6 +993,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "openssl-src" +version = "300.2.3+3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cff92b6f71555b61bb9315f7c64da3ca43d87531622120fea0195fc761b4843" +dependencies = [ + "cc", +] + [[package]] name = "openssl-sys" version = "0.9.101" @@ -1015,6 +1010,7 @@ checksum = "dda2b0f344e78efc2facf7d195d098df0dd72151b26ab98da807afc26c198dff" dependencies = [ "cc", "libc", + "openssl-src", "pkg-config", "vcpkg", ] @@ -1052,10 +1048,10 @@ dependencies = [ "oxiri", "oxrdf", "oxrdfio", - "oxrocksdb-sys", "oxsdatatypes", "rand", "regex", + "rocksdb", "sha1", "sha2", "siphasher", @@ -1158,16 +1154,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "oxrocksdb-sys" -version = "0.4.0-alpha.7-dev" -dependencies = [ - "bindgen", - "cc", - "libc", - "pkg-config", -] - [[package]] name = "oxsdatatypes" version = "0.2.0-alpha.1" @@ -1189,27 +1175,10 @@ dependencies = [ ] [[package]] -name = "parking_lot" -version = "0.12.1" +name = "peeking_take_while" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets 0.48.5", -] +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" [[package]] name = "peg" @@ -1284,12 +1253,6 @@ dependencies = [ "plotters-backend", ] -[[package]] -name = "portable-atomic" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" - [[package]] name = "powerfmt" version = "0.2.0" @@ -1351,77 +1314,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "pyo3" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a02a88a17e74cadbc8ce77855e1d6c8ad0ab82901a4a9b5046bd01c1c0bd95cd" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "memoffset", - "parking_lot", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5eb0b6ecba38961f6f4bd6cd5906dfab3cd426ff37b2eed5771006aa31656f1" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba8a6e48a29b5d22e4fdaf132d8ba8d3203ee9f06362d48f244346902a594ec3" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e80493c5965f94a747d0782a607b2328a4eea5391327b152b00e2f3b001cede" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcd7d86f42004025200e12a6a8119bd878329e6fddef8178eaafa4e4b5906c5b" -dependencies = [ - "heck 0.4.1", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyoxigraph" -version = "0.4.0-alpha.7-dev" -dependencies = [ - "oxigraph", - "pyo3", -] - [[package]] name = "quick-xml" version = "0.31.0" @@ -1491,15 +1383,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "regex" version = "1.10.4" @@ -1544,6 +1427,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rocksdb" +version = "0.21.0" +source = "git+https://git.nextgraph.org/NextGraph/rust-rocksdb.git?branch=master#cedbf494b4ec11638f1e0b7446731e0b73573352" +dependencies = [ + "libc", + "librocksdb-sys", +] + [[package]] name = "rustc-demangle" version = "0.1.23" @@ -1647,12 +1539,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "security-framework" version = "2.9.2" @@ -1741,12 +1627,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" -[[package]] -name = "smallvec" -version = "1.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" - [[package]] name = "sparesults" version = "0.2.0-alpha.4" @@ -1816,12 +1696,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "target-lexicon" -version = "0.12.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" - [[package]] name = "tempfile" version = "3.10.1" @@ -1992,12 +1866,6 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" -[[package]] -name = "unindent" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" - [[package]] name = "untrusted" version = "0.9.0" @@ -2131,18 +1999,6 @@ dependencies = [ "rustls-pki-types", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi" version = "0.2.8" diff --git a/Cargo.toml b/Cargo.toml index 8e9e1205..a88c8e25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,6 @@ members = [ "lib/spargebra", "lib/sparopt", "lib/sparql-smith", - "oxrocksdb-sys", - "python", "testsuite" ] resolver = "2" @@ -71,7 +69,6 @@ oxigraph = { version = "=0.4.0-alpha.7-dev", path = "lib/oxigraph" } oxrdf = { version = "=0.2.0-alpha.4", path = "lib/oxrdf" } oxrdfio = { version = "=0.1.0-alpha.5", path = "lib/oxrdfio" } oxrdfxml = { version = "=0.1.0-alpha.5", path = "lib/oxrdfxml" } -oxrocksdb-sys = { version = "=0.4.0-alpha.7-dev", path = "./oxrocksdb-sys" } oxsdatatypes = { version = "=0.2.0-alpha.1", path = "lib/oxsdatatypes" } oxttl = { version = "=0.1.0-alpha.5", path = "lib/oxttl" } sparesults = { version = "=0.2.0-alpha.4", path = "lib/sparesults" } diff --git a/cli/Cargo.toml b/cli/Cargo.toml index c2ebf2ec..5560844f 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -20,9 +20,8 @@ path = "src/main.rs" doc = false [features] -default = ["native-tls"] +default = [] native-tls = ["oxigraph/http-client-native-tls"] -rocksdb-pkg-config = ["oxigraph/rocksdb-pkg-config"] rustls-native = ["oxigraph/http-client-rustls-native"] rustls-webpki = ["oxigraph/http-client-rustls-webpki"] diff --git a/lib/oxigraph/Cargo.toml b/lib/oxigraph/Cargo.toml index 68b59dee..ac619163 100644 --- a/lib/oxigraph/Cargo.toml +++ b/lib/oxigraph/Cargo.toml @@ -17,13 +17,12 @@ rust-version.workspace = true [features] default = ["rocksdb"] -rocksdb = ["oxrocksdb-sys"] +rocksdb = ["dep:rocksdb"] js = ["getrandom/js", "oxsdatatypes/js", "js-sys"] http-client = ["oxhttp"] http-client-native-tls = ["http-client", "oxhttp/native-tls"] http-client-rustls-webpki = ["http-client", "oxhttp/rustls-ring-webpki"] http-client-rustls-native = ["http-client", "oxhttp/rustls-ring-native"] -rocksdb-pkg-config = ["oxrocksdb-sys/pkg-config"] rocksdb-debug = [] [dependencies] @@ -49,7 +48,7 @@ thiserror.workspace = true [target.'cfg(not(target_family = "wasm"))'.dependencies] libc.workspace = true oxhttp = { workspace = true, optional = true } -oxrocksdb-sys = { workspace = true, optional = true } +rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ], optional = true } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] getrandom.workspace = true diff --git a/lib/oxigraph/src/model.rs b/lib/oxigraph/src/model.rs index 3a9fd053..dbca934b 100644 --- a/lib/oxigraph/src/model.rs +++ b/lib/oxigraph/src/model.rs @@ -18,3 +18,5 @@ //! ``` pub use oxrdf::*; + +pub use spargebra::term::GroundQuad; diff --git a/lib/oxigraph/src/storage/backend/mod.rs b/lib/oxigraph/src/storage/backend/mod.rs index 0fc4f90f..db2ebd5f 100644 --- a/lib/oxigraph/src/storage/backend/mod.rs +++ b/lib/oxigraph/src/storage/backend/mod.rs @@ -4,9 +4,9 @@ #[cfg(any(target_family = "wasm", not(feature = "rocksdb")))] pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; #[cfg(all(not(target_family = "wasm"), feature = "rocksdb"))] -pub use rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; +pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction}; #[cfg(any(target_family = "wasm", not(feature = "rocksdb")))] mod fallback; #[cfg(all(not(target_family = "wasm"), feature = "rocksdb"))] -mod rocksdb; +mod oxi_rocksdb; diff --git a/lib/oxigraph/src/storage/backend/rocksdb.rs b/lib/oxigraph/src/storage/backend/oxi_rocksdb.rs similarity index 99% rename from lib/oxigraph/src/storage/backend/rocksdb.rs rename to lib/oxigraph/src/storage/backend/oxi_rocksdb.rs index b665401b..0f194a6e 100644 --- a/lib/oxigraph/src/storage/backend/rocksdb.rs +++ b/lib/oxigraph/src/storage/backend/oxi_rocksdb.rs @@ -10,8 +10,8 @@ use crate::storage::error::{CorruptionError, StorageError}; use libc::c_void; -use oxrocksdb_sys::*; use rand::random; +use rocksdb::ffi::*; use std::borrow::Borrow; #[cfg(unix)] use std::cmp::min; diff --git a/oxrocksdb-sys/rocksdb b/oxrocksdb-sys/rocksdb deleted file mode 160000 index aecd720a..00000000 --- a/oxrocksdb-sys/rocksdb +++ /dev/null @@ -1 +0,0 @@ -Subproject commit aecd720a5fc2bf7eae9649265a017b68605a8c87 diff --git a/python/Cargo.toml b/python/Cargo.toml deleted file mode 100644 index e1abf1be..00000000 --- a/python/Cargo.toml +++ /dev/null @@ -1,38 +0,0 @@ -[package] -name = "pyoxigraph" -version.workspace = true -authors.workspace = true -license.workspace = true -readme = "README.md" -keywords = ["RDF", "SPARQL", "graph-database", "database"] -repository = "https://github.com/oxigraph/oxigraph/tree/main/python" -homepage = "https://pyoxigraph.readthedocs.io/" -description = "Python bindings of Oxigraph, a SPARQL database and RDF toolkit" -edition.workspace = true -rust-version.workspace = true -publish = false - -[lib] -crate-type = ["cdylib"] -name = "pyoxigraph" -doctest = false -doc = false - -[features] -abi3 = ["pyo3/abi3-py38"] -rocksdb-pkg-config = ["oxigraph/rocksdb-pkg-config"] - -[dependencies] -pyo3 = { workspace = true, features = ["extension-module"] } - -[target.'cfg(any(target_family = "windows", target_os = "macos", target_os = "ios"))'.dependencies] -oxigraph = { workspace = true, features = ["http-client-native-tls"] } - -[target.'cfg(target_family = "wasm")'.dependencies] -oxigraph.workspace = true - -[target.'cfg(not(any(target_family = "windows", target_os = "macos", target_os = "ios", target_family = "wasm")))'.dependencies] -oxigraph = { workspace = true, features = ["http-client-rustls-native"] } - -[lints] -workspace = true diff --git a/python/README.md b/python/README.md deleted file mode 100644 index d9fae275..00000000 --- a/python/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# Pyoxigraph (Oxigraph for Python) - -[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) -[![Conda](https://img.shields.io/conda/vn/conda-forge/pyoxigraph)](https://anaconda.org/conda-forge/pyoxigraph) -![PyPI - Implementation](https://img.shields.io/pypi/implementation/pyoxigraph) -![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyoxigraph) -[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) -[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community) - -Pyoxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. -It is a Python library written on top of [Oxigraph](https://crates.io/crates/oxigraph). - -Pyoxigraph offers two stores with [SPARQL 1.1](https://www.w3.org/TR/sparql11-overview/) capabilities. -One of the store is in-memory, and the other one is disk based. - -It also provides a set of utility functions for reading, writing and processing RDF files in -[Turtle](https://www.w3.org/TR/turtle/), -[TriG](https://www.w3.org/TR/trig/), -[N-Triples](https://www.w3.org/TR/n-triples/), -[N-Quads](https://www.w3.org/TR/n-quads/) and -[RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). - -Pyoxigraph is distributed [on Pypi](https://pypi.org/project/pyoxigraph/) and [on conda-forge](https://anaconda.org/conda-forge/pyoxigraph). -Run `pip install pyoxigraph` to install it. - -There exists also a small library providing [rdflib](https://rdflib.readthedocs.io) stores using pyoxigraph: [oxrdflib](https://github.com/oxigraph/oxrdflib). - -Pyoxigraph documentation is [available on the Oxigraph website](https://pyoxigraph.readthedocs.io/). - -## Build the development version - -To build and install the development version of pyoxigraph you need to clone this git repository including submodules (`git clone --recursive https://github.com/oxigraph/oxigraph.git`) -and to run `pip install .` in the `python` directory (the one this README is in). - -Note that by default the installation will not use [cpython stable ABI](https://docs.python.org/3/c-api/stable.html). -Use `--features abi3` feature to use cpython stable ABI. - -## Help - -Feel free to use [GitHub discussions](https://github.com/oxigraph/oxigraph/discussions) or [the Gitter chat](https://gitter.im/oxigraph/community) to ask questions or talk about Oxigraph. -[Bug reports](https://github.com/oxigraph/oxigraph/issues) are also very welcome. - -If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt). - -## How to contribute - -Pyoxigraph is written in Rust using [PyO3](https://github.com/PyO3/pyo3). - -Pyoxigraph is built using [Maturin](https://github.com/PyO3/maturin). -Maturin could be installed using the `pip install 'maturin>=0.9,<0.10'`. -To install a development version of Oxigraph just run `maturin develop` in this README directory. - -### Tests - -The Python bindings tests are written in Python. -To run them use `python -m unittest` in the `tests` directory. - -### Docs - -The Sphinx documentation can be generated and viewed in the browser using the following command: - -``` -sphinx-autobuild docs docs/_build/html -``` - -Note that you will need to have [sphinx-autobuild](https://pypi.org/project/sphinx-autobuild/) installed. - -Alternatively, you can use `sphinx-build` with Python's `http.server` to achieve the same thing. - -## License - -This project is licensed under either of - -- Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or - http://www.apache.org/licenses/LICENSE-2.0) -- MIT license ([LICENSE-MIT](../LICENSE-MIT) or - http://opensource.org/licenses/MIT) - -at your option. - -### Contribution - -Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. diff --git a/python/docs/conf.py b/python/docs/conf.py deleted file mode 100644 index 007e6426..00000000 --- a/python/docs/conf.py +++ /dev/null @@ -1,38 +0,0 @@ -import datetime -import sys -from pathlib import Path - -import pyoxigraph - -sys.path.insert(0, str(Path(__file__).parent.parent.absolute())) - -# -- Project information ----------------------------------------------------- - -project = "pyoxigraph" -copyright = f"{datetime.date.today().year}, Oxigraph contributors" -author = pyoxigraph.__author__ -version = pyoxigraph.__version__ -release = pyoxigraph.__version__ - -# -- General configuration --------------------------------------------------- - -extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest", "sphinx.ext.intersphinx"] - -exclude_patterns = ["build", "Thumbs.db", ".DS_Store"] - -# -- Options for HTML output ------------------------------------------------- - -html_theme = "furo" -html_static_path = [] -html_logo = "../../logo.svg" -html_favicon = "../../logo.svg" -html_theme_options = {"body_max_width": None} -html_baseurl = "https://pyoxigraph.readthedocs.io/en/stable/" - -# -- Options for doctests ------------------------------------------------- - -doctest_global_setup = "from pyoxigraph import *\nimport io" - -# -- Options for intersphinx ------------------------------------------------- - -intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} diff --git a/python/docs/index.rst b/python/docs/index.rst deleted file mode 100644 index 47ba57d2..00000000 --- a/python/docs/index.rst +++ /dev/null @@ -1,79 +0,0 @@ -pyoxigraph |release| -==================== - -.. image:: https://img.shields.io/pypi/v/pyoxigraph - :alt: PyPI - :target: https://pypi.org/project/pyoxigraph/ -.. image:: https://img.shields.io/conda/vn/conda-forge/pyoxigraph - :alt: conda-forge - :target: https://anaconda.org/conda-forge/pyoxigraph -.. image:: https://img.shields.io/pypi/implementation/pyoxigraph - :alt: PyPI - Implementation -.. image:: https://img.shields.io/pypi/pyversions/pyoxigraph - :alt: PyPI - Python Version -.. image:: https://img.shields.io/pypi/l/pyoxigraph - :alt: PyPI - License - - -Pyoxigraph is a Python graph database library implementing the `SPARQL `_ standard. - -It is built on top of `Oxigraph `_ using `PyO3 `_. - -It also provides a set of utility functions for reading, writing, and processing RDF files in -`Turtle `_, -`TriG `_, -`N-Triples `_, -`N-Quads `_ and -`RDF/XML `_. - -Pyoxigraph is distributed `on Pypi `_ and `on conda-forge `_. - -There is also a small library providing a `rdflib `_ store using pyoxigraph: `oxrdflib `_. - -Oxigraph and pyoxigraph source code are on `GitHub `_. - - -Installation -"""""""""""" - -Pyoxigraph is distributed on `Pypi `_. - -To install it, run the usual ``pip install pyoxigraph`` - - -Example -""""""" - -Insert the triple `` "example"`` and print the name of ```` in SPARQL: - -:: - - from pyoxigraph import * - - store = Store() - ex = NamedNode('http://example/') - schema_name = NamedNode('http://schema.org/name') - store.add(Quad(ex, schema_name, Literal('example'))) - for binding in store.query('SELECT ?name WHERE { ?name }'): - print(binding['name'].value) - - -Table of contents -""""""""""""""""" - -.. toctree:: - - model - io - store - sparql - migration - - -Help -"""" - -Feel free to use `GitHub discussions `_ or `the Gitter chat `_ to ask questions or talk about Oxigraph. -`Bug reports `_ are also very welcome. - -If you need advanced support or are willing to pay to get some extra features, feel free to reach out to `Tpt `_. diff --git a/python/docs/io.rst b/python/docs/io.rst deleted file mode 100644 index edf3fba0..00000000 --- a/python/docs/io.rst +++ /dev/null @@ -1,21 +0,0 @@ -RDF Parsing and Serialization -============================= -.. py:currentmodule:: pyoxigraph - -Oxigraph provides functions to parse and serialize RDF files: - - -Parsing -""""""" -.. autofunction:: parse - - -Serialization -""""""""""""" -.. autofunction:: serialize - - -Formats -""""""" -.. autoclass:: RdfFormat - :members: diff --git a/python/docs/migration.rst b/python/docs/migration.rst deleted file mode 100644 index 3fe44d80..00000000 --- a/python/docs/migration.rst +++ /dev/null @@ -1,47 +0,0 @@ -Migration Guide -=============== - -From 0.3 to 0.4 -""""""""""""""" - -* Python 3.7 and ``musllinux_1_1`` support have been removed. -* :py:class:`OSError` is now raised instead of :py:class:`IOError` on OS errors. -* The ``mime_type`` parameter have been renamed to ``format`` in I/O functions. - Using :py:class:`RdfFormat` is recommended to describe formats. -* Boolean SPARQL results are now encoded with the :py:class:`QueryBoolean` class and not a simple :py:class:`bool`. -* A `path` parameter has been added to all I/O method to read from a file. - The existing ``input`` parameter now consider :py:class:`str` values to be a serialization to parse. - For example, ``parse(path="foo.ttl")`` will parse the file ``foo.ttl`` whereas ``parse("foo", format=RdfFormat.N_TRIPLES)`` will parse a N-Triples file which content is ``foo``. - - -From 0.2 to 0.3 -""""""""""""""" - -* Python 3.6 and ``manylinux2010`` (`PEP 571 `_) support have been removed. The new minimal versions are Python 3.7 and ``manylinux2014`` (`PEP 599 `_). -* The on-disk storage system has been rebuilt on top of `RocksDB `_. - It is now implemented by the :py:class:`.Store` class that keeps the same API as the late :py:class:`.SledStore` class. - - To migrate you have to dump the store content using pyoxigraph **0.2** and the following code: - - .. code-block:: python - - from pyoxigraph import SledStore - store = SledStore('MY_STORAGE_PATH') - with open('temp_file.nq', 'wb') as fp: - store.dump(fp, "application/n-quads") - - And then upgrade to pyoxigraph **0.3** and run: - - .. code-block:: python - - from pyoxigraph import Store - store = Store('MY_NEW_STORAGE_PATH') - with open('temp_file.nq', 'rb') as fp: - store.bulk_load(fp, "application/n-quads") - -* The in-memory storage class :py:class:`.MemoryStore` has been merged into the :py:class:`.Store` class that provides the exact same API as the late :py:class:`.MemoryStore`. - On platforms other than Linux, a temporary directory is created when opening the :py:class:`.Store` and automatically removed when it is garbage collected. No data is written in this directory. -* :py:class:`.Store` operations are now transactional using the "repeatable read" isolation level: - the store only exposes changes that have been "committed" (i.e. no partial writes) - and the exposed state does not change for the complete duration of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update). -* `RDF-star `_ is now supported (including serialization formats and SPARQL-star). :py:class:`.Triple` can now be used in :py:attr:`.Triple.object`, :py:attr:`.Triple.object`, :py:attr:`.Quad.subject` and :py:attr:`.Quad.object`. diff --git a/python/docs/model.rst b/python/docs/model.rst deleted file mode 100644 index 99893190..00000000 --- a/python/docs/model.rst +++ /dev/null @@ -1,47 +0,0 @@ -RDF Model -========= -.. py:currentmodule:: pyoxigraph - -Oxigraph provides python classes to represents basic RDF concepts: - - -`IRIs `_ -""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: NamedNode - :members: - - -`Blank Nodes `_ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: BlankNode - :members: - - -`Literals `_ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: Literal - :members: - - -`Triples `_ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: Triple - :members: - - -Quads (`triples `_ in a `RDF dataset `_) -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: Quad - :members: - -.. autoclass:: DefaultGraph - :members: - - -`Datasets `_ -""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" -.. autoclass:: Dataset - :members: - -.. autoclass:: CanonicalizationAlgorithm - :members: diff --git a/python/docs/sparql.rst b/python/docs/sparql.rst deleted file mode 100644 index 824a42cb..00000000 --- a/python/docs/sparql.rst +++ /dev/null @@ -1,33 +0,0 @@ -SPARQL utility objects -====================== -.. py:currentmodule:: pyoxigraph - -Oxigraph provides also some utilities related to SPARQL queries: - -Variable -"""""""" -.. autoclass:: Variable - :members: - -``SELECT`` solutions -"""""""""""""""""""" -.. autoclass:: QuerySolutions - :members: -.. autoclass:: QuerySolution - :members: - -``ASK`` results -""""""""""""""" -.. autoclass:: QueryBoolean - :members: - -``CONSTRUCT`` results -""""""""""""""""""""" -.. autoclass:: QueryTriples - :members: - -Query results parsing -""""""""""""""""""""" -.. autofunction:: parse_query_results -.. autoclass:: QueryResultsFormat - :members: diff --git a/python/docs/store.rst b/python/docs/store.rst deleted file mode 100644 index b6af6ef5..00000000 --- a/python/docs/store.rst +++ /dev/null @@ -1,6 +0,0 @@ -RDF Store -========= -.. py:currentmodule:: pyoxigraph - -.. autoclass:: Store - :members: diff --git a/python/generate_stubs.py b/python/generate_stubs.py deleted file mode 100644 index 5626c8de..00000000 --- a/python/generate_stubs.py +++ /dev/null @@ -1,438 +0,0 @@ -import argparse -import ast -import importlib -import inspect -import logging -import re -import subprocess -from functools import reduce -from typing import Any, Dict, List, Mapping, Optional, Set, Tuple, Union - - -def path_to_type(*elements: str) -> ast.AST: - base: ast.AST = ast.Name(id=elements[0], ctx=ast.Load()) - for e in elements[1:]: - base = ast.Attribute(value=base, attr=e, ctx=ast.Load()) - return base - - -OBJECT_MEMBERS = dict(inspect.getmembers(object)) -BUILTINS: Dict[str, Union[None, Tuple[List[ast.AST], ast.AST]]] = { - "__annotations__": None, - "__bool__": ([], path_to_type("bool")), - "__bytes__": ([], path_to_type("bytes")), - "__class__": None, - "__contains__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__del__": None, - "__delattr__": ([path_to_type("str")], path_to_type("None")), - "__delitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")), - "__dict__": None, - "__dir__": None, - "__doc__": None, - "__eq__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__format__": ([path_to_type("str")], path_to_type("str")), - "__ge__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__getattribute__": ([path_to_type("str")], path_to_type("typing", "Any")), - "__getitem__": ([path_to_type("typing", "Any")], path_to_type("typing", "Any")), - "__gt__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__hash__": ([], path_to_type("int")), - "__init__": ([], path_to_type("None")), - "__init_subclass__": None, - "__iter__": ([], path_to_type("typing", "Any")), - "__le__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__len__": ([], path_to_type("int")), - "__lt__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__module__": None, - "__ne__": ([path_to_type("typing", "Any")], path_to_type("bool")), - "__new__": None, - "__next__": ([], path_to_type("typing", "Any")), - "__reduce__": None, - "__reduce_ex__": None, - "__repr__": ([], path_to_type("str")), - "__setattr__": ( - [path_to_type("str"), path_to_type("typing", "Any")], - path_to_type("None"), - ), - "__setitem__": ( - [path_to_type("typing", "Any"), path_to_type("typing", "Any")], - path_to_type("typing", "Any"), - ), - "__sizeof__": None, - "__str__": ([], path_to_type("str")), - "__subclasshook__": None, -} - - -def module_stubs(module: Any) -> ast.Module: - types_to_import = {"typing"} - classes = [] - functions = [] - for member_name, member_value in inspect.getmembers(module): - element_path = [module.__name__, member_name] - if member_name.startswith("__"): - pass - elif inspect.isclass(member_value): - classes.append(class_stubs(member_name, member_value, element_path, types_to_import)) - elif inspect.isbuiltin(member_value): - functions.append( - function_stub( - member_name, - member_value, - element_path, - types_to_import, - in_class=False, - ) - ) - else: - logging.warning(f"Unsupported root construction {member_name}") - return ast.Module( - body=[ast.Import(names=[ast.alias(name=t)]) for t in sorted(types_to_import)] + classes + functions, - type_ignores=[], - ) - - -def class_stubs(cls_name: str, cls_def: Any, element_path: List[str], types_to_import: Set[str]) -> ast.ClassDef: - attributes: List[ast.AST] = [] - methods: List[ast.AST] = [] - magic_methods: List[ast.AST] = [] - constants: List[ast.AST] = [] - for member_name, member_value in inspect.getmembers(cls_def): - current_element_path = [*element_path, member_name] - if member_name == "__init__": - try: - inspect.signature(cls_def) # we check it actually exists - methods = [ - function_stub( - member_name, - cls_def, - current_element_path, - types_to_import, - in_class=True, - ), - *methods, - ] - except ValueError as e: - if "no signature found" not in str(e): - raise ValueError(f"Error while parsing signature of {cls_name}.__init_") from e - elif member_value == OBJECT_MEMBERS.get(member_name) or BUILTINS.get(member_name, ()) is None: - pass - elif inspect.isdatadescriptor(member_value): - attributes.extend(data_descriptor_stub(member_name, member_value, current_element_path, types_to_import)) - elif inspect.isroutine(member_value): - (magic_methods if member_name.startswith("__") else methods).append( - function_stub( - member_name, - member_value, - current_element_path, - types_to_import, - in_class=True, - ) - ) - elif member_name == "__match_args__": - constants.append( - ast.AnnAssign( - target=ast.Name(id=member_name, ctx=ast.Store()), - annotation=ast.Subscript( - value=path_to_type("tuple"), - slice=ast.Tuple(elts=[path_to_type("str"), ast.Ellipsis()], ctx=ast.Load()), - ctx=ast.Load(), - ), - value=ast.Constant(member_value), - simple=1, - ) - ) - elif member_value is not None: - constants.append( - ast.AnnAssign( - target=ast.Name(id=member_name, ctx=ast.Store()), - annotation=concatenated_path_to_type( - member_value.__class__.__name__, element_path, types_to_import - ), - value=ast.Ellipsis(), - simple=1, - ) - ) - else: - logging.warning(f"Unsupported member {member_name} of class {'.'.join(element_path)}") - - doc = inspect.getdoc(cls_def) - doc_comment = build_doc_comment(doc) if doc else None - return ast.ClassDef( - cls_name, - bases=[], - keywords=[], - body=(([doc_comment] if doc_comment else []) + attributes + methods + magic_methods + constants) - or [ast.Ellipsis()], - decorator_list=[path_to_type("typing", "final")], - ) - - -def data_descriptor_stub( - data_desc_name: str, - data_desc_def: Any, - element_path: List[str], - types_to_import: Set[str], -) -> Union[Tuple[ast.AnnAssign, ast.Expr], Tuple[ast.AnnAssign]]: - annotation = None - doc_comment = None - - doc = inspect.getdoc(data_desc_def) - if doc is not None: - annotation = returns_stub(data_desc_name, doc, element_path, types_to_import) - m = re.findall(r"^ *:return: *(.*) *$", doc, re.MULTILINE) - if len(m) == 1: - doc_comment = m[0] - elif len(m) > 1: - raise ValueError( - f"Multiple return annotations found with :return: in {'.'.join(element_path)} documentation" - ) - - assign = ast.AnnAssign( - target=ast.Name(id=data_desc_name, ctx=ast.Store()), - annotation=annotation or path_to_type("typing", "Any"), - simple=1, - ) - doc_comment = build_doc_comment(doc_comment) if doc_comment else None - return (assign, doc_comment) if doc_comment else (assign,) - - -def function_stub( - fn_name: str, - fn_def: Any, - element_path: List[str], - types_to_import: Set[str], - *, - in_class: bool, -) -> ast.FunctionDef: - body: List[ast.AST] = [] - doc = inspect.getdoc(fn_def) - if doc is not None: - doc_comment = build_doc_comment(doc) - if doc_comment is not None: - body.append(doc_comment) - - decorator_list = [] - if in_class and hasattr(fn_def, "__self__"): - decorator_list.append(ast.Name("staticmethod")) - - return ast.FunctionDef( - fn_name, - arguments_stub(fn_name, fn_def, doc or "", element_path, types_to_import), - body or [ast.Ellipsis()], - decorator_list=decorator_list, - returns=returns_stub(fn_name, doc, element_path, types_to_import) if doc else None, - lineno=0, - ) - - -def arguments_stub( - callable_name: str, - callable_def: Any, - doc: str, - element_path: List[str], - types_to_import: Set[str], -) -> ast.arguments: - real_parameters: Mapping[str, inspect.Parameter] = inspect.signature(callable_def).parameters - if callable_name == "__init__": - real_parameters = { - "self": inspect.Parameter("self", inspect.Parameter.POSITIONAL_ONLY), - **real_parameters, - } - - parsed_param_types = {} - optional_params = set() - - # Types for magic functions types - builtin = BUILTINS.get(callable_name) - if isinstance(builtin, tuple): - param_names = list(real_parameters.keys()) - if param_names and param_names[0] == "self": - del param_names[0] - for name, t in zip(param_names, builtin[0]): - parsed_param_types[name] = t - - # Types from comment - for match in re.findall(r"^ *:type *([a-z_]+): ([^\n]*) *$", doc, re.MULTILINE): - if match[0] not in real_parameters: - raise ValueError( - f"The parameter {match[0]} of {'.'.join(element_path)} " - "is defined in the documentation but not in the function signature" - ) - type = match[1] - if type.endswith(", optional"): - optional_params.add(match[0]) - type = type[:-10] - parsed_param_types[match[0]] = convert_type_from_doc(type, element_path, types_to_import) - - # we parse the parameters - posonlyargs = [] - args = [] - vararg = None - kwonlyargs = [] - kw_defaults = [] - kwarg = None - defaults = [] - for param in real_parameters.values(): - if param.name != "self" and param.name not in parsed_param_types: - raise ValueError( - f"The parameter {param.name} of {'.'.join(element_path)} " - "has no type definition in the function documentation" - ) - param_ast = ast.arg(arg=param.name, annotation=parsed_param_types.get(param.name)) - - default_ast = None - if param.default != param.empty: - default_ast = ast.Constant(param.default) - if param.name not in optional_params: - raise ValueError( - f"Parameter {param.name} of {'.'.join(element_path)} " - "is optional according to the type but not flagged as such in the doc" - ) - elif param.name in optional_params: - raise ValueError( - f"Parameter {param.name} of {'.'.join(element_path)} " - "is optional according to the documentation but has no default value" - ) - - if param.kind == param.POSITIONAL_ONLY: - posonlyargs.append(param_ast) - defaults.append(default_ast) - elif param.kind == param.POSITIONAL_OR_KEYWORD: - args.append(param_ast) - defaults.append(default_ast) - elif param.kind == param.VAR_POSITIONAL: - vararg = param_ast - elif param.kind == param.KEYWORD_ONLY: - kwonlyargs.append(param_ast) - kw_defaults.append(default_ast) - elif param.kind == param.VAR_KEYWORD: - kwarg = param_ast - - return ast.arguments( - posonlyargs=posonlyargs, - args=args, - vararg=vararg, - kwonlyargs=kwonlyargs, - kw_defaults=kw_defaults, - defaults=defaults, - kwarg=kwarg, - ) - - -def returns_stub(callable_name: str, doc: str, element_path: List[str], types_to_import: Set[str]) -> Optional[ast.AST]: - m = re.findall(r"^ *:rtype: *([^\n]*) *$", doc, re.MULTILINE) - if len(m) == 0: - builtin = BUILTINS.get(callable_name) - if isinstance(builtin, tuple) and builtin[1] is not None: - return builtin[1] - raise ValueError( - f"The return type of {'.'.join(element_path)} " - "has no type definition using :rtype: in the function documentation" - ) - if len(m) > 1: - raise ValueError(f"Multiple return type annotations found with :rtype: for {'.'.join(element_path)}") - return convert_type_from_doc(m[0], element_path, types_to_import) - - -def convert_type_from_doc(type_str: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST: - type_str = type_str.strip() - return parse_type_to_ast(type_str, element_path, types_to_import) - - -def parse_type_to_ast(type_str: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST: - # let's tokenize - tokens = [] - current_token = "" - for c in type_str: - if "a" <= c <= "z" or "A" <= c <= "Z" or c == ".": - current_token += c - else: - if current_token: - tokens.append(current_token) - current_token = "" - if c != " ": - tokens.append(c) - if current_token: - tokens.append(current_token) - - # let's first parse nested parenthesis - stack: List[List[Any]] = [[]] - for token in tokens: - if token == "[": - children: List[str] = [] - stack[-1].append(children) - stack.append(children) - elif token == "]": - stack.pop() - else: - stack[-1].append(token) - - # then it's easy - def parse_sequence(sequence: List[Any]) -> ast.AST: - # we split based on "or" - or_groups: List[List[str]] = [[]] - for e in sequence: - if e == "or": - or_groups.append([]) - else: - or_groups[-1].append(e) - if any(not g for g in or_groups): - raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}") - - new_elements: List[ast.AST] = [] - for group in or_groups: - if len(group) == 1 and isinstance(group[0], str): - new_elements.append(concatenated_path_to_type(group[0], element_path, types_to_import)) - elif len(group) == 2 and isinstance(group[0], str) and isinstance(group[1], list): - new_elements.append( - ast.Subscript( - value=concatenated_path_to_type(group[0], element_path, types_to_import), - slice=parse_sequence(group[1]), - ctx=ast.Load(), - ) - ) - else: - raise ValueError(f"Not able to parse type '{type_str}' used by {'.'.join(element_path)}") - return reduce(lambda left, right: ast.BinOp(left=left, op=ast.BitOr(), right=right), new_elements) - - return parse_sequence(stack[0]) - - -def concatenated_path_to_type(path: str, element_path: List[str], types_to_import: Set[str]) -> ast.AST: - parts = path.split(".") - if any(not p for p in parts): - raise ValueError(f"Not able to parse type '{path}' used by {'.'.join(element_path)}") - if len(parts) > 1: - types_to_import.add(".".join(parts[:-1])) - return path_to_type(*parts) - - -def build_doc_comment(doc: str) -> Optional[ast.Expr]: - lines = [line.strip() for line in doc.split("\n")] - clean_lines = [] - for line in lines: - if line.startswith((":type", ":rtype")): - continue - clean_lines.append(line) - text = "\n".join(clean_lines).strip() - return ast.Expr(value=ast.Constant(text)) if text else None - - -def format_with_ruff(file: str) -> None: - subprocess.check_call(["python", "-m", "ruff", "format", file]) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Extract Python type stub from a python module.") - parser.add_argument("module_name", help="Name of the Python module for which generate stubs") - parser.add_argument( - "out", - help="Name of the Python stub file to write to", - type=argparse.FileType("wt"), - ) - parser.add_argument("--ruff", help="Formats the generated stubs using Ruff", action="store_true") - args = parser.parse_args() - stub_content = ast.unparse(module_stubs(importlib.import_module(args.module_name))) - args.out.write(stub_content) - if args.ruff: - format_with_ruff(args.out.name) diff --git a/python/mypy_allowlist.txt b/python/mypy_allowlist.txt deleted file mode 100644 index bae62500..00000000 --- a/python/mypy_allowlist.txt +++ /dev/null @@ -1,2 +0,0 @@ -pyoxigraph.pyoxigraph -pyoxigraph.DefaultGraph.__init__ \ No newline at end of file diff --git a/python/pyproject.toml b/python/pyproject.toml deleted file mode 100644 index e1003177..00000000 --- a/python/pyproject.toml +++ /dev/null @@ -1,59 +0,0 @@ -[build-system] -requires = ["maturin~=1.0"] -build-backend = "maturin" - -[project] -# Most of the metadata are in Cargo.toml and injected by maturin -name = "pyoxigraph" -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Rust", - "Topic :: Database :: Database Engines/Servers", - "Topic :: Software Development :: Libraries :: Python Modules", -] -requires-python = ">=3.8" - -[project.urls] -Changelog = "https://github.com/oxigraph/oxigraph/blob/main/CHANGELOG.md" -Documentation = "https://pyoxigraph.readthedocs.io/" -Homepage = "https://pyoxigraph.readthedocs.io/" -Source = "https://github.com/oxigraph/oxigraph/tree/main/python" -Tracker = "https://github.com/oxigraph/oxigraph/issues" - -[tool.maturin] -strip = true - -[tool.ruff] -line-length = 120 - -[tool.ruff.lint] -select = [ - "ARG", - "B", - "C40", - "E", - "F", - "FBT", - "I", - "ICN", - "N", - "PIE", - "PTH", - "RET", - "RUF", - "SIM", - "T10", - "TCH", - "TID", - "UP", - "W", - "YTT" -] diff --git a/python/requirements.dev.txt b/python/requirements.dev.txt deleted file mode 100644 index f91877f3..00000000 --- a/python/requirements.dev.txt +++ /dev/null @@ -1,6 +0,0 @@ -furo -maturin~=1.0 -mypy~=1.0 -ruff~=0.3.0 -sphinx~=7.0 -sphinx-lint~=0.9.1 diff --git a/python/src/dataset.rs b/python/src/dataset.rs deleted file mode 100644 index 25eb729e..00000000 --- a/python/src/dataset.rs +++ /dev/null @@ -1,327 +0,0 @@ -use crate::model::{hash, PyGraphNameRef, PyNamedNodeRef, PyQuad, PySubjectRef, PyTermRef}; -use oxigraph::model::dataset::{CanonicalizationAlgorithm, Dataset}; -use oxigraph::model::{Quad, QuadRef}; -use pyo3::exceptions::PyKeyError; -use pyo3::prelude::*; - -/// An in-memory `RDF dataset `_. -/// -/// It can accommodate a fairly large number of quads (in the few millions). -/// -/// Use :py:class:`Store` if you need on-disk persistence or SPARQL. -/// -/// Warning: It interns the strings and does not do any garbage collection yet: -/// if you insert and remove a lot of different terms, memory will grow without any reduction. -/// -/// :param quads: some quads to initialize the dataset with. -/// :type quads: collections.abc.Iterable[Quad] or None, optional -/// -/// The :py:class:`str` function provides an N-Quads serialization: -/// -/// >>> str(Dataset([Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g'))])) -/// ' .\n' -#[pyclass(name = "Dataset", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone)] -pub struct PyDataset { - inner: Dataset, -} - -#[pymethods] -impl PyDataset { - #[new] - #[pyo3(signature = (quads = None))] - fn new(quads: Option<&Bound<'_, PyAny>>) -> PyResult { - let mut inner = Dataset::new(); - if let Some(quads) = quads { - for quad in quads.iter()? { - inner.insert(&*quad?.extract::>()?); - } - } - Ok(Self { inner }) - } - - /// Looks for the quads with the given subject. - /// - /// :param subject: the quad subject. - /// :type subject: NamedNode or BlankNode or Triple - /// :return: an iterator of the quads. - /// :rtype: collections.abc.Iterator[Quad] - /// - /// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store.quads_for_subject(NamedNode('http://example.com'))) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - pub fn quads_for_subject(&self, subject: PySubjectRef<'_>) -> QuadIter { - QuadIter { - inner: self - .inner - .quads_for_subject(&subject) - .map(QuadRef::into_owned) - .collect::>() - .into_iter(), - } - } - - /// Looks for the quads with the given predicate. - /// - /// :param predicate: the quad predicate. - /// :type predicate: NamedNode - /// :return: an iterator of the quads. - /// :rtype: collections.abc.Iterator[Quad] - /// - /// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store.quads_for_predicate(NamedNode('http://example.com/p'))) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - pub fn quads_for_predicate(&self, predicate: PyNamedNodeRef<'_>) -> QuadIter { - QuadIter { - inner: self - .inner - .quads_for_predicate(&predicate) - .map(QuadRef::into_owned) - .collect::>() - .into_iter(), - } - } - - /// Looks for the quads with the given object. - /// - /// :param object: the quad object. - /// :type object: NamedNode or BlankNode or Literal or Triple - /// :return: an iterator of the quads. - /// :rtype: collections.abc.Iterator[Quad] - /// - /// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store.quads_for_object(Literal('1'))) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - pub fn quads_for_object(&self, object: PyTermRef<'_>) -> QuadIter { - QuadIter { - inner: self - .inner - .quads_for_object(&object) - .map(QuadRef::into_owned) - .collect::>() - .into_iter(), - } - } - - /// Looks for the quads with the given graph name. - /// - /// :param graph_name: the quad graph name. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph - /// :return: an iterator of the quads. - /// :rtype: collections.abc.Iterator[Quad] - /// - /// >>> store = Dataset([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store.quads_for_graph_name(NamedNode('http://example.com/g'))) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - pub fn quads_for_graph_name(&self, graph_name: PyGraphNameRef<'_>) -> QuadIter { - QuadIter { - inner: self - .inner - .quads_for_graph_name(&graph_name) - .map(QuadRef::into_owned) - .collect::>() - .into_iter(), - } - } - - /// Adds a quad to the dataset. - /// - /// :param quad: the quad to add. - /// :type quad: Quad - /// :rtype: None - /// - /// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g')) - /// >>> dataset = Dataset() - /// >>> dataset.add(quad) - /// >>> quad in dataset - /// True - fn add(&mut self, quad: &PyQuad) { - self.inner.insert(quad); - } - - /// Removes a quad from the dataset and raises an exception if it is not in the set. - /// - /// :param quad: the quad to remove. - /// :type quad: Quad - /// :rtype: None - /// :raises KeyError: if the element was not in the set. - /// - /// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g')) - /// >>> dataset = Dataset([quad]) - /// >>> dataset.remove(quad) - /// >>> quad in dataset - /// False - fn remove(&mut self, quad: &PyQuad) -> PyResult<()> { - if self.inner.remove(quad) { - Ok(()) - } else { - Err(PyKeyError::new_err(format!( - "{} is not in the Dataset", - QuadRef::from(quad) - ))) - } - } - - /// Removes a quad from the dataset if it is present. - /// - /// :param quad: the quad to remove. - /// :type quad: Quad - /// :rtype: None - /// - /// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g')) - /// >>> dataset = Dataset([quad]) - /// >>> dataset.discard(quad) - /// >>> quad in dataset - /// False - fn discard(&mut self, quad: &PyQuad) { - self.inner.remove(quad); - } - - /// Removes all quads from the dataset. - /// - /// :rtype: None - /// - /// >>> quad = Quad(NamedNode('http://example.com/s'), NamedNode('http://example.com/p'), NamedNode('http://example.com/o'), NamedNode('http://example.com/g')) - /// >>> dataset = Dataset([quad]) - /// >>> dataset.clear() - /// >>> len(dataset) - /// 0 - fn clear(&mut self) { - self.inner.clear() - } - - /// Canonicalizes the dataset by renaming blank nodes. - /// - /// Warning: Blank node ids depends on the current shape of the graph. Adding a new quad might change the ids of a lot of blank nodes. - /// Hence, this canonization might not be suitable for diffs. - /// - /// Warning: This implementation worst-case complexity is in *O(b!)* with *b* the number of blank nodes in the input dataset. - /// - /// :param algorithm: the canonicalization algorithm to use. - /// :type algorithm: CanonicalizationAlgorithm - /// :rtype: None - /// - /// >>> d1 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())]) - /// >>> d2 = Dataset([Quad(BlankNode(), NamedNode('http://example.com/p'), BlankNode())]) - /// >>> d1 == d2 - /// False - /// >>> d1.canonicalize(CanonicalizationAlgorithm.UNSTABLE) - /// >>> d2.canonicalize(CanonicalizationAlgorithm.UNSTABLE) - /// >>> d1 == d2 - /// True - fn canonicalize(&mut self, algorithm: &PyCanonicalizationAlgorithm) { - self.inner.canonicalize(algorithm.inner) - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __bool__(&self) -> bool { - self.inner.is_empty() - } - - fn __eq__(&self, other: &Self) -> bool { - self.inner == other.inner - } - - fn __ne__(&self, other: &Self) -> bool { - self.inner != other.inner - } - - fn __len__(&self) -> usize { - self.inner.len() - } - - fn __contains__(&self, quad: &PyQuad) -> bool { - self.inner.contains(quad) - } - - fn __iter__(&self) -> QuadIter { - // TODO: very inefficient - QuadIter { - inner: self - .inner - .iter() - .map(QuadRef::into_owned) - .collect::>() - .into_iter(), - } - } -} - -#[pyclass(unsendable, module = "pyoxigraph")] -pub struct QuadIter { - inner: std::vec::IntoIter, -} - -#[pymethods] -impl QuadIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self) -> Option { - Some(self.inner.next()?.into()) - } -} - -/// RDF canonicalization algorithms. -/// -/// The following algorithms are supported: -/// -/// * :py:attr:`CanonicalizationAlgorithm.UNSTABLE`: an unstable algorithm preferred by PyOxigraph. -#[pyclass(name = "CanonicalizationAlgorithm", module = "pyoxigraph")] -#[derive(Clone)] -pub struct PyCanonicalizationAlgorithm { - inner: CanonicalizationAlgorithm, -} - -#[pymethods] -impl PyCanonicalizationAlgorithm { - /// The algorithm preferred by PyOxigraph. - /// - /// Warning: Might change between Oxigraph versions. No stability guaranties. - #[classattr] - const UNSTABLE: Self = Self { - inner: CanonicalizationAlgorithm::Unstable, - }; - - fn __repr__(&self) -> String { - format!( - "", - match self.inner { - CanonicalizationAlgorithm::Unstable => "unstable", - _ => "unknown", - } - ) - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __eq__(&self, other: &Self) -> bool { - self.inner == other.inner - } - - fn __ne__(&self, other: &Self) -> bool { - self.inner != other.inner - } - - /// :rtype: CanonicalizationAlgorithm - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: CanonicalizationAlgorithm - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } -} diff --git a/python/src/io.rs b/python/src/io.rs deleted file mode 100644 index d03adeb6..00000000 --- a/python/src/io.rs +++ /dev/null @@ -1,640 +0,0 @@ -#![allow(clippy::needless_option_as_deref)] - -use crate::model::{hash, PyQuad, PyTriple}; -use oxigraph::io::{FromReadQuadReader, RdfFormat, RdfParseError, RdfParser, RdfSerializer}; -use oxigraph::model::QuadRef; -use pyo3::exceptions::{PyDeprecationWarning, PySyntaxError, PyValueError}; -use pyo3::intern; -use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyString}; -use std::cmp::max; -use std::ffi::OsStr; -use std::fs::File; -use std::io::{self, BufWriter, Cursor, Read, Write}; -use std::path::{Path, PathBuf}; -use std::sync::OnceLock; - -/// Parses RDF graph and dataset serialization formats. -/// -/// It currently supports the following formats: -/// -/// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) -/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) -/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) -/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) -/// * `N3 `_ (:py:attr:`RdfFormat.N3`) -/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) -/// -/// It supports also some media type and extension aliases. -/// For example, ``application/turtle`` could also be used for `Turtle `_ -/// and ``application/xml`` or ``xml`` for `RDF/XML `_. -/// -/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. -/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional -/// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: RdfFormat or None, optional -/// :param path: The file path to read from. Replaces the ``input`` parameter. -/// :type path: str or os.PathLike[str] or None, optional -/// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. -/// :type base_iri: str or None, optional -/// :param without_named_graphs: Sets that the parser must fail when parsing a named graph. -/// :type without_named_graphs: bool, optional -/// :param rename_blank_nodes: Renames the blank nodes identifiers from the ones set in the serialization to random ids. This allows to avoid identifier conflicts when merging graphs together. -/// :type rename_blank_nodes: bool, optional -/// :return: an iterator of RDF triples or quads depending on the format. -/// :rtype: collections.abc.Iterator[Quad] -/// :raises ValueError: if the format is not supported. -/// :raises SyntaxError: if the provided data is invalid. -/// :raises OSError: if a system error happens while reading the file. -/// -/// >>> list(parse(input=b'

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/")) -/// [ predicate= object=> graph_name=>] -#[pyfunction] -#[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, without_named_graphs = false, rename_blank_nodes = false))] -pub fn parse( - input: Option, - format: Option, - path: Option, - base_iri: Option<&str>, - without_named_graphs: bool, - rename_blank_nodes: bool, - py: Python<'_>, -) -> PyResult { - let input = PyReadable::from_args(&path, input, py)?; - let format = lookup_rdf_format(format, path.as_deref())?; - let mut parser = RdfParser::from_format(format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - } - if without_named_graphs { - parser = parser.without_named_graphs(); - } - if rename_blank_nodes { - parser = parser.rename_blank_nodes(); - } - Ok(PyQuadReader { - inner: parser.parse_read(input), - file_path: path, - } - .into_py(py)) -} - -/// Serializes an RDF graph or dataset. -/// -/// It currently supports the following formats: -/// -/// * `canonical `_ `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) -/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) -/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) -/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) -/// * `N3 `_ (:py:attr:`RdfFormat.N3`) -/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) -/// -/// It supports also some media type and extension aliases. -/// For example, ``application/turtle`` could also be used for `Turtle `_ -/// and ``application/xml`` or ``xml`` for `RDF/XML `_. -/// -/// :param input: the RDF triples and quads to serialize. -/// :type input: collections.abc.Iterable[Triple] or collections.abc.Iterable[Quad] -/// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. -/// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional -/// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: RdfFormat or None, optional -/// :return: :py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. -/// :rtype: bytes or None -/// :raises ValueError: if the format is not supported. -/// :raises TypeError: if a triple is given during a quad format serialization or reverse. -/// :raises OSError: if a system error happens while writing the file. -/// -/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], format=RdfFormat.TURTLE) -/// b' "1" .\n' -/// -/// >>> import io -/// >>> output = io.BytesIO() -/// >>> serialize([Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))], output, RdfFormat.TURTLE) -/// >>> output.getvalue() -/// b' "1" .\n' -#[pyfunction] -#[pyo3(signature = (input, output = None, format = None))] -pub fn serialize<'py>( - input: &Bound<'py, PyAny>, - output: Option, - format: Option, - py: Python<'py>, -) -> PyResult>> { - PyWritable::do_write( - |output, file_path| { - let format = lookup_rdf_format(format, file_path.as_deref())?; - let mut writer = RdfSerializer::from_format(format).serialize_to_write(output); - for i in input.iter()? { - let i = i?; - if let Ok(triple) = i.extract::>() { - writer.write_triple(&*triple) - } else { - let quad = i.extract::>()?; - let quad = QuadRef::from(&*quad); - if !quad.graph_name.is_default_graph() && !format.supports_datasets() { - return Err(PyValueError::new_err( - "The {format} format does not support named graphs", - )); - } - writer.write_quad(quad) - }?; - } - Ok(writer.finish()?) - }, - output, - py, - ) -} - -#[pyclass(name = "QuadReader", module = "pyoxigraph")] -pub struct PyQuadReader { - inner: FromReadQuadReader, - file_path: Option, -} - -#[pymethods] -impl PyQuadReader { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self, py: Python<'_>) -> PyResult> { - py.allow_threads(|| { - Ok(self - .inner - .next() - .transpose() - .map_err(|e| map_parse_error(e, self.file_path.clone()))? - .map(PyQuad::from)) - }) - } -} - -/// RDF serialization formats. -/// -/// The following formats are supported: -/// -/// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) -/// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) -/// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) -/// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) -/// * `N3 `_ (:py:attr:`RdfFormat.N3`) -/// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) -/// -/// >>> RdfFormat.N3.media_type -/// 'text/n3' -#[pyclass(name = "RdfFormat", module = "pyoxigraph")] -#[derive(Clone)] -pub struct PyRdfFormat { - inner: RdfFormat, -} - -#[pymethods] -impl PyRdfFormat { - /// `N3 `_ - #[classattr] - const N3: Self = Self { - inner: RdfFormat::N3, - }; - /// `N-Quads `_ - #[classattr] - const N_QUADS: Self = Self { - inner: RdfFormat::NQuads, - }; - /// `N-Triples `_ - #[classattr] - const N_TRIPLES: Self = Self { - inner: RdfFormat::NTriples, - }; - /// `RDF/XML `_ - #[classattr] - const RDF_XML: Self = Self { - inner: RdfFormat::RdfXml, - }; - /// `TriG `_ - #[classattr] - const TRIG: Self = Self { - inner: RdfFormat::TriG, - }; - /// `Turtle `_ - #[classattr] - const TURTLE: Self = Self { - inner: RdfFormat::Turtle, - }; - - /// :return: the format canonical IRI according to the `Unique URIs for file formats registry `_. - /// :rtype: str - /// - /// >>> RdfFormat.N_TRIPLES.iri - /// 'http://www.w3.org/ns/formats/N-Triples' - #[getter] - fn iri(&self) -> &'static str { - self.inner.iri() - } - - /// :return: the format `IANA media type `_. - /// :rtype: str - /// - /// >>> RdfFormat.N_TRIPLES.media_type - /// 'application/n-triples' - #[getter] - fn media_type(&self) -> &'static str { - self.inner.media_type() - } - - /// :return: the format `IANA-registered `_ file extension. - /// :rtype: str - /// - /// >>> RdfFormat.N_TRIPLES.file_extension - /// 'nt' - #[getter] - pub fn file_extension(&self) -> &'static str { - self.inner.file_extension() - } - - /// :return: the format name. - /// :rtype: str - /// - /// >>> RdfFormat.N_TRIPLES.name - /// 'N-Triples' - #[getter] - pub const fn name(&self) -> &'static str { - self.inner.name() - } - - /// :return: if the formats supports `RDF datasets `_ and not only `RDF graphs `_. - /// :rtype: bool - /// - /// >>> RdfFormat.N_TRIPLES.supports_datasets - /// False - /// >>> RdfFormat.N_QUADS.supports_datasets - /// True - #[getter] - pub fn supports_datasets(&self) -> bool { - self.inner.supports_datasets() - } - - /// :return: if the formats supports `RDF-star quoted triples `_. - /// :rtype: bool - /// - /// >>> RdfFormat.N_TRIPLES.supports_rdf_star - /// True - /// >>> RdfFormat.RDF_XML.supports_rdf_star - /// False - #[getter] - pub const fn supports_rdf_star(&self) -> bool { - self.inner.supports_rdf_star() - } - - /// Looks for a known format from a media type. - /// - /// It supports some media type aliases. - /// For example, "application/xml" is going to return RDF/XML even if it is not its canonical media type. - /// - /// :param media_type: the media type. - /// :type media_type: str - /// :return: :py:class:`RdfFormat` if the media type is known or :py:const:`None` if not. - /// :rtype: RdfFormat or None - /// - /// >>> RdfFormat.from_media_type("text/turtle; charset=utf-8") - /// - #[staticmethod] - pub fn from_media_type(media_type: &str) -> Option { - Some(Self { - inner: RdfFormat::from_media_type(media_type)?, - }) - } - - /// Looks for a known format from an extension. - /// - /// It supports some aliases. - /// - /// :param extension: the extension. - /// :type extension: str - /// :return: :py:class:`RdfFormat` if the extension is known or :py:const:`None` if not. - /// :rtype: RdfFormat or None - /// - /// >>> RdfFormat.from_extension("nt") - /// - #[staticmethod] - pub fn from_extension(extension: &str) -> Option { - Some(Self { - inner: RdfFormat::from_extension(extension)?, - }) - } - - fn __str__(&self) -> &'static str { - self.inner.name() - } - - fn __repr__(&self) -> String { - format!("", self.inner.name()) - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __eq__(&self, other: &Self) -> bool { - self.inner == other.inner - } - - fn __ne__(&self, other: &Self) -> bool { - self.inner != other.inner - } - - /// :rtype: RdfFormat - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: RdfFormat - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } -} - -pub enum PyReadable { - Bytes(Cursor>), - Io(PyIo), - File(File), -} - -impl PyReadable { - pub fn from_args( - path: &Option, - input: Option, - py: Python<'_>, - ) -> PyResult { - match (path, input) { - (Some(_), Some(_)) => Err(PyValueError::new_err( - "input and file_path can't be both set at the same time", - )), - (Some(path), None) => Ok(Self::File(py.allow_threads(|| File::open(path))?)), - (None, Some(input)) => Ok(input.into()), - (None, None) => Err(PyValueError::new_err( - "Either input or file_path must be set", - )), - } - } -} - -impl Read for PyReadable { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - match self { - Self::Bytes(bytes) => bytes.read(buf), - Self::Io(io) => io.read(buf), - Self::File(file) => file.read(buf), - } - } -} - -#[derive(FromPyObject)] -pub enum PyReadableInput { - String(String), - Bytes(Vec), - Io(PyObject), -} - -impl From for PyReadable { - fn from(input: PyReadableInput) -> Self { - match input { - PyReadableInput::String(string) => Self::Bytes(Cursor::new(string.into_bytes())), - PyReadableInput::Bytes(bytes) => Self::Bytes(Cursor::new(bytes)), - PyReadableInput::Io(io) => Self::Io(PyIo(io)), - } - } -} - -pub enum PyWritable { - Bytes(Vec), - Io(PyIo), - File(File), -} - -impl PyWritable { - pub fn do_write( - write: impl FnOnce(BufWriter, Option) -> PyResult>, - output: Option, - py: Python<'_>, - ) -> PyResult>> { - let (output, file_path) = match output { - Some(PyWritableOutput::Path(file_path)) => ( - Self::File(py.allow_threads(|| File::create(&file_path))?), - Some(file_path), - ), - Some(PyWritableOutput::Io(object)) => (Self::Io(PyIo(object)), None), - None => (Self::Bytes(Vec::new()), None), - }; - let writer = write(BufWriter::new(output), file_path)?; - py.allow_threads(|| writer.into_inner())?.close(py) - } - - fn close(self, py: Python<'_>) -> PyResult>> { - match self { - Self::Bytes(bytes) => Ok(Some(PyBytes::new_bound(py, &bytes))), - Self::File(mut file) => { - py.allow_threads(|| { - file.flush()?; - file.sync_all() - })?; - Ok(None) - } - Self::Io(mut io) => { - py.allow_threads(|| io.flush())?; - Ok(None) - } - } - } -} - -impl Write for PyWritable { - fn write(&mut self, buf: &[u8]) -> io::Result { - match self { - Self::Bytes(bytes) => bytes.write(buf), - Self::Io(io) => io.write(buf), - Self::File(file) => file.write(buf), - } - } - - fn flush(&mut self) -> io::Result<()> { - match self { - Self::Bytes(_) => Ok(()), - Self::Io(io) => io.flush(), - Self::File(file) => file.flush(), - } - } -} - -#[derive(FromPyObject)] -pub enum PyWritableOutput { - Path(PathBuf), - Io(PyObject), -} - -pub struct PyIo(PyObject); - -impl Read for PyIo { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - Python::with_gil(|py| { - if buf.is_empty() { - return Ok(0); - } - let to_read = max(1, buf.len() / 4); // We divide by 4 because TextIO works with number of characters and not with number of bytes - let read = self - .0 - .bind(py) - .call_method1(intern!(py, "read"), (to_read,))?; - Ok(if let Ok(bytes) = read.extract::<&[u8]>() { - buf[..bytes.len()].copy_from_slice(bytes); - bytes.len() - } else { - // TODO: Python 3.10+ use directly .extract<&str> - let string = read.extract::>()?; - let str = string.to_cow()?; - buf[..str.len()].copy_from_slice(str.as_bytes()); - str.len() - }) - }) - } -} - -impl Write for PyIo { - fn write(&mut self, buf: &[u8]) -> io::Result { - Python::with_gil(|py| { - Ok(self - .0 - .bind(py) - .call_method1(intern!(py, "write"), (PyBytes::new_bound(py, buf),))? - .extract::()?) - }) - } - - fn flush(&mut self) -> io::Result<()> { - Python::with_gil(|py| { - self.0.bind(py).call_method0(intern!(py, "flush"))?; - Ok(()) - }) - } -} - -pub fn lookup_rdf_format( - format: Option, - path: Option<&Path>, -) -> PyResult { - if let Some(format) = format { - return match format { - PyRdfFormatInput::Object(format) => Ok(format.inner), - PyRdfFormatInput::MediaType(media_type) => { - deprecation_warning("Using string to specify a RDF format is deprecated, please use a RdfFormat object instead.")?; - RdfFormat::from_media_type(&media_type).ok_or_else(|| { - PyValueError::new_err(format!( - "The media type {media_type} is not supported by pyoxigraph" - )) - }) - } - }; - } - let Some(path) = path else { - return Err(PyValueError::new_err( - "The format parameter is required when a file path is not given", - )); - }; - let Some(ext) = path.extension().and_then(OsStr::to_str) else { - return Err(PyValueError::new_err(format!( - "The file name {} has no extension to guess a file format from", - path.display() - ))); - }; - RdfFormat::from_extension(ext) - .ok_or_else(|| PyValueError::new_err(format!("Not supported RDF format extension: {ext}"))) -} - -#[derive(FromPyObject)] -pub enum PyRdfFormatInput { - Object(PyRdfFormat), - MediaType(String), -} - -pub fn map_parse_error(error: RdfParseError, file_path: Option) -> PyErr { - match error { - RdfParseError::Syntax(error) => { - // Python 3.9 does not support end line and end column - if python_version() >= (3, 10) { - let params = if let Some(location) = error.location() { - ( - file_path, - Some(location.start.line + 1), - Some(location.start.column + 1), - None::>, - Some(location.end.line + 1), - Some(location.end.column + 1), - ) - } else { - (None, None, None, None, None, None) - }; - PySyntaxError::new_err((error.to_string(), params)) - } else { - let params = if let Some(location) = error.location() { - ( - file_path, - Some(location.start.line + 1), - Some(location.start.column + 1), - None::>, - ) - } else { - (None, None, None, None) - }; - PySyntaxError::new_err((error.to_string(), params)) - } - } - RdfParseError::Io(error) => error.into(), - } -} - -/// Release the GIL -/// There should not be ANY use of pyo3 code inside of this method!!! -/// -/// Code from pyo3: https://github.com/PyO3/pyo3/blob/a67180c8a42a0bc0fdc45b651b62c0644130cf47/src/python.rs#L366 -#[allow(unsafe_code)] -pub fn allow_threads_unsafe(_py: Python<'_>, f: impl FnOnce() -> T) -> T { - struct RestoreGuard { - tstate: *mut pyo3::ffi::PyThreadState, - } - - impl Drop for RestoreGuard { - fn drop(&mut self) { - // SAFETY: not cloned so called once - unsafe { - pyo3::ffi::PyEval_RestoreThread(self.tstate); - } - } - } - - // SAFETY: we have the restore part in Drop to make sure it's properly executed - let tstate = unsafe { pyo3::ffi::PyEval_SaveThread() }; - let _guard = RestoreGuard { tstate }; - f() -} - -pub fn python_version() -> (u8, u8) { - static VERSION: OnceLock<(u8, u8)> = OnceLock::new(); - *VERSION.get_or_init(|| { - Python::with_gil(|py| { - let v = py.version_info(); - (v.major, v.minor) - }) - }) -} - -pub fn deprecation_warning(message: &str) -> PyResult<()> { - Python::with_gil(|py| { - PyErr::warn_bound(py, &py.get_type_bound::(), message, 0) - }) -} diff --git a/python/src/lib.rs b/python/src/lib.rs deleted file mode 100644 index 89e3d502..00000000 --- a/python/src/lib.rs +++ /dev/null @@ -1,47 +0,0 @@ -#![allow( - clippy::unused_self, - clippy::trivially_copy_pass_by_ref, - unused_qualifications -)] - -mod dataset; -mod io; -mod model; -mod sparql; -mod store; - -use crate::dataset::*; -use crate::io::*; -use crate::model::*; -use crate::sparql::*; -use crate::store::*; -use pyo3::prelude::*; - -/// Oxigraph Python bindings -#[pymodule] -fn pyoxigraph(_py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> { - module.add("__package__", "pyoxigraph")?; - module.add("__version__", env!("CARGO_PKG_VERSION"))?; - module.add("__author__", env!("CARGO_PKG_AUTHORS").replace(':', "\n"))?; - - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_class::()?; - module.add_wrapped(wrap_pyfunction!(parse))?; - module.add_wrapped(wrap_pyfunction!(parse_query_results))?; - module.add_wrapped(wrap_pyfunction!(serialize))?; - Ok(()) -} diff --git a/python/src/model.rs b/python/src/model.rs deleted file mode 100644 index a930db2c..00000000 --- a/python/src/model.rs +++ /dev/null @@ -1,1281 +0,0 @@ -use oxigraph::model::*; -use pyo3::basic::CompareOp; -use pyo3::exceptions::{PyIndexError, PyNotImplementedError, PyTypeError, PyValueError}; -use pyo3::prelude::*; -use pyo3::types::{PyDict, PyTuple}; -use pyo3::PyTypeInfo; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; -use std::vec::IntoIter; - -/// An RDF `node identified by an IRI `_. -/// -/// :param value: the IRI as a string. -/// :type value: str -/// :raises ValueError: if the IRI is not valid according to `RFC 3987 `_. -/// -/// The :py:class:`str` function provides a serialization compatible with NTriples, Turtle, and SPARQL: -/// -/// >>> str(NamedNode('http://example.com')) -/// '' -#[pyclass(frozen, name = "NamedNode", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] -pub struct PyNamedNode { - inner: NamedNode, -} - -impl From for PyNamedNode { - fn from(inner: NamedNode) -> Self { - Self { inner } - } -} - -impl From for NamedNode { - fn from(node: PyNamedNode) -> Self { - node.inner - } -} - -impl From for NamedOrBlankNode { - fn from(node: PyNamedNode) -> Self { - node.inner.into() - } -} - -impl From for Subject { - fn from(node: PyNamedNode) -> Self { - node.inner.into() - } -} - -impl From for Term { - fn from(node: PyNamedNode) -> Self { - node.inner.into() - } -} - -impl From for GraphName { - fn from(node: PyNamedNode) -> Self { - node.inner.into() - } -} - -#[pymethods] -impl PyNamedNode { - #[new] - fn new(value: String) -> PyResult { - Ok(NamedNode::new(value) - .map_err(|e| PyValueError::new_err(e.to_string()))? - .into()) - } - - /// :return: the named node IRI. - /// :rtype: str - /// - /// >>> NamedNode("http://example.com").value - /// 'http://example.com' - #[getter] - fn value(&self) -> &str { - self.inner.as_str() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - let mut buffer = String::new(); - named_node_repr(self.inner.as_ref(), &mut buffer); - buffer - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __richcmp__(&self, other: &Bound<'_, PyAny>, op: CompareOp) -> PyResult { - if let Ok(other) = other.extract::>() { - Ok(op.matches(self.cmp(&other))) - } else if PyBlankNode::is_type_of_bound(other) - || PyLiteral::is_type_of_bound(other) - || PyDefaultGraph::is_type_of_bound(other) - { - eq_compare_other_type(op) - } else { - Err(PyTypeError::new_err( - "NamedNode could only be compared with RDF terms", - )) - } - } - - /// :rtype: typing.Any - fn __getnewargs__(&self) -> (&str,) { - (self.value(),) - } - - /// :rtype: NamedNode - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: NamedNode - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str,) { - ("value",) - } -} - -/// An RDF `blank node `_. -/// -/// :param value: the `blank node identifier `_ (if not present, a random blank node identifier is automatically generated). -/// :type value: str or None, optional -/// :raises ValueError: if the blank node identifier is invalid according to NTriples, Turtle, and SPARQL grammars. -/// -/// The :py:class:`str` function provides a serialization compatible with NTriples, Turtle, and SPARQL: -/// -/// >>> str(BlankNode('ex')) -/// '_:ex' -#[pyclass(frozen, name = "BlankNode", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PyBlankNode { - inner: BlankNode, -} - -impl From for PyBlankNode { - fn from(inner: BlankNode) -> Self { - Self { inner } - } -} - -impl From for BlankNode { - fn from(node: PyBlankNode) -> Self { - node.inner - } -} - -impl From for NamedOrBlankNode { - fn from(node: PyBlankNode) -> Self { - node.inner.into() - } -} - -impl From for Subject { - fn from(node: PyBlankNode) -> Self { - node.inner.into() - } -} - -impl From for Term { - fn from(node: PyBlankNode) -> Self { - node.inner.into() - } -} - -impl From for GraphName { - fn from(node: PyBlankNode) -> Self { - node.inner.into() - } -} - -#[pymethods] -impl PyBlankNode { - #[new] - #[pyo3(signature = (value = None))] - fn new(value: Option) -> PyResult { - Ok(if let Some(value) = value { - BlankNode::new(value).map_err(|e| PyValueError::new_err(e.to_string()))? - } else { - BlankNode::default() - } - .into()) - } - - /// :return: the `blank node identifier `_. - /// :rtype: str - /// - /// >>> BlankNode("ex").value - /// 'ex' - #[getter] - fn value(&self) -> &str { - self.inner.as_str() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - let mut buffer = String::new(); - blank_node_repr(self.inner.as_ref(), &mut buffer); - buffer - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __richcmp__(&self, other: &Bound<'_, PyAny>, op: CompareOp) -> PyResult { - if let Ok(other) = other.extract::>() { - eq_compare(self, &other, op) - } else if PyNamedNode::is_type_of_bound(other) - || PyLiteral::is_type_of_bound(other) - || PyDefaultGraph::is_type_of_bound(other) - { - eq_compare_other_type(op) - } else { - Err(PyTypeError::new_err( - "BlankNode could only be compared with RDF terms", - )) - } - } - - /// :rtype: typing.Any - fn __getnewargs__(&self) -> (&str,) { - (self.value(),) - } - - /// :rtype: BlankNode - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: BlankNode - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str,) { - ("value",) - } -} - -/// An RDF `literal `_. -/// -/// :param value: the literal value or `lexical form `_. -/// :type value: str -/// :param datatype: the literal `datatype IRI `_. -/// :type datatype: NamedNode or None, optional -/// :param language: the literal `language tag `_. -/// :type language: str or None, optional -/// :raises ValueError: if the language tag is not valid according to `RFC 5646 `_ (`BCP 47 `_). -/// -/// The :py:class:`str` function provides a serialization compatible with NTriples, Turtle, and SPARQL: -/// -/// >>> str(Literal('example')) -/// '"example"' -/// >>> str(Literal('example', language='en')) -/// '"example"@en' -/// >>> str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) -/// '"11"^^' -#[pyclass(frozen, name = "Literal", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PyLiteral { - inner: Literal, -} - -impl From for PyLiteral { - fn from(inner: Literal) -> Self { - Self { inner } - } -} - -impl From for Literal { - fn from(literal: PyLiteral) -> Self { - literal.inner - } -} - -impl From for Term { - fn from(node: PyLiteral) -> Self { - node.inner.into() - } -} - -#[pymethods] -impl PyLiteral { - #[new] - #[pyo3(signature = (value, *, datatype = None, language = None))] - fn new( - value: String, - datatype: Option, - language: Option, - ) -> PyResult { - Ok(if let Some(language) = language { - if let Some(datatype) = datatype { - if datatype.value() != "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" { - return Err(PyValueError::new_err( - "The literals with a language tag must use the rdf:langString datatype", - )); - } - } - Literal::new_language_tagged_literal(value, language) - .map_err(|e| PyValueError::new_err(e.to_string()))? - } else if let Some(datatype) = datatype { - Literal::new_typed_literal(value, datatype) - } else { - Literal::new_simple_literal(value) - } - .into()) - } - - /// :return: the literal value or `lexical form `_. - /// :rtype: str - /// - /// >>> Literal("example").value - /// 'example' - #[getter] - fn value(&self) -> &str { - self.inner.value() - } - - /// :return: the literal `language tag `_. - /// :rtype: str or None - /// - /// >>> Literal('example', language='en').language - /// 'en' - /// >>> Literal('example').language - #[getter] - fn language(&self) -> Option<&str> { - self.inner.language() - } - - /// :return: the literal `datatype IRI `_. - /// :rtype: NamedNode - /// - /// >>> Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer')).datatype - /// - /// >>> Literal('example').datatype - /// - /// >>> Literal('example', language='en').datatype - /// - #[getter] - fn datatype(&self) -> PyNamedNode { - self.inner.datatype().into_owned().into() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - let mut buffer = String::new(); - literal_repr(self.inner.as_ref(), &mut buffer); - buffer - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __richcmp__(&self, other: &Bound<'_, PyAny>, op: CompareOp) -> PyResult { - if let Ok(other) = other.extract::>() { - eq_compare(self, &other, op) - } else if PyNamedNode::is_type_of_bound(other) - || PyBlankNode::is_type_of_bound(other) - || PyDefaultGraph::is_type_of_bound(other) - { - eq_compare_other_type(op) - } else { - Err(PyTypeError::new_err( - "Literal could only be compared with RDF terms", - )) - } - } - - /// :rtype: typing.Any - fn __getnewargs_ex__<'a, 'py>( - &'a self, - py: Python<'py>, - ) -> PyResult<((&'a str,), Bound<'py, PyDict>)> { - let kwargs = PyDict::new_bound(py); - if let Some(language) = self.language() { - kwargs.set_item("language", language)?; - } else { - kwargs.set_item("datatype", self.datatype().into_py(py))?; - } - Ok(((self.value(),), kwargs)) - } - - /// :rtype: Literal - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: Literal - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str,) { - ("value",) - } -} - -/// The RDF `default graph name `_. -#[pyclass(frozen, name = "DefaultGraph", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] -pub struct PyDefaultGraph; - -impl From for GraphName { - fn from(_: PyDefaultGraph) -> Self { - Self::DefaultGraph - } -} - -#[pymethods] -impl PyDefaultGraph { - #[new] - fn new() -> Self { - Self {} - } - - fn __str__(&self) -> &str { - "DEFAULT" - } - - fn __repr__(&self) -> &str { - "" - } - - fn __hash__(&self) -> u64 { - 0 - } - - fn __richcmp__(&self, other: &Bound<'_, PyAny>, op: CompareOp) -> PyResult { - if let Ok(other) = other.extract::>() { - eq_compare(self, &other, op) - } else if PyNamedNode::is_type_of_bound(other) - || PyBlankNode::is_type_of_bound(other) - || PyLiteral::is_type_of_bound(other) - { - eq_compare_other_type(op) - } else { - Err(PyTypeError::new_err( - "DefaultGraph could only be compared with RDF terms", - )) - } - } - - /// :rtype: typing.Any - fn __getnewargs__<'py>(&self, py: Python<'py>) -> Bound<'py, PyTuple> { - PyTuple::empty_bound(py) - } - - /// :rtype: DefaultGraph - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: DefaultGraph - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } -} - -#[derive(FromPyObject)] -pub enum PyNamedOrBlankNode { - NamedNode(PyNamedNode), - BlankNode(PyBlankNode), -} - -impl From for NamedOrBlankNode { - fn from(node: PyNamedOrBlankNode) -> Self { - match node { - PyNamedOrBlankNode::NamedNode(node) => node.into(), - PyNamedOrBlankNode::BlankNode(node) => node.into(), - } - } -} - -impl From for PyNamedOrBlankNode { - fn from(node: NamedOrBlankNode) -> Self { - match node { - NamedOrBlankNode::NamedNode(node) => Self::NamedNode(node.into()), - NamedOrBlankNode::BlankNode(node) => Self::BlankNode(node.into()), - } - } -} - -impl IntoPy for PyNamedOrBlankNode { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - Self::NamedNode(node) => node.into_py(py), - Self::BlankNode(node) => node.into_py(py), - } - } -} - -#[derive(FromPyObject)] -pub enum PySubject { - NamedNode(PyNamedNode), - BlankNode(PyBlankNode), - Triple(PyTriple), -} - -impl From for Subject { - fn from(node: PySubject) -> Self { - match node { - PySubject::NamedNode(node) => node.into(), - PySubject::BlankNode(node) => node.into(), - PySubject::Triple(triple) => triple.into(), - } - } -} - -impl From for PySubject { - fn from(node: Subject) -> Self { - match node { - Subject::NamedNode(node) => Self::NamedNode(node.into()), - Subject::BlankNode(node) => Self::BlankNode(node.into()), - Subject::Triple(triple) => Self::Triple(triple.as_ref().clone().into()), - } - } -} - -impl IntoPy for PySubject { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - Self::NamedNode(node) => node.into_py(py), - Self::BlankNode(node) => node.into_py(py), - Self::Triple(triple) => triple.into_py(py), - } - } -} - -#[derive(FromPyObject)] -pub enum PyTerm { - NamedNode(PyNamedNode), - BlankNode(PyBlankNode), - Literal(PyLiteral), - Triple(PyTriple), -} - -impl From for Term { - fn from(term: PyTerm) -> Self { - match term { - PyTerm::NamedNode(node) => node.into(), - PyTerm::BlankNode(node) => node.into(), - PyTerm::Literal(literal) => literal.into(), - PyTerm::Triple(triple) => triple.into(), - } - } -} - -impl From for PyTerm { - fn from(term: Term) -> Self { - match term { - Term::NamedNode(node) => Self::NamedNode(node.into()), - Term::BlankNode(node) => Self::BlankNode(node.into()), - Term::Literal(literal) => Self::Literal(literal.into()), - Term::Triple(triple) => Self::Triple(triple.as_ref().clone().into()), - } - } -} - -impl IntoPy for PyTerm { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - Self::NamedNode(node) => node.into_py(py), - Self::BlankNode(node) => node.into_py(py), - Self::Literal(literal) => literal.into_py(py), - Self::Triple(triple) => triple.into_py(py), - } - } -} - -/// An RDF `triple `_. -/// -/// :param subject: the triple subject. -/// :type subject: NamedNode or BlankNode or Triple -/// :param predicate: the triple predicate. -/// :type predicate: NamedNode -/// :param object: the triple object. -/// :type object: NamedNode or BlankNode or Literal or Triple -/// -/// The :py:class:`str` function provides a serialization compatible with NTriples, Turtle, and SPARQL: -/// -/// >>> str(Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) -/// ' "1"' -/// -/// A triple could also be easily destructed into its components: -/// -/// >>> (s, p, o) = Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')) -#[pyclass(frozen, sequence, name = "Triple", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PyTriple { - inner: Triple, -} - -impl From for PyTriple { - fn from(inner: Triple) -> Self { - Self { inner } - } -} - -impl From for Triple { - fn from(triple: PyTriple) -> Self { - triple.inner - } -} - -impl<'a> From<&'a PyTriple> for TripleRef<'a> { - fn from(triple: &'a PyTriple) -> Self { - triple.inner.as_ref() - } -} - -impl From for Subject { - fn from(triple: PyTriple) -> Self { - triple.inner.into() - } -} - -impl From for Term { - fn from(triple: PyTriple) -> Self { - triple.inner.into() - } -} - -#[pymethods] -impl PyTriple { - #[new] - fn new(subject: PySubject, predicate: PyNamedNode, object: PyTerm) -> Self { - Triple::new(subject, predicate, object).into() - } - - /// :return: the triple subject. - /// :rtype: NamedNode or BlankNode or Triple - /// - /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).subject - /// - #[getter] - fn subject(&self) -> PySubject { - self.inner.subject.clone().into() - } - - /// :return: the triple predicate. - /// :rtype: NamedNode - /// - /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).predicate - /// - #[getter] - fn predicate(&self) -> PyNamedNode { - self.inner.predicate.clone().into() - } - - /// :return: the triple object. - /// :rtype: NamedNode or BlankNode or Literal or Triple - /// - /// >>> Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')).object - /// > - #[getter] - fn object(&self) -> PyTerm { - self.inner.object.clone().into() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - let mut buffer = String::new(); - triple_repr(self.inner.as_ref(), &mut buffer); - buffer - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __richcmp__(&self, other: &Self, op: CompareOp) -> PyResult { - eq_compare(self, other, op) - } - - fn __len__(&self) -> usize { - 3 - } - - fn __getitem__(&self, input: usize) -> PyResult { - match input { - 0 => Ok(Term::from(self.inner.subject.clone()).into()), - 1 => Ok(Term::from(self.inner.predicate.clone()).into()), - 2 => Ok(self.inner.object.clone().into()), - _ => Err(PyIndexError::new_err("A triple has only 3 elements")), - } - } - - fn __iter__(&self) -> TripleComponentsIter { - TripleComponentsIter { - inner: vec![ - self.inner.subject.clone().into(), - self.inner.predicate.clone().into(), - self.inner.object.clone(), - ] - .into_iter(), - } - } - - /// :rtype: typing.Any - fn __getnewargs__(&self) -> (PySubject, PyNamedNode, PyTerm) { - (self.subject(), self.predicate(), self.object()) - } - - /// :rtype: Triple - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: Triple - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str, &'static str, &'static str) { - ("subject", "predicate", "object") - } -} - -#[derive(FromPyObject)] -pub enum PyGraphName { - NamedNode(PyNamedNode), - BlankNode(PyBlankNode), - DefaultGraph(PyDefaultGraph), -} - -impl From for GraphName { - fn from(graph_name: PyGraphName) -> Self { - match graph_name { - PyGraphName::NamedNode(node) => node.into(), - PyGraphName::BlankNode(node) => node.into(), - PyGraphName::DefaultGraph(default_graph) => default_graph.into(), - } - } -} - -impl From for PyGraphName { - fn from(graph_name: GraphName) -> Self { - match graph_name { - GraphName::NamedNode(node) => Self::NamedNode(node.into()), - GraphName::BlankNode(node) => Self::BlankNode(node.into()), - GraphName::DefaultGraph => Self::DefaultGraph(PyDefaultGraph::new()), - } - } -} - -impl IntoPy for PyGraphName { - fn into_py(self, py: Python<'_>) -> PyObject { - match self { - Self::NamedNode(node) => node.into_py(py), - Self::BlankNode(node) => node.into_py(py), - Self::DefaultGraph(node) => node.into_py(py), - } - } -} - -/// An RDF `triple `_. -/// in a `RDF dataset `_. -/// -/// :param subject: the quad subject. -/// :type subject: NamedNode or BlankNode or Triple -/// :param predicate: the quad predicate. -/// :type predicate: NamedNode -/// :param object: the quad object. -/// :type object: NamedNode or BlankNode or Literal or Triple -/// :param graph_name: the quad graph name. If not present, the default graph is assumed. -/// :type graph_name: NamedNode or BlankNode or DefaultGraph or None, optional -/// -/// The :py:class:`str` function provides a serialization compatible with NTriples, Turtle, and SPARQL: -/// -/// >>> str(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) -/// ' "1" ' -/// -/// >>> str(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), DefaultGraph())) -/// ' "1"' -/// -/// A quad could also be easily destructed into its components: -/// -/// >>> (s, p, o, g) = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) -#[pyclass(frozen, sequence, name = "Quad", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PyQuad { - inner: Quad, -} - -impl From for PyQuad { - fn from(inner: Quad) -> Self { - Self { inner } - } -} - -impl From for Quad { - fn from(node: PyQuad) -> Self { - node.inner - } -} - -impl<'a> From<&'a PyQuad> for QuadRef<'a> { - fn from(node: &'a PyQuad) -> Self { - node.inner.as_ref() - } -} - -#[pymethods] -impl PyQuad { - #[new] - #[pyo3(signature = (subject, predicate, object, graph_name = None))] - fn new( - subject: PySubject, - predicate: PyNamedNode, - object: PyTerm, - graph_name: Option, - ) -> Self { - Quad::new( - subject, - predicate, - object, - graph_name.unwrap_or(PyGraphName::DefaultGraph(PyDefaultGraph {})), - ) - .into() - } - - /// :return: the quad subject. - /// :rtype: NamedNode or BlankNode or Triple - /// - /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).subject - /// - #[getter] - fn subject(&self) -> PySubject { - self.inner.subject.clone().into() - } - - /// :return: the quad predicate. - /// :rtype: NamedNode - /// - /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).predicate - /// - #[getter] - fn predicate(&self) -> PyNamedNode { - self.inner.predicate.clone().into() - } - - /// :return: the quad object. - /// :rtype: NamedNode or BlankNode or Literal or Triple - /// - /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).object - /// > - #[getter] - fn object(&self) -> PyTerm { - self.inner.object.clone().into() - } - - /// :return: the quad graph name. - /// :rtype: NamedNode or BlankNode or DefaultGraph - /// - /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).graph_name - /// - #[getter] - fn graph_name(&self) -> PyGraphName { - self.inner.graph_name.clone().into() - } - - /// :return: the quad underlying triple. - /// :rtype: Triple - /// - /// >>> Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')).triple - /// predicate= object=>> - #[getter] - fn triple(&self) -> PyTriple { - Triple::from(self.inner.clone()).into() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - let mut buffer = String::new(); - buffer.push_str(" QuadComponentsIter { - QuadComponentsIter { - inner: vec![ - Some(self.inner.subject.clone().into()), - Some(self.inner.predicate.clone().into()), - Some(self.inner.object.clone()), - match self.inner.graph_name.clone() { - GraphName::NamedNode(node) => Some(node.into()), - GraphName::BlankNode(node) => Some(node.into()), - GraphName::DefaultGraph => None, - }, - ] - .into_iter(), - } - } - - /// :rtype: typing.Any - fn __getnewargs__(&self) -> (PySubject, PyNamedNode, PyTerm, PyGraphName) { - ( - self.subject(), - self.predicate(), - self.object(), - self.graph_name(), - ) - } - - /// :rtype: Quad - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: Quad - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str, &'static str, &'static str, &'static str) { - ("subject", "predicate", "object", "graph_name") - } -} - -/// A SPARQL query variable. -/// -/// :param value: the variable name as a string. -/// :type value: str -/// :raises ValueError: if the variable name is invalid according to the SPARQL grammar. -/// -/// The :py:class:`str` function provides a serialization compatible with SPARQL: -/// -/// >>> str(Variable('foo')) -/// '?foo' -#[pyclass(frozen, name = "Variable", module = "pyoxigraph")] -#[derive(Eq, PartialEq, Debug, Clone, Hash)] -pub struct PyVariable { - inner: Variable, -} - -impl From for PyVariable { - fn from(inner: Variable) -> Self { - Self { inner } - } -} - -impl From for Variable { - fn from(variable: PyVariable) -> Self { - variable.inner - } -} - -impl<'a> From<&'a PyVariable> for &'a Variable { - fn from(variable: &'a PyVariable) -> Self { - &variable.inner - } -} - -#[pymethods] -impl PyVariable { - #[new] - fn new(value: String) -> PyResult { - Ok(Variable::new(value) - .map_err(|e| PyValueError::new_err(e.to_string()))? - .into()) - } - - /// :return: the variable name. - /// :rtype: str - /// - /// >>> Variable("foo").value - /// 'foo' - #[getter] - fn value(&self) -> &str { - self.inner.as_str() - } - - fn __str__(&self) -> String { - self.inner.to_string() - } - - fn __repr__(&self) -> String { - format!("", self.inner.as_str()) - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __richcmp__(&self, other: &Self, op: CompareOp) -> PyResult { - eq_compare(self, other, op) - } - - /// :rtype: typing.Any - fn __getnewargs__(&self) -> (&str,) { - (self.value(),) - } - - /// :rtype: Variable - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: Variable - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } - - #[classattr] - fn __match_args__() -> (&'static str,) { - ("value",) - } -} - -#[derive(FromPyObject)] -pub struct PyNamedNodeRef<'a>(PyRef<'a, PyNamedNode>); - -impl<'a> From<&'a PyNamedNodeRef<'a>> for NamedNodeRef<'a> { - fn from(value: &'a PyNamedNodeRef<'a>) -> Self { - value.0.inner.as_ref() - } -} - -#[derive(FromPyObject)] -pub enum PyNamedOrBlankNodeRef<'a> { - NamedNode(PyRef<'a, PyNamedNode>), - BlankNode(PyRef<'a, PyBlankNode>), -} - -impl<'a> From<&'a PyNamedOrBlankNodeRef<'a>> for NamedOrBlankNodeRef<'a> { - fn from(value: &'a PyNamedOrBlankNodeRef<'a>) -> Self { - match value { - PyNamedOrBlankNodeRef::NamedNode(value) => value.inner.as_ref().into(), - PyNamedOrBlankNodeRef::BlankNode(value) => value.inner.as_ref().into(), - } - } -} - -#[derive(FromPyObject)] -pub enum PySubjectRef<'a> { - NamedNode(PyRef<'a, PyNamedNode>), - BlankNode(PyRef<'a, PyBlankNode>), - Triple(PyRef<'a, PyTriple>), -} - -impl<'a> From<&'a PySubjectRef<'a>> for SubjectRef<'a> { - fn from(value: &'a PySubjectRef<'a>) -> Self { - match value { - PySubjectRef::NamedNode(value) => value.inner.as_ref().into(), - PySubjectRef::BlankNode(value) => value.inner.as_ref().into(), - PySubjectRef::Triple(value) => (&value.inner).into(), - } - } -} - -#[derive(FromPyObject)] -pub enum PyTermRef<'a> { - NamedNode(PyRef<'a, PyNamedNode>), - BlankNode(PyRef<'a, PyBlankNode>), - Literal(PyRef<'a, PyLiteral>), - Triple(PyRef<'a, PyTriple>), -} - -impl<'a> From<&'a PyTermRef<'a>> for TermRef<'a> { - fn from(value: &'a PyTermRef<'a>) -> Self { - match value { - PyTermRef::NamedNode(value) => value.inner.as_ref().into(), - PyTermRef::BlankNode(value) => value.inner.as_ref().into(), - PyTermRef::Literal(value) => value.inner.as_ref().into(), - PyTermRef::Triple(value) => (&value.inner).into(), - } - } -} - -impl<'a> From<&'a PyTermRef<'a>> for Term { - fn from(value: &'a PyTermRef<'a>) -> Self { - TermRef::from(value).into() - } -} - -#[derive(FromPyObject)] -pub enum PyGraphNameRef<'a> { - NamedNode(PyRef<'a, PyNamedNode>), - BlankNode(PyRef<'a, PyBlankNode>), - DefaultGraph(PyRef<'a, PyDefaultGraph>), -} - -impl<'a> From<&'a PyGraphNameRef<'a>> for GraphNameRef<'a> { - fn from(value: &'a PyGraphNameRef<'a>) -> Self { - match value { - PyGraphNameRef::NamedNode(value) => value.inner.as_ref().into(), - PyGraphNameRef::BlankNode(value) => value.inner.as_ref().into(), - PyGraphNameRef::DefaultGraph(_) => Self::DefaultGraph, - } - } -} - -fn eq_compare(a: &T, b: &T, op: CompareOp) -> PyResult { - match op { - CompareOp::Eq => Ok(a == b), - CompareOp::Ne => Ok(a != b), - _ => Err(PyNotImplementedError::new_err( - "Ordering is not implemented", - )), - } -} - -fn eq_compare_other_type(op: CompareOp) -> PyResult { - match op { - CompareOp::Eq => Ok(false), - CompareOp::Ne => Ok(true), - _ => Err(PyNotImplementedError::new_err( - "Ordering is not implemented", - )), - } -} - -pub(crate) fn hash(t: &impl Hash) -> u64 { - let mut s = DefaultHasher::new(); - t.hash(&mut s); - s.finish() -} - -fn named_node_repr(node: NamedNodeRef<'_>, buffer: &mut String) { - buffer.push_str(""), - } -} - -fn triple_repr(triple: TripleRef<'_>, buffer: &mut String) { - buffer.push_str(", -} - -#[pymethods] -impl TripleComponentsIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self) -> Option { - self.inner.next().map(PyTerm::from) - } -} - -#[pyclass(module = "pyoxigraph")] -pub struct QuadComponentsIter { - inner: IntoIter>, -} - -#[pymethods] -impl QuadComponentsIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self, py: Python<'_>) -> Option { - self.inner.next().map(move |t| { - if let Some(t) = t { - PyTerm::from(t).into_py(py) - } else { - PyDefaultGraph {}.into_py(py) - } - }) - } -} diff --git a/python/src/sparql.rs b/python/src/sparql.rs deleted file mode 100644 index f2a40c85..00000000 --- a/python/src/sparql.rs +++ /dev/null @@ -1,740 +0,0 @@ -use crate::io::*; -use crate::model::*; -use crate::store::map_storage_error; -use oxigraph::io::RdfSerializer; -use oxigraph::model::Term; -use oxigraph::sparql::results::{ - FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat, - QueryResultsParseError, QueryResultsParser, QueryResultsSerializer, -}; -use oxigraph::sparql::{ - EvaluationError, Query, QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter, - Variable, -}; -use pyo3::basic::CompareOp; -use pyo3::exceptions::{PyRuntimeError, PySyntaxError, PyValueError}; -use pyo3::prelude::*; -use pyo3::types::{PyBytes, PyString}; -use std::ffi::OsStr; -use std::io; -use std::path::{Path, PathBuf}; -use std::vec::IntoIter; - -pub fn parse_query( - query: &str, - base_iri: Option<&str>, - use_default_graph_as_union: bool, - default_graph: Option<&Bound<'_, PyAny>>, - named_graphs: Option<&Bound<'_, PyAny>>, - py: Python<'_>, -) -> PyResult { - let mut query = allow_threads_unsafe(py, || Query::parse(query, base_iri)) - .map_err(|e| map_evaluation_error(e.into()))?; - - if use_default_graph_as_union && default_graph.is_some() { - return Err(PyValueError::new_err( - "The query() method use_default_graph_as_union and default_graph arguments should not be set at the same time", - )); - } - - if use_default_graph_as_union { - query.dataset_mut().set_default_graph_as_union(); - } - - if let Some(default_graph) = default_graph { - if let Ok(default_graphs) = default_graph.iter() { - query.dataset_mut().set_default_graph( - default_graphs - .map(|graph| Ok(graph?.extract::()?.into())) - .collect::>()?, - ) - } else if let Ok(default_graph) = default_graph.extract::() { - query - .dataset_mut() - .set_default_graph(vec![default_graph.into()]); - } else { - return Err(PyValueError::new_err( - format!("The query() method default_graph argument should be a NamedNode, a BlankNode, the DefaultGraph or a not empty list of them. {} found", default_graph.get_type() - ))); - } - } - - if let Some(named_graphs) = named_graphs { - query.dataset_mut().set_available_named_graphs( - named_graphs - .iter()? - .map(|graph| Ok(graph?.extract::()?.into())) - .collect::>()?, - ) - } - - Ok(query) -} - -pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObject { - match results { - QueryResults::Solutions(inner) => PyQuerySolutions { - inner: PyQuerySolutionsVariant::Query(inner), - } - .into_py(py), - QueryResults::Graph(inner) => PyQueryTriples { inner }.into_py(py), - QueryResults::Boolean(inner) => PyQueryBoolean { inner }.into_py(py), - } -} - -/// Tuple associating variables and terms that are the result of a SPARQL ``SELECT`` query. -/// -/// It is the equivalent of a row in SQL. -/// -/// It could be indexes by variable name (:py:class:`Variable` or :py:class:`str`) or position in the tuple (:py:class:`int`). -/// Unpacking also works. -/// -/// >>> store = Store() -/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) -/// >>> solution = next(store.query('SELECT ?s ?p ?o WHERE { ?s ?p ?o }')) -/// >>> solution[Variable('s')] -/// -/// >>> solution['s'] -/// -/// >>> solution[0] -/// -/// >>> s, p, o = solution -/// >>> s -/// -#[pyclass(frozen, name = "QuerySolution", module = "pyoxigraph")] -pub struct PyQuerySolution { - inner: QuerySolution, -} - -#[pymethods] -impl PyQuerySolution { - fn __repr__(&self) -> String { - let mut buffer = String::new(); - buffer.push_str("'); - buffer - } - - fn __eq__(&self, other: &Self) -> bool { - self.inner == other.inner - } - - fn __ne__(&self, other: &Self) -> bool { - self.inner != other.inner - } - - fn __len__(&self) -> usize { - self.inner.len() - } - - fn __getitem__(&self, key: PySolutionKey<'_>) -> PyResult> { - Ok(match key { - PySolutionKey::Usize(key) => self.inner.get(key), - PySolutionKey::Str(key) => self.inner.get(key.to_cow()?.as_ref()), - PySolutionKey::Variable(key) => self.inner.get(<&Variable>::from(&*key)), - } - .map(|term| PyTerm::from(term.clone()))) - } - - #[allow(clippy::unnecessary_to_owned)] - fn __iter__(&self) -> SolutionValueIter { - SolutionValueIter { - inner: self.inner.values().to_vec().into_iter(), - } - } -} - -#[derive(FromPyObject)] -pub enum PySolutionKey<'a> { - Usize(usize), - Str(Bound<'a, PyString>), // TODO: Python 3.10+: use &str - Variable(PyRef<'a, PyVariable>), -} - -#[pyclass(module = "pyoxigraph")] -pub struct SolutionValueIter { - inner: IntoIter>, -} - -#[pymethods] -impl SolutionValueIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self) -> Option> { - self.inner.next().map(|v| v.map(PyTerm::from)) - } -} - -/// An iterator of :py:class:`QuerySolution` returned by a SPARQL ``SELECT`` query -/// -/// >>> store = Store() -/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) -/// >>> list(store.query('SELECT ?s WHERE { ?s ?p ?o }')) -/// [>] -#[pyclass(unsendable, name = "QuerySolutions", module = "pyoxigraph")] -pub struct PyQuerySolutions { - inner: PyQuerySolutionsVariant, -} -enum PyQuerySolutionsVariant { - Query(QuerySolutionIter), - Reader { - iter: FromReadSolutionsReader, - file_path: Option, - }, -} - -#[pymethods] -impl PyQuerySolutions { - /// :return: the ordered list of all variables that could appear in the query results - /// :rtype: list[Variable] - /// - /// >>> store = Store() - /// >>> store.query('SELECT ?s WHERE { ?s ?p ?o }').variables - /// [] - #[getter] - fn variables(&self) -> Vec { - match &self.inner { - PyQuerySolutionsVariant::Query(inner) => { - inner.variables().iter().map(|v| v.clone().into()).collect() - } - PyQuerySolutionsVariant::Reader { iter, .. } => { - iter.variables().iter().map(|v| v.clone().into()).collect() - } - } - } - - /// Writes the query results into a file. - /// - /// It currently supports the following formats: - /// - /// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) - /// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) - /// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) - /// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/json`` could also be used for `JSON `_. - /// - /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: QueryResultsFormat or None, optional - /// :rtype: bytes or None - /// :raises ValueError: if the format is not supported. - /// :raises OSError: if a system error happens while writing the file. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> results = store.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o }") - /// >>> results.serialize(format=QueryResultsFormat.JSON) - /// b'{"head":{"vars":["s","p","o"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"},"p":{"type":"uri","value":"http://example.com/p"},"o":{"type":"literal","value":"1"}}]}}' - #[pyo3(signature = (output = None, format = None))] - fn serialize<'py>( - &mut self, - output: Option, - format: Option, - py: Python<'py>, - ) -> PyResult>> { - PyWritable::do_write( - |output, file_path| { - let format = lookup_query_results_format(format, file_path.as_deref())?; - let mut writer = QueryResultsSerializer::from_format(format) - .serialize_solutions_to_write( - output, - match &self.inner { - PyQuerySolutionsVariant::Query(inner) => inner.variables().to_vec(), - PyQuerySolutionsVariant::Reader { iter, .. } => { - iter.variables().to_vec() - } - }, - )?; - match &mut self.inner { - PyQuerySolutionsVariant::Query(inner) => { - for solution in inner { - writer.write(&solution.map_err(map_evaluation_error)?)?; - } - } - PyQuerySolutionsVariant::Reader { iter, file_path } => { - for solution in iter { - writer.write(&solution.map_err(|e| { - map_query_results_parse_error(e, file_path.clone()) - })?)?; - } - } - } - - Ok(writer.finish()?) - }, - output, - py, - ) - } - - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self, py: Python<'_>) -> PyResult> { - Ok(match &mut self.inner { - PyQuerySolutionsVariant::Query(inner) => allow_threads_unsafe(py, || { - inner.next().transpose().map_err(map_evaluation_error) - }), - PyQuerySolutionsVariant::Reader { iter, file_path } => iter - .next() - .transpose() - .map_err(|e| map_query_results_parse_error(e, file_path.clone())), - }? - .map(move |inner| PyQuerySolution { inner })) - } -} - -/// A boolean returned by a SPARQL ``ASK`` query. -/// -/// It can be easily casted to a regular boolean using the :py:func:`bool` function. -/// -/// >>> store = Store() -/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) -/// >>> bool(store.query('ASK { ?s ?p ?o }')) -/// True -#[pyclass(unsendable, name = "QueryBoolean", module = "pyoxigraph")] -pub struct PyQueryBoolean { - inner: bool, -} - -#[pymethods] -impl PyQueryBoolean { - /// Writes the query results into a file. - /// - /// It currently supports the following formats: - /// - /// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) - /// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) - /// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) - /// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/json`` could also be used for `JSON `_. - /// - /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: QueryResultsFormat or None, optional - /// :rtype: bytes or None - /// :raises ValueError: if the format is not supported. - /// :raises OSError: if a system error happens while writing the file. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> results = store.query("ASK { ?s ?p ?o }") - /// >>> results.serialize(format=QueryResultsFormat.JSON) - /// b'{"head":{},"boolean":true}' - #[pyo3(signature = (output = None, format = None))] - fn serialize<'py>( - &mut self, - output: Option, - format: Option, - py: Python<'py>, - ) -> PyResult>> { - PyWritable::do_write( - |output, file_path| { - let format = lookup_query_results_format(format, file_path.as_deref())?; - py.allow_threads(|| { - Ok(QueryResultsSerializer::from_format(format) - .serialize_boolean_to_write(output, self.inner)?) - }) - }, - output, - py, - ) - } - - fn __bool__(&self) -> bool { - self.inner - } - - fn __richcmp__(&self, other: &Self, op: CompareOp) -> bool { - op.matches(self.inner.cmp(&other.inner)) - } - - fn __hash__(&self) -> u64 { - self.inner.into() - } - - fn __repr__(&self) -> String { - format!("", self.inner) - } -} - -/// An iterator of :py:class:`Triple` returned by a SPARQL ``CONSTRUCT`` or ``DESCRIBE`` query -/// -/// >>> store = Store() -/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) -/// >>> list(store.query('CONSTRUCT WHERE { ?s ?p ?o }')) -/// [ predicate= object=>>] -#[pyclass(unsendable, name = "QueryTriples", module = "pyoxigraph")] -pub struct PyQueryTriples { - inner: QueryTripleIter, -} - -#[pymethods] -impl PyQueryTriples { - /// Writes the query results into a file. - /// - /// It currently supports the following formats: - /// - /// * `canonical `_ `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) - /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) - /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) - /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) - /// * `N3 `_ (:py:attr:`RdfFormat.N3`) - /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/turtle`` could also be used for `Turtle `_ - /// and ``application/xml`` or ``xml`` for `RDF/XML `_. - /// - /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: RdfFormat or None, optional - /// :rtype: bytes or None - /// :raises ValueError: if the format is not supported. - /// :raises OSError: if a system error happens while writing the file. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }") - /// >>> results.serialize(format=RdfFormat.N_TRIPLES) - /// b' "1" .\n' - #[pyo3(signature = (output = None, format = None))] - fn serialize<'py>( - &mut self, - output: Option, - format: Option, - py: Python<'py>, - ) -> PyResult>> { - PyWritable::do_write( - |output, file_path| { - let format = lookup_rdf_format(format, file_path.as_deref())?; - let mut writer = RdfSerializer::from_format(format).serialize_to_write(output); - for triple in &mut self.inner { - writer.write_triple(&triple.map_err(map_evaluation_error)?)?; - } - Ok(writer.finish()?) - }, - output, - py, - ) - } - - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self, py: Python<'_>) -> PyResult> { - Ok(allow_threads_unsafe(py, || self.inner.next()) - .transpose() - .map_err(map_evaluation_error)? - .map(Into::into)) - } -} - -/// Parses SPARQL query results. -/// -/// It currently supports the following formats: -/// -/// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) -/// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) -/// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) -/// -/// It supports also some media type and extension aliases. -/// For example, ``application/json`` could also be used for `JSON `_. -/// -/// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. -/// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional -/// :param format: the format of the query results serialization. If :py:const:`None`, the format is guessed from the file name extension. -/// :type format: QueryResultsFormat or None, optional -/// :param path: The file path to read from. Replaces the ``input`` parameter. -/// :type path: str or os.PathLike[str] or None, optional -/// :return: an iterator of :py:class:`QuerySolution` or a :py:class:`bool`. -/// :rtype: QuerySolutions or QueryBoolean -/// :raises ValueError: if the format is not supported. -/// :raises SyntaxError: if the provided data is invalid. -/// :raises OSError: if a system error happens while reading the file. -/// -/// >>> list(parse_query_results('?s\t?p\t?o\n\t\t1\n', QueryResultsFormat.TSV)) -/// [ p= o=>>] -/// -/// >>> parse_query_results('{"head":{},"boolean":true}', QueryResultsFormat.JSON) -/// -#[pyfunction] -#[pyo3(signature = (input = None, format = None, *, path = None))] -pub fn parse_query_results( - input: Option, - format: Option, - path: Option, - py: Python<'_>, -) -> PyResult { - let input = PyReadable::from_args(&path, input, py)?; - let format = lookup_query_results_format(format, path.as_deref())?; - let results = QueryResultsParser::from_format(format) - .parse_read(input) - .map_err(|e| map_query_results_parse_error(e, path.clone()))?; - Ok(match results { - FromReadQueryResultsReader::Solutions(iter) => PyQuerySolutions { - inner: PyQuerySolutionsVariant::Reader { - iter, - file_path: path, - }, - } - .into_py(py), - FromReadQueryResultsReader::Boolean(inner) => PyQueryBoolean { inner }.into_py(py), - }) -} - -/// `SPARQL query `_ results serialization formats. -/// -/// The following formats are supported: -/// -/// * `XML `_ (:py:attr:`QueryResultsFormat.XML`) -/// * `JSON `_ (:py:attr:`QueryResultsFormat.JSON`) -/// * `CSV `_ (:py:attr:`QueryResultsFormat.CSV`) -/// * `TSV `_ (:py:attr:`QueryResultsFormat.TSV`) -#[pyclass(name = "QueryResultsFormat", module = "pyoxigraph")] -#[derive(Clone)] -pub struct PyQueryResultsFormat { - inner: QueryResultsFormat, -} - -#[pymethods] -impl PyQueryResultsFormat { - /// `SPARQL Query Results CSV Format `_ - #[classattr] - const CSV: Self = Self { - inner: QueryResultsFormat::Csv, - }; - /// `SPARQL Query Results JSON Format `_ - #[classattr] - const JSON: Self = Self { - inner: QueryResultsFormat::Json, - }; - /// `SPARQL Query Results TSV Format `_ - #[classattr] - const TSV: Self = Self { - inner: QueryResultsFormat::Tsv, - }; - /// `SPARQL Query Results XML Format `_ - #[classattr] - const XML: Self = Self { - inner: QueryResultsFormat::Xml, - }; - - /// :return: the format canonical IRI according to the `Unique URIs for file formats registry `_. - /// :rtype: str - /// - /// >>> QueryResultsFormat.JSON.iri - /// 'http://www.w3.org/ns/formats/SPARQL_Results_JSON' - #[getter] - fn iri(&self) -> &'static str { - self.inner.iri() - } - - /// :return: the format `IANA media type `_. - /// :rtype: str - /// - /// >>> QueryResultsFormat.JSON.media_type - /// 'application/sparql-results+json' - #[getter] - fn media_type(&self) -> &'static str { - self.inner.media_type() - } - - /// :return: the format `IANA-registered `_ file extension. - /// :rtype: str - /// - /// >>> QueryResultsFormat.JSON.file_extension - /// 'srj' - #[getter] - fn file_extension(&self) -> &'static str { - self.inner.file_extension() - } - - /// :return: the format name. - /// :rtype: str - /// - /// >>> QueryResultsFormat.JSON.name - /// 'SPARQL Results in JSON' - #[getter] - pub const fn name(&self) -> &'static str { - self.inner.name() - } - - /// Looks for a known format from a media type. - /// - /// It supports some media type aliases. - /// For example, "application/xml" is going to return :py:const:`QueryResultsFormat.XML` even if it is not its canonical media type. - /// - /// :param media_type: the media type. - /// :type media_type: str - /// :return: :py:class:`QueryResultsFormat` if the media type is known or :py:const:`None` if not. - /// :rtype: QueryResultsFormat or None - /// - /// >>> QueryResultsFormat.from_media_type("application/sparql-results+json; charset=utf-8") - /// - #[staticmethod] - fn from_media_type(media_type: &str) -> Option { - Some(Self { - inner: QueryResultsFormat::from_media_type(media_type)?, - }) - } - - /// Looks for a known format from an extension. - /// - /// It supports some aliases. - /// - /// :param extension: the extension. - /// :type extension: str - /// :return: :py:class:`QueryResultsFormat` if the extension is known or :py:const:`None` if not. - /// :rtype: QueryResultsFormat or None - /// - /// >>> QueryResultsFormat.from_extension("json") - /// - #[staticmethod] - fn from_extension(extension: &str) -> Option { - Some(Self { - inner: QueryResultsFormat::from_extension(extension)?, - }) - } - - fn __str__(&self) -> &'static str { - self.inner.name() - } - - fn __repr__(&self) -> String { - format!("", self.inner.name()) - } - - fn __hash__(&self) -> u64 { - hash(&self.inner) - } - - fn __eq__(&self, other: &Self) -> bool { - self.inner == other.inner - } - - fn __ne__(&self, other: &Self) -> bool { - self.inner != other.inner - } - - /// :rtype: QueryResultsFormat - fn __copy__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - /// :type memo: typing.Any - /// :rtype: QueryResultsFormat - #[allow(unused_variables)] - fn __deepcopy__<'a>(slf: PyRef<'a, Self>, memo: &'_ Bound<'_, PyAny>) -> PyRef<'a, Self> { - slf - } -} - -pub fn lookup_query_results_format( - format: Option, - path: Option<&Path>, -) -> PyResult { - if let Some(format) = format { - return match format { - PyQueryResultsFormatInput::Object(format) => Ok(format.inner), - PyQueryResultsFormatInput::MediaType(media_type) => { - deprecation_warning("Using a string to specify a query results format is deprecated, please use a QueryResultsFormat object instead.")?; - QueryResultsFormat::from_media_type(&media_type).ok_or_else(|| { - PyValueError::new_err(format!( - "The media type {media_type} is not supported by pyoxigraph" - )) - }) - } - }; - } - let Some(path) = path else { - return Err(PyValueError::new_err( - "The format parameter is required when a file path is not given", - )); - }; - let Some(ext) = path.extension().and_then(OsStr::to_str) else { - return Err(PyValueError::new_err(format!( - "The file name {} has no extension to guess a file format from", - path.display() - ))); - }; - QueryResultsFormat::from_extension(ext) - .ok_or_else(|| PyValueError::new_err(format!("Not supported RDF format extension: {ext}"))) -} - -#[derive(FromPyObject)] -pub enum PyQueryResultsFormatInput { - Object(PyQueryResultsFormat), - MediaType(String), -} - -pub fn map_evaluation_error(error: EvaluationError) -> PyErr { - match error { - EvaluationError::Parsing(error) => PySyntaxError::new_err(error.to_string()), - EvaluationError::Storage(error) => map_storage_error(error), - EvaluationError::GraphParsing(error) => map_parse_error(error, None), - EvaluationError::ResultsParsing(error) => map_query_results_parse_error(error, None), - EvaluationError::ResultsSerialization(error) => error.into(), - EvaluationError::Service(error) => match error.downcast::() { - Ok(error) => (*error).into(), - Err(error) => PyRuntimeError::new_err(error.to_string()), - }, - _ => PyRuntimeError::new_err(error.to_string()), - } -} - -pub fn map_query_results_parse_error( - error: QueryResultsParseError, - file_path: Option, -) -> PyErr { - match error { - QueryResultsParseError::Syntax(error) => { - // Python 3.9 does not support end line and end column - if python_version() >= (3, 10) { - let params = if let Some(location) = error.location() { - ( - file_path, - Some(location.start.line + 1), - Some(location.start.column + 1), - None::>, - Some(location.end.line + 1), - Some(location.end.column + 1), - ) - } else { - (None, None, None, None, None, None) - }; - PySyntaxError::new_err((error.to_string(), params)) - } else { - let params = if let Some(location) = error.location() { - ( - file_path, - Some(location.start.line + 1), - Some(location.start.column + 1), - None::>, - ) - } else { - (None, None, None, None) - }; - PySyntaxError::new_err((error.to_string(), params)) - } - } - QueryResultsParseError::Io(error) => error.into(), - } -} diff --git a/python/src/store.rs b/python/src/store.rs deleted file mode 100644 index fb83a4e3..00000000 --- a/python/src/store.rs +++ /dev/null @@ -1,865 +0,0 @@ -#![allow(clippy::needless_option_as_deref)] - -use crate::io::{ - allow_threads_unsafe, lookup_rdf_format, map_parse_error, PyRdfFormatInput, PyReadable, - PyReadableInput, PyWritable, PyWritableOutput, -}; -use crate::model::*; -use crate::sparql::*; -use oxigraph::io::RdfParser; -use oxigraph::model::GraphNameRef; -use oxigraph::sparql::Update; -use oxigraph::store::{self, LoaderError, SerializerError, StorageError, Store}; -use pyo3::exceptions::{PyRuntimeError, PyValueError}; -use pyo3::prelude::*; -use pyo3::types::PyBytes; -use std::path::PathBuf; - -/// RDF store. -/// -/// It encodes a `RDF dataset `_ and allows to query it using SPARQL. -/// It is based on the `RocksDB `_ key-value database. -/// -/// This store ensures the "repeatable read" isolation level: the store only exposes changes that have -/// been "committed" (i.e. no partial writes) and the exposed state does not change for the complete duration -/// of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update). -/// -/// The :py:class:`Store` constructor opens a read-write instance. -/// To open a static read-only instance use :py:func:`Store.read_only` -/// and to open a read-only instance that tracks a read-write instance use :py:func:`Store.secondary`. -/// -/// :param path: the path of the directory in which the store should read and write its data. If the directory does not exist, it is created. -/// If no directory is provided a temporary one is created and removed when the Python garbage collector removes the store. -/// In this case, the store data are kept in memory and never written on disk. -/// :type path: str or os.PathLike[str] or None, optional -/// :raises OSError: if the target directory contains invalid data or could not be accessed. -/// -/// The :py:class:`str` function provides a serialization of the store in NQuads: -/// -/// >>> store = Store() -/// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) -/// >>> str(store) -/// ' "1" .\n' -#[pyclass(frozen, name = "Store", module = "pyoxigraph")] -#[derive(Clone)] -pub struct PyStore { - inner: Store, -} - -#[pymethods] -impl PyStore { - #[cfg(not(target_family = "wasm"))] - #[new] - #[pyo3(signature = (path = None))] - fn new(path: Option, py: Python<'_>) -> PyResult { - py.allow_threads(|| { - Ok(Self { - inner: if let Some(path) = path { - Store::open(path) - } else { - Store::new() - } - .map_err(map_storage_error)?, - }) - }) - } - - #[cfg(target_family = "wasm")] - #[new] - fn new(py: Python<'_>) -> PyResult { - py.allow_threads(|| { - Ok(Self { - inner: Store::new().map_err(map_storage_error)?, - }) - }) - } - - /// Opens a read-only store from disk. - /// - /// Opening as read-only while having an other process writing the database is undefined behavior. - /// :py:func:`Store.secondary` should be used in this case. - /// - /// :param path: path to the primary read-write instance data. - /// :type path: str - /// :return: the opened store. - /// :rtype: Store - /// :raises OSError: if the target directory contains invalid data or could not be accessed. - #[cfg(not(target_family = "wasm"))] - #[staticmethod] - fn read_only(path: &str, py: Python<'_>) -> PyResult { - py.allow_threads(|| { - Ok(Self { - inner: Store::open_read_only(path).map_err(map_storage_error)?, - }) - }) - } - - /// Opens a read-only clone of a running read-write store. - /// - /// Changes done while this process is running will be replicated after a possible lag. - /// - /// It should only be used if a primary instance opened with :py:func:`Store` is running at the same time. - /// - /// If you want to simple read-only store use :py:func:`Store.read_only`. - /// - /// :param primary_path: path to the primary read-write instance data. - /// :type primary_path: str - /// :param secondary_path: path to an other directory for the secondary instance cache. If not given a temporary directory will be used. - /// :type secondary_path: str or None, optional - /// :return: the opened store. - /// :rtype: Store - /// :raises OSError: if the target directories contain invalid data or could not be accessed. - #[cfg(not(target_family = "wasm"))] - #[staticmethod] - #[pyo3(signature = (primary_path, secondary_path = None))] - fn secondary( - primary_path: &str, - secondary_path: Option<&str>, - py: Python<'_>, - ) -> PyResult { - py.allow_threads(|| { - Ok(Self { - inner: if let Some(secondary_path) = secondary_path { - Store::open_persistent_secondary(primary_path, secondary_path) - } else { - Store::open_secondary(primary_path) - } - .map_err(map_storage_error)?, - }) - }) - } - - /// Adds a quad to the store. - /// - /// :param quad: the quad to add. - /// :type quad: Quad - /// :rtype: None - /// :raises OSError: if an error happens during the quad insertion. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - fn add(&self, quad: &PyQuad, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| { - self.inner.insert(quad).map_err(map_storage_error)?; - Ok(()) - }) - } - - /// Adds atomically a set of quads to this store. - /// - /// Insertion is done in a transactional manner: either the full operation succeeds or nothing is written to the database. - /// The :py:func:`bulk_extend` method is also available for much faster loading of a large number of quads but without transactional guarantees. - /// - /// :param quads: the quads to add. - /// :type quads: collections.abc.Iterable[Quad] - /// :rtype: None - /// :raises OSError: if an error happens during the quad insertion. - /// - /// >>> store = Store() - /// >>> store.extend([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - fn extend(&self, quads: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult<()> { - let quads = quads - .iter()? - .map(|q| q?.extract()) - .collect::>>()?; - py.allow_threads(|| { - self.inner.extend(quads).map_err(map_storage_error)?; - Ok(()) - }) - } - - /// Adds a set of quads to this store. - /// - /// This function is designed to be as fast as possible **without** transactional guarantees. - /// Only a part of the data might be written to the store. - /// - /// :param quads: the quads to add. - /// :type quads: collections.abc.Iterable[Quad] - /// :rtype: None - /// :raises OSError: if an error happens during the quad insertion. - /// - /// >>> store = Store() - /// >>> store.bulk_extend([Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))]) - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - #[cfg(not(target_family = "wasm"))] - fn bulk_extend(&self, quads: &Bound<'_, PyAny>) -> PyResult<()> { - self.inner - .bulk_loader() - .load_ok_quads::( - quads.iter()?.map(|q| q?.extract::()), - )?; - Ok(()) - } - - /// Removes a quad from the store. - /// - /// :param quad: the quad to remove. - /// :type quad: Quad - /// :rtype: None - /// :raises OSError: if an error happens during the quad removal. - /// - /// >>> store = Store() - /// >>> quad = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) - /// >>> store.add(quad) - /// >>> store.remove(quad) - /// >>> list(store) - /// [] - fn remove(&self, quad: &PyQuad, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| { - self.inner.remove(quad).map_err(map_storage_error)?; - Ok(()) - }) - } - - /// Looks for the quads matching a given pattern. - /// - /// :param subject: the quad subject or :py:const:`None` to match everything. - /// :type subject: NamedNode or BlankNode or Triple or None - /// :param predicate: the quad predicate or :py:const:`None` to match everything. - /// :type predicate: NamedNode or None - /// :param object: the quad object or :py:const:`None` to match everything. - /// :type object: NamedNode or BlankNode or Literal or Triple or None - /// :param graph_name: the quad graph name. To match only the default graph, use :py:class:`DefaultGraph`. To match everything use :py:const:`None`. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph or None, optional - /// :return: an iterator of the quads matching the pattern. - /// :rtype: collections.abc.Iterator[Quad] - /// :raises OSError: if an error happens during the quads lookup. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> list(store.quads_for_pattern(NamedNode('http://example.com'), None, None, None)) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - #[pyo3(signature = (subject, predicate, object, graph_name = None))] - fn quads_for_pattern( - &self, - subject: Option>, - predicate: Option>, - object: Option>, - graph_name: Option>, - ) -> QuadIter { - QuadIter { - inner: self.inner.quads_for_pattern( - subject.as_ref().map(Into::into), - predicate.as_ref().map(Into::into), - object.as_ref().map(Into::into), - graph_name.as_ref().map(Into::into), - ), - } - } - - /// Executes a `SPARQL 1.1 query `_. - /// - /// :param query: the query to execute. - /// :type query: str - /// :param base_iri: the base IRI used to resolve the relative IRIs in the SPARQL query or :py:const:`None` if relative IRI resolution should not be done. - /// :type base_iri: str or None, optional - /// :param use_default_graph_as_union: if the SPARQL query should look for triples in all the dataset graphs by default (i.e. without `GRAPH` operations). Disabled by default. - /// :type use_default_graph_as_union: bool, optional - /// :param default_graph: list of the graphs that should be used as the query default graph. By default, the store default graph is used. - /// :type default_graph: NamedNode or BlankNode or DefaultGraph or list[NamedNode or BlankNode or DefaultGraph] or None, optional - /// :param named_graphs: list of the named graphs that could be used in SPARQL `GRAPH` clause. By default, all the store named graphs are available. - /// :type named_graphs: list[NamedNode or BlankNode] or None, optional - /// :return: a :py:class:`bool` for ``ASK`` queries, an iterator of :py:class:`Triple` for ``CONSTRUCT`` and ``DESCRIBE`` queries and an iterator of :py:class:`QuerySolution` for ``SELECT`` queries. - /// :rtype: QuerySolutions or QueryBoolean or QueryTriples - /// :raises SyntaxError: if the provided query is invalid. - /// :raises OSError: if an error happens while reading the store. - /// - /// ``SELECT`` query: - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> [solution['s'] for solution in store.query('SELECT ?s WHERE { ?s ?p ?o }')] - /// [] - /// - /// ``CONSTRUCT`` query: - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> list(store.query('CONSTRUCT WHERE { ?s ?p ?o }')) - /// [ predicate= object=>>] - /// - /// ``ASK`` query: - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> bool(store.query('ASK { ?s ?p ?o }')) - /// True - #[pyo3(signature = (query, *, base_iri = None, use_default_graph_as_union = false, default_graph = None, named_graphs = None))] - fn query( - &self, - query: &str, - base_iri: Option<&str>, - use_default_graph_as_union: bool, - default_graph: Option<&Bound<'_, PyAny>>, - named_graphs: Option<&Bound<'_, PyAny>>, - py: Python<'_>, - ) -> PyResult { - let query = parse_query( - query, - base_iri, - use_default_graph_as_union, - default_graph, - named_graphs, - py, - )?; - let results = - allow_threads_unsafe(py, || self.inner.query(query)).map_err(map_evaluation_error)?; - Ok(query_results_to_python(py, results)) - } - - /// Executes a `SPARQL 1.1 update `_. - /// - /// Updates are applied in a transactional manner: either the full operation succeeds or nothing is written to the database. - /// - /// :param update: the update to execute. - /// :type update: str - /// :param base_iri: the base IRI used to resolve the relative IRIs in the SPARQL update or :py:const:`None` if relative IRI resolution should not be done. - /// :type base_iri: str or None, optional - /// :rtype: None - /// :raises SyntaxError: if the provided update is invalid. - /// :raises OSError: if an error happens while reading the store. - /// - /// ``INSERT DATA`` update: - /// - /// >>> store = Store() - /// >>> store.update('INSERT DATA { "1" }') - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - /// - /// ``DELETE DATA`` update: - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> store.update('DELETE DATA { "1" }') - /// >>> list(store) - /// [] - /// - /// ``DELETE`` update: - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> store.update('DELETE WHERE { ?p ?o }') - /// >>> list(store) - /// [] - #[pyo3(signature = (update, *, base_iri = None))] - fn update(&self, update: &str, base_iri: Option<&str>, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| { - let update = - Update::parse(update, base_iri).map_err(|e| map_evaluation_error(e.into()))?; - self.inner.update(update).map_err(map_evaluation_error) - }) - } - - /// Loads an RDF serialization into the store. - /// - /// Loads are applied in a transactional manner: either the full operation succeeds or nothing is written to the database. - /// The :py:func:`bulk_load` method is also available for much faster loading of big files but without transactional guarantees. - /// - /// Beware, the full file is loaded into memory. - /// - /// It currently supports the following formats: - /// - /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) - /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) - /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) - /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) - /// * `N3 `_ (:py:attr:`RdfFormat.N3`) - /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/turtle`` could also be used for `Turtle `_ - /// and ``application/xml`` or ``xml`` for `RDF/XML `_. - /// - /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional - /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: RdfFormat or None, optional - /// :param path: The file path to read from. Replaces the ``input`` parameter. - /// :type path: str or os.PathLike[str] or None, optional - /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. - /// :type base_iri: str or None, optional - /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. - /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None, optional - /// :rtype: None - /// :raises ValueError: if the format is not supported. - /// :raises SyntaxError: if the provided data is invalid. - /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. - /// - /// >>> store = Store() - /// >>> store.load(input='

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] - fn load( - &self, - input: Option, - format: Option, - path: Option, - base_iri: Option<&str>, - to_graph: Option>, - py: Python<'_>, - ) -> PyResult<()> { - let to_graph_name = to_graph.as_ref().map(GraphNameRef::from); - let input = PyReadable::from_args(&path, input, py)?; - let format = lookup_rdf_format(format, path.as_deref())?; - py.allow_threads(|| { - let mut parser = RdfParser::from_format(format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - } - if let Some(to_graph_name) = to_graph_name { - parser = parser.with_default_graph(to_graph_name); - } - self.inner - .load_from_read(parser, input) - .map_err(|e| map_loader_error(e, path)) - }) - } - - /// Loads an RDF serialization into the store. - /// - /// This function is designed to be as fast as possible on big files **without** transactional guarantees. - /// If the file is invalid only a piece of it might be written to the store. - /// - /// The :py:func:`load` method is also available for loads with transactional guarantees. - /// - /// It currently supports the following formats: - /// - /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) - /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) - /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) - /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) - /// * `N3 `_ (:py:attr:`RdfFormat.N3`) - /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/turtle`` could also be used for `Turtle `_ - /// and ``application/xml`` or ``xml`` for `RDF/XML `_. - /// - /// :param input: The :py:class:`str`, :py:class:`bytes` or I/O object to read from. For example, it could be the file content as a string or a file reader opened in binary mode with ``open('my_file.ttl', 'rb')``. - /// :type input: bytes or str or typing.IO[bytes] or typing.IO[str] or None, optional - /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: RdfFormat or None, optional - /// :param path: The file path to read from. Replaces the ``input`` parameter. - /// :type path: str or os.PathLike[str] or None, optional - /// :param base_iri: the base IRI used to resolve the relative IRIs in the file or :py:const:`None` if relative IRI resolution should not be done. - /// :type base_iri: str or None, optional - /// :param to_graph: if it is a file composed of triples, the graph in which the triples should be stored. By default, the default graph is used. - /// :type to_graph: NamedNode or BlankNode or DefaultGraph or None, optional - /// :rtype: None - /// :raises ValueError: if the format is not supported. - /// :raises SyntaxError: if the provided data is invalid. - /// :raises OSError: if an error happens during a quad insertion or if a system error happens while reading the file. - /// - /// >>> store = Store() - /// >>> store.bulk_load(input=b'

"1" .', format=RdfFormat.TURTLE, base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) - /// >>> list(store) - /// [ predicate= object=> graph_name=>] - #[allow(clippy::needless_pass_by_value)] - #[pyo3(signature = (input = None, format = None, *, path = None, base_iri = None, to_graph = None))] - fn bulk_load( - &self, - input: Option, - format: Option, - path: Option, - base_iri: Option<&str>, - to_graph: Option>, - py: Python<'_>, - ) -> PyResult<()> { - let to_graph_name = to_graph.as_ref().map(GraphNameRef::from); - let input = PyReadable::from_args(&path, input, py)?; - let format = lookup_rdf_format(format, path.as_deref())?; - py.allow_threads(|| { - let mut parser = RdfParser::from_format(format); - if let Some(base_iri) = base_iri { - parser = parser - .with_base_iri(base_iri) - .map_err(|e| PyValueError::new_err(e.to_string()))?; - } - if let Some(to_graph_name) = to_graph_name { - parser = parser.with_default_graph(to_graph_name); - } - self.inner - .load_from_read(parser, input) - .map_err(|e| map_loader_error(e, path)) - }) - } - - /// Dumps the store quads or triples into a file. - /// - /// It currently supports the following formats: - /// - /// * `N-Triples `_ (:py:attr:`RdfFormat.N_TRIPLES`) - /// * `N-Quads `_ (:py:attr:`RdfFormat.N_QUADS`) - /// * `Turtle `_ (:py:attr:`RdfFormat.TURTLE`) - /// * `TriG `_ (:py:attr:`RdfFormat.TRIG`) - /// * `N3 `_ (:py:attr:`RdfFormat.N3`) - /// * `RDF/XML `_ (:py:attr:`RdfFormat.RDF_XML`) - /// - /// It supports also some media type and extension aliases. - /// For example, ``application/turtle`` could also be used for `Turtle `_ - /// and ``application/xml`` or ``xml`` for `RDF/XML `_. - /// - /// :param output: The binary I/O object or file path to write to. For example, it could be a file path as a string or a file writer opened in binary mode with ``open('my_file.ttl', 'wb')``. If :py:const:`None`, a :py:class:`bytes` buffer is returned with the serialized content. - /// :type output: typing.IO[bytes] or str or os.PathLike[str] or None, optional - /// :param format: the format of the RDF serialization. If :py:const:`None`, the format is guessed from the file name extension. - /// :type format: RdfFormat or None, optional - /// :param from_graph: the store graph from which dump the triples. Required if the serialization format does not support named graphs. If it does supports named graphs the full dataset is written. - /// :type from_graph: NamedNode or BlankNode or DefaultGraph or None, optional - /// :return: :py:class:`bytes` with the serialization if the ``output`` parameter is :py:const:`None`, :py:const:`None` if ``output`` is set. - /// :rtype: bytes or None - /// :raises ValueError: if the format is not supported or the `from_graph` parameter is not given with a syntax not supporting named graphs. - /// :raises OSError: if an error happens during a quad lookup or file writing. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) - /// >>> store.dump(format=RdfFormat.TRIG) - /// b' "1" .\n' - /// - /// >>> import io - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> output = io.BytesIO() - /// >>> store.dump(output, RdfFormat.TURTLE, from_graph=NamedNode("http://example.com/g")) - /// >>> output.getvalue() - /// b' "1" .\n' - #[allow(clippy::needless_pass_by_value)] - #[pyo3(signature = (output = None, format = None, *, from_graph = None))] - fn dump<'py>( - &self, - output: Option, - format: Option, - from_graph: Option>, - py: Python<'py>, - ) -> PyResult>> { - let from_graph_name = from_graph.as_ref().map(GraphNameRef::from); - PyWritable::do_write( - |output, file_path| { - py.allow_threads(|| { - let format = lookup_rdf_format(format, file_path.as_deref())?; - if let Some(from_graph_name) = from_graph_name { - self.inner - .dump_graph_to_write(from_graph_name, format, output) - } else { - self.inner.dump_to_write(format, output) - } - .map_err(map_serializer_error) - }) - }, - output, - py, - ) - } - - /// Returns an iterator over all the store named graphs. - /// - /// :return: an iterator of the store graph names. - /// :rtype: collections.abc.Iterator[NamedNode or BlankNode] - /// :raises OSError: if an error happens during the named graphs lookup. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> list(store.named_graphs()) - /// [] - fn named_graphs(&self) -> GraphNameIter { - GraphNameIter { - inner: self.inner.named_graphs(), - } - } - - /// Returns if the store contains the given named graph. - /// - /// :param graph_name: the name of the named graph. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph - /// :rtype: bool - /// :raises OSError: if an error happens during the named graph lookup. - /// - /// >>> store = Store() - /// >>> store.add_graph(NamedNode('http://example.com/g')) - /// >>> store.contains_named_graph(NamedNode('http://example.com/g')) - /// True - #[allow(clippy::needless_pass_by_value)] - fn contains_named_graph( - &self, - graph_name: PyGraphNameRef<'_>, - py: Python<'_>, - ) -> PyResult { - let graph_name = GraphNameRef::from(&graph_name); - py.allow_threads(|| { - match graph_name { - GraphNameRef::DefaultGraph => Ok(true), - GraphNameRef::NamedNode(graph_name) => self.inner.contains_named_graph(graph_name), - GraphNameRef::BlankNode(graph_name) => self.inner.contains_named_graph(graph_name), - } - .map_err(map_storage_error) - }) - } - - /// Adds a named graph to the store. - /// - /// :param graph_name: the name of the name graph to add. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph - /// :rtype: None - /// :raises OSError: if an error happens during the named graph insertion. - /// - /// >>> store = Store() - /// >>> store.add_graph(NamedNode('http://example.com/g')) - /// >>> list(store.named_graphs()) - /// [] - #[allow(clippy::needless_pass_by_value)] - fn add_graph(&self, graph_name: PyGraphNameRef<'_>, py: Python<'_>) -> PyResult<()> { - let graph_name = GraphNameRef::from(&graph_name); - py.allow_threads(|| { - match graph_name { - GraphNameRef::DefaultGraph => Ok(()), - GraphNameRef::NamedNode(graph_name) => { - self.inner.insert_named_graph(graph_name).map(|_| ()) - } - GraphNameRef::BlankNode(graph_name) => { - self.inner.insert_named_graph(graph_name).map(|_| ()) - } - } - .map_err(map_storage_error) - }) - } - - /// Clears a graph from the store without removing it. - /// - /// :param graph_name: the name of the name graph to clear. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph - /// :rtype: None - /// :raises OSError: if an error happens during the operation. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> store.clear_graph(NamedNode('http://example.com/g')) - /// >>> list(store) - /// [] - /// >>> list(store.named_graphs()) - /// [] - #[allow(clippy::needless_pass_by_value)] - fn clear_graph(&self, graph_name: PyGraphNameRef<'_>, py: Python<'_>) -> PyResult<()> { - let graph_name = GraphNameRef::from(&graph_name); - py.allow_threads(|| { - self.inner - .clear_graph(graph_name) - .map_err(map_storage_error) - }) - } - - /// Removes a graph from the store. - /// - /// The default graph will not be removed but just cleared. - /// - /// :param graph_name: the name of the name graph to remove. - /// :type graph_name: NamedNode or BlankNode or DefaultGraph - /// :rtype: None - /// :raises OSError: if an error happens during the named graph removal. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> store.remove_graph(NamedNode('http://example.com/g')) - /// >>> list(store.named_graphs()) - /// [] - #[allow(clippy::needless_pass_by_value)] - fn remove_graph(&self, graph_name: PyGraphNameRef<'_>, py: Python<'_>) -> PyResult<()> { - let graph_name = GraphNameRef::from(&graph_name); - py.allow_threads(|| { - match graph_name { - GraphNameRef::DefaultGraph => self.inner.clear_graph(GraphNameRef::DefaultGraph), - GraphNameRef::NamedNode(graph_name) => { - self.inner.remove_named_graph(graph_name).map(|_| ()) - } - GraphNameRef::BlankNode(graph_name) => { - self.inner.remove_named_graph(graph_name).map(|_| ()) - } - } - .map_err(map_storage_error) - }) - } - - /// Clears the store by removing all its contents. - /// - /// :rtype: None - /// :raises OSError: if an error happens during the operation. - /// - /// >>> store = Store() - /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) - /// >>> store.clear() - /// >>> list(store) - /// [] - /// >>> list(store.named_graphs()) - /// [] - fn clear(&self, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| self.inner.clear().map_err(map_storage_error)) - } - - /// Flushes all buffers and ensures that all writes are saved on disk. - /// - /// Flushes are automatically done using background threads but might lag a little bit. - /// - /// :rtype: None - /// :raises OSError: if an error happens during the flush. - #[cfg(not(target_family = "wasm"))] - fn flush(&self, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| self.inner.flush().map_err(map_storage_error)) - } - - /// Optimizes the database for future workload. - /// - /// Useful to call after a batch upload or another similar operation. - /// - /// :rtype: None - /// :raises OSError: if an error happens during the optimization. - #[cfg(not(target_family = "wasm"))] - fn optimize(&self, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| self.inner.optimize().map_err(map_storage_error)) - } - - /// Creates database backup into the `target_directory`. - /// - /// After its creation, the backup is usable using :py:class:`Store` constructor. - /// like a regular pyxigraph database and operates independently from the original database. - /// - /// Warning: Backups are only possible for on-disk databases created by providing a path to :py:class:`Store` constructor. - /// Temporary in-memory databases created without path are not compatible with the backup system. - /// - /// Warning: An error is raised if the ``target_directory`` already exists. - /// - /// If the target directory is in the same file system as the current database, - /// the database content will not be fully copied - /// but hard links will be used to point to the original database immutable snapshots. - /// This allows cheap regular backups. - /// - /// If you want to move your data to another RDF storage system, you should have a look at the :py:func:`dump_dataset` function instead. - /// - /// :param target_directory: the directory name to save the database to. - /// :type target_directory: str or os.PathLike[str] - /// :rtype: None - /// :raises OSError: if an error happens during the backup. - #[cfg(not(target_family = "wasm"))] - fn backup(&self, target_directory: PathBuf, py: Python<'_>) -> PyResult<()> { - py.allow_threads(|| { - self.inner - .backup(target_directory) - .map_err(map_storage_error) - }) - } - - fn __str__(&self, py: Python<'_>) -> String { - py.allow_threads(|| self.inner.to_string()) - } - - fn __bool__(&self) -> PyResult { - Ok(!self.inner.is_empty().map_err(map_storage_error)?) - } - - fn __len__(&self) -> PyResult { - self.inner.len().map_err(map_storage_error) - } - - fn __contains__(&self, quad: &PyQuad) -> PyResult { - self.inner.contains(quad).map_err(map_storage_error) - } - - fn __iter__(&self) -> QuadIter { - QuadIter { - inner: self.inner.iter(), - } - } -} - -#[pyclass(unsendable, module = "pyoxigraph")] -pub struct QuadIter { - inner: store::QuadIter, -} - -#[pymethods] -impl QuadIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self) -> PyResult> { - self.inner - .next() - .map(|q| Ok(q.map_err(map_storage_error)?.into())) - .transpose() - } -} - -#[pyclass(unsendable, module = "pyoxigraph")] -pub struct GraphNameIter { - inner: store::GraphNameIter, -} - -#[pymethods] -impl GraphNameIter { - fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { - slf - } - - fn __next__(&mut self) -> PyResult> { - self.inner - .next() - .map(|q| Ok(q.map_err(map_storage_error)?.into())) - .transpose() - } -} - -pub fn map_storage_error(error: StorageError) -> PyErr { - match error { - StorageError::Io(error) => error.into(), - _ => PyRuntimeError::new_err(error.to_string()), - } -} - -pub fn map_loader_error(error: LoaderError, file_path: Option) -> PyErr { - match error { - LoaderError::Storage(error) => map_storage_error(error), - LoaderError::Parsing(error) => map_parse_error(error, file_path), - LoaderError::InvalidBaseIri { .. } => PyValueError::new_err(error.to_string()), - } -} - -pub fn map_serializer_error(error: SerializerError) -> PyErr { - match error { - SerializerError::Storage(error) => map_storage_error(error), - SerializerError::Io(error) => error.into(), - SerializerError::DatasetFormatExpected(_) => PyValueError::new_err(error.to_string()), - } -} - -enum PythonOrStorageError { - Python(PyErr), - Storage(StorageError), -} - -impl From for PythonOrStorageError { - fn from(error: PyErr) -> Self { - Self::Python(error) - } -} - -impl From for PythonOrStorageError { - fn from(error: StorageError) -> Self { - Self::Storage(error) - } -} -impl From for PyErr { - fn from(error: PythonOrStorageError) -> Self { - match error { - PythonOrStorageError::Python(error) => error, - PythonOrStorageError::Storage(error) => map_storage_error(error), - } - } -} diff --git a/python/tests/test_doc.py b/python/tests/test_doc.py deleted file mode 100644 index c0ba505b..00000000 --- a/python/tests/test_doc.py +++ /dev/null @@ -1,37 +0,0 @@ -# type: ignore -import inspect -from doctest import DocTestFinder, DocTestSuite - -import pyoxigraph - - -class ExtendedDocTestFinder(DocTestFinder): - """ - More aggressive doctest lookup - """ - - def _find(self, tests, obj, name, module, source_lines, globs, seen): - # If we've already processed this object, then ignore it. - if id(obj) in seen: - return - seen[id(obj)] = 1 - - # Find a test for this object, and add it to the list of tests. - test = self._get_test(obj, name, module, globs, source_lines) - if test is not None: - tests.append(test) - - # Look for tests in a module's contained objects. - if inspect.ismodule(obj) or inspect.isclass(obj): - for valname, val in obj.__dict__.items(): - if valname == "__doc__": - continue - # Special handling for staticmethod/classmethod. - if isinstance(val, (staticmethod, classmethod)): - val = val.__func__ - self._find(tests, val, f"{name}.{valname}", module, source_lines, globs, seen) - - -def load_tests(_loader, tests, _ignore): - tests.addTests(DocTestSuite(pyoxigraph, test_finder=ExtendedDocTestFinder())) - return tests diff --git a/python/tests/test_io.py b/python/tests/test_io.py deleted file mode 100644 index fe137eff..00000000 --- a/python/tests/test_io.py +++ /dev/null @@ -1,266 +0,0 @@ -import sys -import unittest -from io import BytesIO, StringIO, UnsupportedOperation -from tempfile import NamedTemporaryFile, TemporaryFile - -from pyoxigraph import ( - Literal, - NamedNode, - Quad, - QueryBoolean, - QueryResultsFormat, - QuerySolutions, - RdfFormat, - parse, - parse_query_results, - serialize, -) - -EXAMPLE_TRIPLE = Quad( - NamedNode("http://example.com/foo"), - NamedNode("http://example.com/p"), - Literal("éù"), -) -EXAMPLE_QUAD = Quad( - NamedNode("http://example.com/foo"), - NamedNode("http://example.com/p"), - Literal("1"), - NamedNode("http://example.com/g"), -) - - -class TestParse(unittest.TestCase): - def test_parse_file(self) -> None: - with NamedTemporaryFile(suffix=".ttl") as fp: - fp.write('

"éù" .'.encode()) - fp.flush() - self.assertEqual( - list(parse(path=fp.name, base_iri="http://example.com/")), - [EXAMPLE_TRIPLE], - ) - - def test_parse_not_existing_file(self) -> None: - with self.assertRaises(IOError) as _: - parse(path="/tmp/not-existing-oxigraph-file.ttl", format=RdfFormat.TURTLE) - - def test_parse_str(self) -> None: - self.assertEqual( - list( - parse( - '

"éù" .', - RdfFormat.TURTLE, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_TRIPLE], - ) - - def test_parse_bytes(self) -> None: - self.assertEqual( - list( - parse( - '

"éù" .'.encode(), - RdfFormat.TURTLE, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_TRIPLE], - ) - - def test_parse_str_io(self) -> None: - self.assertEqual( - list( - parse( - StringIO('

"éù" .'), - RdfFormat.TURTLE, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_TRIPLE], - ) - - def test_parse_long_str_io(self) -> None: - self.assertEqual( - list( - parse( - StringIO('

"éù" .\n' * 1024), - RdfFormat.TURTLE, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_TRIPLE] * 1024, - ) - - def test_parse_bytes_io(self) -> None: - self.assertEqual( - list( - parse( - BytesIO('

"éù" .'.encode()), - RdfFormat.TURTLE, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_TRIPLE], - ) - - def test_parse_io_error(self) -> None: - with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - list(parse(fp, RdfFormat.N_TRIPLES)) - - def test_parse_quad(self) -> None: - self.assertEqual( - list( - parse( - ' {

"1" }', - RdfFormat.TRIG, - base_iri="http://example.com/", - ) - ), - [EXAMPLE_QUAD], - ) - - def test_parse_syntax_error(self) -> None: - with NamedTemporaryFile() as fp: - fp.write(b"@base .\n") - fp.write(b' "p" "1"') - fp.flush() - with self.assertRaises(SyntaxError) as ctx: - list(parse(path=fp.name, format=RdfFormat.TURTLE)) - self.assertEqual(ctx.exception.filename, fp.name) - self.assertEqual(ctx.exception.lineno, 2) - self.assertEqual(ctx.exception.offset, 7) - if sys.version_info >= (3, 10): - self.assertEqual(ctx.exception.end_lineno, 2) - self.assertEqual(ctx.exception.end_offset, 10) - - def test_parse_without_named_graphs(self) -> None: - with self.assertRaises(SyntaxError) as _: - list( - parse( - ' {

"1" }', - RdfFormat.TRIG, - base_iri="http://example.com/", - without_named_graphs=True, - ) - ) - - def test_parse_rename_blank_nodes(self) -> None: - self.assertNotEqual( - list( - parse( - '_:s "o" .', - RdfFormat.N_TRIPLES, - rename_blank_nodes=True, - ) - ), - list( - parse( - '_:s "o" .', - RdfFormat.N_TRIPLES, - rename_blank_nodes=True, - ) - ), - ) - - -class TestSerialize(unittest.TestCase): - def test_serialize_to_bytes(self) -> None: - self.assertEqual( - (serialize([EXAMPLE_TRIPLE.triple], None, RdfFormat.TURTLE) or b"").decode(), - ' "éù" .\n', - ) - - def test_serialize_to_bytes_io(self) -> None: - output = BytesIO() - serialize([EXAMPLE_TRIPLE.triple], output, RdfFormat.TURTLE) - self.assertEqual( - output.getvalue().decode(), - ' "éù" .\n', - ) - - def test_serialize_to_file(self) -> None: - with NamedTemporaryFile(suffix=".ttl") as fp: - serialize([EXAMPLE_TRIPLE], fp.name) - self.assertEqual( - fp.read().decode(), - ' "éù" .\n', - ) - - def test_serialize_io_error(self) -> None: - with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("rb") as fp: - serialize([EXAMPLE_TRIPLE], fp, RdfFormat.TURTLE) - - def test_serialize_quad(self) -> None: - output = BytesIO() - serialize([EXAMPLE_QUAD], output, RdfFormat.TRIG) - self.assertEqual( - output.getvalue(), - b' {\n\t "1" .\n}\n', - ) - - -class TestParseQuerySolutions(unittest.TestCase): - def test_parse_file(self) -> None: - with NamedTemporaryFile(suffix=".tsv") as fp: - fp.write(b'?s\t?p\t?o\n\t\t"1"\n') - fp.flush() - r = parse_query_results(path=fp.name) - self.assertIsInstance(r, QuerySolutions) - results = list(r) # type: ignore[arg-type] - self.assertEqual(results[0]["s"], NamedNode("http://example.com/s")) - self.assertEqual(results[0][2], Literal("1")) - - def test_parse_not_existing_file(self) -> None: - with self.assertRaises(IOError) as _: - parse_query_results(path="/tmp/not-existing-oxigraph-file.ttl", format=QueryResultsFormat.JSON) - - def test_parse_str(self) -> None: - result = parse_query_results("true", QueryResultsFormat.TSV) - self.assertIsInstance(result, QueryBoolean) - self.assertTrue(result) - - def test_parse_bytes(self) -> None: - result = parse_query_results(b"false", QueryResultsFormat.TSV) - self.assertIsInstance(result, QueryBoolean) - self.assertFalse(result) - - def test_parse_str_io(self) -> None: - result = parse_query_results("true", QueryResultsFormat.TSV) - self.assertIsInstance(result, QueryBoolean) - self.assertTrue(result) - - def test_parse_bytes_io(self) -> None: - result = parse_query_results(BytesIO(b"false"), QueryResultsFormat.TSV) - self.assertIsInstance(result, QueryBoolean) - self.assertFalse(result) - - def test_parse_io_error(self) -> None: - with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - parse_query_results(fp, QueryResultsFormat.XML) - - def test_parse_syntax_error_json(self) -> None: - with NamedTemporaryFile() as fp: - fp.write(b"{]") - fp.flush() - with self.assertRaises(SyntaxError) as ctx: - list(parse_query_results(path=fp.name, format=QueryResultsFormat.JSON)) # type: ignore[arg-type] - self.assertEqual(ctx.exception.filename, fp.name) - self.assertEqual(ctx.exception.lineno, 1) - self.assertEqual(ctx.exception.offset, 2) - if sys.version_info >= (3, 10): - self.assertEqual(ctx.exception.end_lineno, 1) - self.assertEqual(ctx.exception.end_offset, 3) - - def test_parse_syntax_error_tsv(self) -> None: - with NamedTemporaryFile() as fp: - fp.write(b"?a\t?test\n") - fp.write(b"1\t\n") - fp.flush() - with self.assertRaises(SyntaxError) as ctx: - list(parse_query_results(path=fp.name, format=QueryResultsFormat.TSV)) # type: ignore[arg-type] - self.assertEqual(ctx.exception.filename, fp.name) - self.assertEqual(ctx.exception.lineno, 2) - self.assertEqual(ctx.exception.offset, 3) - if sys.version_info >= (3, 10): - self.assertEqual(ctx.exception.end_lineno, 2) - self.assertEqual(ctx.exception.end_offset, 9) diff --git a/python/tests/test_model.py b/python/tests/test_model.py deleted file mode 100644 index 6bed69fd..00000000 --- a/python/tests/test_model.py +++ /dev/null @@ -1,376 +0,0 @@ -import copy -import pickle -import sys -import unittest - -from pyoxigraph import ( - BlankNode, - DefaultGraph, - Literal, - NamedNode, - Quad, - Triple, - Variable, -) - -XSD_STRING = NamedNode("http://www.w3.org/2001/XMLSchema#string") -XSD_INTEGER = NamedNode("http://www.w3.org/2001/XMLSchema#integer") -RDF_LANG_STRING = NamedNode("http://www.w3.org/1999/02/22-rdf-syntax-ns#langString") - - -def match_works(test: unittest.TestCase, matched_value: str, constraint: str) -> None: - """Hack for Python < 3.10 compatibility""" - if sys.version_info < (3, 10): - return test.skipTest("match has been introduced by Python 3.10") - found = True - exec( - f""" -match {matched_value}: - case {constraint}: - found = True -""" - ) - test.assertTrue(found) - return None - - -class TestNamedNode(unittest.TestCase): - def test_constructor(self) -> None: - self.assertEqual(NamedNode("http://foo").value, "http://foo") - - def test_string(self) -> None: - self.assertEqual(str(NamedNode("http://foo")), "") - - def test_equal(self) -> None: - self.assertEqual(NamedNode("http://foo"), NamedNode("http://foo")) - self.assertNotEqual(NamedNode("http://foo"), NamedNode("http://bar")) - - def test_pickle(self) -> None: - node = NamedNode("http://foo") - self.assertEqual(pickle.loads(pickle.dumps(node)), node) - self.assertEqual(copy.copy(node), node) - self.assertEqual(copy.deepcopy(node), node) - - def test_basic_match(self) -> None: - match_works(self, 'NamedNode("http://foo")', 'NamedNode("http://foo")') - - def test_wildcard_match(self) -> None: - match_works(self, 'NamedNode("http://foo")', "NamedNode(x)") - - -class TestBlankNode(unittest.TestCase): - def test_constructor(self) -> None: - self.assertEqual(BlankNode("foo").value, "foo") - self.assertNotEqual(BlankNode(), BlankNode()) - - def test_string(self) -> None: - self.assertEqual(str(BlankNode("foo")), "_:foo") - - def test_equal(self) -> None: - self.assertEqual(BlankNode("foo"), BlankNode("foo")) - self.assertNotEqual(BlankNode("foo"), BlankNode("bar")) - self.assertNotEqual(BlankNode("foo"), NamedNode("http://foo")) - self.assertNotEqual(NamedNode("http://foo"), BlankNode("foo")) - - def test_pickle(self) -> None: - node = BlankNode("foo") - self.assertEqual(pickle.loads(pickle.dumps(node)), node) - self.assertEqual(copy.copy(node), node) - self.assertEqual(copy.deepcopy(node), node) - - auto = BlankNode() - self.assertEqual(pickle.loads(pickle.dumps(auto)), auto) - self.assertEqual(copy.copy(auto), auto) - self.assertEqual(copy.deepcopy(auto), auto) - - def test_basic_match(self) -> None: - match_works(self, 'BlankNode("foo")', 'BlankNode("foo")') - - def test_wildcard_match(self) -> None: - match_works(self, 'BlankNode("foo")', "BlankNode(x)") - - -class TestLiteral(unittest.TestCase): - def test_constructor(self) -> None: - self.assertEqual(Literal("foo").value, "foo") - self.assertEqual(Literal("foo").datatype, XSD_STRING) - - self.assertEqual(Literal("foo", language="en").value, "foo") - self.assertEqual(Literal("foo", language="en").language, "en") - self.assertEqual(Literal("foo", language="en").datatype, RDF_LANG_STRING) - - self.assertEqual(Literal("foo", datatype=XSD_INTEGER).value, "foo") - self.assertEqual(Literal("foo", datatype=XSD_INTEGER).datatype, XSD_INTEGER) - - def test_string(self) -> None: - self.assertEqual(str(Literal("foo")), '"foo"') - self.assertEqual(str(Literal("foo", language="en")), '"foo"@en') - self.assertEqual( - str(Literal("foo", datatype=XSD_INTEGER)), - '"foo"^^', - ) - - def test_equals(self) -> None: - self.assertEqual(Literal("foo", datatype=XSD_STRING), Literal("foo")) - self.assertEqual( - Literal("foo", language="en", datatype=RDF_LANG_STRING), - Literal("foo", language="en"), - ) - self.assertNotEqual(NamedNode("http://foo"), Literal("foo")) - self.assertNotEqual(Literal("foo"), NamedNode("http://foo")) - self.assertNotEqual(BlankNode("foo"), Literal("foo")) - self.assertNotEqual(Literal("foo"), BlankNode("foo")) - - def test_pickle(self) -> None: - simple = Literal("foo") - self.assertEqual(pickle.loads(pickle.dumps(simple)), simple) - self.assertEqual(copy.copy(simple), simple) - self.assertEqual(copy.deepcopy(simple), simple) - - lang_tagged = Literal("foo", language="en") - self.assertEqual(pickle.loads(pickle.dumps(lang_tagged)), lang_tagged) - self.assertEqual(copy.copy(lang_tagged), lang_tagged) - self.assertEqual(copy.deepcopy(lang_tagged), lang_tagged) - - number = Literal("1", datatype=XSD_INTEGER) - self.assertEqual(pickle.loads(pickle.dumps(number)), number) - self.assertEqual(copy.copy(number), number) - self.assertEqual(copy.deepcopy(number), number) - - def test_basic_match(self) -> None: - match_works(self, 'Literal("foo", language="en")', 'Literal("foo", language="en")') - match_works( - self, - 'Literal("1", datatype=XSD_INTEGER)', - 'Literal("1", datatype=NamedNode("http://www.w3.org/2001/XMLSchema#integer"))', - ) - - def test_wildcard_match(self) -> None: - match_works(self, 'Literal("foo", language="en")', "Literal(v, language=l)") - match_works(self, 'Literal("1", datatype=XSD_INTEGER)', "Literal(v, datatype=d)") - - -class TestTriple(unittest.TestCase): - def test_constructor(self) -> None: - t = Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - self.assertEqual(t.subject, NamedNode("http://example.com/s")) - self.assertEqual(t.predicate, NamedNode("http://example.com/p")) - self.assertEqual(t.object, NamedNode("http://example.com/o")) - - def test_rdf_star_constructor(self) -> None: - t = Triple( - Triple( - NamedNode("http://example.com/ss"), - NamedNode("http://example.com/sp"), - NamedNode("http://example.com/so"), - ), - NamedNode("http://example.com/p"), - Triple( - NamedNode("http://example.com/os"), - NamedNode("http://example.com/op"), - NamedNode("http://example.com/oo"), - ), - ) - self.assertEqual( - t.subject, - Triple( - NamedNode("http://example.com/ss"), - NamedNode("http://example.com/sp"), - NamedNode("http://example.com/so"), - ), - ) - self.assertEqual(t.predicate, NamedNode("http://example.com/p")) - self.assertEqual( - t.object, - Triple( - NamedNode("http://example.com/os"), - NamedNode("http://example.com/op"), - NamedNode("http://example.com/oo"), - ), - ) - - def test_mapping(self) -> None: - t = Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - self.assertEqual(t[0], NamedNode("http://example.com/s")) - self.assertEqual(t[1], NamedNode("http://example.com/p")) - self.assertEqual(t[2], NamedNode("http://example.com/o")) - - def test_destruct(self) -> None: - (s, p, o) = Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - self.assertEqual(s, NamedNode("http://example.com/s")) - self.assertEqual(p, NamedNode("http://example.com/p")) - self.assertEqual(o, NamedNode("http://example.com/o")) - - def test_string(self) -> None: - self.assertEqual( - str( - Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - ), - " ", - ) - - def test_pickle(self) -> None: - triple = Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - self.assertEqual(pickle.loads(pickle.dumps(triple)), triple) - self.assertEqual(copy.copy(triple), triple) - self.assertEqual(copy.deepcopy(triple), triple) - - def test_match(self) -> None: - match_works( - self, - 'Triple(NamedNode("http://example.com/s"), NamedNode("http://example.com/p"), ' - 'NamedNode("http://example.com/o"))', - 'Triple(NamedNode("http://example.com/s"), NamedNode(p), o)', - ) - - -class TestDefaultGraph(unittest.TestCase): - def test_equal(self) -> None: - self.assertEqual(DefaultGraph(), DefaultGraph()) - self.assertNotEqual(DefaultGraph(), NamedNode("http://bar")) - - def test_pickle(self) -> None: - self.assertEqual(pickle.loads(pickle.dumps(DefaultGraph())), DefaultGraph()) - self.assertEqual(copy.copy(DefaultGraph()), DefaultGraph()) - self.assertEqual(copy.deepcopy(DefaultGraph()), DefaultGraph()) - - def test_match(self) -> None: - match_works(self, "DefaultGraph()", "DefaultGraph()") - - -class TestQuad(unittest.TestCase): - def test_constructor(self) -> None: - t = Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - NamedNode("http://example.com/g"), - ) - self.assertEqual(t.subject, NamedNode("http://example.com/s")) - self.assertEqual(t.predicate, NamedNode("http://example.com/p")) - self.assertEqual(t.object, NamedNode("http://example.com/o")) - self.assertEqual(t.graph_name, NamedNode("http://example.com/g")) - self.assertEqual( - t.triple, - Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ), - ) - self.assertEqual( - Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ), - Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - DefaultGraph(), - ), - ) - - def test_mapping(self) -> None: - t = Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - NamedNode("http://example.com/g"), - ) - self.assertEqual(t[0], NamedNode("http://example.com/s")) - self.assertEqual(t[1], NamedNode("http://example.com/p")) - self.assertEqual(t[2], NamedNode("http://example.com/o")) - self.assertEqual(t[3], NamedNode("http://example.com/g")) - - def test_destruct(self) -> None: - (s, p, o, g) = Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - NamedNode("http://example.com/g"), - ) - self.assertEqual(s, NamedNode("http://example.com/s")) - self.assertEqual(p, NamedNode("http://example.com/p")) - self.assertEqual(o, NamedNode("http://example.com/o")) - self.assertEqual(g, NamedNode("http://example.com/g")) - - def test_string(self) -> None: - self.assertEqual( - str( - Triple( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - ) - ), - " ", - ) - - def test_pickle(self) -> None: - quad = Quad( - NamedNode("http://example.com/s"), - NamedNode("http://example.com/p"), - NamedNode("http://example.com/o"), - NamedNode("http://example.com/g"), - ) - self.assertEqual(pickle.loads(pickle.dumps(quad)), quad) - self.assertEqual(copy.copy(quad), quad) - self.assertEqual(copy.deepcopy(quad), quad) - - def test_match(self) -> None: - match_works( - self, - 'Quad(NamedNode("http://example.com/s"), NamedNode("http://example.com/p"), ' - 'NamedNode("http://example.com/o"), NamedNode("http://example.com/g"))', - 'Quad(NamedNode("http://example.com/s"), NamedNode(p), o, NamedNode("http://example.com/g"))', - ) - - -class TestVariable(unittest.TestCase): - def test_constructor(self) -> None: - self.assertEqual(Variable("foo").value, "foo") - - def test_string(self) -> None: - self.assertEqual(str(Variable("foo")), "?foo") - - def test_equal(self) -> None: - self.assertEqual(Variable("foo"), Variable("foo")) - self.assertNotEqual(Variable("foo"), Variable("bar")) - - def test_pickle(self) -> None: - v = Variable("foo") - self.assertEqual(pickle.loads(pickle.dumps(v)), v) - self.assertEqual(copy.copy(v), v) - self.assertEqual(copy.deepcopy(v), v) - - def test_basic_match(self) -> None: - match_works(self, 'Variable("foo")', 'Variable("foo")') - - def test_wildcard_match(self) -> None: - match_works(self, 'Variable("foo")', "Variable(x)") - - -if __name__ == "__main__": - unittest.main() diff --git a/python/tests/test_store.py b/python/tests/test_store.py deleted file mode 100644 index abda28ca..00000000 --- a/python/tests/test_store.py +++ /dev/null @@ -1,418 +0,0 @@ -import gc -import sys -import unittest -from io import BytesIO, StringIO, UnsupportedOperation -from pathlib import Path -from tempfile import NamedTemporaryFile, TemporaryDirectory, TemporaryFile -from typing import Any - -from pyoxigraph import ( - BlankNode, - DefaultGraph, - NamedNode, - Quad, - QueryBoolean, - QueryResultsFormat, - QuerySolution, - QuerySolutions, - QueryTriples, - RdfFormat, - Store, - Triple, - Variable, -) - -foo = NamedNode("http://foo") -bar = NamedNode("http://bar") -baz = NamedNode("http://baz") -triple = Triple(foo, foo, foo) -graph = NamedNode("http://graph") -is_wasm = sys.platform == "emscripten" - - -class TestStore(unittest.TestCase): - def test_add(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - store.add(Quad(foo, bar, baz, DefaultGraph())) - store.add(Quad(foo, bar, baz, graph)) - store.add(Quad(triple, bar, baz)) - store.add(Quad(foo, bar, triple)) - self.assertEqual(len(store), 4) - - def test_extend(self) -> None: - store = Store() - store.extend( - ( - Quad(foo, bar, baz), - Quad(foo, bar, baz, graph), - Quad(foo, bar, baz, DefaultGraph()), - ) - ) - self.assertEqual(len(store), 2) - - @unittest.skipIf(is_wasm, "Not supported with WASM") - def test_bulk_extend(self) -> None: - store = Store() - store.bulk_extend( - ( - Quad(foo, bar, baz), - Quad(foo, bar, baz, graph), - Quad(foo, bar, baz, DefaultGraph()), - ) - ) - self.assertEqual(len(store), 2) - - def test_remove(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - store.add(Quad(foo, bar, baz, DefaultGraph())) - store.add(Quad(foo, bar, baz, graph)) - store.remove(Quad(foo, bar, baz)) - self.assertEqual(len(store), 1) - - def test_len(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - store.add(Quad(foo, bar, baz, graph)) - self.assertEqual(len(store), 2) - - def test_in(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - store.add(Quad(foo, bar, baz, DefaultGraph())) - store.add(Quad(foo, bar, baz, graph)) - self.assertIn(Quad(foo, bar, baz), store) - self.assertIn(Quad(foo, bar, baz, DefaultGraph()), store) - self.assertIn(Quad(foo, bar, baz, graph), store) - self.assertNotIn(Quad(foo, bar, baz, foo), store) - - def test_iter(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, DefaultGraph())) - store.add(Quad(foo, bar, baz, graph)) - self.assertEqual( - set(store), - {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, - ) - - def test_quads_for_pattern(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, DefaultGraph())) - store.add(Quad(foo, bar, baz, graph)) - self.assertEqual( - set(store.quads_for_pattern(None, None, None)), - {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, - ) - self.assertEqual( - set(store.quads_for_pattern(foo, None, None)), - {Quad(foo, bar, baz, DefaultGraph()), Quad(foo, bar, baz, graph)}, - ) - self.assertEqual( - set(store.quads_for_pattern(None, None, None, graph)), - {Quad(foo, bar, baz, graph)}, - ) - self.assertEqual( - set(store.quads_for_pattern(foo, None, None, DefaultGraph())), - {Quad(foo, bar, baz, DefaultGraph())}, - ) - - def test_ask_query(self) -> None: - store = Store() - store.add(Quad(foo, foo, foo)) - self.assertTrue(store.query("ASK { ?s ?s ?s }")) - self.assertFalse(store.query("ASK { FILTER(false) }")) - - def test_construct_query(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - results: Any = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }") - self.assertIsInstance(results, QueryTriples) - self.assertEqual( - set(results), - {Triple(foo, bar, baz)}, - ) - - def test_select_query(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - solutions: Any = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }") - self.assertIsInstance(solutions, QuerySolutions) - self.assertEqual(solutions.variables, [Variable("s"), Variable("o")]) - solution = next(solutions) - self.assertIsInstance(solution, QuerySolution) - self.assertEqual(solution[0], foo) - self.assertEqual(solution[1], baz) - self.assertEqual(solution["s"], foo) - self.assertEqual(solution["o"], baz) - self.assertEqual(solution[Variable("s")], foo) - self.assertEqual(solution[Variable("o")], baz) - s, o = solution - self.assertEqual(s, foo) - self.assertEqual(o, baz) - - def test_select_query_union_default_graph(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - results: Any = store.query("SELECT ?s WHERE { ?s ?p ?o }") - self.assertEqual(len(list(results)), 0) - results = store.query("SELECT ?s WHERE { ?s ?p ?o }", use_default_graph_as_union=True) - self.assertEqual(len(list(results)), 1) - results = store.query( - "SELECT ?s WHERE { ?s ?p ?o }", - use_default_graph_as_union=True, - named_graphs=[graph], - ) - self.assertEqual(len(list(results)), 1) - - def test_select_query_with_default_graph(self) -> None: - store = Store() - graph_bnode = BlankNode("g") - store.add(Quad(foo, bar, baz, graph)) - store.add(Quad(foo, bar, foo)) - store.add(Quad(foo, bar, bar, graph_bnode)) - results: Any = store.query("SELECT ?s WHERE { ?s ?p ?o }") - self.assertEqual(len(list(results)), 1) - results = store.query("SELECT ?s WHERE { ?s ?p ?o }", default_graph=graph) - self.assertEqual(len(list(results)), 1) - results = store.query( - "SELECT ?s WHERE { ?s ?p ?o }", - default_graph=[DefaultGraph(), graph, graph_bnode], - ) - self.assertEqual(len(list(results)), 3) - - def test_select_query_with_named_graph(self) -> None: - store = Store() - graph_bnode = BlankNode("g") - store.add(Quad(foo, bar, baz, graph)) - store.add(Quad(foo, bar, foo)) - store.add(Quad(foo, bar, bar, graph_bnode)) - store.add(Quad(foo, bar, bar, foo)) - results: Any = store.query( - "SELECT ?s WHERE { GRAPH ?g { ?s ?p ?o } }", - named_graphs=[graph, graph_bnode], - ) - self.assertEqual(len(list(results)), 2) - - def test_select_query_dump(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - results: QuerySolutions = store.query("SELECT ?s WHERE { ?s ?p ?o }") # type: ignore[assignment] - self.assertIsInstance(results, QuerySolutions) - output = BytesIO() - results.serialize(output, QueryResultsFormat.CSV) - self.assertEqual( - output.getvalue().decode(), - "s\r\nhttp://foo\r\n", - ) - - def test_ask_query_dump(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - results: QueryBoolean = store.query("ASK { ?s ?p ?o }") # type: ignore[assignment] - self.assertIsInstance(results, QueryBoolean) - output = BytesIO() - results.serialize(output, QueryResultsFormat.CSV) - self.assertEqual( - output.getvalue().decode(), - "true", - ) - - def test_construct_query_dump(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz)) - results: QueryTriples = store.query("CONSTRUCT WHERE { ?s ?p ?o }") # type: ignore[assignment] - self.assertIsInstance(results, QueryTriples) - output = BytesIO() - results.serialize(output, RdfFormat.N_TRIPLES) - self.assertEqual( - output.getvalue().decode(), - " .\n", - ) - - def test_update_insert_data(self) -> None: - store = Store() - store.update("INSERT DATA { }") - self.assertEqual(len(store), 1) - - def test_update_delete_data(self) -> None: - store = Store() - store.add(Quad(foo, foo, foo)) - store.update("DELETE DATA { }") - self.assertEqual(len(store), 0) - - def test_update_delete_where(self) -> None: - store = Store() - store.add(Quad(foo, foo, foo)) - store.update("DELETE WHERE { ?v ?v ?v }") - self.assertEqual(len(store), 0) - - @unittest.skipIf(is_wasm, "Not supported with WASM") - def test_update_load(self) -> None: - store = Store() - store.update("LOAD ") - self.assertGreater(len(store), 100) - - def test_update_star(self) -> None: - store = Store() - store.update("PREFIX : INSERT DATA { :alice :claims << :bob :age 23 >> }") - results: Any = store.query( - "PREFIX : SELECT ?p ?a WHERE { ?p :claims << :bob :age ?a >> }" - ) - self.assertEqual(len(list(results)), 1) - - def test_load_ntriples_to_default_graph(self) -> None: - store = Store() - store.load( - b" .", - RdfFormat.N_TRIPLES, - ) - self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) - - def test_load_ntriples_to_named_graph(self) -> None: - store = Store() - store.load( - " .", - RdfFormat.N_TRIPLES, - to_graph=graph, - ) - self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) - - def test_load_turtle_with_base_iri(self) -> None: - store = Store() - store.load( - BytesIO(b" <> ."), - RdfFormat.TURTLE, - base_iri="http://baz", - ) - self.assertEqual(set(store), {Quad(foo, bar, baz, DefaultGraph())}) - - def test_load_nquads(self) -> None: - store = Store() - store.load( - StringIO(" ."), - RdfFormat.N_QUADS, - ) - self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) - - def test_load_trig_with_base_iri(self) -> None: - store = Store() - store.load( - " { <> . }", - RdfFormat.TRIG, - base_iri="http://baz", - ) - self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) - - def test_load_file(self) -> None: - with NamedTemporaryFile(suffix=".nq") as fp: - fp.write(b" .") - fp.flush() - store = Store() - store.load(path=fp.name) - self.assertEqual(set(store), {Quad(foo, bar, baz, graph)}) - - def test_load_with_io_error(self) -> None: - with self.assertRaises(UnsupportedOperation) as _, TemporaryFile("wb") as fp: - Store().load(fp, RdfFormat.N_TRIPLES) - - def test_dump_ntriples(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - output = BytesIO() - store.dump(output, RdfFormat.N_TRIPLES, from_graph=graph) - self.assertEqual( - output.getvalue(), - b" .\n", - ) - - def test_dump_nquads(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - self.assertEqual( - store.dump(format=RdfFormat.N_QUADS), - b" .\n", - ) - - def test_dump_trig(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - store.add(Quad(foo, bar, baz)) - output = BytesIO() - store.dump(output, RdfFormat.TRIG) - self.assertEqual( - output.getvalue(), - b" .\n" - b" {\n\t .\n}\n", - ) - - def test_dump_file(self) -> None: - with NamedTemporaryFile(delete=False) as fp: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - file_name = Path(fp.name) - store.dump(file_name, RdfFormat.N_QUADS) - self.assertEqual( - file_name.read_text(), - " .\n", - ) - - def test_dump_with_io_error(self) -> None: - store = Store() - store.add(Quad(foo, bar, bar)) - with self.assertRaises(OSError) as _, TemporaryFile("rb") as fp: - store.dump(fp, RdfFormat.TRIG) - - def test_write_in_read(self) -> None: - store = Store() - store.add(Quad(foo, bar, bar)) - store.add(Quad(foo, bar, baz)) - for triple in store: - store.add(Quad(triple.object, triple.predicate, triple.subject)) - self.assertEqual(len(store), 4) - - def test_add_graph(self) -> None: - store = Store() - store.add_graph(graph) - self.assertEqual(list(store.named_graphs()), [graph]) - - def test_remove_graph(self) -> None: - store = Store() - store.add(Quad(foo, bar, baz, graph)) - store.add_graph(NamedNode("http://graph2")) - store.remove_graph(graph) - store.remove_graph(NamedNode("http://graph2")) - self.assertEqual(list(store.named_graphs()), []) - self.assertEqual(list(store), []) - - @unittest.skipIf(is_wasm, "Not supported with WASM") - def test_read_only(self) -> None: - quad = Quad(foo, bar, baz, graph) - with TemporaryDirectory() as dir: - store = Store(dir) - store.add(quad) - del store - gc.collect() - store = Store.read_only(dir) - self.assertEqual(list(store), [quad]) - - @unittest.skipIf(is_wasm, "Not supported with WASM") - def test_secondary(self) -> None: - quad = Quad(foo, bar, baz, graph) - with TemporaryDirectory() as dir: - store = Store(dir) - store.add(quad) - store.flush() - - secondary_store = Store.secondary(dir) - self.assertEqual(list(secondary_store), [quad]) - - store.remove(quad) - store.flush() - self.assertEqual(list(secondary_store), []) - del secondary_store - del store - - -if __name__ == "__main__": - unittest.main()