Compare commits

..

552 Commits

Author SHA1 Message Date
Niko PLP c7f873f904 adding serde to the data model: triple 8 months ago
Niko PLP 7b0b60cda0 adding serde to the data model 8 months ago
Niko PLP a4e2847810 test 8 months ago
Niko PLP 41e2e7206e test 8 months ago
Niko PLP aca204d9e5 trying sync write 8 months ago
Niko PLP b3ae51da82 removed key in new() 9 months ago
Niko PLP c2d31daa1c fix opt_bytes_to_ptr 9 months ago
Niko PLP f3ae4d8074 remove secondary storage 9 months ago
Niko PLP 53834396aa fix removed feature rocksdb 9 months ago
Niko PLP a9ff0524e2 remove feature rocksdb, added key to open storage 9 months ago
Niko PLP 0d82c473f7 encryption key for rocksdb env 9 months ago
Niko PLP 6226e1fba6 libc version reduced 9 months ago
Niko PLP fdcaf65a8d remmoved cli 9 months ago
Niko PLP 77edc05ced use nextgraph's version of rocksdb 9 months ago
Tpt 427d675c9b Upgrades GitHub actions 9 months ago
Tpt 1a40ab2017 Fixes some typos 9 months ago
Tpt cbb72c7be6 sparopt: Avoid join reordering in SERVICE 10 months ago
Tpt 58699f36f3 Removes dependency on escargot 10 months ago
Tpt 83aa8170ea Makes test_debian_compatibility more robust 10 months ago
Tpt 1d5843fddc Upgrades to PyO3 0.21 10 months ago
Tpt 0f0c1d2742 Releases v0.4.0-alpha.6 10 months ago
Tpt 184b8367dc Fixes compatibility with latest OxIRI version 10 months ago
Tpt ba396bad10 Releases v0.4.0-alpha.5 10 months ago
Tpt 75695dcdf7 Upgrades dependencies 10 months ago
Tpt 4c27b43e41 JS: Drop older NodeJS and WebBrowsers support 10 months ago
Tpt dcfcdd359c Python: Fixes Dataset.quads_for_graph_name type 10 months ago
Tpt f7023a381e Python: exposes the Dataset class 10 months ago
Tpt 2998f795fd Uploads code coverage to codecov 10 months ago
Tpt 4705d75893 makes newer Clippy happy 10 months ago
Tpt 9b9cf9cbcb Adds packaging status to the README 10 months ago
Tpt 583d64e0c0 Fuzz image: do not install C++ compiler and make sure submodules are cloned 10 months ago
Tpt a0cc75b7cc Python: bulk_loader: fixes type annotations 10 months ago
Tpt be44451679 CLI: Documents better the dump --graph argument 10 months ago
Tpt 43ef3e9e8a Upgrades to RocksDB 9.0.0 10 months ago
Tpt 0ac70e73dc Adds an enum for CanonicalizationAlgorithm 10 months ago
Tpt f5b975e4d1 Bump versions and fixes spargebra version 10 months ago
Tpt bd5e54a00a Makes new Clippy happy 10 months ago
Tpt c57615519b Properly publishes Oxigraph crates 10 months ago
Tpt 130f090555 Fixes dependency version 10 months ago
Tpt bdde46b5c7 Releases v0.4.0-alpha.4 10 months ago
Tpt 2b656df6ee Makes new Clippy happy 10 months ago
Tpt 8e2548467c Upgrades dependencies including RocksDB 8.11.3 10 months ago
Tpt fee7bf0d8a Python: strips builds by default 10 months ago
Tpt e0087c56b3 Makes RocksDB backend optional but enabled by default 10 months ago
Tpt 0b5790a18f JS: Allows to set SPARQL base IRI and union graph 11 months ago
Tpt f7d132f317 Follow up on Triple::from_terms 11 months ago
Jesse Wright d361e1d283 feat: add term casting 11 months ago
Tpt 1424181379 Support Tokio async in SPARQL TSV results parser 11 months ago
Tpt 01d73fa62d CI: Read MSRV from Cargo.toml 11 months ago
Tpt accadaac34 CI: Uses 1.76.0 for Clippy 11 months ago
Tpt 7d45ea43f5 Adds Tokio async to SPARQL XML results parser 11 months ago
Tpt c13cb8db7c Fix MSRV test and upgrades dependencies 11 months ago
Tpt 6c7514d058 OxRDF: fixes running doc tests with rdf-star disabled 11 months ago
Tpt e48b268fc5 Adds an async SPARQL JSON results reader 11 months ago
Tpt c277804026 RocksDB: uses multi-columns flush 11 months ago
Tpt efae84b5f8 Convert from spargebra Update to oxigraph Update 11 months ago
Tpt 269c73a7c2 Upgrades to Ruff 0.2 11 months ago
Yuri Astrakhan ea300e9081 Normalize unicode refs 11 months ago
Yuri Astrakhan a078b12508 Bump Cargo.lock 11 months ago
Yuri Astrakhan 0400f04915 Error renaming 11 months ago
Tpt 655ecd3e91 Convert error to thiserror 11 months ago
Yuri Astrakhan 1c3f054836 Convert error to thiserror 11 months ago
Yuri Astrakhan f5de5d3e98 use github action to install cargo tools 11 months ago
Yuri Astrakhan 089875ad21 A few more minor lints, keyword fix 11 months ago
Tpt 9e3758e2c9 Makes QueryResults::write return the Write impl 11 months ago
Tpt be26d210f1 Removes unused StrLookup::contains_str 11 months ago
Yuri Astrakhan a924df0e0a
Clean workspace dependency list and updates dependencies 11 months ago
Tpt 0b1aabfcdd Moves main crate to lib/oxigraph and centralizes dependencies 11 months ago
Tpt 70a4ff231b Runs SPARQL 1.2 testsuite 11 months ago
Tpt d49fb47767 Adds a link to RDFa and JSON-LD parsers in Rust 11 months ago
Yuri Astrakhan c15233e964 do not order trait methods 11 months ago
Yuri Astrakhan 1e37577b71 Optimize some code, lints 11 months ago
Yuri Astrakhan 1e4326a2c5 Optimize format performance 11 months ago
Tpt c0d245871c Simplifies the reexport of sub crates 12 months ago
Tpt 2b6ac5c195 Release v0.4.0-alpha.3 12 months ago
Tpt ec030fb652 Python: test Pyodide wheel 12 months ago
Tpt 2a81106c34 Python: use rustls by default on anything that is not Windows/macOS/iOS 12 months ago
Tpt 46d3ed3f99 Removes all debuginfo from release build 12 months ago
Tpt ef765666be Serialization: allows to set prefixes 12 months ago
Tpt 0a7cea5e25 Updates dependencies 12 months ago
Yuri Astrakhan 18bf383701 Remove tabs from BNF comments 12 months ago
Tpt 54489aacfb oxttl and oxrdfio: improves prefixes and base_iri getters 12 months ago
Yuri Astrakhan 6494ba6e31 keep concrete types 12 months ago
Yuri Astrakhan 185d83838c Linting: Impl ordering, Self refs 12 months ago
Tpt d838d55f02 Uses nightly rustfmt on imports and comments 12 months ago
etiennept f354bc7546
JS: avoids directory copies during build 12 months ago
Yuri Astrakhan a976eb3efc Remove use_self allow clippy 12 months ago
Yuri Astrakhan 522bda2906 Add WKT_LITERAL vocabulary support 12 months ago
Yuri Astrakhan 5be6f55155 A few more self-fixes 12 months ago
Yuri Astrakhan 405b95b4bd Minor linting fixes 12 months ago
Yuri Astrakhan 5f603bc4fe Fix CI status badges 12 months ago
Yuri Astrakhan 2b8df24b8b Use `Self::AssocName` to simplify declarations 12 months ago
Yuri Astrakhan 51941c0dc5 Simplify complex strings with raw literals, readme fix 12 months ago
Yuri Astrakhan d4bfcd3b24 Add debug print to test_debian_compatibility 12 months ago
Tpt df040400c5 Turtle: fixes parsing bug with escaped dot at the end of a local name 1 year ago
Tpt b08c201074 CI: attempt to fix debian compatibility test 1 year ago
Tpt c2040a30fd oxttl: Exposes the base IRI 1 year ago
Tpt c2df0b829d CI: Uploads all Python wheels in one batch 1 year ago
Tpt cffc536eb9 Releases v0.4.0-alpha.2 1 year ago
Tpt 5cf8025aec CI: Fuzzer: Removes unused clone 1 year ago
Tpt d4eaa3c5ef Docker: use semver tagging 1 year ago
Tpt dcabf50ab6 Fixes Docker image 1 year ago
Tpt 0d23f4ae48 Architecture diagram 1 year ago
Tpt d1da94b08b Runs sphinx-lint in the CI 1 year ago
Tpt f01796b1a4 Python: Runs doctests as part of the unittests 1 year ago
Tpt 93eab63868 Python: QuerySolution is thread safe 1 year ago
Tpt 42a66f62b9 Enables cargo semver-checks again 1 year ago
Tpt f2a2bd5b5d CI: Improves Python build and upload 1 year ago
Tpt 391e8d7662 Fixes and ensures 32bits x86 support 1 year ago
Tpt a5781d1187 Releases v0.4.0-alpha.1 1 year ago
Tpt a84b898fda Parsers: adds "unchecked" option for faster parsing 1 year ago
Tpt d170b53609 N3: Fixes stack overflow errors 1 year ago
Tpt 790501e1b3 Setup codspeed and improves benchmarks 1 year ago
Tpt bdf5d593ee CI: Share code to setup Rust 1 year ago
Tpt 1761672b41 Upgrades Pyo3 1 year ago
Tpt bde73e5d72 Updates README 1 year ago
Tpt 4c79e7ee78 Upgrades testsuite 1 year ago
Tpt 5cc3e37876 Upgrades Cargo lock file 1 year ago
Tpt 8104f9e1de Merge remote-tracking branch 'origin/main' into next 1 year ago
Tpt ed94f56ab4 Fixes linux aarch64 build 1 year ago
Tpt 025bd2afd2 Uses new cargo lint configuration system 1 year ago
Tpt 4756217787 Adds "since" to the #[deprecated] annotations 1 year ago
Tpt 2e9ac3cc1a Python Store.backup: allows pathlib 1 year ago
Tpt 604d1bbe2e BulkLoader: rename set_* methods to with_* methods 1 year ago
Tpt 4084acb9b8 Renames dump_dataset and dump_graph to dump_to_write and dump_graph_to_write 1 year ago
Tpt 2a135283d5 Lints against 1.74 1 year ago
etiennept 735db897ff Update package.json 1 year ago
Tpt 4b3f3f3278 RDF/XML: properly serialize predicates with xmlns: prefix 1 year ago
Tpt 3241f47059 Docker: use bookworm 1 year ago
Tpt 4841f89072 Server: allows Content-Encoding: gzip requests 1 year ago
Tpt efd5eec65d Introduces load_from_read instead of load_graph and load_dataset 1 year ago
Tpt 899e553249 Makes SPARQL query result Send and Sync 1 year ago
Tpt 03afe5c6c6 Releases v0.3.22 1 year ago
Tpt d88c2e0a8a Adds compatibility with lazy_static with spin_no_std feature 1 year ago
Tpt 9979a3d503 Allows newer dependency versions 1 year ago
Dan Brickley 389d993dc4 Update README.md 1 year ago
Tpt 4f404ab650 Python: allows again to use string for types (but with a deprecation warning) 1 year ago
Tpt f445166942 JS: Applies new biome lints 1 year ago
Tpt e1ff1d919c Releases v0.3.21 1 year ago
Tpt 31733beda8 Python: generate sdist on Linux 1 year ago
Tpt aa82fc8157 Python: builds for Python 12 and pypy on Linux 1 year ago
Tpt 6d1d752e01 Upgrades RocksDB to 8.8.1 1 year ago
Tpt 756c5394d0 Adds Tokio support to SPARQL results serializers 1 year ago
Tpt d1cb4cecbd OxRDF: makes more function const 1 year ago
Tpt 99c3a4cce4 CI: Adds a test with latest dependency versions 1 year ago
Tpt 48174cac12 Distributes Pypy wheels for linux 1 year ago
Tpt 9af2717502 Bulk loader: do not fail on empty files 1 year ago
Tpt a9fee4f6b8 Upgrades dependencies 1 year ago
Tpt f8034c68e9 SPARQL: refactor AggregateExpression 1 year ago
Tpt 98caee8f92 RDF/XML: avoids to serialize special XML namespaces 1 year ago
Tpt ddf589ea14 Python: Introduces enums for RDF and SPARQL result formats 1 year ago
Tpt d19947414e The N-Triples serializer outputs canonical N-Triples 1 year ago
Tpt cc41448b18 Python: harmonizes parse_query_results and parse signatures 1 year ago
Tpt e6d98445e6 Makes newer Clippy happy 1 year ago
Tpt 261f9c64a5 Python: I/O adds a `path` parameter to read/write from/to a file 1 year ago
Tpt 48db7f872b Python: any os.PathLike path is now supported 1 year ago
Tpt 8a7c6cf2c1 Uses Ruff instead of Black 1 year ago
Tpt ab5f5c1c60 Upgrades quick-xml 1 year ago
Tpt a2a6c5a41e Python: annotate Triple and Quad with sequence behaviors 1 year ago
Tpt a259879ef1 Releases v0.3.20 1 year ago
Tpt ea80c11d6e CI: Run clippy on all targets 1 year ago
Tpt 1dfad23e4b Upgrades RocksDB 1 year ago
Tpt 5647624012 Testsuite: executes C14N tests 1 year ago
dependabot[bot] e87bff6e6e Bump rustix from 0.37.23 to 0.37.25 1 year ago
Tpt b1e33293a5 Upgrades RocksDB 1 year ago
Tpt ef429e6d1b Uses anyhow context function more often 1 year ago
Tpt b0a01e65fa SPARQL: makes sure that STR is working properly on small IRIs 1 year ago
Tpt 517df6d59e Testsuite: Makes use of mf:assumedTestBase 1 year ago
Tpt 832a4ba27d JS: Upgrades Biome 1 year ago
Vilnis Termanis c1b57f460b SPARQL: Fix ASK + FROM combination 1 year ago
Tpt 8d348b2a6f Upgrades PyO3 1 year ago
Tpt b777d0110a Makes recent Clippy happy 1 year ago
Tpt 64f45cd11b Makes recent Clippy happy 1 year ago
Tpt 70b1c52166 Python: uses 3.12 in CI + builds for it 1 year ago
Tpt 38844f6436 sparesults: use Read instead of BufRead as input type 1 year ago
Tpt d280f7d2f7 Adds basic location support to sparesults SyntaxError 1 year ago
Tpt dbb39d867a Upgrades json-event-parser 1 year ago
Tpt 5e3a2fc89d Uses Rustls for portable Linux binaries 1 year ago
Tpt c5f02d9263 Upgrades oxhttp 1 year ago
Tpt 90b7b128f2 Upgrades MSRV to 1.70 1 year ago
Tpt 9b985295ae Drops Musl linux 1.1 support 1 year ago
Tpt 108721624f Improves bulk loader doc comments 1 year ago
Tpt 67fd726f9d Makes SPARQL results TSV work with a Read implementation 1 year ago
Tpt 412ca37b3c Makes sparesults parser API closer to oxrdfio 1 year ago
Tpt 7a3e07d98d sparesults: no more hidden flush 1 year ago
Tpt 6a21cb0625 Makes sparesults formatter API closer to oxrdfio 1 year ago
Tpt 0783d1dcda Splits sparesults lib.rs 1 year ago
Tpt 8ee30cf001 Removes .devcontainer 1 year ago
Tpt 8c8ca54596 CI: Increases fuzzing time 1 year ago
Tpt 7c4578f5f5 ReadTheDocs: updates Rust version 1 year ago
Tpt 4c97637e4b Python: improves documentation 1 year ago
Tpt a6f32390df Python: query results I/O 1 year ago
Tpt 180ae22293 Python: introduces QueryBoolean class 1 year ago
Tpt a8f98a0056 Python: makes serialization method output bytes if no output is specified 1 year ago
Tpt 1b511ed018 Python: guess file type from file extension 1 year ago
Tpt 87d2006b6e RocksDB: adds a feature for dynamic linking directly in the oxigraph crates 1 year ago
Tpt be074000cc Allows dynamic RocksDB linking 1 year ago
Tpt 3c51dd31bc Move back MSRV to 1.67 but keep Cargo.lock to 1.70 1 year ago
Tpt 555f6b8d7c xsd:duration: properly fails when building not-serializable durations 1 year ago
Tpt bdedcc47e3 Renames oxigraph-server to oxigraph(-cli) 1 year ago
Tpt 99abe69ba1 oxttl: Exposes prefixes 1 year ago
Tpt be002dd51e Migrates to new sha1 name 1 year ago
Tpt 6edfb7a2f4 Python: Adds location data to SyntaxError 1 year ago
Tpt 13c3515d7b OxTTL: return file position in errors 1 year ago
Tpt 8193cac86d Testsuite: avoid creating and dropping Stores 1 year ago
Tpt b1c90b599b Testsuite: simplifies error handling 1 year ago
Tpt 1d55635fe2 Migrates from Rome to Biome 1 year ago
Tpt 1eaa77ad93 Uses new rustdoc <div class="warning"> style 1 year ago
Tpt 7fe055d2b4 Exposes SPARQL results I/O in Oxigraph and improve EvaluationError 1 year ago
Tpt 9da26c6f95 Makes media type to format more robust 1 year ago
Tpt f10e5a40a3 Enables more Clippy lints 1 year ago
Tpt 024bc7b8e8 Simplifies Gitter link 1 year ago
Tpt 6611b491b1 Sets Rust minimum version to 1.70 1 year ago
Tpt 666a00cfab Upgrades webkpi to avoid RUSTSEC-2023-0052 1 year ago
Tpt c9ec5f7c0c Python and & JS: Renames "mime_type" parameter to "format" 1 year ago
Tpt d44f9bee7a I/O adds extra #[must_use] annotations 1 year ago
Tpt 570f21748d Cargo.toml: share some common fields in the workspace 1 year ago
Tpt d2306cea52 Improves type inference on zero-args functions 1 year ago
Tpt 9e76323e2b CI: Add cargo caching for linux wheel build 1 year ago
Tpt 872111ab88 Makes Graph and Dataset implement Clone 1 year ago
Tpt 3de3f9c4bc Server: Adds ValueHint annotations 1 year ago
Tpt 010196c974 Makes parse_ and serialize_ method take owned Parser and Serializer 1 year ago
Tpt bbf184f7ae Isomorphism: makes sure that new hashes depends on the old ones 1 year ago
Tpt 4568ae4209 JS: Adds more information on how to use dev tools 1 year ago
Tpt 788450932a Server: removes the "location" argument to relevant commands 1 year ago
Tpt f586cc048f Fuzzer: ensure that NQuad/TriG segmentation does not affect results 1 year ago
Tpt 88e49f6c66 Server: adds the "convert" command 1 year ago
Tpt 807cf0d436 Isomorphism: make sure to also take quoted triples into account and fixes interning stability issue 1 year ago
Tpt 5fee36e587 Improves naming of artifacts built by the CI 1 year ago
Tpt c6e55c706a RDF serialization: removes implicit flush 1 year ago
Tpt 7c227830e9 Upgrades RocksDB 1 year ago
Tpt f878463828 Releases v0.3.19 1 year ago
Tpt bb7379addb Upgrades dependencies 1 year ago
Tpt 382aa2e01f Python: allows giving pathlib.Path for input 1 year ago
Tpt 3bb05e2af2 Adds documentation field to Cargo.toml 1 year ago
Tpt 4a798ed3ea Python: use OSError instead of IOError to map io::Error 1 year ago
Tpt f183196859 Adopt new I/O API for parsing 1 year ago
Tpt 217abaf7ee Adopt new I/O API for serialization 1 year ago
Tpt 7cd383af79 Introduces OxRDF I/O stand-alone crate 1 year ago
Tpt 73af297b4c Adds From<Iri> to NamedNode 1 year ago
Tpt b06d6506cb Fixes "let else" formatting 1 year ago
Tpt 12a738279f Python: allows giving pathlib.Path for input 1 year ago
Tpt 4cb377bda4 Adds documentation field to Cargo.toml 1 year ago
Tpt afaabf6110 Simplifies OxTTL lexer buffer management 1 year ago
Tpt 4f7445104a Testsuite: Upgrades to new RDF syntax test structure 1 year ago
Tpt 3adf33d2f4 Renames "parse_from_read" to "parse_read" 1 year ago
Tpt 922023b1da Parsers do not need BufRead anymore 1 year ago
Tpt 077c1fc1a8 Improves XSD errors and code organization 1 year ago
Tpt b22e74379a Run RDF canon tests to check isomorphism 1 year ago
Tpt 1e1ed65d3b Stop assuming JS platform when compiling to wasm32-unknown-unknown 2 years ago
Tpt 8a398db20e SPARQL: Do not unescape unicode escape everywhere but only in IRIs and strings 2 years ago
Tpt 00f179058e Upgrades minimal Python version to 3.8 2 years ago
Thomas 8e770fbb5d SPARQL: Removes intermediate query plan representation 2 years ago
Thomas c31ba0e823 Makes sparopt figure out good join keys 2 years ago
Tpt cdabe52847 RDF I/O: adds basic Tokio support 2 years ago
Thomas 501f9ce6f9 Makes profiler independent from query plan 2 years ago
Thomas 24a1dd2556 Applies some recent Clippy lints 2 years ago
Tpt c8e718ed2d Properly document features in docs.rs 2 years ago
Tpt db7fab0f20 Run Clippy on more configurations 2 years ago
Tpt f6c8358b24 Refactor parsers error types 2 years ago
Tpt 69d8ce6b4e Migrates RDF/XML parser from Rio 2 years ago
Thomas 94986a0d28 Fixes a testsuite typo 2 years ago
Thomas b69e0d38f6 Fixes a testsuite typo 2 years ago
Tpt 98ac089984 Adds TryFrom<DayTimeDuration> to std::time::Duration 2 years ago
Tpt 001b6e07b7 Enforces "return self not must use" lint 2 years ago
Tpt 86f14ce96f Improves oxttl documentation 2 years ago
Tpt cb9922379c Uses "let else" syntax where relevant 2 years ago
Dan Yamamoto 5085a60a87 Apply sugestions from code review 2 years ago
Dan Yamamoto 43e6ce87f8 OxRDF: Add extra literal escaping to generate canonical N-Triples and N-Quads 2 years ago
Tpt 71b1768d28 New N3/Turtle/TriG/N-Triple/N-Quad parsers and serializers 2 years ago
Tpt a1cbfdf67d Improves some code style details 2 years ago
Tpt 6cc7488905 SPARQL: requires decimal digits for DECIMAL serialization 2 years ago
Tpt a27f31b84e Python: Removes DefaultGraph.value 2 years ago
Tpt 785df9b00b Makes use of io::read_to_string 2 years ago
Tpt 76deca135c BulkLoader: Uses thread::scope 2 years ago
Tpt 2281575c14 GraphName: implements Default 2 years ago
Tpt 5af06e926a Removes deprecated methods from oxsdatatypes 2 years ago
Tpt 01caaa5d70 calendar subtraction: it should return xsd:dayTimeDuration following XPath 2 years ago
Tpt 81895cb6bc xsd:integer: checked_div is integer division and not regular division according to XPath 2 years ago
Tpt 40b10cdabc Adds a naive standalone query optimizer 2 years ago
Tpt 7c0563cb1b XSD type structs: use by-value instead of by-ref in methods 2 years ago
Tpt a8abf26913 Enables some extra Clippy lints 2 years ago
Tpt c016116b09 Makes Clippy 1.65 happy 2 years ago
Tpt ae294683d6 Upgrades MSRV to 1.65 2 years ago
Tpt ab17138f33 Starts 0.4.0-alpha.1 2 years ago
Tpt 8e76341bb2 Releases v0.3.18 2 years ago
Tpt f47306a4c5 Duration: ensures that the smallest supported duration can be parsed and serialized 2 years ago
Tpt acf83d4a31 Fixes || evaluation when all alternatives are false 2 years ago
Tpt f23ef514e4 Adds a link to the v0.4 development branch 2 years ago
Tpt 9a4f726aa4 Python: fixes Store.contains_named_graph return type 2 years ago
Tpt 99186c1e7d Releases v0.3.17 2 years ago
Tpt 7a1cce527d Upgrades Maturin to 1.0 2 years ago
Tpt 1c1531f640 Simplifies some rust doc links 2 years ago
Tpt 03f7641355 Enables docker cross compilation to arm64 2 years ago
Tpt 8c68cf4041 Makes SPARQL AVG aggregate function simpler 2 years ago
Tpt 1f89bef860 Python doc: cleans up links to RDF specifications 2 years ago
Tpt ac61adc9c2 CI: Temporarily disable SPARQL XML results fuzzing 2 years ago
Tpt edec370f0a Makes xsd:dateTime year parsing stricter 2 years ago
Tpt 5f2c9a3b92 Cleans up documentation links to RDF specifications 2 years ago
Tpt 8c62137a01 Upgrades PyO3 to 0.19 2 years ago
Tpt f72a9600ae CI: Makes sure to update APT cache before installing packages 2 years ago
Tpt 57d39cad24 Makes Clippy happy 2 years ago
Tpt a7758484a5 Python: Annotates immutable classes as frozen 2 years ago
Tpt 5d253c6afb SPARQL XML results: circumvent quick-xml crash 2 years ago
Tpt 3e51020222 CI: Fuzz SPARQL query results I/O 2 years ago
Tpt adda2d2d7e Makes hash join into for loop join optimization more aggressive 2 years ago
Tpt 22f990344f Fixes hash left join into for loop left join optimization 2 years ago
Tpt 4cc9e4008b Fixes empty IN expression optimization 2 years ago
Tpt 0a064a8704 SPARQL: Prevents ForLoopLeftJoin with MINUS 2 years ago
Thomas d2804d8a8d Python: fixes parsing from text I/O with not-ASCII char 2 years ago
Tpt d500614fcc Fuzzer: do not consider OFFSET and LIMIT for now 2 years ago
Tpt 2650c5ed13 Speeds up sparql_eval fuzzer by hardcoding size_hint 2 years ago
Tpt 7b9e9f9694 SPARQL: Makes average function works with yearMonthDuration and dayTimeDuration 2 years ago
Tpt d992fb7545 Fixes cross compilation of RocksDB with zig targeting macOS 2 years ago
Tpt b2d625e10e XSD: Drops nom dependency 2 years ago
Tpt 63945638ea XSD: Adds checked_neg operation 2 years ago
Tpt 5bfbbdbd3f Python: Adds Store.contains_named_graph 2 years ago
Tpt eb40457d5c Upgrades RDF-tests 2 years ago
Tpt d24461fc42 XSD: Improves arithmetic computations 2 years ago
Tpt 8bec2e2ff9 SPARQL-star parser: align with SPARQL 1.2 draft 2 years ago
Tpt 9a6233b511 SPARQL parser: removes reference to rule numbers 2 years ago
Tpt d26731432c Server: Use target graph name as base URI 2 years ago
Tpt 38af275451 Server: Improves systemd support 2 years ago
Tpt cb89166380 RocksDB: Avoids building twice util/crc32c_arm64.cc on aarch64 2 years ago
Tpt 5ce24dda01 Dependabot: Adds JS dependencies 2 years ago
Tpt 05fbb0e071 JS: Upgrades Rome formatter 2 years ago
Thomas Tanon 8f3af5a7fc Revert "Python: drops compatibility with Musl 1.1" 2 years ago
dependabot[bot] a25bf55919 Update maturin requirement from ~=0.14.0 to ~=0.15.1 in /python 2 years ago
Tpt f9d7b93abf Python: drops compatibility with Musl 1.1 2 years ago
Tpt e96672a2a8 SPARQL plan: allows AND and OR to have more than 2 children 2 years ago
Tpt cfe52db3a3 Spargebra: Makes GroundTermPattern::Triple properly gated with the rdf-star feature 2 years ago
Tpt 7175784356 Improves Clippy lint list 2 years ago
Tpt a2d8bcaaa3 Starts 0.3.17 dev 2 years ago
Tpt f520de8893 Releases v0.3.16 2 years ago
Tpt 8e3ee3b6dd Upgrades RocksDB to 8.1.1 2 years ago
Yaroslav Halchenko 6f37c4c9c9 Move shellcheck into tests workflow collecting all kinds of testing 2 years ago
Yaroslav Halchenko aeeabf5d1c Replace very cute (ab)use of array to get first element with explicit awk 2 years ago
Yaroslav Halchenko 029fbf470e Disable shellcheck warning about loop running once -- intended 2 years ago
Yaroslav Halchenko a3294a8abd Remove unused JAVA_HOME. If to be used by child processes -- should have been exported 2 years ago
Yaroslav Halchenko 80ce67e6dd Run bench/bsbm_ with set -eu to exit right away if something abnormal happens 2 years ago
Yaroslav Halchenko 38357dd9b5 Add github action to shellcheck main on push and PRs 2 years ago
Thomas 704440538d Adds EXISTS operation to the profiler output 2 years ago
Yaroslav Halchenko bbe9bd0303 Make all Thomases into one 2 years ago
Tpt cf03da0fab CI: fixes linux cross-compilation v2 2 years ago
Tpt b8c5628e3b CI: fixes linux cross-compilation 2 years ago
Tpt 79c5e3918e SPARQL plan: fixes used variable lookup in quoted triple patterns 2 years ago
Tpt b630ab4185 Adds some extra tests covering property path with shared variables 2 years ago
Tpt 3e0f6b5405 Applies oxigraph crate clippy lint to the complete project 2 years ago
Maximilian Goisser 1d02098b70 Fix missing GSPO compaction 2 years ago
Tpt c3cf8e2002 Start 0.3.16 dev 2 years ago
Tpt c6b8c754ee Releases v0.3.15 2 years ago
Tpt d653e0645b Updates dependencies 2 years ago
Tpt 56e105bc04 Server: prints the query explanation with indentation 2 years ago
Tpt d587d3b2bb xsd:decimal parsing: properly detect underflow 2 years ago
Tpt 284e79521d Server: Adds an option to allow CORS 2 years ago
Tpt 86bbebf93c Fixes evaluation of empty aggregation without GROUP BY 2 years ago
Tpt feeaf17fe6 Test: fixes SPARQL query results isomorphism 2 years ago
Thomas f41c499ef3 Consolidates Rust benchmarks 2 years ago
Tpt a977adff91 Allows to collect RocksDB statistics 2 years ago
Tpt d74fc58a1c Flamegraph: allows to use the inferno generator tool 2 years ago
Tpt 60ffd99ad8 Exposes the query profiler to the CLI app 2 years ago
Tpt dcd59ac4dd Adds a way to add profiling to the evaluator 2 years ago
Tpt 81793bc221 Makes nested PlanNode easy to clone 2 years ago
Tpt 9dc1106b9a SPARQL query plan: attach original RDF terms 2 years ago
Tpt 6af6c9c0eb Bulk loader: fixes the progress indicator display 2 years ago
Tpt 7787be6e84 Bulk loader: pre-allocate some buffers 2 years ago
Tpt f4b99e6953 Updates version to 0.3.15-dev 2 years ago
Tpt ef65d53190 Python: Adds __match_args__ definition where relevant 2 years ago
Tpt 76dec0b6a8 Docker image: uses distroless base 2 years ago
Tpt 13976014e7 Bulk loader: reduces default system parameters 2 years ago
Tpt f29a49bcd2 ReadTheDocs: updates rust to 1.64 2 years ago
Tpt 5ce23665f8 CI: Python: Avoids generating target specific wheels on nightly builds 2 years ago
Tpt 51c896fe03 CI: Python: Installs Black for stubs formatting 2 years ago
Tpt 0c407cd041 CI: fixes release artifacts upload 2 years ago
Tpt 9c32f07e87 Python: Fixes the secondary test on Windows 2 years ago
Tpt 5852d0b4df CI: Evaluates Python tests on Windows 2 years ago
Tpt 88732f7dc7 Fixes typos in CHANGELOG 2 years ago
Tpt 5849c6fdbe Releases v0.3.14 2 years ago
Tpt 3fb6beb0ba Upgrades dependencies 2 years ago
Tpt f9c58602a0 Server: typos in documentation 2 years ago
Tpt cdf76307d9 Upgrades RocksDB to 8.0.0 2 years ago
Tpt a164b268c2 OxRDF: drops lasso dependency 2 years ago
Tpt 21994d39fd Server: adds explicit flush calls 2 years ago
Tpt 20dc1f26df Uses new quick-xml APIs for XML results serialization 2 years ago
Tpt 0f43ef19e3 Python: QuerySolution: implements equality 2 years ago
Tpt bdb803dab5 JSON SPARQL results: allows the "head" key to be at the end of the document 2 years ago
Tpt c40c81447e Python: Optimizes copy on basic model classes 2 years ago
Tpt d4e964ac47 Python: Allow to pickle all basic model classes 2 years ago
Tpt 935e778db1 Python: adds proper module medata to classes 2 years ago
Tpt 7b74fa9b0a Stop advertising the Docker images on Docker Hub 2 years ago
Tpt 28def4001b Python: Uses Ruff linter 2 years ago
Tpt fbcbd60c0e Python: adds Store.bulk_extend 2 years ago
Tpt 0e00e8209a Python: adds Store.extend 2 years ago
Tpt e553b6374a Python: uses some recent PyO3 syntactic sugars 2 years ago
Tpt 23e47bcc5e Server: Adds the optimize command 2 years ago
Tpt 3d61867386 Server: Fixes support of --location before command name 2 years ago
Tpt bf36e60b34 Server: Adds query and update commands 2 years ago
Tpt beca5e88ca Server: use option instead of positional arguments 2 years ago
Thomas Tanon e90d98bb2c Add issue template 2 years ago
Tpt c4a5b65ac0 Adds link to the conda-forge package 2 years ago
Tpt d8fa540b97 Python: Exposes read-only and secondary store 2 years ago
Tpt 9b20dbe6dc CI: Merges nightly artifacts and release artifacts generation 2 years ago
Tpt 85d4c70171 CI: Do not run SPARQL result format fuzzing 2 years ago
Tpt 9d6b72e9c4 Sparql smith: Fixes PropertyListPath generation 2 years ago
Tpt 53edaf9d11 Server: Add a nicer error when writes are not allowed 2 years ago
Tpt 5eaa388312 Server: adds the "backup" command to copy the database 2 years ago
Tpt 26f4e2dc98 Allows to have secondary instances in memory 2 years ago
Tpt 5f68cb3746 Cleans up RocksDB wrapper code and implements backup from read-only and secondary instances 2 years ago
Tpt 1ffb559ee2 Oxrdf: Makes Display for Graph and Dataset return proper NTriples and NQuads 2 years ago
Tpt 1570a3a4f1 Makes Clippy happy 2 years ago
Tpt 6d4a15d067 Server: Load command: makes the "--file" option positional 2 years ago
Tpt d42e2a818c Server: Adds "dump" command 2 years ago
Tpt 84d6d48b0e Upgrades Rust dependencies 2 years ago
Tpt df55148355 Server: Adds serve-read-only and serve-secondary commands 2 years ago
Tpt b2385509a6 Improves read only and secondary test coverage 2 years ago
Tpt 855c39146d Replaces Store::open_with_options with explicit variants 2 years ago
Tpt df2233c51c JS: Optimizes release builds for size 2 years ago
Tpt 9729ec8ed3 Adds features annotations to docs.rs 2 years ago
Tpt cdd8866fd3 Runs rustdoc as part of the CI 2 years ago
Tpt f8486364b3 ClusterFuzzLite: adds fuzzing for TSV and JSON result formats 2 years ago
Tpt fab5db9511 Fuzzer: SPARQL results: Makes the fuzzers test serialization too 2 years ago
Tpt 9063867ec9 QuerySolution: implements Eq and Debug 2 years ago
Tpt 31c6bb7815 TSV SPARQL results: Add spaces around quoted triples "<<" 2 years ago
Tpt 7e7489499d OxRDF: Fixes Term::from_str() with a blank node in object position of a quoted triple 2 years ago
Tpt 42cd6b0094 XML parser results: clears the temporary data buffer while parsing 2 years ago
Tpt 6d09d77c61 CSV and TSV SPARQL results: always print trailing line jumps 2 years ago
Tpt a51509dcd3 TSV SPARQL results: Properly quote \t and use short Turtle serialization everywhere possible 2 years ago
Tpt a271e39fa0 Upgrades to RocksDB 7.10.2 2 years ago
Thomas Tanon cbc24950e3 Server REAME: adds a line about pre-built binaries 2 years ago
Tpt c3f0aa94bf CI: Builds also the server for ARM linux 2 years ago
Tpt 306271df61 CI: Builds also the server for ARM macs 2 years ago
Tpt af02d5e1c4 Release: stop pushing Python and JS artifact to GitHub release 2 years ago
Tpt c8caf805fa Uses clang-format for C/C++ code 2 years ago
Tpt 03df957427 Server: Allows loading from stdin 2 years ago
Tpt 4ce1b0e241 Server: Adds "graph" and "format" options to the loader 2 years ago
Tpt ef2701dc0c Makes new Clippy happy 2 years ago
Benedikt Seidl 2b271e45ac Add test for OpenAsReadOnly and OpenAsSecondary 2 years ago
Benedikt Seidl f7637ee5a5 Add option to open database in read-only mode 2 years ago
Benedikt Seidl aa9476b9cc Add option to open rocksdb in secondary mode 2 years ago
Tpt 62ff6ec138 Updates version numbers 2 years ago
Tpt c25a76c1f3 Releases v0.3.13 2 years ago
Tpt 7b81955d72 Tests: using new SPARQL tests directories 2 years ago
Tpt 909a906d2a XSD: Adds tests for "minimal conformance" 2 years ago
Tpt cb2c891979 Fixes formatting of times with decimal second number lower than 10 2 years ago
Tpt 7a0c457867 Updates version numbers 2 years ago
Tpt d80cdf3054 CI: Install llvm-symbolizer before running address sanitizer 2 years ago
Tpt 0668983cd6 Releases v0.3.12 2 years ago
Tpt b267d5ea07 Upgrades dependencies 2 years ago
Tpt c60dd0d3ca Installs cargo deny directly 2 years ago
Tpt 0786c40a5e Adds cargo-semver-checks to the CI 2 years ago
Tpt 524903b03d Uses -dev versions for versioning during development 2 years ago
Tpt f15101a2b3 Js: Uses Rome instead of JS standard 2 years ago
dependabot[bot] 70d4eef803 Update mypy requirement from ~=0.991.0 to ~=1.0 in /python 2 years ago
Tpt afdb1f76e8 Adds a typo linter as part of the CI 2 years ago
dependabot[bot] 6dc4aefe99 Update black requirement from ~=22.10 to ~=23.1 in /python 2 years ago
dependabot[bot] bd77bce2cd Bump docker/build-push-action from 3 to 4 2 years ago
Thomas 339a619f28 WIP: Releases musllinux wheels 2 years ago
Tpt 8684b82893 Properly expose oxigraph::sparql::QueryDataset 2 years ago
Tpt 2d19a19320 Fixes TriG serialization 2 years ago
Tpt 7fcf9e1051 Sponsors: + Magnus Bakken 2 years ago
Tpt 6375481a80 Testsuite: makes format detection more strict 2 years ago
Tpt 323ad73831 Testsuite: do not run rejected tests 2 years ago
Tpt 6cabf6da15 Oxrdf: adds NamedOrBlankNode to GraphName conversion functions 2 years ago
Tpt 575bb8d253 Reduces source code archive by removing .git 2 years ago
Tpt ca415ec044 Reduces RocksDB file size 2 years ago
Tpt f47b2b1a7b Fixes oxsdatatypes keywords 2 years ago
Tpt 44fc4eef1a Releases v0.3.11 2 years ago
Tpt 54356f5273 Updates dependencies 2 years ago
Tpt 3d9cbc5d14 Migrates to pyO3 0.18 2 years ago
Tpt 0c23589187 Upgrades to RocksDB 7.9.2 2 years ago
Tpt 47e5ef329e Adds a script to add RDF4J to the benchmark 2 years ago
Thomas Tanon c71f2d66b1 Increases fuzzer pruning timeout 2 years ago
Tpt 22a3c21c4e Improves oxsdatatypes documentation 2 years ago
Tpt 20928b82fa SPARQL smith: adds more functions 2 years ago
Tpt f969a66d05 Uses "ParseDecimalError" naming just like "ParseFloatError" 2 years ago
Thomas Tanon ea0b4e22e7 Increases fuzzer pruning timeout 2 years ago
Tpt 07e105e1be Property path: faster eval for closed path 2 years ago
Tpt 0a78eacfcd SPARQL: Makes sure there are no duplicates when evaluating (a|b) property paths 2 years ago
Tpt a7bc31b446 SPARQL regex: compile ahead of time if possible 2 years ago
Tpt 5c055e0d12 Removes Eq derive on query plan 2 years ago
Tpt 9fe5436f94 SPARQL smith: removes Debug derives 2 years ago
Tpt 1fa0633db3 Library: adds basic WASI support 2 years ago
Tpt f6e9ceccc1 Add back oxrocksdb-sys to the main workspace 2 years ago
Tpt d97eb9eb31 Properly exclude oxrocksdb-sys from the main workspace 2 years ago
Tpt 4927b3148e Moves oxrocksdb-sys out of the workspace 2 years ago
Tpt 54ce7410d2 CI: Adds dependency caching 2 years ago
Tpt d453721e8b Python: Uses typing.io for I/O types annotations 2 years ago
Tpt 719cde2eac Implements SEP-0002 ADJUST function 2 years ago
Tpt 3485833875 Makes Datetime.checked_sub return DayTimeDuration 2 years ago
Tpt 78c4e750ae Improves TimezoneOffset API 2 years ago
Tpt fe2b7c2e76 Attempt to setup clusterfuzzlite 2 years ago
Tpt 6539f0a72e SPARQL test: displays query results diffs 2 years ago
Tpt 76dd879ea6 Implements LATERAL join 2 years ago
Tpt 3f3523963d Maturin: fails if manylinux tag is not properly added 2 years ago
Tpt 14121b21f4 Upgrades RocksDB to v7.8.3 2 years ago
Tpt 854e29ee38 Makes XSD datatypes a standalone crate 2 years ago
Tpt 027da6d639 Python: Makes abi3 optional 2 years ago
Tpt 53913b7e96 CI: Removes test on push 2 years ago
Tpt de4c5abd9c Simplifies test manifest parser 2 years ago
Tpt 7fdd045516 Improves error handling code in testsuite and server 2 years ago
Tpt 1ded5ac4b4 SPARQL eval: Makes sure to avoid overflow when estimating result size upper bound 2 years ago
Tpt be3b009f5d CI: Validates Python 3.7 compatibility 2 years ago
Tpt 92feec7e98 CI: Validates Rust 1.60 compatibility 2 years ago
Tpt 686e1edc8e Set Rust MSRV to 1.60 2 years ago
Tpt 808c9db007 String formatting: uses inline syntax 2 years ago
Tpt f21ab0ea6a Python macOS: Do not build universal2 wheels anymore 2 years ago
Tpt e055c7b5f8 Releases v0.3.10 2 years ago
Tpt 59359b13d9 Manylinux wheels: install rustup "by hand" 2 years ago
Thomas Tanon 149d600e65 Adds DOI to CITATION.cff 2 years ago
Tpt a8f666fb5d Fuzzer: compares SPARQL evaluation with and without the optimizer 2 years ago
Tpt 24371412b4 SPARQL-smith: Adds FILTER EXISTS and literals in queries 2 years ago
Tpt d7e4d5583f SPARQL: Adds an option to disable the optimizer 2 years ago
Tpt 2ca8bd19d3 PlanBuilder: no need for &mut references 2 years ago
Tpt d65e587756 Property evaluates property paths with unknown graphs 2 years ago
Tpt b6c9a5b429 Fixes ZeroOrX path evaluation on terms that are not in the dataset but only in the query 2 years ago
Tpt 07b5c32935 Upgrades Rio and sysinfo 2 years ago
Tpt 576760e417 Cargo publish: cargo now properly waits for the index propagation 2 years ago
Thomas Tanon 65ed5471da
Uses quick-xml 0.26 2 years ago
Tpt e5b15031b6 Dependabot: only increase version number if necessary 2 years ago
Tpt 3712142e6f Python: makes type stubs and code valid according to Mypy strict mode 2 years ago
Tpt 45c541edad Releases v0.3.9 2 years ago
Tpt 45cd47d3c1 Updates dependencies 2 years ago
Tpt 7568aaab7b Removes posonlyargs usages 2 years ago
Tpt 6b02ac3c10 SPARQL UPDATE: Fixes Halloween problem 2 years ago
Tpt b7059d07e8 Adds very basic SPARQL evaluation fuzzer 2 years ago
Tpt 0ccdea2ff1 SPARQL: properly validates VALUE clauses 2 years ago
Thomas Tanon a6de2e59a5 Suggest to use -T instead of --data in cUrl commands 2 years ago
Tpt dd9201e466 Python wheel: releases also an ARM-only wheel 2 years ago
Tpt ecd4b423dc Server: Avoids unwrap() in test 2 years ago
Tpt 127ffc3547 Adds bulk loader option to HTTP API 2 years ago
Tpt 112631a0d7 Server: store POST: avoids arbitrary choice if the mime type matches both a graph and dataset format 2 years ago
Tpt 38fdffc147 Server: avoids big if let blocks 2 years ago
Tpt 74c565a690 Server: avoids some duplicated code 2 years ago
Tpt 3f7ff6843d Server: simplifies error related code 2 years ago
Tpt 7f89baad87 Do not run address sanitizer on server tests 2 years ago
Tpt 2de13a9498 Server: refactor bulk load code 2 years ago
Tpt 796780cd12 Attempt to make CLI test pass 2 years ago
Tpt c9762fc280 Uses anyhow for errors 2 years ago
Tpt ea4ae6bc48 Basic CLI integration tests 2 years ago
Tpt 7581d9a6b2 Makes Clippy happy 2 years ago
Tpt cda6b09d79 Fixes NOW() evaluation 2 years ago
Tpt c2fd3920b5 Upgrades rdf-tests and removes a workaround 2 years ago
Tpt 54c66279f3 Set version constraints for Python build dependency 2 years ago
Tpt 841c9a2066 Better validation of test manifests 2 years ago
Tpt 579c876f98 README: + Sponsors 2 years ago
Tpt 59aea75a30 Uses try_into for slice to array conversions 2 years ago
Tpt 9dc8dce69c Adds dependabot for GitHub actions 2 years ago
Tpt be51f90352 Server: Uses PathBuf instead of string for I/O 2 years ago
Tpt 1317fef237 Fixes Python type annotation 2 years ago
Tpt 7a259955d2 Makes Clippy happy 2 years ago
Thomas Tanon ffa16b3afd CITATION.cff: Fixes license field 2 years ago
Maximilian Goisser 325dc59f84 Add conversion from spargebra Query into oxigraph Query 2 years ago
  1. 4
      .clusterfuzzlite/Dockerfile
  2. 30
      .clusterfuzzlite/build.sh
  3. 1
      .clusterfuzzlite/project.yaml
  4. 21
      .devcontainer/Dockerfile
  5. 69
      .devcontainer/devcontainer.json
  6. 16
      .github/DEPENDABOT.yml
  7. 16
      .github/ISSUE_TEMPLATE/bug_report.md
  8. 20
      .github/ISSUE_TEMPLATE/feature-request.md
  9. 10
      .github/ISSUE_TEMPLATE/question.md
  10. 27
      .github/actions/setup-rust/action.yml
  11. 372
      .github/workflows/artifacts.yml
  12. 11
      .github/workflows/install_rocksdb.sh
  13. 23
      .github/workflows/manylinux_build.sh
  14. 19
      .github/workflows/musllinux_build.sh
  15. 276
      .github/workflows/release.yml
  16. 508
      .github/workflows/tests.yml
  17. 10
      .gitmodules
  18. 3
      .mailmap
  19. 2
      .readthedocs.yaml
  20. 328
      CHANGELOG.md
  21. 5
      CITATION.cff
  22. 1490
      Cargo.lock
  23. 266
      Cargo.toml
  24. 49
      README.md
  25. 17
      bench/bsbm_blazegraph.sh
  26. 21
      bench/bsbm_graphdb.sh
  27. 21
      bench/bsbm_jena.sh
  28. 18
      bench/bsbm_oxigraph.sh
  29. 49
      bench/bsbm_rdf4j.sh
  30. 10
      bench/bsbm_virtuoso.sh
  31. 63
      bench/explanation_to_flamegraph.py
  32. 52
      bench/explanation_to_trace.py
  33. 2
      clippy.toml
  34. 16
      deny.toml
  35. 120
      docs/arch-diagram.svg
  36. 35
      docs/arch-diagram.txt
  37. 33
      fuzz/Cargo.toml
  38. 28
      fuzz/fuzz_targets/n3.rs
  39. 84
      fuzz/fuzz_targets/nquads.rs
  40. 35
      fuzz/fuzz_targets/rdf_xml.rs
  41. 61
      fuzz/fuzz_targets/sparql_eval.rs
  42. 7
      fuzz/fuzz_targets/sparql_query.rs
  43. 15
      fuzz/fuzz_targets/sparql_results_json.rs
  44. 10
      fuzz/fuzz_targets/sparql_results_tsv.rs
  45. 10
      fuzz/fuzz_targets/sparql_results_xml.rs
  46. 6
      fuzz/fuzz_targets/sparql_update.rs
  47. 166
      fuzz/fuzz_targets/trig.rs
  48. 1
      fuzz/src/lib.rs
  49. 63
      fuzz/src/result_format.rs
  50. 27
      js/Cargo.toml
  51. 69
      js/README.md
  52. 14
      js/biome.json
  53. 42
      js/build_package.js
  54. 1027
      js/package-lock.json
  55. 37
      js/package.json
  56. 2
      js/src/lib.rs
  57. 50
      js/src/model.rs
  58. 130
      js/src/store.rs
  59. 1
      js/src/utils.rs
  60. 86
      js/test/model.mjs
  61. 361
      js/test/store.mjs
  62. 63
      lib/Cargo.toml
  63. 85
      lib/README.md
  64. 208
      lib/benches/store.rs
  65. 59
      lib/oxigraph/Cargo.toml
  66. 82
      lib/oxigraph/README.md
  67. 93
      lib/oxigraph/src/io/format.rs
  68. 39
      lib/oxigraph/src/io/mod.rs
  69. 199
      lib/oxigraph/src/io/read.rs
  70. 185
      lib/oxigraph/src/io/write.rs
  71. 12
      lib/oxigraph/src/lib.rs
  72. 22
      lib/oxigraph/src/model.rs
  73. 144
      lib/oxigraph/src/sparql/algebra.rs
  74. 8
      lib/oxigraph/src/sparql/dataset.rs
  75. 84
      lib/oxigraph/src/sparql/error.rs
  76. 5870
      lib/oxigraph/src/sparql/eval.rs
  77. 16
      lib/oxigraph/src/sparql/http/dummy.rs
  78. 9
      lib/oxigraph/src/sparql/http/mod.rs
  79. 21
      lib/oxigraph/src/sparql/http/simple.rs
  80. 328
      lib/oxigraph/src/sparql/mod.rs
  81. 371
      lib/oxigraph/src/sparql/model.rs
  82. 44
      lib/oxigraph/src/sparql/results.rs
  83. 73
      lib/oxigraph/src/sparql/service.rs
  84. 101
      lib/oxigraph/src/sparql/update.rs
  85. 78
      lib/oxigraph/src/storage/backend/fallback.rs
  86. 12
      lib/oxigraph/src/storage/backend/mod.rs
  87. 1445
      lib/oxigraph/src/storage/backend/oxi_rocksdb.rs
  88. 100
      lib/oxigraph/src/storage/binary_encoder.rs
  89. 139
      lib/oxigraph/src/storage/error.rs
  90. 392
      lib/oxigraph/src/storage/mod.rs
  91. 127
      lib/oxigraph/src/storage/numeric_encoder.rs
  92. 54
      lib/oxigraph/src/storage/small_string.rs
  93. 1382
      lib/oxigraph/src/store.rs
  94. 0
      lib/oxigraph/tests/rocksdb_bc_data/000003.log
  95. 0
      lib/oxigraph/tests/rocksdb_bc_data/CURRENT
  96. 0
      lib/oxigraph/tests/rocksdb_bc_data/IDENTITY
  97. 0
      lib/oxigraph/tests/rocksdb_bc_data/LOCK
  98. 0
      lib/oxigraph/tests/rocksdb_bc_data/MANIFEST-000004
  99. 0
      lib/oxigraph/tests/rocksdb_bc_data/OPTIONS-000026
  100. 315
      lib/oxigraph/tests/store.rs
  101. Some files were not shown because too many files have changed in this diff Show More

@ -0,0 +1,4 @@
FROM gcr.io/oss-fuzz-base/base-builder-rust:v1
COPY . $SRC/oxigraph
WORKDIR oxigraph
COPY .clusterfuzzlite/build.sh $SRC/

@ -0,0 +1,30 @@
#!/bin/bash -eu
shopt -s globstar
function build_seed_corpus() {
mkdir "/tmp/oxigraph_$1"
for file in **/*."$2"
do
hash=$(sha256sum "$file" | awk '{print $1;}')
cp "$file" "/tmp/oxigraph_$1/$hash"
done
zip "$1_seed_corpus.zip" /tmp/"oxigraph_$1"/*
rm -r "/tmp/oxigraph_$1"
}
cd "$SRC"/oxigraph
git submodule init
git submodule update
cargo fuzz build -O --debug-assertions
for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml n3 nquads trig rdf_xml
do
cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/
done
build_seed_corpus sparql_results_json srj
build_seed_corpus sparql_results_tsv tsv
build_seed_corpus sparql_results_xml srx
build_seed_corpus n3 n3
build_seed_corpus nquads nq
build_seed_corpus trig trig
build_seed_corpus rdf_xml rdf

@ -1,21 +0,0 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="bullseye"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}
# [Optional] Uncomment this section to install additional packages.
RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \
&& apt-get -y install --no-install-recommends \
python3 \
python3-venv \
python-is-python3 \
libclang-dev
ENV VIRTUAL_ENV=/opt/venv
RUN python -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
RUN pip install --no-cache-dir -r python/requirements.dev.txt
# Change owner to the devcontainer user
RUN chown -R 1000:1000 $VIRTUAL_ENV

@ -1,69 +0,0 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.241.1/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": ["--cap-add=SYS_PTRACE", "--security-opt", "seccomp=unconfined"],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy",
"python.defaultInterpreterPath": "/opt/venv/bin/python",
"python.linting.enabled": true,
"python.linting.pylintEnabled": true,
"python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8",
"python.formatting.blackPath": "/usr/local/py-utils/bin/black",
"python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf",
"python.linting.banditPath": "/usr/local/py-utils/bin/bandit",
"python.linting.flake8Path": "/usr/local/py-utils/bin/flake8",
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/opt/venv/bin/pylint",
"python.testing.pytestPath": "/opt/venv/bin/pytest"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"rust-lang.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"ms-python.python",
"ms-python.vscode-pylance",
"esbenp.prettier-vscode",
"stardog-union.stardog-rdf-grammars"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
"postCreateCommand": "git submodule update --init && cargo build",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"python": "3.10"
}
}

@ -0,0 +1,16 @@
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: weekly
- package-ecosystem: "pip"
directory: "/python/"
versioning-strategy: increase-if-necessary
schedule:
interval: weekly
- package-ecosystem: "npm"
directory: "/js/"
versioning-strategy: increase-if-necessary
schedule:
interval: weekly

@ -0,0 +1,16 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: ''
---
**Describe the bug**
A clear and concise description of what the bug is.
**To Reproduce**
Steps to reproduce the behavior:
1. Which version of Oxigraph are you using? On which platform?
2. A command-line or a code snippet that triggers the bug.

@ -0,0 +1,20 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: ''
---
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
**Additional context**
Please link to other systems implementing the feature, specification of it if it exists and/or existing documentation about this feature.

@ -0,0 +1,10 @@
---
name: Question
about: Please don't use issues but the Q&A section of the "discussions" space
title: ''
labels: question
assignees: ''
---

@ -0,0 +1,27 @@
name: 'Setup Rust'
description: 'Setup Rust using Rustup'
inputs:
version:
description: 'Rust version to use. By default latest stable version'
required: false
default: 'stable'
component:
description: 'Rust extra component to install like clippy'
required: false
target:
description: 'Rust extra target to install like wasm32-unknown-unknown'
required: false
runs:
using: "composite"
steps:
- run: rustup update
shell: bash
- run: rustup default ${{ inputs.version }}
shell: bash
- run: rustup component add ${{ inputs.component }}
shell: bash
if: ${{ inputs.component }}
- run: rustup target add ${{ inputs.target }}
shell: bash
if: ${{ inputs.target }}
- uses: Swatinem/rust-cache@v2

@ -1,9 +1,13 @@
name: Nightly artifacts name: Artifacts
on: on:
push: push:
branches: branches:
- main - main
- next
release:
types:
- published
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
@ -11,17 +15,42 @@ concurrency:
jobs: jobs:
binary_linux: binary_linux:
runs-on: ubuntu-latest runs-on: ubuntu-20.04
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: cargo build --release - uses: ./.github/actions/setup-rust
working-directory: ./server with:
- uses: actions/upload-artifact@v3 target: aarch64-unknown-linux-gnu
- run: |
sudo apt-get update && sudo apt-get install -y g++-aarch64-linux-gnu
mkdir .cargo
echo -e "[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml
- run: cargo build --release --no-default-features --features rustls-native
working-directory: ./cli
- run: cargo build --release --target aarch64-unknown-linux-gnu --no-default-features --features rustls-native
working-directory: ./cli
env:
BINDGEN_EXTRA_CLANG_ARGS: --sysroot /usr/aarch64-linux-gnu
- uses: actions/upload-artifact@v4
with:
name: oxigraph_x86_64_linux_gnu
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
with:
name: oxigraph_aarch64_linux_gnu
path: target/aarch64-unknown-linux-gnu/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu
if: github.event_name == 'release'
- run: mv target/aarch64-unknown-linux-gnu/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
name: oxigraph_server_x86_64_linux_gnu files: |
path: target/release/oxigraph_server oxigraph_${{ github.event.release.tag_name }}_x86_64_linux_gnu
oxigraph_${{ github.event.release.tag_name }}_aarch64_linux_gnu
if: github.event_name == 'release'
binary_mac: binary_mac:
runs-on: macos-latest runs-on: macos-latest
@ -30,43 +59,124 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk' SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14' MACOSX_DEPLOYMENT_TARGET: '10.14'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
with:
target: aarch64-apple-darwin
- run: cargo build --release - run: cargo build --release
working-directory: ./server working-directory: ./cli
- uses: actions/upload-artifact@v3 - run: cargo build --release --target aarch64-apple-darwin
working-directory: ./cli
- uses: actions/upload-artifact@v4
with:
name: oxigraph_x86_64_apple
path: target/release/oxigraph
- uses: actions/upload-artifact@v4
with:
name: oxigraph_aarch64_apple
path: target/aarch64-apple-darwin/release/oxigraph
- run: mv target/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_x86_64_apple
if: github.event_name == 'release'
- run: mv target/aarch64-apple-darwin/release/oxigraph oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
name: oxigraph_server_x86_64_apple files: |
path: target/release/oxigraph_server oxigraph_${{ github.event.release.tag_name }}_x86_64_apple
oxigraph_${{ github.event.release.tag_name }}_aarch64_apple
if: github.event_name == 'release'
binary_windows: binary_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo build --release - run: cargo build --release
working-directory: ./server working-directory: ./cli
- uses: actions/upload-artifact@v3 - uses: actions/upload-artifact@v4
with:
name: oxigraph_x86_64_windows_msvc
path: target/release/oxigraph.exe
- run: mv target/release/oxigraph.exe oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
if: github.event_name == 'release'
- uses: softprops/action-gh-release@v1
with: with:
name: oxigraph_server_x86_64_windows_msvc files: oxigraph_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
path: target/release/oxigraph_server.exe if: github.event_name == 'release'
python_sdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python
- run: maturin sdist -m python/Cargo.toml
- uses: actions/upload-artifact@v4
with:
name: pyoxigraph_sdist
path: target/wheels/*.tar.gz
wheel_linux: wheel_linux:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/manylinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/manylinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/manylinux2014_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
with:
name: pyoxigraph_${{ matrix.architecture }}_linux_gnu
path: target/wheels/*.whl
wheel_linux_musl:
runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: sed 's/%arch%/x86_64/g' .github/workflows/manylinux_build.sh > .github/workflows/manylinux_build_script.sh - uses: docker/setup-qemu-action@v2
- run: docker run -v "$(pwd)":/workdir --platform linux/x86_64 quay.io/pypa/manylinux2014_x86_64 /bin/bash /workdir/.github/workflows/manylinux_build_script.sh with:
- uses: actions/upload-artifact@v3 platforms: linux/${{ matrix.architecture }}
if: github.event_name == 'release' && matrix.architecture != 'x86_64'
- uses: ./.github/actions/setup-rust
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/musllinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/musllinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_2_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh
if: github.event_name == 'release' || matrix.architecture == 'x86_64'
- uses: actions/upload-artifact@v4
with: with:
name: pyoxigraph_x86_64_linux name: pyoxigraph_${{ matrix.architecture }}_linux_musl
path: target/wheels/*.whl path: target/wheels/*.whl
wheel_mac: wheel_mac:
@ -76,85 +186,237 @@ jobs:
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk' SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14' MACOSX_DEPLOYMENT_TARGET: '10.14'
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- uses: actions/setup-python@v4 - uses: ./.github/actions/setup-rust
with: with:
python-version: "3.10" target: aarch64-apple-darwin
- run: rustup update && rustup target add aarch64-apple-darwin - uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml - run: maturin build --release --features abi3
working-directory: ./python
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels - run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python working-directory: ./python
- run: maturin build --release -m python/Cargo.toml --universal2 - run: maturin build --release --target universal2-apple-darwin --features abi3
- uses: actions/upload-artifact@v3 working-directory: ./python
- run: maturin build --release --features abi3
working-directory: ./python
if: github.event_name == 'release'
- run: maturin build --release --target aarch64-apple-darwin --features abi3
working-directory: ./python
if: github.event_name == 'release'
- uses: actions/upload-artifact@v4
with: with:
name: pyoxigraph_wheel_universal2_mac name: pyoxigraph_macos
path: target/wheels/*.whl path: target/wheels/*.whl
wheel_windows: wheel_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- uses: actions/setup-python@v4 - uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.12"
- run: rustup update cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml - run: maturin build --release --features abi3
working-directory: ./python
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels - run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python working-directory: ./python
- run: maturin build --release -m python/Cargo.toml - run: maturin build --release -m python/Cargo.toml --features abi3
- uses: actions/upload-artifact@v3 - uses: actions/upload-artifact@v4
with: with:
name: pyoxigraph_wheel_x86_64_windows name: pyoxigraph_windows
path: target/wheels/*.whl path: target/wheels/*.whl
publish_pypi:
if: github.event_name == 'release'
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/pyoxigraph
permissions:
id-token: write
needs:
- python_sdist
- wheel_windows
- wheel_mac
- wheel_linux
- wheel_linux_musl
steps:
- uses: actions/download-artifact@v4
with:
pattern: pyoxigraph_*
path: dist
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
skip-existing: true
npm_tarball: npm_tarball:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- run: rustup update with:
- run: cargo install wasm-pack submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
with:
node-version: 16
cache: npm
cache-dependency-path: "js/package.json"
registry-url: https://registry.npmjs.org
- run: npm run pack - run: npm run pack
working-directory: ./js working-directory: ./js
- uses: actions/upload-artifact@v3 - uses: actions/upload-artifact@v4
with: with:
name: oxigraph_wasm_npm name: oxigraph_wasm_npm
path: js/*.tgz path: js/*.tgz
- run: npm run release
working-directory: ./js
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
if: github.event_name == 'release'
docker: docker:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: 'recursive' submodules: true
- uses: docker/setup-buildx-action@v2 - uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v2 - uses: docker/login-action@v3
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{github.actor}} username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}} password: ${{secrets.GITHUB_TOKEN}}
- uses: docker/metadata-action@v4 - uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
if: github.event_name == 'release'
- uses: docker/metadata-action@v5
id: docker_meta id: docker_meta
with: with:
images: ghcr.io/${{ github.repository }} images: |
tags: nightly ${{ github.repository }},enable=${{ github.event_name == 'release' }}
- uses: docker/build-push-action@v3 ghcr.io/${{ github.repository }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
- uses: docker/build-push-action@v5
with: with:
context: . context: .
file: server/Dockerfile file: server/Dockerfile
platforms: linux/amd64,linux/arm64
pull: true pull: true
push: true push: true
tags: ${{ steps.docker_meta.outputs.tags }} tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }} labels: ${{ steps.docker_meta.outputs.labels }}
cache-from: type=gha cache-from: type=gha
cache-to: type=gha,mode=max cache-to: type=gha,mode=max
publish_crates:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- run: rustup update
- run: cargo login $CRATES_IO_TOKEN
env:
CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
- run: cargo publish
working-directory: ./oxrocksdb-sys
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxsdatatypes
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdf
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfxml
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxttl
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdfio
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparesults
continue-on-error: true
- run: cargo publish
working-directory: ./lib/spargebra
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparopt
continue-on-error: true
- run: cargo publish
working-directory: ./lib/sparql-smith
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxigraph
continue-on-error: true
- run: cargo publish
working-directory: ./cli
homebrew:
if: "github.event_name == 'release' && !contains('-', github.event.release.tag_name)"
runs-on: ubuntu-latest
needs: full_archive
steps:
- uses: actions/checkout@v4
with:
repository: oxigraph/homebrew-oxigraph
token: ${{ secrets.FULL_ACCESS_TOKEN }}
- run: |
wget "https://github.com/oxigraph/oxigraph/releases/download/${{ github.event.release.tag_name }}/oxigraph_${{ github.event.release.tag_name }}.tar.gz"
SHA=`shasum -a 256 "oxigraph_${{ github.event.release.tag_name }}.tar.gz" | awk '{ print $1 }'`
rm "oxigraph_${{ github.event.release.tag_name }}.tar.gz"
sed -i "s/download\/.*\.tar/download\/${{ github.event.release.tag_name }}\/oxigraph_${{ github.event.release.tag_name }}.tar/g" Formula/oxigraph.rb
sed -i "s/sha256 \".*\"/sha256 \"$SHA\"/g" Formula/oxigraph.rb
git config user.name github-actions
git config user.email github-actions@github.com
git add .
git diff-index --quiet HEAD || git commit -m "Upgrades to ${{ github.event.release.tag_name }}"
git push
full_archive:
if: github.event_name == 'release'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- run: |
rm -rf .git bench fuzz
zip -r oxigraph_${{ github.event.release.tag_name }}.zip .
tar -czf /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
mv /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}.zip
oxigraph_${{ github.event.release.tag_name }}.tar.gz

@ -0,0 +1,11 @@
if [ -f "rocksdb" ]
then
cd rocksdb || exit
else
git clone https://github.com/facebook/rocksdb.git
cd rocksdb || exit
git checkout v8.0.0
make shared_lib
fi
sudo make install-shared
sudo ldconfig /usr/local/lib

@ -2,12 +2,23 @@ cd /workdir
yum -y install centos-release-scl-rh yum -y install centos-release-scl-rh
yum -y install llvm-toolset-7.0 yum -y install llvm-toolset-7.0
source scl_source enable llvm-toolset-7.0 source scl_source enable llvm-toolset-7.0
curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal curl https://static.rust-lang.org/rustup/dist/%arch%-unknown-linux-gnu/rustup-init --output rustup-init
export PATH="${PATH}:/root/.cargo/bin:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin" chmod +x rustup-init
./rustup-init -y --profile minimal
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python cd python
python3.10 -m venv venv python3.12 -m venv venv
source venv/bin/activate source venv/bin/activate
pip install -r requirements.dev.txt pip install -r requirements.dev.txt
maturin develop --release -m Cargo.toml maturin develop --release
python generate_stubs.py pyoxigraph pyoxigraph.pyi --black python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
maturin build --release -m Cargo.toml maturin build --release --features abi3 --compatibility manylinux2014
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility manylinux2014
done
for VERSION in 9 10; do
maturin build --release --interpreter "pypy3.$VERSION" --compatibility manylinux2014
done
fi

@ -0,0 +1,19 @@
cd /workdir
apk add clang-dev
curl https://static.rust-lang.org/rustup/dist/%arch%-unknown-linux-musl/rustup-init --output rustup-init
chmod +x rustup-init
./rustup-init -y --profile minimal
source "$HOME/.cargo/env"
export PATH="${PATH}:/opt/python/cp37-cp37m/bin:/opt/python/cp38-cp38/bin:/opt/python/cp39-cp39/bin:/opt/python/cp310-cp310/bin:/opt/python/cp311-cp311/bin"
cd python
python3.12 -m venv venv
source venv/bin/activate
pip install -r requirements.dev.txt
maturin develop --release
python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
maturin build --release --features abi3 --compatibility musllinux_1_2
if [ %for_each_version% ]; then
for VERSION in 8 9 10 11 12; do
maturin build --release --interpreter "python3.$VERSION" --compatibility musllinux_1_2
done
fi

@ -1,276 +0,0 @@
name: Release artifacts
on:
release:
types: [ published ]
jobs:
push_server_to_docker_registry:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-buildx-action@v2
- uses: docker/metadata-action@v4
id: docker_meta
with:
images: |
${{ github.repository }}
ghcr.io/${{ github.repository }}
- uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- uses: docker/build-push-action@v3
with:
context: .
file: server/Dockerfile
pull: true
push: true
tags: ${{ steps.docker_meta.outputs.tags }}
labels: ${{ steps.docker_meta.outputs.labels }}
cache-from: type=gha
cache-to: type=gha,mode=max
publish_crates:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: cargo login $CRATES_IO_TOKEN
env:
CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }}
- run: cargo publish
working-directory: ./oxrocksdb-sys
continue-on-error: true
- run: cargo publish
working-directory: ./lib/oxrdf
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./lib/sparesults
continue-on-error: true
- run: cargo publish
working-directory: ./lib/spargebra
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./lib
continue-on-error: true
- run: sleep 60
- run: cargo publish
working-directory: ./server
publish_pypi_linux:
runs-on: ubuntu-latest
strategy:
matrix:
architecture: [ "x86_64", "aarch64" ]
continue-on-error: true
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: docker/setup-qemu-action@v2
with:
platforms: linux/${{ matrix.architecture }}
if: matrix.architecture != 'x86_64'
- run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/manylinux_build.sh > .github/workflows/manylinux_build_script.sh
- run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/manylinux2014_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/manylinux_build_script.sh
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages_dir: target/wheels
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_mac:
runs-on: macos-latest
env:
DEVELOPER_DIR: '/Applications/Xcode.app/Contents/Developer'
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update && rustup target add aarch64-apple-darwin
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin publish --no-sdist --universal2 -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- run: maturin publish --no-sdist -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: pip install -r python/requirements.dev.txt
- run: maturin build --release -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin publish --no-sdist -m python/Cargo.toml -u __token__ -p ${{ secrets.PYPI_PASSWORD }}
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.whl
publish_pypi_stdist:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-python@v4
with:
python-version: "3.10"
- run: rustup update
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black
working-directory: ./python
- run: maturin sdist -m python/Cargo.toml
- uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.PYPI_PASSWORD }}
packages_dir: target/wheels
- uses: softprops/action-gh-release@v1
with:
files: target/wheels/*.tar.gz
publish_npm:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/setup-node@v3
with:
node-version: 16
registry-url: https://registry.npmjs.org
- run: rustup update
- run: cargo install wasm-pack
- run: npm install
working-directory: ./js
- run: npm run release
working-directory: ./js
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
- run: npm run pack
working-directory: ./js
- uses: softprops/action-gh-release@v1
with:
files: js/*.tgz
publish_full_archive:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: |
zip -r oxigraph_${{ github.event.release.tag_name }}.zip .
tar -czf /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
mv /tmp/oxigraph_${{ github.event.release.tag_name }}.tar.gz .
- uses: softprops/action-gh-release@v1
with:
files: |
oxigraph_${{ github.event.release.tag_name }}.zip
oxigraph_${{ github.event.release.tag_name }}.tar.gz
publish_homebrew:
if: "!contains('-', github.event.release.tag_name)"
runs-on: ubuntu-latest
needs: publish_full_archive
steps:
- uses: actions/checkout@v3
with:
repository: oxigraph/homebrew-oxigraph
token: ${{ secrets.FULL_ACCESS_TOKEN }}
- run: |
wget "https://github.com/oxigraph/oxigraph/releases/download/${{ github.event.release.tag_name }}/oxigraph_${{ github.event.release.tag_name }}.tar.gz"
SHA=`shasum -a 256 "oxigraph_${{ github.event.release.tag_name }}.tar.gz" | awk '{ print $1 }'`
rm "oxigraph_${{ github.event.release.tag_name }}.tar.gz"
sed -i "s/download\/.*\.tar/download\/${{ github.event.release.tag_name }}\/oxigraph_${{ github.event.release.tag_name }}.tar/g" Formula/oxigraph.rb
sed -i "s/sha256 \".*\"/sha256 \"$SHA\"/g" Formula/oxigraph.rb
git config user.name github-actions
git config user.email github-actions@github.com
git add .
git diff-index --quiet HEAD || git commit -m "Upgrades to ${{ github.event.release.tag_name }}"
git push
publish_binary_linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_linux_gnu
publish_binary_mac:
runs-on: macos-latest
env:
DEVELOPER_DIR: '/Applications/Xcode.app/Contents/Developer'
SDKROOT: '/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk'
MACOSX_DEPLOYMENT_TARGET: '10.14'
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_apple
publish_binary_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v3
with:
submodules: true
- run: rustup update
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo build --release
working-directory: ./server
- run: mv target/release/oxigraph_server.exe oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe
- uses: softprops/action-gh-release@v1
with:
files: oxigraph_server_${{ github.event.release.tag_name }}_x86_64_windows_msvc.exe

@ -1,16 +1,12 @@
name: Change tests name: Change tests
on: on:
push:
branches:
- main
- dev
pull_request: pull_request:
branches: branches:
- main - main
- dev - next
schedule: schedule:
- cron: "0 0 * * 0" - cron: "12 3 * * *"
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref }} group: ${{ github.workflow }}-${{ github.ref }}
@ -20,72 +16,261 @@ jobs:
fmt: fmt:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- run: rustup update && rustup component add rustfmt - uses: ./.github/actions/setup-rust
with:
component: rustfmt
- run: cargo fmt -- --check - run: cargo fmt -- --check
clippy: clippy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update && rustup component add clippy - uses: ./.github/actions/setup-rust
- run: cargo clippy with:
version: 1.76.0
component: clippy
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxsdatatypes
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdf working-directory: ./lib/oxrdf
- run: cargo clippy - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfxml
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxttl
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/oxrdfio
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparesults
- run: cargo clippy --all-targets --features async-tokio -- -D warnings -D clippy::all
working-directory: ./lib/sparesults working-directory: ./lib/sparesults
- run: cargo clippy - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/spargebra working-directory: ./lib/spargebra
- run: cargo clippy --all-targets --all-features - run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/sparopt
- run: cargo clippy --all-targets --no-default-features -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./python
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./cli
- run: cargo clippy --all-targets -- -D warnings -D clippy::all
working-directory: ./testsuite
clippy_wasm_js:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown -- -D warnings -D clippy::all
working-directory: ./js
clippy_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-wasi
component: clippy
- run: cargo clippy --lib --tests --target wasm32-wasi -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-wasi --features abi3 --no-default-features -- -D warnings -D clippy::all
working-directory: ./python
clippy_wasm_emscripten:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-emscripten
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-emscripten -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
- run: cargo clippy --target wasm32-unknown-emscripten --features abi3 -- -D warnings -D clippy::all
working-directory: ./python
clippy_wasm_unknown:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
target: wasm32-unknown-unknown
component: clippy
- run: cargo clippy --lib --tests --target wasm32-unknown-unknown --features getrandom/custom --features oxsdatatypes/custom-now -- -D warnings -D clippy::all
working-directory: ./lib/oxigraph
deny: deny:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- uses: EmbarkStudios/cargo-deny-action@v1 - uses: taiki-e/install-action@v2
with: { tool: cargo-deny }
- run: cargo deny check
test_linux: semver_checks:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- run: cargo test --all-features - uses: taiki-e/install-action@v2
env: with: { tool: cargo-semver-checks }
RUST_BACKTRACE: 1 - uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo semver-checks check-release --exclude oxrocksdb-sys --exclude oxigraph-js --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
test_linux_x86_64:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: cargo test
address_sanitizer: test_linux_i686:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update && rustup toolchain install nightly - uses: ./.github/actions/setup-rust
- run: cargo +nightly test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph_testsuite with:
target: i686-unknown-linux-gnu
- run: sudo apt-get update && sudo apt-get install -y g++-multilib
- run: cargo test --target i686-unknown-linux-gnu --no-default-features --features http-client-rustls-native
working-directory: ./lib/oxigraph
test_linux_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions && cargo update -p bumpalo --precise 3.14.0
- run: cargo test
test_linux_latest:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- run: rm Cargo.lock && cargo update
- run: cargo test
test_linux_address_sanitizer:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: nightly
- run: sudo apt-get update && sudo apt-get install -y llvm
- run: cargo test --tests --target x86_64-unknown-linux-gnu --workspace --exclude pyoxigraph --exclude oxigraph-testsuite --exclude oxigraph-cli
env: env:
RUST_BACKTRACE: 1
RUSTFLAGS: -Z sanitizer=address RUSTFLAGS: -Z sanitizer=address
test_linux_dynamic_linking:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/cache@v4
with:
path: rocksdb
key: ${{ runner.os }}-rocksdb-8.0.0
- run: bash .github/workflows/install_rocksdb.sh
- run: cargo test --tests --features oxrocksdb-sys/pkg-config
test_windows: test_windows:
runs-on: windows-latest runs-on: windows-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse - run: Remove-Item -LiteralPath "C:\msys64\" -Force -Recurse
- run: cargo test --all-features - run: cargo test
test_wasi:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
target: wasm32-wasi
- uses: taiki-e/install-action@v2
with: { tool: "wasmtime,cargo-wasi" }
- run: cargo wasi test --workspace --exclude oxigraph-js --exclude oxigraph-cli --exclude oxigraph-testsuite --exclude oxrocksdb-sys --exclude pyoxigraph
rustdoc:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
with:
version: 1.76.0
- run: cargo doc
env: env:
RUST_BACKTRACE: 1 RUSTDOCFLAGS: -D warnings
js: js:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- run: rustup update - uses: ./.github/actions/setup-rust
- run: cargo install wasm-pack - uses: taiki-e/install-action@v2
with: { tool: wasm-pack }
- uses: actions/setup-node@v4
with:
node-version: 18
cache: npm
cache-dependency-path: "js/package.json"
- run: npm install - run: npm install
working-directory: ./js working-directory: ./js
- run: npm test - run: npm test
@ -94,27 +279,258 @@ jobs:
python: python:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
with: with:
submodules: true submodules: true
- run: rustup update - uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v4 - uses: actions/setup-python@v5
with: with:
python-version: "3.10" python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt - run: pip install -r python/requirements.dev.txt
- run: python -m black --check --diff --color . - run: maturin build -m python/Cargo.toml
working-directory: ./python - run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: maturin sdist -m python/Cargo.toml - run: rm -r target/wheels
- run: pip install target/wheels/*.tar.gz
- run: python -m unittest - run: python -m unittest
working-directory: ./python/tests working-directory: ./python/tests
- run: sphinx-build -M doctest . build
working-directory: ./python/docs
- run: sphinx-build -M html . build - run: sphinx-build -M html . build
working-directory: ./python/docs working-directory: ./python/docs
- run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --black - run: python generate_stubs.py pyoxigraph pyoxigraph.pyi --ruff
working-directory: ./python working-directory: ./python
- run: python -m mypy.stubtest pyoxigraph --allowlist=mypy_allowlist.txt - run: python -m mypy.stubtest pyoxigraph --allowlist=mypy_allowlist.txt
working-directory: ./python working-directory: ./python
- run: python -m mypy generate_stubs.py tests - run: python -m mypy generate_stubs.py tests --strict
working-directory: ./python
- run: python -m ruff format --check .
working-directory: ./python
- run: python -m ruff check --output-format=github .
working-directory: ./python working-directory: ./python
- run: sphinx-lint docs
working-directory: ./python
python_msv:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- name: Read MSRV from Cargo.toml
id: metadata
run: echo "rust-version=$(sed -ne 's/rust-version *= *\"\(.*\)\"/\1/p' Cargo.toml)" >> $GITHUB_OUTPUT
- uses: ./.github/actions/setup-rust
with:
version: ${{ steps.metadata.outputs.rust-version }}
- run: rustup toolchain install nightly
- uses: actions/setup-python@v5
with:
python-version: "3.8"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: rm Cargo.lock && cargo +nightly update -Z direct-minimal-versions
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_pypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "pypy3.10"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install -r python/requirements.dev.txt
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- run: pip install "maturin~=1.0"
- run: maturin build -m python/Cargo.toml
- run: pip install --no-index --find-links=target/wheels/ pyoxigraph
- run: rm -r target/wheels
- run: python -m unittest
working-directory: ./python/tests
python_pyodide:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: pip
cache-dependency-path: '**/requirements.dev.txt'
- uses: ./.github/actions/setup-rust
with:
version: nightly
target: wasm32-unknown-emscripten
- run: |
pip install pyodide-build
echo EMSCRIPTEN_VERSION=$(pyodide config get emscripten_version) >> $GITHUB_ENV
- uses: mymindstorm/setup-emsdk@v13
with:
version: ${{ env.EMSCRIPTEN_VERSION }}
- run: pyodide build
working-directory: ./python
- run: |
pyodide venv venv
source venv/bin/activate
pip install --no-index --find-links=../dist/ pyoxigraph
python -m unittest
working-directory: ./python/tests
typos:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: taiki-e/install-action@v2
with: { tool: typos-cli }
- run: typos
clang_fmt:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y clang-format
- run: clang-format --Werror --dry-run oxrocksdb-sys/api/*
fuzz_changes:
if: github.event_name == 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
github-token: ${{ secrets.GITHUB_TOKEN }}
sanitizer: address
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 900
mode: code-change
sanitizer: address
minimize-crashes: true
parallel-fuzzing: true
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
fuzz_repo:
if: github.event_name != 'pull_request'
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
github-token: ${{ secrets.GITHUB_TOKEN }}
sanitizer: address
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 7200
mode: batch
sanitizer: address
minimize-crashes: true
parallel-fuzzing: true
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
continue-on-error: true
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600
mode: prune
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
continue-on-error: true
fuzz_coverage:
if: github.event_name != 'pull_request'
needs: fuzz_repo
runs-on: ubuntu-latest
steps:
- uses: google/clusterfuzzlite/actions/build_fuzzers@v1
with:
language: rust
sanitizer: coverage
- uses: google/clusterfuzzlite/actions/run_fuzzers@v1
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
fuzz-seconds: 3600
mode: coverage
sanitizer: coverage
storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: sudo apt-get update && sudo apt-get install -y shellcheck
- run: git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck
spec_links:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- run: python lints/test_spec_links.py
debian_compatibility:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: ./.github/actions/setup-rust
- run: python lints/test_debian_compatibility.py
codspeed:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-codspeed }
- run: cargo codspeed build -p oxigraph --features http-client-native-tls
- run: cargo codspeed build -p oxigraph-testsuite
- uses: CodSpeedHQ/action@v2
with:
run: cargo codspeed run
token: ${{ secrets.CODSPEED_TOKEN }}
codecov:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: true
- uses: ./.github/actions/setup-rust
- uses: taiki-e/install-action@v2
with: { tool: cargo-llvm-cov }
- run: cargo llvm-cov --codecov --output-path codecov.json
- uses: codecov/codecov-action@v4
with:
files: codecov.json
flags: rust
fail_ci_if_error: true
token: ${{ secrets.CODECOV_TOKEN }}

10
.gitmodules vendored

@ -7,9 +7,13 @@
[submodule "bench/bsbm-tools"] [submodule "bench/bsbm-tools"]
path = bench/bsbm-tools path = bench/bsbm-tools
url = https://github.com/Tpt/bsbm-tools.git url = https://github.com/Tpt/bsbm-tools.git
[submodule "oxrocksdb-sys/rocksdb"]
path = oxrocksdb-sys/rocksdb
url = https://github.com/facebook/rocksdb.git
[submodule "oxrocksdb-sys/lz4"] [submodule "oxrocksdb-sys/lz4"]
path = oxrocksdb-sys/lz4 path = oxrocksdb-sys/lz4
url = https://github.com/lz4/lz4.git url = https://github.com/lz4/lz4.git
[submodule "testsuite/N3"]
path = testsuite/N3
url = https://github.com/w3c/N3.git
branch = master
[submodule "testsuite/rdf-canon"]
path = testsuite/rdf-canon
url = https://github.com/w3c/rdf-canon.git

@ -0,0 +1,3 @@
Thomas Tanon <thomas@pellissier-tanon.fr> <thomaspt@hotmail.fr> <Tpt@users.noreply.github.com>
Thomas Tanon <thomas@pellissier-tanon.fr>
Thomas Tanon <thomas.pellissier-tanon@helsing.ai>

@ -7,7 +7,7 @@ build:
os: "ubuntu-22.04" os: "ubuntu-22.04"
tools: tools:
python: "3" python: "3"
rust: "1.61" rust: "1.70"
apt_packages: apt_packages:
- clang - clang

@ -1,3 +1,317 @@
## [0.4.0-alpha.6] - 2024-03-25
### Changed
- Fixes compatibility with OxIRI 0.2.3.
## [0.4.0-alpha.5] - 2024-03-23
### Added
- Python: `Dataset` class
### Changed
- Rust: `Dataset::canonicalize` and `Graph::canonicalize` takes for input a `CanonicalizationAlgorithm` to set which algorithm to use.
- Upgrades RocksDB to 9.0.0
- JS: Drops NodeJS 12-16 and older web browsers (Edge before Chromium...) support.
## [0.4.0-alpha.4] - 2024-03-07
### Added
- Rust: `From<spargebra::Update` on `Update`.
- `sparesults`: Tokio Async readers for SPARQL query results (XML, JSON and TSV).
- `oxrdf`: `Term::from_terms` constructor.
- JS: options to set query and update base IRI and query union default graph.
## Changed
- Uses RocksDB atomic multi-columns flush.
- Rust: RocksDB is optional but enabled by default.
- Upgrades RocksDB to 8.11.3.
- Rust: `oxigraph` crate has been moved to `lib/oxigraph`.
- Rust: `QueryResults::write` returns the `Write` impl and not `()`.
- Rust: use `thierror` for errors.
- Rust: rename `oxrdfio::{Parse,Syntax}Error` to `oxrdfio::Rdf{Parse,Syntax}Error`,
`oxrdfxml::{Parse,Syntax}Error` to `oxrdfxml::RdfXml{Parse,Syntax}Error`,
`oxttl::{Parse,Syntax}Error` to `oxttl::Turtle{Parse,Syntax}Error`,
`sparesults::{Parse,Syntax}Error` to `sparesults::QueryResults{Parse,Syntax}Error` and
`spargebra::SyntaxError` to `spargebra::SparqlSyntaxError`.
## [0.4.0-alpha.3] - 2024-01-25
### Added
- `oxttl`: expose base IRIs.
- `oxttl`: allows to inject prefixes for serialization.
- `oxrdf`: `vocab::geosparql::WKT_LITERAL`.
### Changed
- Turtle: Fixes parsing bug with escaped dot at the end of local name.
- `oxttl`: Changes `prefixes` getter return type.
- JS: simplify build.
- Python: uses rustls by default all platforms that are not Windows/macOS/iOS/WASM.
- Strips debug info of the Rust std library in release build.
## [0.4.0-alpha.2] - 2024-01-08
### Added
- i686 linux support
### Changed
- Docker: fixes Docker image Glib version error.
- Docker: tags now use semver e.g. `0.3.22` and not `v0.3.22`. Preversions are also not tagged `latest` anymore.
- Python: `QuerySolution` is now thread safe.
## [0.4.0-alpha.1] - 2024-01-03
### Added
- `sparopt` crate: A new still quite naive query optimizer.
- `oxttl` crate: A N-Triples/N-Quads/Turtle/TriG/N3 parser and serializer compatible with Tokio.
- `oxrdfxml` crate: A RDF/XML parser and serializer compatible with Tokio.
- `oxrdfio` crate: A stand-alone crate with oxigraph I/O related APIs.
- Rust: SPARQL results I/O is now exposed in the `oxigraph` crate (`oxigraph::sparql::results` module).
- Rust: It is now possible to dynamically link rocksdb with the `rocksdb-pkg-config` feature.
- Python: error location is now included in some `SyntaxError` exceptions.
- Python: the file type can be guessed from the file path extension during parsing and serialization.
- Python: the serialization method returns a `bytes` value if no output-related argument is given.
- Python: SPARQL query results I/O is now exposed (`parse_query_results` function and `.serialize` method).
- Python: `RdfFormat` and `QueryResultsFormat` enum to encode supported formats.
- CLI: a `convert` command to convert RDF file between different formats.
### Removed
- Rust: automated flush at the end of serialization. This should be done explicitly now.
- oxsdatatypes: Deprecated methods.
- Python: 3.7 and Musl linux 1.1 support.
- Python: `GraphName.value`.
### Changed
- SPARQL: a digit after `.` is now required for `xsd:decimal`.
- SPARQL: calendar subtraction returns `xsd:dayTimeDuration` and not `xsd:duration`.
- SPARQL: Unicode escapes (`\u` and `\U`) are now only supported in IRIs and strings and not everywhere.
- Literal serialization now produces canonical N-Triples according to the RDF 1.2 and RDF Dataset Canonicalization drafts
- Rust: MSRV is now 1.70.
- Rust Makes `GraphName` implement `Default`.
- Rust: `wasm32-unknown-unknown` does not assumes JS platform by default. Enable the `js` feature for that.
- Rust: Parsers take `Read` and not `BufRead` for input.
- Rust: `GraphFormat` and `DatasetFormat` have been merged into `RdfFormat`.
- Rust: `GraphParser` and `DatasetParser` have been merged into `RdfParser`.
- Rust: `GraphSerializer` and `DatasetSerializer` have been merged into `RdfSerializer`.
- Rust: query results are now `Send` and `Sync`.
- Rust: `Store.load_graph` and `Store.load_dataset` have been merged into a `load_from_read` method.
- Rust: `Store.dump_graph` and `Store.dump_dataset` have been renamed to `dump_graph_to_write` and `dump_to_write`.
- Rust: `BulkLoader.set_*` methods have been renamed to `BulkLoader.with_*`.
- oxsdatatypes: pass by-values instead of by-reference parameters when relevant.
- oxsdatatypes: error types have been redesigned.
- oxsdatatypes: return an error when building not serializable duration (year-month and day-time of opposite signs).
- sparesults: renames some methods to move closer to the new oxrdfio crate.
- Python: raise `OSError` instead of `IOError` on OS errors.
- Python: the `mime_type` parameter have been renamed to `format`.
- Python: boolean SPARQL results are now encoded with a `QueryBoolean` class and not a simple `bool`.
- Python: a `path` parameter has been added to all I/O method to read from a file.
The existing `input` parameter now consider `str` values to be a serialization to parse.
- JS: the `mime_type` parameter have been renamed to `format`.
- CLI: the `oxigraph_server` binary has been renamed to `oxigraph`.
- CLI: the `--location` argument is now part of sub-commands where it is relevant.
`oxigraph_server --location foo serve` is not possible anymore.
One need to write `oxigraph serve --location foo`.
- CLI: is is now possible to upload gzip encoded files to the HTTP API with the `Content-Encoding: gzip` header.
## [0.3.22] - 2023-11-29
### Changed
- Allows to compile with more recent `bindgen` and `cc`
- Fixes compatibility with `spin_no_std` feature of `lazy_static`
## [0.3.21] - 2023-11-29
### Changed
- Bulk loader: do not fail when loading empty files.
- Python: fixes source distribution.
- Upgrades RocksDB to 7.8.1.
## [0.3.20] - 2023-10-23
### Changed
- SPARQL: fixes `STR` evaluation on small IRI (less than 16 bytes) that was broken.
- SPARQL update: fixes `WHERE` clause evaluation that was generating too many solutions in some cases.
- Upgrades RocksDB to 8.7.1.
## [0.3.19] - 2023-08-18
### Added
- Python: allows to give `pathlib.Path` argument when a path is expected.
- Cargo.toml: add a documentation field to link to docs.rs documentation.
### Changed
- Upgrades RocksDB to 8.3.2.
## [0.3.18] - 2023-06-13
### Changed
- SPARQL: fixes evaluation of `||` when all alternatives are `false`. The results must be `false` and not an error.
- SPARQL: `xsd:duration` it is now possible to properly parse and serialize all numbers that can be internally represented by the encoding used by Oxigraph.
- Python: fixes `Store.contains_named_graph` return type annotation.
## [0.3.17] - 2023-06-11
### Added
- SPARQL: support of `xsd:duration` to SPARQL `SUM` aggregate function.
- Server: support for systemd "notify" service option.
- Server: uses the target graph name as base URI when `POST`ing and `PUT`ing data.
- Python: `Store.contains_named_graph` method.
### Changed
- SPARQL: `xsd:decimal` multiplication and division have been improved to avoid raising overflow because of internal operations in some cases.
- SPARQL: `xsd:decimal` parser is now able to properly parse all numbers that can be internally represented by the encoding used by Oxigraph.
- SPARQL: `xsd:decimal` avoids bad overflow on unary `-` operator.
- SPARQL: reduces loss of precision when casting `xsd:decimal` to `xsd:float` or `xsd:double`.
- SPARQL: improves a bit the AST in case of a lot of `&&`, `||` or `IN` operators.
- SPARQL: fixes some optimization bugs around `MINUS` inside `OPTIONAL`.
- SPARQL: makes for loop join choice instead of hash join a bit more aggressive.
- Avoids random crashes when building RocksDB for aarch64.
- Python: fixes support of text I/O with not-ASCII char.
- Python: upgrades PyO3 to 0.19.
- `spargebra`: `GroundTermPattern::Triple` is now properly gated behind the `"rdf-star"` feature.
- `oxsdatatypes`: Deprecates `is_naan` that is renamed to `is_nan` (we like bread but prefer numbers).
- `oxsdatatypes`: Adds `checked_neg` methods on numerical types.
- `oxsdatatypes`: Drops `nom` dependency and uses hand-written parsers instead.
## [0.3.16] - 2023-04-29
### Changed
- Fixes flush and compaction on the GSPO index. It might improve Oxigraph performances and storage space.
- SPARQL: fixes some optimizations in presence quoted triples with nested variables.
- SPARQL profiler: adds EXISTS operation to the explanation and profiling tree.
- Upgrades RocksDB to 8.1.1.
## [0.3.15] - 2023-04-18
### Added
- Rust: adds `Store.explain_query_opt` method that allows to get an explanation of the evaluation with profiling statistics.
- Server: adds explanation and profiling to the `query` action (`--explain`, `--explain-file` and `--stats` options).
- Python: implements the `__match_args__` attribute on terms (literals, triples, quads...) to allow `match` operator usage.
- Server: adds the `--cors` option to the `serve` actions to allow cross-origin resource sharing.
### Changed
- SPARQL: fixes evaluation of empty aggregation without GROUP BY: aggregators now return their default value (0 for COUNT...) and not an empty row.
- SPARQL: fixes parsing of decimal with more than 19 digits in the fractional part.
- Server docker image: switch to the smaller distroless base images.
- Bulk loader: by default only uses 2 concurrent threads and around 2GB of RAM.
- Server load: progress is now printed to stderr much more regularly.
## [0.3.14] - 2023-03-19
### Added
- Read only and secondary RocksDB storage. Allows to open the data as read-only or to follow a primary instance.
- Server: adds multiple commands:
- `serve-secondary` and `serve-read-only` to serve the HTTP server in secondary and read-only modes.
- `dump` to dump the database content to a file.
- `query` and `update` to execute SPARQL query and updates.
- `backup` to do a database backup.
- `optimize` to optimize the database storage.
- Server: adds `format` and `graph` options to the `load` command in order to select the loaded data format and the target graph.
- Python: `Store.extend` and `Store.bulk_extend` methods.
- Python: allows to pickle basic data model classes.
### Changed
- Upgrades RocksDB to 8.0.0.
- Server: listening to localhost now properly listen to both IPv4 and IPv6.
- RDF/XML and XML parser results: avoid an ever growing buffer.
- JSON SPARQL results: allows the "head" key to be at the end of the document.
- TSV SPARQL results: properly quote `\t` and print trailing line jumps.
- `Term::from_str`: fixes parsing of blank nodes in object position of quoted triples.
- `QuerySolution`: implements `Eq` and `Debug`.
- JS: Reduces WASM build size.
- OxRDF: fixes `Graph` and `Dataset` serialization to output proper N-Triples and N-Quads.
## [0.3.13] - 2023-02-23
### Changed
- Fixes serialization of times with a decimal number of seconds lower than 10 (now `01:01:01.01` and not `01:01:1.01`).
- Turtle and TriG: fixes parsing for prefixes named after keywords (`prefix`, `base`, `graph`, `true` and `false`).
## [0.3.12] - 2023-02-18
### Added
- `From<NamedOrBlankNode>` for `GraphName` (and similarly for `*Ref`).
- Prebuilt Python wheels for Linux with [musl libc](https://www.musl-libc.org/).
### Changed
- Fixes TriG serialization.
- `QueryDataset` is now properly exposed in the public Rust API.
## [0.3.11] - 2023-01-18
### Added
- SPARQL: Implementation of the `LATERAL` operator following [SPARQL SEP-0006](https://github.com/w3c/sparql-12/blob/main/SEP/SEP-0006/sep-0006.md).
Support is behind the `sep-0006` feature in `spargebra` and enabled by default in Oxigraph.
- SPARQL: Implementation of the `ADJUST` function following [SPARQL SEP-0002](https://github.com/w3c/sparql-12/blob/main/SEP/SEP-0002/sep-0002.md).
Support is behind the `sep-0002` feature in `spargebra` and enabled by default in Oxigraph.
- Rust: There is a new stand-alone crate `oxsdatatypes` implementing Rust structs for the common XML schema datatypes.
It was part of the Oxigraph crate and it might be useful for other SPARQL or XPath implementations.
- Rust: The `oxigraph` crate can now be compiled for `wasm32-wasi` with the basic in-memory backend but without RocksDB.
### Changed
- SPARQL: The property path evaluator was sometime emitting duplicates when evaluating disjunctive patterns (`a|b`). It is now fixed.
- SPARQL: If written explicitly in the SPARQL query, the regular expressions are now compiled once and not for each row.
- SPARQL: Property path evaluation with both start and end variables bound has been optimized.
- SPARQL: Casts to `xsd:decimal` from `xsd:float` and `xsd:double` now properly fails on overflow instead of overflowing the internal 128-bits representation.
This follows [XPath casting rules](https://www.w3.org/TR/xpath-functions-31/#casting-to-decimal).
- Rust: The minimal supported Rust version is set at 1.60 and enforced using the CI.
- Python: Local builds will now target the specific Python version instead of [abi3](https://docs.python.org/3/c-api/stable.html).
abi3 wheels are still released on Pypi alongside new Python-version specific builds for Linux+GNU.
- SPARQL: Fixes a panic when the estimated upper bound of the results size was overflowing a `usize`.
- Python: Uses `typing.IO` in Python stubs instead of narrower interfaces.
- Upgrades RocksDB to 7.9.2, `quick-xml` to 0.27 and `pyo3` to 0.18.
## [0.3.10] - 2022-12-21
### Added
- SPARQL: Property path with unbound graph are now evaluated.
For example, `SELECT * WHERE { GRAPH ?g { ?s ex:p+ ?o } }` now works.
- SPARQL: The query optimizer is now fuzzed for better testing.
### Changed
- SPARQL: Evaluation of zero-length paths with both start and end unbounded now return only terms defined in the current graph but not terms only defined in the query.
For example, in `SELECT * WHERE { VALUES ?s { 1 } { ?s ex:p? ?o } }` the literal `1` won't be returned anymore if not in the queried graph.
- Python: type stubs are now compatible with Python 3.7 and Mypy strict mode.
- RDF/XML: allows entities declared using other entities.
- Upgrades `quick-xml` to 0.26.
## [0.3.9] - 2022-12-07
### Added
- Server: The `/store` endpoints now has a `no_transaction` HTTP option for `POST` and `PUT` request to get better performances at the cost of transactional guarantees.
- Server: The `/store` endpoints now has a `lenient` HTTP option for `POST` and `PUT` request to ignore syntax errors (requires the `no_transaction` option).
- Server: allows path that are not valid UTF-8 in file path CLI arguments.
- Rust: `From<spargebra::Query>` to `oxigraph::Query` (thanks to @hobofan).
### Changed
- SPARQL: `NOW()` function properly returns the current time and not 1970-01-01
- SPARQL: fixes serialization of SPARQL queries (property path and STRSTARTS function).
- SPARQL: slightly optimize aggregates by avoiding an unneeded projection.
- SPARQL: the parser now cleanly fails if invalid `VALUES` clauses are provided.
- SPARQL: In DELETE/INSERT UPDATE the currently written values can't be read anymore ("Halloween problem").
- `oxrdf`: makes Clippy run without warnings when `rdf-star` is disable.
- Python: makes type annotations compatible with Python 3.7.
- Python: makes sure the parameter default value is always included in the type annotation.
## [0.3.8] - 2022-10-22 ## [0.3.8] - 2022-10-22
### Changed ### Changed
@ -126,8 +440,8 @@
## [0.3.0-beta.1] - 2022-01-29 ## [0.3.0-beta.1] - 2022-01-29
### Added ### Added
- [RDF-star](https://w3c.github.io/rdf-star/cg-spec) support. `Triple` is now a possible `Term`. Serialization formats and SPARQL support have been updated to match the [latest version of the specification draft](https://w3c.github.io/rdf-star/cg-spec/2021-07-01.html). - [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) support. `Triple` is now a possible `Term`. Serialization formats and SPARQL support have been updated to match the [latest version of the specification draft](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html).
- Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command line option of the server. - Fast data bulk load with the `Store` `bulk_load_dataset` and `bulk_load_graph` methods and a special command-line option of the server.
- It is now possible to quickly backup the database using the `backup` method. - It is now possible to quickly backup the database using the `backup` method.
- Rust: `*Syntax::from_extension` to easy guess a graph/dataset/sparql result format from a file extension. - Rust: `*Syntax::from_extension` to easy guess a graph/dataset/sparql result format from a file extension.
- Rust: Custom SPARQL functions are now supported using `QueryOptions::with_custom_function`. - Rust: Custom SPARQL functions are now supported using `QueryOptions::with_custom_function`.
@ -136,14 +450,14 @@
- `Store` operations are now transactional using the "repeatable read" isolation level: - `Store` operations are now transactional using the "repeatable read" isolation level:
the store only exposes changes that have been "committed" (i.e. no partial writes) and the exposed state does not change for the complete duration of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update). the store only exposes changes that have been "committed" (i.e. no partial writes) and the exposed state does not change for the complete duration of a read operation (e.g. a SPARQL query) or a read/write operation (e.g. a SPARQL update).
the `Store` `transaction` method now allows to do read/write transactions. the `Store` `transaction` method now allows to do read/write transactions.
-`RDF-star <https://w3c.github.io/rdf-star/cg-spec>`_ is now supported (including serialization formats and SPARQL-star). :py:class:`.Triple` can now be used in :py:attr:`.Triple.object`, :py:attr:`.Triple.object`, :py:attr:`.Quad.subject` and :py:attr:`.Quad.object`. -`RDF-star <https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html>`_ is now supported (including serialization formats and SPARQL-star). :py:class:`.Triple` can now be used in :py:attr:`.Triple.object`, :py:attr:`.Triple.object`, :py:attr:`.Quad.subject` and :py:attr:`.Quad.object`.
### Changed ### Changed
- SPARQL: It is now possible to compare `rdf:langString` literals with the same language tag. - SPARQL: It is now possible to compare `rdf:langString` literals with the same language tag.
- SPARQL: The parser now validates more carefully the inputs following the SPARQL specification and test suite. - SPARQL: The parser now validates more carefully the inputs following the SPARQL specification and test suite.
- SPARQL: Variable scoping was buggy with "FILTER EXISTS". It is now fixed. - SPARQL: Variable scoping was buggy with "FILTER EXISTS". It is now fixed.
- Rust: RDF model, SPARQL parser and SPARQL result parsers have been moved to stand-alone reusable libraries. - Rust: RDF model, SPARQL parser and SPARQL result parsers have been moved to stand-alone reusable libraries.
- Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TSL layer. - Rust: HTTPS is not supported by default with the `http_client` option. You need to enable the `native-tls` or the `rustls` feature of the `oxhttp` crate to enable a TLS layer.
- Rust: The error types have been cleaned. - Rust: The error types have been cleaned.
Most of the `Store` methods now return a `StorageError` that is more descriptive than the previous `std::io::Error`. Most of the `Store` methods now return a `StorageError` that is more descriptive than the previous `std::io::Error`.
The new error type all implements `Into<std::io::Error>` for easy conversion. The new error type all implements `Into<std::io::Error>` for easy conversion.
@ -151,12 +465,12 @@
It is The used type of the `subject` field of the `Triple` and `Quad` structs. It is The used type of the `subject` field of the `Triple` and `Quad` structs.
- Rust: The SPARQL algebra is not anymore publicly exposed in the `oxigraph` crate. The new `oxalgebra` crate exposes it. - Rust: The SPARQL algebra is not anymore publicly exposed in the `oxigraph` crate. The new `oxalgebra` crate exposes it.
- Rust: `UpdateOptions` API have been rewritten. It can now be built using `From<QueryOptions>` or `Default`. - Rust: `UpdateOptions` API have been rewritten. It can now be built using `From<QueryOptions>` or `Default`.
- Server: The command line API has been redesign. See the [server README](server/README.md) for more information. - Server: The command-line API has been redesign. See the [server README](server/README.md) for more information.
- Server: The HTTP implementation is now provided by [`oxhttp`](https://github.com/oxigraph/oxhttp). - Server: The HTTP implementation is now provided by [`oxhttp`](https://github.com/oxigraph/oxhttp).
- Server: The HTTP response bodies are now generated on the fly instead of being buffered. - Server: The HTTP response bodies are now generated on the fly instead of being buffered.
- Python: The `SledStore` and `MemoryStore` classes have been removed in favor of the `Store` class. - Python: The `SledStore` and `MemoryStore` classes have been removed in favor of the `Store` class.
- JS: The `MemoryStore` class has been renamed to `Store`. - JS: The `MemoryStore` class has been renamed to `Store`.
- JS: The [RDF/JS `DataFactory` interface](http://rdf.js.org/data-model-spec/#datafactory-interface) is now implemented by the `oxigraph` module itself and the `MemoryStore.dataFactory` propery has been removed. - JS: The [RDF/JS `DataFactory` interface](http://rdf.js.org/data-model-spec/#datafactory-interface) is now implemented by the `oxigraph` module itself and the `MemoryStore.dataFactory` property has been removed.
- The implementation of SPARQL evaluation has been improved for better performances (especially joins). - The implementation of SPARQL evaluation has been improved for better performances (especially joins).
- The TLS implementation used in SPARQL HTTP calls is now [rustls](https://github.com/rustls/rustls) and not [native-tls](https://github.com/sfackler/rust-native-tls). The host system certificate registry is still used. - The TLS implementation used in SPARQL HTTP calls is now [rustls](https://github.com/rustls/rustls) and not [native-tls](https://github.com/sfackler/rust-native-tls). The host system certificate registry is still used.
- Spargebra: The basic RDF terms are now the ones of the `oxrdf` crate. - Spargebra: The basic RDF terms are now the ones of the `oxrdf` crate.
@ -172,7 +486,7 @@ Many thanks to [Thad Guidry](https://github.com/thadguidry), [James Overton](htt
## [0.2.5] - 2021-07-11 ## [0.2.5] - 2021-07-11
### Added ### Added
- [SPARQL 1.1 Query Results JSON Format](http://www.w3.org/TR/sparql11-results-json/) parser. - [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) parser.
- Python wheels for macOS are now universal2 binaries. - Python wheels for macOS are now universal2 binaries.
### Changed ### Changed

@ -5,7 +5,10 @@ authors:
family-names: Pellissier Tanon family-names: Pellissier Tanon
email: thomas@pellissier-tanon.fr email: thomas@pellissier-tanon.fr
orcid: "https://orcid.org/0000-0002-0620-6486" orcid: "https://orcid.org/0000-0002-0620-6486"
license: "MIT OR Apache-2.0" doi: 10.5281/zenodo.7408022
license:
- Apache-2.0
- MIT
message: "If you use this software, please cite it as below." message: "If you use this software, please cite it as below."
repository-code: "https://github.com/oxigraph/oxigraph" repository-code: "https://github.com/oxigraph/oxigraph"
title: Oxigraph title: Oxigraph

1490
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -1,16 +1,272 @@
[workspace] [workspace]
members = [ members = [
"js", "js",
"lib", "lib/oxigraph",
"lib/oxrdf", "lib/oxrdf",
"lib/spargebra", "lib/oxrdfio",
"lib/oxrdfxml",
"lib/oxsdatatypes",
"lib/oxttl",
"lib/sparesults", "lib/sparesults",
"python", "lib/spargebra",
"oxrocksdb-sys", "lib/sparopt",
"server", "lib/sparql-smith",
"testsuite" "testsuite"
] ]
resolver = "2"
[workspace.package]
version = "0.4.0-alpha.7-dev"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
edition = "2021"
rust-version = "1.70"
[workspace.dependencies]
rocksdb = {git = "https://git.nextgraph.org/NextGraph/rust-rocksdb.git", branch = "master", features = [ ] }
serde = { version = "1.0.142", features = ["derive"] }
anyhow = "1.0.72"
arbitrary = "1.3"
assert_cmd = "2.0"
assert_fs = "1.0"
bindgen = ">=0.60, <0.70"
cc = "1.0.73"
clap = "4.0"
codspeed-criterion-compat = "2.3.3"
console_error_panic_hook = "0.1.7"
digest = "0.10"
flate2 = "1.0"
getrandom = "0.2.8"
hex = "0.4"
js-sys = "0.3.60"
json-event-parser = "0.2.0-alpha.2"
md-5 = "0.10"
memchr = "2.5"
oxilangtag = "0.1"
oxiri = "0.2.3"
peg = "0.8"
pkg-config = "0.3.25"
predicates = ">=2.0, <4.0"
pyo3 = "0.21.0"
quick-xml = ">=0.29, <0.32"
rand = "0.8"
rayon-core = "1.11"
regex = "1.7"
sha1 = "0.10"
sha2 = "0.10"
siphasher = ">=0.3, <2.0"
text-diff = "0.4"
thiserror = "1.0.50"
time = "0.3"
tokio = "1.29"
url = "2.4"
wasm-bindgen = "0.2.83"
zstd = ">=0.12, <0.14"
# Internal dependencies
oxigraph = { version = "=0.4.0-alpha.7-dev", path = "lib/oxigraph" }
oxrdf = { version = "=0.2.0-alpha.4", path = "lib/oxrdf" }
oxrdfio = { version = "=0.1.0-alpha.5", path = "lib/oxrdfio" }
oxrdfxml = { version = "=0.1.0-alpha.5", path = "lib/oxrdfxml" }
oxsdatatypes = { version = "=0.2.0-alpha.1", path = "lib/oxsdatatypes" }
oxttl = { version = "=0.1.0-alpha.5", path = "lib/oxttl" }
sparesults = { version = "=0.2.0-alpha.4", path = "lib/sparesults" }
spargebra = { version = "=0.3.0-alpha.4", path = "lib/spargebra" }
sparopt = { version = "=0.1.0-alpha.5-dev", path = "lib/sparopt" }
[workspace.lints.rust]
absolute_paths_not_starting_with_crate = "warn"
elided_lifetimes_in_paths = "warn"
explicit_outlives_requirements = "warn"
let_underscore_drop = "warn"
macro_use_extern_crate = "warn"
# TODO missing_docs = "warn"
trivial_casts = "warn"
trivial_numeric_casts = "warn"
unsafe_code = "warn"
unused_import_braces = "warn"
unused_lifetimes = "warn"
unused_macro_rules = "warn"
unused_qualifications = "warn"
[workspace.lints.clippy]
allow_attributes = "warn"
allow_attributes_without_reason = "warn"
as_underscore = "warn"
assertions_on_result_states = "warn"
bool_to_int_with_if = "warn"
borrow_as_ptr = "warn"
case_sensitive_file_extension_comparisons = "warn"
cast_lossless = "warn"
cast_possible_truncation = "warn"
cast_possible_wrap = "warn"
cast_precision_loss = "warn"
cast_ptr_alignment = "warn"
cast_sign_loss = "warn"
checked_conversions = "warn"
clone_on_ref_ptr = "warn"
cloned_instead_of_copied = "warn"
copy_iterator = "warn"
create_dir = "warn"
dbg_macro = "warn"
decimal_literal_representation = "warn"
default_trait_access = "warn"
default_union_representation = "warn"
deref_by_slicing = "warn"
disallowed_script_idents = "warn"
doc_link_with_quotes = "warn"
empty_drop = "warn"
empty_enum = "warn"
empty_structs_with_brackets = "warn"
enum_glob_use = "warn"
error_impl_error = "warn"
exit = "warn"
expect_used = "warn"
expl_impl_clone_on_copy = "warn"
explicit_deref_methods = "warn"
explicit_into_iter_loop = "warn"
explicit_iter_loop = "warn"
filetype_is_file = "warn"
filter_map_next = "warn"
flat_map_option = "warn"
fn_params_excessive_bools = "warn"
fn_to_numeric_cast_any = "warn"
format_push_string = "warn"
from_iter_instead_of_collect = "warn"
get_unwrap = "warn"
host_endian_bytes = "warn"
if_not_else = "warn"
if_then_some_else_none = "warn"
ignored_unit_patterns = "warn"
implicit_clone = "warn"
implicit_hasher = "warn"
inconsistent_struct_constructor = "warn"
index_refutable_slice = "warn"
inefficient_to_string = "warn"
infinite_loop = "warn"
inline_always = "warn"
inline_asm_x86_att_syntax = "warn"
inline_asm_x86_intel_syntax = "warn"
into_iter_without_iter = "warn"
invalid_upcast_comparisons = "warn"
items_after_statements = "warn"
iter_not_returning_iterator = "warn"
iter_without_into_iter = "warn"
large_digit_groups = "warn"
large_futures = "warn"
large_include_file = "warn"
large_stack_arrays = "warn"
large_types_passed_by_value = "warn"
let_underscore_must_use = "warn"
let_underscore_untyped = "warn"
linkedlist = "warn"
lossy_float_literal = "warn"
macro_use_imports = "warn"
manual_assert = "warn"
manual_instant_elapsed = "warn"
manual_let_else = "warn"
manual_ok_or = "warn"
manual_string_new = "warn"
many_single_char_names = "warn"
map_unwrap_or = "warn"
match_bool = "warn"
match_on_vec_items = "warn"
match_same_arms = "warn"
match_wild_err_arm = "warn"
match_wildcard_for_single_variants = "warn"
maybe_infinite_iter = "warn"
mem_forget = "warn"
mismatching_type_param_order = "warn"
missing_assert_message = "warn"
missing_asserts_for_indexing = "warn"
missing_fields_in_debug = "warn"
multiple_inherent_impl = "warn"
mut_mut = "warn"
mutex_atomic = "warn"
naive_bytecount = "warn"
needless_bitwise_bool = "warn"
needless_continue = "warn"
needless_for_each = "warn"
needless_pass_by_value = "warn"
needless_raw_string_hashes = "warn"
needless_raw_strings = "warn"
negative_feature_names = "warn"
no_effect_underscore_binding = "warn"
no_mangle_with_rust_abi = "warn"
non_ascii_literal = "warn"
panic = "warn"
panic_in_result_fn = "warn"
partial_pub_fields = "warn"
print_stderr = "warn"
print_stdout = "warn"
ptr_as_ptr = "warn"
ptr_cast_constness = "warn"
pub_without_shorthand = "warn"
range_minus_one = "warn"
range_plus_one = "warn"
rc_buffer = "warn"
rc_mutex = "warn"
redundant_closure_for_method_calls = "warn"
redundant_else = "warn"
redundant_feature_names = "warn"
redundant_type_annotations = "warn"
ref_binding_to_reference = "warn"
ref_option_ref = "warn"
ref_patterns = "warn"
rest_pat_in_fully_bound_structs = "warn"
return_self_not_must_use = "warn"
same_functions_in_if_condition = "warn"
same_name_method = "warn"
semicolon_inside_block = "warn"
shadow_same = "warn"
should_panic_without_expect = "warn"
single_match_else = "warn"
stable_sort_primitive = "warn"
str_to_string = "warn"
string_add = "warn"
string_add_assign = "warn"
string_lit_chars_any = "warn"
string_to_string = "warn"
struct_excessive_bools = "warn"
struct_field_names = "warn"
suspicious_xor_used_as_pow = "warn"
tests_outside_test_module = "warn"
todo = "warn"
transmute_ptr_to_ptr = "warn"
trivially_copy_pass_by_ref = "warn"
try_err = "warn"
unchecked_duration_subtraction = "warn"
undocumented_unsafe_blocks = "warn"
unicode_not_nfc = "warn"
unimplemented = "warn"
uninlined_format_args = "warn"
unnecessary_box_returns = "warn"
unnecessary_join = "warn"
unnecessary_safety_comment = "warn"
unnecessary_safety_doc = "warn"
unnecessary_self_imports = "warn"
unnecessary_wraps = "warn"
unneeded_field_pattern = "warn"
unnested_or_patterns = "warn"
unreadable_literal = "warn"
unsafe_derive_deserialize = "warn"
unseparated_literal_suffix = "warn"
unused_async = "warn"
unused_self = "warn"
unwrap_in_result = "warn"
use_debug = "warn"
used_underscore_binding = "warn"
verbose_bit_mask = "warn"
verbose_file_reads = "warn"
wildcard_dependencies = "warn"
zero_sized_map_values = "warn"
[profile.release] [profile.release]
lto = true lto = true
codegen-units = 1 codegen-units = 1
strip = "debuginfo"
[profile.release.package.oxigraph-js]
codegen-units = 1
opt-level = "z"
strip = "debuginfo"

@ -4,9 +4,10 @@
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) [![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![tests status](https://github.com/oxigraph/oxigraph/actions/workflows/tests.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![artifacts status](https://github.com/oxigraph/oxigraph/actions/workflows/artifacts.yml/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph) [![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
[![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph) [![Twitter URL](https://img.shields.io/twitter/url?style=social&url=https%3A%2F%2Ftwitter.com%2Foxigraph)](https://twitter.com/oxigraph)
Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
@ -19,6 +20,12 @@ Oxigraph is in heavy development and SPARQL query evaluation has not been optimi
The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open). The development roadmap is using [GitHub milestones](https://github.com/oxigraph/oxigraph/milestones?direction=desc&sort=completeness&state=open).
Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
Oxigraph implements the following specifications:
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
- [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
It is split into multiple parts: It is split into multiple parts:
- [The database written as a Rust library](https://crates.io/crates/oxigraph). Its source code is in the `lib` directory. - [The database written as a Rust library](https://crates.io/crates/oxigraph). Its source code is in the `lib` directory.
@ -27,15 +34,22 @@ It is split into multiple parts:
- [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) - [`pyoxigraph` that exposes Oxigraph to the Python world](https://pyoxigraph.readthedocs.io/). Its source code is in the `python` directory. [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
- [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory. - [JavaScript bindings for Oxigraph](https://www.npmjs.com/package/oxigraph). WebAssembly is used to package Oxigraph into a NodeJS compatible NPM package. Its source code is in the `js` directory.
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
- [Oxigraph server](https://crates.io/crates/oxigraph_server) that provides a standalone binary of a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `server` directory. - [Oxigraph binary](https://crates.io/crates/oxigraph-cli) that provides a standalone command-line tool allowing to manipulate RDF data and spawn a a web server implementing the [SPARQL 1.1 Protocol](https://www.w3.org/TR/sparql11-protocol/) and the [SPARQL 1.1 Graph Store Protocol](https://www.w3.org/TR/sparql11-http-rdf-update/). Its source code is in the `cli` directory.
[![Latest Version](https://img.shields.io/crates/v/oxigraph_server.svg)](https://crates.io/crates/oxigraph_server) Note that it was previously named [Oxigraph server](https://crates.io/crates/oxigraph-server).
[![Docker Image Version (latest semver)](https://img.shields.io/docker/v/oxigraph/oxigraph?sort=semver)](https://hub.docker.com/r/oxigraph/oxigraph) [![Latest Version](https://img.shields.io/crates/v/oxigraph-cli.svg)](https://crates.io/crates/oxigraph-cli)
Oxigraph implements the following specifications: Also, some parts of Oxigraph are available as standalone Rust crates:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
- [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/). * [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
- [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio). * [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
- [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/). * [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats.
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
The library layers in Oxigraph. The elements above depend on the elements below:
![Oxigraph libraries architecture diagram](./docs/arch-diagram.svg)
A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture). A preliminary benchmark [is provided](bench/README.md). There is also [a document describing Oxigraph technical architecture](https://github.com/oxigraph/oxigraph/wiki/Architecture).
@ -51,6 +65,7 @@ Feel free to use [GitHub discussions](https://github.com/oxigraph/oxigraph/discu
If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt/). If you need advanced support or are willing to pay to get some extra features, feel free to reach out to [Tpt](https://github.com/Tpt/).
## License ## License
This project is licensed under either of This project is licensed under either of
@ -65,3 +80,15 @@ at your option.
### Contribution ### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
## Sponsors
* [RelationLabs](https://relationlabs.ai/) that is building [Relation-Graph](https://github.com/relationlabs/Relation-Graph), a SPARQL database module for the [Substrate blockchain platform](https://substrate.io/) based on Oxigraph.
* [Field 33](https://field33.com) that was building [an ontology management platform](https://plow.pm/).
* [Magnus Bakken](https://github.com/magbak) who is building [Data Treehouse](https://www.data-treehouse.com/), a time-series + RDF datalake platform, and [chrontext](https://github.com/magbak/chrontext), a SPARQL query endpoint on top of joint RDF and time series databases.
* [DeciSym.AI](https://www.decisym.ai/) a cyber security consulting company providing RDF-based software.
* [ACE IoT Solutions](https://aceiotsolutions.com/), a building IOT platform.
* [Albin Larsson](https://byabbe.se/) who is building [GovDirectory](https://www.govdirectory.org/), a directory of public agencies based on Wikidata.
And [others](https://github.com/sponsors/Tpt). Many thanks to them!

@ -2,18 +2,19 @@
DATASET_SIZE=100000 DATASET_SIZE=100000
PARALLELISM=16 PARALLELISM=16
cd bsbm-tools
set -eu
wget -nc https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
wget https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar /usr/lib/jvm/java-8-openjdk/bin/java -server -jar ../blazegraph.jar &
/usr/lib/jvm/java-8-openjdk/bin/java -server -jar blazegraph.jar &
sleep 10 sleep 10
curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" http://localhost:9999/blazegraph/sparql
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql
kill $! kill $!
rm blazegraph.jar rm -f blazegraph.jnl
rm blazegraph.jnl rm -f "explore-${DATASET_SIZE}.nt"
rm "explore-${DATASET_SIZE}.nt" rm -f "explore-update-${DATASET_SIZE}.nt"
rm "explore-update-${DATASET_SIZE}.nt" rm -rf td_data
rm -r td_data

@ -3,21 +3,22 @@
DATASET_SIZE=100000 DATASET_SIZE=100000
PARALLELISM=16 PARALLELISM=16
VERSION="9.3.3" VERSION="9.3.3"
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
export JAVA_HOME=/usr/lib/jvm/java-11-openjdk
../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN & ../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN &
sleep 10 sleep 10
curl -f -X POST http://localhost:7200/rest/repositories -H 'Content-Type:application/json' -d ' curl -f -X POST http://localhost:7200/rest/repositories -H 'Content-Type:application/json' -d '
{"id":"test","params":{"ruleset":{"label":"Ruleset","name":"ruleset","value":"empty"},"title":{"label":"Repository title","name":"title","value":"GraphDB Free repository"},"checkForInconsistencies":{"label":"Check for inconsistencies","name":"checkForInconsistencies","value":"false"},"disableSameAs":{"label":"Disable owl:sameAs","name":"disableSameAs","value":"true"},"baseURL":{"label":"Base URL","name":"baseURL","value":"http://example.org/owlim#"},"repositoryType":{"label":"Repository type","name":"repositoryType","value":"file-repository"},"id":{"label":"Repository ID","name":"id","value":"repo-test"},"storageFolder":{"label":"Storage folder","name":"storageFolder","value":"storage"}},"title":"Test","type":"free"} {"id":"bsbm","params":{"ruleset":{"label":"Ruleset","name":"ruleset","value":"empty"},"title":{"label":"Repository title","name":"title","value":"GraphDB Free repository"},"checkForInconsistencies":{"label":"Check for inconsistencies","name":"checkForInconsistencies","value":"false"},"disableSameAs":{"label":"Disable owl:sameAs","name":"disableSameAs","value":"true"},"baseURL":{"label":"Base URL","name":"baseURL","value":"http://example.org/owlim#"},"repositoryType":{"label":"Repository type","name":"repositoryType","value":"file-repository"},"id":{"label":"Repository ID","name":"id","value":"repo-bsbm"},"storageFolder":{"label":"Storage folder","name":"storageFolder","value":"storage"}},"title":"BSBM","type":"free"}
' '
curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/test/statements curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:7200/repositories/bsbm/statements
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test -u http://localhost:7200/repositories/test/statements -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm -u http://localhost:7200/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/test #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm
kill $! kill $!
sleep 5 sleep 5
rm -r ../graphdb-free-9.3.3/data rm -rf ../graphdb-free-9.3.3/data
rm "explore-${DATASET_SIZE}.nt" rm -f "explore-${DATASET_SIZE}.nt"
rm "explore-update-${DATASET_SIZE}.nt" rm -f "explore-update-${DATASET_SIZE}.nt"
rm -r td_data rm -rf td_data

@ -3,11 +3,12 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
VERSION="4.3.2" VERSION="4.3.2"
cd bsbm-tools
set -eu
wget -nc https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
wget https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip unzip ../"apache-jena-${VERSION}.zip"
unzip apache-jena-${VERSION}.zip
rm apache-jena-${VERSION}.zip
./apache-jena-${VERSION}/bin/tdb2.tdbloader --loader=parallel --loc=td_data "explore-${DATASET_SIZE}.nt" ./apache-jena-${VERSION}/bin/tdb2.tdbloader --loader=parallel --loc=td_data "explore-${DATASET_SIZE}.nt"
wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-${VERSION}.zip wget https://downloads.apache.org/jena/binaries/apache-jena-fuseki-${VERSION}.zip
unzip apache-jena-fuseki-${VERSION}.zip unzip apache-jena-fuseki-${VERSION}.zip
@ -19,9 +20,9 @@ sleep 10
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query
kill $! kill $!
rm "explore-${DATASET_SIZE}.nt" rm -f "explore-${DATASET_SIZE}.nt"
rm "explore-update-${DATASET_SIZE}.nt" rm -f "explore-update-${DATASET_SIZE}.nt"
rm -r td_data rm -rf td_data
rm -r run rm -rf run
rm -r apache-jena-${VERSION} rm -rf apache-jena-${VERSION}
rm -r apache-jena-fuseki-${VERSION} rm -rf apache-jena-fuseki-${VERSION}

@ -2,18 +2,20 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cargo build --release --manifest-path="../../server/Cargo.toml" cargo build --release --manifest-path="../../cli/Cargo.toml"
VERSION=$(./../../target/release/oxigraph_server --version | sed 's/oxigraph_server //g') VERSION=$(./../../target/release/oxigraph --version | sed 's/oxigraph //g')
./../../target/release/oxigraph_server --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt" ./../../target/release/oxigraph --location oxigraph_data load --file "explore-${DATASET_SIZE}.nt"
./../../target/release/oxigraph_server --location oxigraph_data serve --bind 127.0.0.1:7878 & ./../../target/release/oxigraph --location oxigraph_data serve --bind 127.0.0.1:7878 &
sleep 1 sleep 1
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query ./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt" ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query"
kill $! kill $!
rm -r oxigraph_data rm -rf oxigraph_data
rm "explore-${DATASET_SIZE}.nt" rm -f "explore-${DATASET_SIZE}.nt"
rm "explore-update-${DATASET_SIZE}.nt" rm -f "explore-update-${DATASET_SIZE}.nt"
rm -r td_data rm -rf td_data

@ -0,0 +1,49 @@
#!/usr/bin/env bash
DATASET_SIZE=100000
PARALLELISM=16
VERSION="4.2.2"
TOMCAT_VERSION="9.0.71"
set -eu
wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1"
wget -nc -O "tomcat-${TOMCAT_VERSION}.zip" "https://dlcdn.apache.org/tomcat/tomcat-9/v${TOMCAT_VERSION}/bin/apache-tomcat-${TOMCAT_VERSION}.zip"
cd bsbm-tools || exit
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1"
unzip ../"rdf4j-${VERSION}.zip"
unzip ../"tomcat-${TOMCAT_VERSION}.zip"
CATALINA_HOME="$(pwd)/apache-tomcat-${TOMCAT_VERSION}"
export CATALINA_HOME
export JAVA_OPTS="-Dorg.eclipse.rdf4j.appdata.basedir=${CATALINA_HOME}/rdf4j"
cp "eclipse-rdf4j-${VERSION}"/war/rdf4j-server.war "${CATALINA_HOME}"/webapps/
chmod +x "${CATALINA_HOME}"/bin/*.sh
"${CATALINA_HOME}"/bin/startup.sh
sleep 30
curl -f -X PUT http://localhost:8080/rdf4j-server/repositories/bsbm -H 'Content-Type:text/turtle' -d '
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
@prefix rep: <http://www.openrdf.org/config/repository#>.
@prefix sr: <http://www.openrdf.org/config/repository/sail#>.
@prefix sail: <http://www.openrdf.org/config/sail#>.
[] a rep:Repository ;
rep:repositoryID "bsbm" ;
rdfs:label "BSBM" ;
rep:repositoryImpl [
rep:repositoryType "openrdf:SailRepository" ;
sr:sailImpl [
sail:sailType "rdf4j:LmdbStore"
]
] .
'
sleep 10
curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZE}.nt" http://localhost:8080/rdf4j-server/repositories/bsbm/statements
./testdriver -mt ${PARALLELISM} -ucf usecases/explore/sparql.txt -o "../bsbm.explore.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm
./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm -u http://localhost:8080/rdf4j-server/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt"
#./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm
"${CATALINA_HOME}"/bin/shutdown.sh
rm -f "explore-${DATASET_SIZE}.nt"
rm -f "explore-update-${DATASET_SIZE}.nt"
rm -rf td_data
rm -rf "eclipse-rdf4j-${VERSION}"
rm -rf "apache-tomcat-${TOMCAT_VERSION}"

@ -3,6 +3,8 @@
DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product.
PARALLELISM=16 PARALLELISM=16
VERSION="7.2.5" VERSION="7.2.5"
set -eu
cd bsbm-tools cd bsbm-tools
./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}"
cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini
@ -18,7 +20,7 @@ EOF
# ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt" # ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt"
# ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' # ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test'
kill $! kill $!
rm -r ../database rm -rf ../database
rm "explore-${DATASET_SIZE}.nt" rm -f "explore-${DATASET_SIZE}.nt"
rm "explore-update-${DATASET_SIZE}.nt" rm -f "explore-update-${DATASET_SIZE}.nt"
rm -r td_data rm -rf td_data

@ -0,0 +1,63 @@
"""
Converts a SPARQL query JSON explanation file to a flamegraph.
Usage: python explanation_to_flamegraph.py explanation.json flamegraph.svg
"""
import json
import subprocess
from argparse import ArgumentParser
from pathlib import Path
from shutil import which
from tempfile import NamedTemporaryFile
parser = ArgumentParser(
prog='OxigraphFlamegraph',
description='Builds a flamegraph from the Oxigraph query explanation JSON format',
epilog='Text at the bottom of help')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('flamegraph_svg', type=Path)
args = parser.parse_args()
def trace_line(label: str, value: float):
return f"{label} {int(value * 1_000_000)}"
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
if "parsing duration in seconds" in explanation:
trace.append(trace_line("parsing", explanation['parsing duration in seconds']))
if "planning duration in seconds" in explanation:
trace.append(trace_line("planning", explanation['planning duration in seconds']))
already_used_names = {}
def add_to_trace(node, path):
path = f"{path};{node['name'].replace(' ', '`')}"
if path in already_used_names:
already_used_names[path] += 1
path = f"{path}`{already_used_names[path]}"
else:
already_used_names[path] = 0
samples = node['duration in seconds'] - sum(child['duration in seconds'] for child in node.get("children", ()))
if int(samples * 1_000_000) > 0:
trace.append(trace_line(path, samples))
for i, child in enumerate(node.get("children", ())):
add_to_trace(child, path)
add_to_trace(explanation["plan"], 'eval')
inferno = which('inferno-flamegraph')
flamegraph_pl = which('flamegraph.pl')
if inferno:
args.flamegraph_svg.write_text(
subprocess.run([inferno], input='\n'.join(trace), stdout=subprocess.PIPE, text=True).stdout)
elif flamegraph_pl:
with NamedTemporaryFile('w+t') as fp:
fp.write('\n'.join(trace))
fp.flush()
args.flamegraph_svg.write_text(
subprocess.run([flamegraph_pl, fp.name], stdout=subprocess.PIPE, text=True).stdout)
else:
raise Exception(
'This script requires either the inferno-flamegraph from https://github.com/jonhoo/inferno either the flamegraph.pl script from https://github.com/brendangregg/FlameGraph to be installed and be in $PATH.')

@ -0,0 +1,52 @@
"""
Converts a SPARQL query JSON explanation file to a tracing event file compatible with Chrome.
Usage: python explanation_to_trace.py explanation.json trace.json
"""
import json
from argparse import ArgumentParser
from pathlib import Path
parser = ArgumentParser(
prog='OxigraphTracing',
description='Builds a Trace Event Format file from the Oxigraph query explanation JSON format')
parser.add_argument('json_explanation', type=Path)
parser.add_argument('json_trace_event', type=Path)
args = parser.parse_args()
with args.json_explanation.open('rt') as fp:
explanation = json.load(fp)
trace = []
def trace_element(name: str, cat: str, start_s: float, duration_s: float):
return {
"name": name,
"cat": cat,
"ph": "X",
"ts": int(start_s * 1_000_000),
"dur": int(duration_s * 1_000_000),
"pid": 1
}
def add_to_trace(node, path, start_time: float):
path = f"{path};{node['name'].replace(' ', '`')}"
trace.append(trace_element(node["name"], node["name"].split("(")[0], start_time, node["duration in seconds"]))
for child in node.get("children", ()):
add_to_trace(child, path, start_time)
start_time += child["duration in seconds"]
current_time = 0
if "parsing duration in seconds" in explanation:
d = explanation["parsing duration in seconds"]
trace.append(trace_element(f"parsing", "parsing", current_time, d))
current_time += d
if "planning duration in seconds" in explanation:
d = explanation["planning duration in seconds"]
trace.append(trace_element(f"planning", "planning", current_time, d))
current_time += d
add_to_trace(explanation["plan"], 'eval', current_time)
with args.json_trace_event.open("wt") as fp:
json.dump(trace, fp)

@ -1,4 +1,4 @@
avoid-breaking-exported-api = true avoid-breaking-exported-api = false
cognitive-complexity-threshold = 50 cognitive-complexity-threshold = 50
too-many-arguments-threshold = 10 too-many-arguments-threshold = 10
type-complexity-threshold = 500 type-complexity-threshold = 500

@ -1,12 +1,18 @@
[advisories]
version = 2
ignore = ["RUSTSEC-2018-0015"]
[licenses] [licenses]
unlicensed = "deny" version = 2
allow = [ allow = [
"MIT",
"Apache-2.0", "Apache-2.0",
"Apache-2.0 WITH LLVM-exception" "Apache-2.0 WITH LLVM-exception",
"BSD-3-Clause",
"ISC",
"MIT",
"OpenSSL",
"Unicode-DFS-2016"
] ]
allow-osi-fsf-free = "either"
default = "deny"
[[licenses.clarify]] [[licenses.clarify]]
name = "ring" name = "ring"

@ -0,0 +1,120 @@
<svg xmlns="http://www.w3.org/2000/svg" width="624" height="384" class="svgbob">
<style>.svgbob line, .svgbob path, .svgbob circle, .svgbob rect, .svgbob polygon {
stroke: black;
stroke-width: 2;
stroke-opacity: 1;
fill-opacity: 1;
stroke-linecap: round;
stroke-linejoin: miter;
}
.svgbob text {
white-space: pre;
fill: black;
font-family: Iosevka Fixed, monospace;
font-size: 14px;
}
.svgbob rect.backdrop {
stroke: none;
fill: white;
}
.svgbob .broken {
stroke-dasharray: 8;
}
.svgbob .filled {
fill: black;
}
.svgbob .bg_filled {
fill: white;
stroke-width: 1;
}
.svgbob .nofill {
fill: white;
}
.svgbob .end_marked_arrow {
marker-end: url(#arrow);
}
.svgbob .start_marked_arrow {
marker-start: url(#arrow);
}
.svgbob .end_marked_diamond {
marker-end: url(#diamond);
}
.svgbob .start_marked_diamond {
marker-start: url(#diamond);
}
.svgbob .end_marked_circle {
marker-end: url(#circle);
}
.svgbob .start_marked_circle {
marker-start: url(#circle);
}
.svgbob .end_marked_open_circle {
marker-end: url(#open_circle);
}
.svgbob .start_marked_open_circle {
marker-start: url(#open_circle);
}
.svgbob .end_marked_big_open_circle {
marker-end: url(#big_open_circle);
}
.svgbob .start_marked_big_open_circle {
marker-start: url(#big_open_circle);
}<!--separator-->.svgbob .r{
fill: papayawhip;
}
.svgbob .p{
fill: lightyellow;
}
.svgbob .j{
fill: lightgreen;
}
</style>
<defs>
<marker id="arrow" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,0 0,4 4,2 0,0"></polygon>
</marker>
<marker id="diamond" viewBox="-2 -2 8 8" refX="4" refY="2" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<polygon points="0,2 2,0 4,2 2,4 0,2"></polygon>
</marker>
<marker id="circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="filled"></circle>
</marker>
<marker id="open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="2" class="bg_filled"></circle>
</marker>
<marker id="big_open_circle" viewBox="0 0 8 8" refX="4" refY="4" markerWidth="7" markerHeight="7"
orient="auto-start-reverse">
<circle cx="4" cy="4" r="3" class="bg_filled"></circle>
</marker>
</defs>
<rect class="backdrop" x="0" y="0" width="624" height="384"></rect>
<rect x="4" y="8" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="28">oxigraph CLI</text>
<rect x="244" y="8" width="136" height="32" class="solid nofill p" rx="0"></rect>
<text x="258" y="28">pyoxigraph</text>
<rect x="468" y="8" width="144" height="32" class="solid nofill j" rx="0"></rect>
<text x="482" y="28">oxigraph</text>
<text x="554" y="28">JS</text>
<rect x="4" y="72" width="608" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="92">oxigraph</text>
<rect x="68" y="136" width="232" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="156">oxrdfio</text>
<rect x="348" y="136" width="112" height="32" class="solid nofill r" rx="0"></rect>
<text x="362" y="156">sparopt</text>
<rect x="68" y="200" width="96" height="32" class="solid nofill r" rx="0"></rect>
<text x="82" y="220">oxttl</text>
<rect x="180" y="200" width="120" height="32" class="solid nofill r" rx="0"></rect>
<text x="194" y="220">oxrdfxml</text>
<rect x="316" y="200" width="144" height="32" class="solid nofill r" rx="0"></rect>
<text x="330" y="220">spargebra</text>
<rect x="476" y="200" width="136" height="32" class="solid nofill r" rx="0"></rect>
<text x="490" y="220">sparesults</text>
<rect x="36" y="264" width="576" height="32" class="solid nofill r" rx="0"></rect>
<text x="50" y="284">oxrdf</text>
<rect x="4" y="328" width="152" height="32" class="solid nofill r" rx="0"></rect>
<text x="18" y="348">oxsdatatypes</text>
</svg>

After

Width:  |  Height:  |  Size: 4.6 KiB

@ -0,0 +1,35 @@
+------------------+ +----------------+ +-----------------+
+ oxigraph CLI {r} + + pyoxigraph {p} + + oxigraph JS {j} +
+------------------+ +----------------+ +-----------------+
+---------------------------------------------------------------------------+
+ oxigraph (Rust) {r} +
+---------------------------------------------------------------------------+
+----------------------------+ +-------------+
+ oxrdfio {r} + + sparopt {r} +
+----------------------------+ +-------------+
+-----------+ +--------------+ +-----------------+ +----------------+
+ oxttl {r} + + oxrdfxml {r} + + spargebra {r} + + sparesults {r} +
+-----------+ +--------------+ +-----------------+ +----------------+
+-----------------------------------------------------------------------+
+ oxrdf {r} +
+-----------------------------------------------------------------------+
+------------------+
+ oxsdatatypes {r} +
+------------------+
# Legend:
r = {
fill: papayawhip;
}
p = {
fill: lightyellow;
}
j = {
fill: lightgreen;
}

@ -1,7 +1,6 @@
[package] [package]
name = "oxigraph-fuzz" name = "oxigraph-fuzz"
version = "0.0.0" version = "0.0.0"
authors = ["Automatically generated"]
publish = false publish = false
edition = "2021" edition = "2021"
@ -9,12 +8,38 @@ edition = "2021"
cargo-fuzz = true cargo-fuzz = true
[dependencies] [dependencies]
anyhow = "1.0.72"
libfuzzer-sys = "0.4" libfuzzer-sys = "0.4"
spargebra = { path = "../lib/spargebra", features = ["rdf-star"] } oxigraph = { path = "../lib/oxigraph", default-features = false }
oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] }
oxrdfxml = { path = "../lib/oxrdfxml" }
oxttl = { path = "../lib/oxttl", features = ["rdf-star"] }
sparesults = { path = "../lib/sparesults", features = ["rdf-star"] } sparesults = { path = "../lib/sparesults", features = ["rdf-star"] }
spargebra = { path = "../lib/spargebra", features = ["rdf-star", "sep-0006"] }
sparql-smith = { path = "../lib/sparql-smith", features = ["sep-0006"] }
[profile.release]
codegen-units = 1
debug = true
[workspace] [workspace]
[[bin]]
name = "nquads"
path = "fuzz_targets/nquads.rs"
[[bin]]
name = "n3"
path = "fuzz_targets/n3.rs"
[[bin]]
name = "rdf_xml"
path = "fuzz_targets/rdf_xml.rs"
[[bin]]
name = "sparql_eval"
path = "fuzz_targets/sparql_eval.rs"
[[bin]] [[bin]]
name = "sparql_query" name = "sparql_query"
path = "fuzz_targets/sparql_query.rs" path = "fuzz_targets/sparql_query.rs"
@ -34,3 +59,7 @@ path = "fuzz_targets/sparql_results_xml.rs"
[[bin]] [[bin]]
name = "sparql_results_tsv" name = "sparql_results_tsv"
path = "fuzz_targets/sparql_results_tsv.rs" path = "fuzz_targets/sparql_results_tsv.rs"
[[bin]]
name = "trig"
path = "fuzz_targets/trig.rs"

@ -0,0 +1,28 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxttl::N3Parser;
fuzz_target!(|data: &[u8]| {
let mut quads = Vec::new();
let mut parser = N3Parser::new()
.with_base_iri("http://example.com/")
.unwrap()
.parse();
for chunk in data.split(|c| *c == 0xFF) {
parser.extend_from_slice(chunk);
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
}
parser.end();
while let Some(result) = parser.read_next() {
if let Ok(quad) = result {
quads.push(quad);
}
}
assert!(parser.is_end());
//TODO: serialize
});

@ -0,0 +1,84 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::Quad;
use oxttl::{NQuadsParser, NQuadsSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = NQuadsParser::new().with_quoted_triples();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(quads, errors)
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
assert_eq!(quads, quads_without_split);
assert_eq!(errors, errors_without_split);
// We test also unchecked if valid
if errors.is_empty() {
let (quads_unchecked, errors_unchecked) = parse(data.split(|c| *c == 0xFF), true);
assert!(errors_unchecked.is_empty());
assert_eq!(quads, quads_unchecked);
}
// We serialize
let mut writer = NQuadsSerializer::new().serialize_to_write(Vec::new());
for quad in &quads {
writer.write_quad(quad).unwrap();
}
let new_serialization = writer.finish();
// We parse the serialization
let new_quads = NQuadsParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -0,0 +1,35 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdfxml::{RdfXmlParser, RdfXmlSerializer};
fuzz_target!(|data: &[u8]| {
// We parse
let triples = RdfXmlParser::new()
.parse_read(data)
.flatten()
.collect::<Vec<_>>();
// We serialize
let mut writer = RdfXmlSerializer::new().serialize_to_write(Vec::new());
for triple in &triples {
writer.write_triple(triple).unwrap();
}
let new_serialization = writer.finish().unwrap();
// We parse the serialization
let new_triples = RdfXmlParser::new()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {triples:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_triples, triples);
});

@ -0,0 +1,61 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxigraph::io::RdfFormat;
use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter};
use oxigraph::store::Store;
use std::sync::OnceLock;
fuzz_target!(|data: sparql_smith::Query| {
static STORE: OnceLock<Store> = OnceLock::new();
let store = STORE.get_or_init(|| {
let store = Store::new().unwrap();
store
.load_from_read(RdfFormat::TriG, sparql_smith::DATA_TRIG.as_bytes())
.unwrap();
store
});
let query_str = data.to_string();
if let Ok(query) = Query::parse(&query_str, None) {
let options = QueryOptions::default();
let with_opt = store.query_opt(query.clone(), options.clone()).unwrap();
let without_opt = store
.query_opt(query, options.without_optimizations())
.unwrap();
match (with_opt, without_opt) {
(QueryResults::Solutions(with_opt), QueryResults::Solutions(without_opt)) => {
assert_eq!(
query_solutions_key(with_opt, query_str.contains(" REDUCED ")),
query_solutions_key(without_opt, query_str.contains(" REDUCED "))
)
}
(QueryResults::Graph(_), QueryResults::Graph(_)) => unimplemented!(),
(QueryResults::Boolean(with_opt), QueryResults::Boolean(without_opt)) => {
assert_eq!(with_opt, without_opt)
}
_ => panic!("Different query result types"),
}
}
});
fn query_solutions_key(iter: QuerySolutionIter, is_reduced: bool) -> String {
// TODO: ordering
let mut b = iter
.into_iter()
.map(|t| {
let mut b = t
.unwrap()
.iter()
.map(|(var, val)| format!("{var}: {val}"))
.collect::<Vec<_>>();
b.sort_unstable();
b.join(" ")
})
.collect::<Vec<_>>();
b.sort_unstable();
if is_reduced {
b.dedup();
}
b.join("\n")
}

@ -1,10 +1,7 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use spargebra::Query; use spargebra::Query;
use std::str;
fuzz_target!(|data: &[u8]| { fuzz_target!(|data: &str| {
if let Ok(data) = str::from_utf8(data) { let _ = Query::parse(data, None);
Query::parse(data, None);
}
}); });

@ -1,15 +1,6 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader}; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Json, data));
let parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for s in solutions {
if s.is_err() {
// TODO: avoid infinite loop of errors
break;
}
}
}
});

@ -1,10 +1,6 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader}; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Tsv, data));
let parser = QueryResultsParser::from_format(QueryResultsFormat::Tsv);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for _ in solutions {}
}
});

@ -1,10 +1,6 @@
#![no_main] #![no_main]
use libfuzzer_sys::fuzz_target; use libfuzzer_sys::fuzz_target;
use sparesults::{QueryResultsFormat, QueryResultsParser, QueryResultsReader}; use oxigraph_fuzz::result_format::fuzz_result_format;
use sparesults::QueryResultsFormat;
fuzz_target!(|data: &[u8]| { fuzz_target!(|data: &[u8]| fuzz_result_format(QueryResultsFormat::Xml, data));
let parser = QueryResultsParser::from_format(QueryResultsFormat::Xml);
if let Ok(QueryResultsReader::Solutions(solutions)) = parser.read_results(data) {
for _ in solutions {}
}
});

@ -3,8 +3,6 @@ use libfuzzer_sys::fuzz_target;
use spargebra::Update; use spargebra::Update;
use std::str; use std::str;
fuzz_target!(|data: &[u8]| { fuzz_target!(|data: &str| {
if let Ok(data) = str::from_utf8(data) { let _ = Update::parse(data, None);
Update::parse(data, None);
}
}); });

@ -0,0 +1,166 @@
#![no_main]
use libfuzzer_sys::fuzz_target;
use oxrdf::graph::CanonicalizationAlgorithm;
use oxrdf::{Dataset, GraphName, Quad, Subject, Term, Triple};
use oxttl::{TriGParser, TriGSerializer};
fn parse<'a>(
chunks: impl IntoIterator<Item = &'a [u8]>,
unchecked: bool,
) -> (Vec<Quad>, Vec<String>, Vec<(String, String)>) {
let mut quads = Vec::new();
let mut errors = Vec::new();
let mut parser = TriGParser::new()
.with_quoted_triples()
.with_base_iri("http://example.com/")
.unwrap();
if unchecked {
parser = parser.unchecked();
}
let mut reader = parser.parse();
for chunk in chunks {
reader.extend_from_slice(chunk);
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
}
reader.end();
while let Some(result) = reader.read_next() {
match result {
Ok(quad) => quads.push(quad),
Err(error) => errors.push(error.to_string()),
}
}
assert!(reader.is_end());
(
quads,
errors,
reader
.prefixes()
.map(|(k, v)| (k.to_owned(), v.to_owned()))
.collect(),
)
}
fn count_triple_blank_nodes(triple: &Triple) -> usize {
(match &triple.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &triple.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
})
}
fn count_quad_blank_nodes(quad: &Quad) -> usize {
(match &quad.subject {
Subject::BlankNode(_) => 1,
Subject::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + (match &quad.object {
Term::BlankNode(_) => 1,
Term::Triple(t) => count_triple_blank_nodes(t),
_ => 0,
}) + usize::from(matches!(quad.graph_name, GraphName::BlankNode(_)))
}
fn serialize_quads(quads: &[Quad], prefixes: Vec<(String, String)>) -> Vec<u8> {
let mut serializer = TriGSerializer::new();
for (prefix_name, prefix_iri) in prefixes {
serializer = serializer.with_prefix(prefix_name, prefix_iri).unwrap();
}
let mut writer = serializer.serialize_to_write(Vec::new());
for quad in quads {
writer.write_quad(quad).unwrap();
}
writer.finish().unwrap()
}
fuzz_target!(|data: &[u8]| {
// We parse with splitting
let (quads, errors, prefixes) = parse(data.split(|c| *c == 0xFF), false);
// We parse without splitting
let (quads_without_split, errors_without_split, _) = parse(
[data
.iter()
.copied()
.filter(|c| *c != 0xFF)
.collect::<Vec<_>>()
.as_slice()],
false,
);
let (quads_unchecked, errors_unchecked, _) = parse(data.split(|c| *c == 0xFF), true);
if errors.is_empty() {
assert!(errors_unchecked.is_empty());
}
let bnodes_count = quads.iter().map(count_quad_blank_nodes).sum::<usize>();
if bnodes_count == 0 {
assert_eq!(
quads,
quads_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
assert_eq!(
quads,
quads_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
} else if bnodes_count <= 4 {
let mut dataset_with_split = quads.iter().collect::<Dataset>();
let mut dataset_without_split = quads_without_split.iter().collect::<Dataset>();
dataset_with_split.canonicalize(CanonicalizationAlgorithm::Unstable);
dataset_without_split.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_without_split,
"With split:\n{}\nWithout split:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_without_split, Vec::new()))
);
if errors.is_empty() {
let mut dataset_unchecked = quads_unchecked.iter().collect::<Dataset>();
dataset_unchecked.canonicalize(CanonicalizationAlgorithm::Unstable);
assert_eq!(
dataset_with_split,
dataset_unchecked,
"Validating:\n{}\nUnchecked:\n{}",
String::from_utf8_lossy(&serialize_quads(&quads, Vec::new())),
String::from_utf8_lossy(&serialize_quads(&quads_unchecked, Vec::new()))
);
}
}
assert_eq!(errors, errors_without_split);
// We serialize
let new_serialization = serialize_quads(&quads, prefixes);
// We parse the serialization
let new_quads = TriGParser::new()
.with_quoted_triples()
.parse_read(new_serialization.as_slice())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| {
format!(
"Error on {:?} from {quads:?} based on {:?}: {e}",
String::from_utf8_lossy(&new_serialization),
String::from_utf8_lossy(data)
)
})
.unwrap();
// We check the roundtrip has not changed anything
assert_eq!(new_quads, quads);
});

@ -0,0 +1 @@
pub mod result_format;

@ -0,0 +1,63 @@
use anyhow::Context;
use sparesults::{
FromReadQueryResultsReader, QueryResultsFormat, QueryResultsParser, QueryResultsSerializer,
};
pub fn fuzz_result_format(format: QueryResultsFormat, data: &[u8]) {
let parser = QueryResultsParser::from_format(format);
let serializer = QueryResultsSerializer::from_format(format);
let Ok(reader) = parser.parse_read(data) else {
return;
};
match reader {
FromReadQueryResultsReader::Solutions(solutions) => {
let Ok(solutions) = solutions.collect::<Result<Vec<_>, _>>() else {
return;
};
// We try to write again
let mut writer = serializer
.serialize_solutions_to_write(
Vec::new(),
solutions
.first()
.map_or_else(Vec::new, |s| s.variables().to_vec()),
)
.unwrap();
for solution in &solutions {
writer.write(solution).unwrap();
}
let serialized = String::from_utf8(writer.finish().unwrap()).unwrap();
// And to parse again
if let FromReadQueryResultsReader::Solutions(roundtrip_solutions) = parser
.parse_read(serialized.as_bytes())
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap()
{
assert_eq!(
roundtrip_solutions
.collect::<Result<Vec<_>, _>>()
.with_context(|| format!("Parsing {serialized:?}"))
.unwrap(),
solutions
)
}
}
FromReadQueryResultsReader::Boolean(value) => {
// We try to write again
let mut serialized = Vec::new();
serializer
.serialize_boolean_to_write(&mut serialized, value)
.unwrap();
// And to parse again
if let FromReadQueryResultsReader::Boolean(roundtrip_value) =
parser.parse_read(serialized.as_slice()).unwrap()
{
assert_eq!(roundtrip_value, value)
}
}
}
}

@ -1,23 +1,26 @@
[package] [package]
name = "oxigraph_js" name = "oxigraph-js"
version = "0.4.0-alpha" version.workspace = true
authors = ["Tpt <thomas@pellissier-tanon.fr>"] authors.workspace = true
license = "MIT OR Apache-2.0" license.workspace = true
readme = "README.md" readme = "README.md"
keywords = ["RDF", "N-Triples", "Turtle", "RDF/XML", "SPARQL"] keywords = ["RDF", "N-Triples", "Turtle", "XML", "SPARQL"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/js" repository = "https://github.com/oxigraph/oxigraph/tree/main/js"
description = "JavaScript bindings of Oxigraph" description = "JavaScript bindings of Oxigraph"
edition = "2021" edition.workspace = true
rust-version.workspace = true
publish = false
[lib] [lib]
crate-type = ["cdylib"] crate-type = ["cdylib"]
name = "oxigraph" name = "oxigraph"
doc = false
[dependencies] [dependencies]
oxigraph = { version = "0.4.0-alpha", path="../lib" } console_error_panic_hook.workspace = true
wasm-bindgen = "0.2" js-sys.workspace = true
js-sys = "0.3" oxigraph = { workspace = true, features = ["js"] }
console_error_panic_hook = "0.1" wasm-bindgen.workspace = true
[dev-dependencies] [lints]
wasm-bindgen-test = "0.3" workspace = true

@ -3,7 +3,7 @@ Oxigraph for JavaScript
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly. This package provides a JavaScript API on top of [Oxigraph](https://crates.io/crates/oxigraph), compiled with WebAssembly.
@ -13,7 +13,7 @@ Oxigraph for JavaScript is a work in progress and currently offers a simple in-m
The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/). The store is also able to load RDF serialized in [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/) and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/).
It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 12+ and modern web browsers compatible with WebAssembly. It is distributed using a [a NPM package](https://www.npmjs.com/package/oxigraph) that should work with Node.JS 18+ and [modern web browsers compatible with WebAssembly reference types and JavaScript `WeakRef`](https://caniuse.com/wasm-reference-types,mdn-javascript_builtins_weakref).
To install: To install:
```bash ```bash
@ -188,6 +188,18 @@ if (store.query("ASK { ?s ?s ?s }")) {
} }
``` ```
It is also possible to provide some options in an object given as second argument:
```js
if (store.query("ASK { <s> ?p ?o }", {
base_iri: "http://example.com/", // base IRI to resolve relative IRIs in the query
use_default_graph_as_union: true, // the default graph in the query is the union of all the dataset graphs
})) {
console.log("there is a triple with same subject, predicate and object");
}
```
#### `Store.prototype.update(String query)` #### `Store.prototype.update(String query)`
Executes a [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/). Executes a [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/).
The [`LOAD` operation](https://www.w3.org/TR/sparql11-update/#load) is not supported yet. The [`LOAD` operation](https://www.w3.org/TR/sparql11-update/#load) is not supported yet.
@ -197,40 +209,50 @@ Example of update:
store.update("DELETE WHERE { <http://example.com/s> ?p ?o }") store.update("DELETE WHERE { <http://example.com/s> ?p ?o }")
``` ```
#### `Store.prototype.load(String data, String mimeType, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)` It is also possible to provide some options in an object given as second argument:
```js
store.update("DELETE WHERE { <s> ?p ?o }", {
base_iri: "http://example.com/" // base IRI to resolve relative IRIs in the update
})
```
#### `Store.prototype.load(String data, String format, NamedNode|String? baseIRI, NamedNode|BlankNode|DefaultGraph? toNamedGraph)`
Loads serialized RDF triples or quad into the store. Loads serialized RDF triples or quad into the store.
The method arguments are: The method arguments are:
1. `data`: the serialized RDF triples or quads. 1. `data`: the serialized RDF triples or quads.
2. `mimeType`: the MIME type of the serialization. See below for the supported mime types. 2. `format`: the format of the serialization. See below for the supported formats.
3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization. 3. `baseIRI`: the base IRI to use to resolve the relative IRIs in the serialization.
4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to. 4. `toNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded to.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` * [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`: Example of loading a Turtle file into the named graph `<http://example.com/graph>` with the base IRI `http://example.com`:
```js ```js
store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph")); store.load("<http://example.com> <http://example.com> <> .", "text/turtle", "http://example.com", oxigraph.namedNode("http://example.com/graph"));
``` ```
#### `Store.prototype.dump(String mimeType, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)` #### `Store.prototype.dump(String format, NamedNode|BlankNode|DefaultGraph? fromNamedGraph)`
Returns serialized RDF triples or quad from the store. Returns serialized RDF triples or quad from the store.
The method arguments are: The method arguments are:
1. `mimeType`: the MIME type of the serialization. See below for the supported mime types. 1. `format`: the format type of the serialization. See below for the supported types.
2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from. 2. `fromNamedGraph`: for triple serialization formats, the name of the named graph the triple should be loaded from.
The available formats are: The available formats are:
* [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` * [Turtle](https://www.w3.org/TR/turtle/): `text/turtle` or `ttl`
* [TriG](https://www.w3.org/TR/trig/): `application/trig` * [TriG](https://www.w3.org/TR/trig/): `application/trig` or `trig`
* [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` * [N-Triples](https://www.w3.org/TR/n-triples/): `application/n-triples` or `nt`
* [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` * [N-Quads](https://www.w3.org/TR/n-quads/): `application/n-quads` or `nq`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` * [N3](https://w3c.github.io/N3/spec/): `text/n3` or `n3`
* [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/): `application/rdf+xml` or `rdf`
Example of building a Turtle file from the named graph `<http://example.com/graph>`: Example of building a Turtle file from the named graph `<http://example.com/graph>`:
```js ```js
@ -242,7 +264,8 @@ store.dump("text/turtle", oxigraph.namedNode("http://example.com/graph"));
### From 0.2 to 0.3 ### From 0.2 to 0.3
* The `MemoryStore` class is now called `Store` (there is no other kind of stores...). * The `MemoryStore` class is now called `Store` (there is no other kind of stores...).
* RDF/JS datamodel functions (`namedNode`...) are now available at the root of the `oxigraph` package. You now need to call `oxigraph.namedNode` instead of `store.dataFactory.namedNode`. * RDF/JS datamodel functions (`namedNode`...) are now available at the root of the `oxigraph` package. You now need to call `oxigraph.namedNode` instead of `store.dataFactory.namedNode`.
* [RDF-star](https://w3c.github.io/rdf-star/cg-spec) is now implemented. `Quad` is now a valid value for the `Ωuad` `subject` and `object` properties. * [RDF-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html) is now implemented. `Quad` is now a valid value for the `Ωuad` `subject` and `object` properties.
## How to contribute ## How to contribute
@ -250,7 +273,17 @@ The Oxigraph bindings are written in Rust using [the Rust WASM toolkit](https://
The [The Rust Wasm Book](https://rustwasm.github.io/docs/book/) is a great tutorial to get started. The [The Rust Wasm Book](https://rustwasm.github.io/docs/book/) is a great tutorial to get started.
To run the tests of the JS bindings written in JS run `npm test`. To setup a dev environment:
- ensure to have a Rust toolchain with `rustup` and `cargo` installed ([possible instructions](https://www.rust-lang.org/tools/install)).
- install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/): `cargo install wasm-pack` (it is also in some Linux distribution repositories).
- `npm install` to install pure JS dependencies.
- you are good to go!
Testing and linting:
- Rust code is formatted with [rustfmt](https://github.com/rust-lang/rustfmt) and linted with [clippy](https://github.com/rust-lang/rust-clippy).
You can execute them with `cargo fmt` and `cargo clippy`.
- JS code is formatted and linted with [Biome](https://biomejs.dev/). `npm run fmt` to auto-format and `npm test` to lint and test.
- Tests are written in JavaScript using [Mocha](https://mochajs.org/) in the `test` directory. `npm test` to run them.
## License ## License

@ -0,0 +1,14 @@
{
"$schema": "https://biomejs.dev/schemas/1.0.0/schema.json",
"formatter": {
"indentStyle": "space",
"indentWidth": 4,
"lineWidth": 100
},
"linter": {
"ignore": ["pkg"]
},
"organizeImports": {
"enabled": true
}
}

@ -1,31 +1,19 @@
#! /usr/bin/env node #! /usr/bin/env node
const fs = require('fs') const fs = require("node:fs");
const pkg = JSON.parse(fs.readFileSync("./pkg/package.json"));
// We copy file to the new directory pkg.name = "oxigraph";
fs.mkdirSync('pkg') pkg.main = "node.js";
for (const file of fs.readdirSync('./pkg-web')) { pkg.browser = "web.js";
fs.copyFileSync('./pkg-web/' + file, './pkg/' + file) pkg.files = ["*.{js,wasm,d.ts}"];
} pkg.homepage = "https://github.com/oxigraph/oxigraph/tree/main/js";
for (const file of fs.readdirSync('./pkg-node')) {
fs.copyFileSync('./pkg-node/' + file, './pkg/' + file)
}
const pkg = JSON.parse(fs.readFileSync('./pkg/package.json'))
pkg.name = 'oxigraph'
pkg.main = 'node.js'
pkg.browser = 'web.js'
pkg.files = [
'*.{js,wasm,d.ts}'
]
pkg.homepage = 'https://github.com/oxigraph/oxigraph/tree/main/js'
pkg.bugs = { pkg.bugs = {
url: 'https://github.com/oxigraph/oxigraph/issues' url: "https://github.com/oxigraph/oxigraph/issues",
} };
pkg.collaborators = undefined pkg.collaborators = undefined;
pkg.repository = { pkg.repository = {
type: 'git', type: "git",
url: 'https://github.com/oxigraph/oxigraph.git', url: "https://github.com/oxigraph/oxigraph.git",
directory: 'js' directory: "js",
} };
fs.writeFileSync('./pkg/package.json', JSON.stringify(pkg, null, 2)) fs.writeFileSync("./pkg/package.json", JSON.stringify(pkg, null, 2));

1027
js/package-lock.json generated

File diff suppressed because it is too large Load Diff

@ -1,21 +1,20 @@
{ {
"name": "oxigraph_tests", "name": "oxigraph_tests",
"description": "Oxigraph JS build and tests", "description": "Oxigraph JS build and tests",
"private": true, "private": true,
"devDependencies": { "devDependencies": {
"mocha": "^10.0.0", "@biomejs/biome": "^1.0.0",
"@rdfjs/data-model": "^2.0.1", "@rdfjs/data-model": "^2.0.1",
"standard": "^17.0.0" "mocha": "^10.0.0"
}, },
"scripts": { "scripts": {
"test": "standard && wasm-pack build --debug --target nodejs && mocha", "fmt": "biome format . --write && biome check . --apply-unsafe && biome format . --write",
"build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", "test": "biome ci . && wasm-pack build --debug --target nodejs --weak-refs --reference-types && mocha",
"release": "npm run build && npm publish ./pkg", "build": "wasm-pack build --release --target web --out-name web --weak-refs --reference-types && wasm-pack build --release --target nodejs --out-name node --weak-refs --reference-types && node build_package.js",
"pack": "npm run build && npm pack ./pkg" "release": "npm run build && npm publish ./pkg",
}, "pack": "npm run build && npm pack ./pkg"
"standard": { },
"ignore": [ "standard": {
"pkg*" "ignore": ["pkg*"]
] }
}
} }

@ -1,4 +1,4 @@
#![allow(clippy::unused_unit)] #![allow(clippy::mem_forget)]
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
mod model; mod model;

@ -1,10 +1,9 @@
#![allow(dead_code, clippy::inherent_to_string)] #![allow(dead_code, clippy::inherent_to_string, clippy::unused_self)]
use crate::format_err; use crate::format_err;
use crate::utils::to_err; use crate::utils::to_err;
use js_sys::{Reflect, UriError}; use js_sys::{Reflect, UriError};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::sparql::Variable;
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
thread_local! { thread_local! {
@ -14,7 +13,7 @@ thread_local! {
#[wasm_bindgen(js_name = namedNode)] #[wasm_bindgen(js_name = namedNode)]
pub fn named_node(value: String) -> Result<JsNamedNode, JsValue> { pub fn named_node(value: String) -> Result<JsNamedNode, JsValue> {
NamedNode::new(value) NamedNode::new(value)
.map(|v| v.into()) .map(Into::into)
.map_err(|v| UriError::new(&v.to_string()).into()) .map_err(|v| UriError::new(&v.to_string()).into())
} }
@ -51,7 +50,7 @@ pub fn literal(
#[wasm_bindgen(js_name = defaultGraph)] #[wasm_bindgen(js_name = defaultGraph)]
pub fn default_graph() -> JsDefaultGraph { pub fn default_graph() -> JsDefaultGraph {
JsDefaultGraph {} JsDefaultGraph
} }
#[wasm_bindgen(js_name = variable)] #[wasm_bindgen(js_name = variable)]
@ -297,7 +296,7 @@ impl From<JsLiteral> for Term {
#[wasm_bindgen(js_name = DefaultGraph)] #[wasm_bindgen(js_name = DefaultGraph)]
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct JsDefaultGraph {} pub struct JsDefaultGraph;
#[wasm_bindgen(js_class = DefaultGraph)] #[wasm_bindgen(js_class = DefaultGraph)]
impl JsDefaultGraph { impl JsDefaultGraph {
@ -308,12 +307,12 @@ impl JsDefaultGraph {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
"".to_owned() String::new()
} }
#[wasm_bindgen(js_name = toString)] #[wasm_bindgen(js_name = toString)]
pub fn to_string(&self) -> String { pub fn to_string(&self) -> String {
"DEFAULT".to_string() "DEFAULT".to_owned()
} }
pub fn equals(&self, other: &JsValue) -> bool { pub fn equals(&self, other: &JsValue) -> bool {
@ -388,7 +387,7 @@ impl JsQuad {
#[wasm_bindgen(getter)] #[wasm_bindgen(getter)]
pub fn value(&self) -> String { pub fn value(&self) -> String {
"".to_owned() String::new()
} }
#[wasm_bindgen(getter = subject)] #[wasm_bindgen(getter = subject)]
@ -476,19 +475,19 @@ impl From<JsTerm> for JsValue {
impl From<NamedNode> for JsTerm { impl From<NamedNode> for JsTerm {
fn from(node: NamedNode) -> Self { fn from(node: NamedNode) -> Self {
JsTerm::NamedNode(node.into()) Self::NamedNode(node.into())
} }
} }
impl From<BlankNode> for JsTerm { impl From<BlankNode> for JsTerm {
fn from(node: BlankNode) -> Self { fn from(node: BlankNode) -> Self {
JsTerm::BlankNode(node.into()) Self::BlankNode(node.into())
} }
} }
impl From<Literal> for JsTerm { impl From<Literal> for JsTerm {
fn from(literal: Literal) -> Self { fn from(literal: Literal) -> Self {
JsTerm::Literal(literal.into()) Self::Literal(literal.into())
} }
} }
@ -527,20 +526,20 @@ impl From<GraphName> for JsTerm {
match name { match name {
GraphName::NamedNode(node) => node.into(), GraphName::NamedNode(node) => node.into(),
GraphName::BlankNode(node) => node.into(), GraphName::BlankNode(node) => node.into(),
GraphName::DefaultGraph => JsTerm::DefaultGraph(JsDefaultGraph {}), GraphName::DefaultGraph => Self::DefaultGraph(JsDefaultGraph),
} }
} }
} }
impl From<Variable> for JsTerm { impl From<Variable> for JsTerm {
fn from(variable: Variable) -> Self { fn from(variable: Variable) -> Self {
JsTerm::Variable(variable.into()) Self::Variable(variable.into())
} }
} }
impl From<Triple> for JsTerm { impl From<Triple> for JsTerm {
fn from(triple: Triple) -> Self { fn from(triple: Triple) -> Self {
JsTerm::Quad(triple.into()) Self::Quad(triple.into())
} }
} }
@ -552,14 +551,14 @@ impl From<Box<Triple>> for JsTerm {
impl From<Quad> for JsTerm { impl From<Quad> for JsTerm {
fn from(quad: Quad) -> Self { fn from(quad: Quad) -> Self {
JsTerm::Quad(quad.into()) Self::Quad(quad.into())
} }
} }
impl TryFrom<JsTerm> for NamedNode { impl TryFrom<JsTerm> for NamedNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Err(format_err!( JsTerm::BlankNode(node) => Err(format_err!(
@ -583,7 +582,7 @@ impl TryFrom<JsTerm> for NamedNode {
impl TryFrom<JsTerm> for NamedOrBlankNode { impl TryFrom<JsTerm> for NamedOrBlankNode {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -609,7 +608,7 @@ impl TryFrom<JsTerm> for NamedOrBlankNode {
impl TryFrom<JsTerm> for Subject { impl TryFrom<JsTerm> for Subject {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -632,7 +631,7 @@ impl TryFrom<JsTerm> for Subject {
impl TryFrom<JsTerm> for Term { impl TryFrom<JsTerm> for Term {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -652,7 +651,7 @@ impl TryFrom<JsTerm> for Term {
impl TryFrom<JsTerm> for GraphName { impl TryFrom<JsTerm> for GraphName {
type Error = JsValue; type Error = JsValue;
fn try_from(value: JsTerm) -> Result<Self, JsValue> { fn try_from(value: JsTerm) -> Result<Self, Self::Error> {
match value { match value {
JsTerm::NamedNode(node) => Ok(node.into()), JsTerm::NamedNode(node) => Ok(node.into()),
JsTerm::BlankNode(node) => Ok(node.into()), JsTerm::BlankNode(node) => Ok(node.into()),
@ -660,7 +659,7 @@ impl TryFrom<JsTerm> for GraphName {
"The literal {} is not a possible graph name", "The literal {} is not a possible graph name",
literal.inner literal.inner
)), )),
JsTerm::DefaultGraph(_) => Ok(GraphName::DefaultGraph), JsTerm::DefaultGraph(_) => Ok(Self::DefaultGraph),
JsTerm::Variable(variable) => Err(format_err!( JsTerm::Variable(variable) => Err(format_err!(
"The variable {} is not a possible RDF term", "The variable {} is not a possible RDF term",
variable.inner variable.inner
@ -712,7 +711,7 @@ impl FromJsConverter {
.map_err(|v| UriError::new(&v.to_string()))? .map_err(|v| UriError::new(&v.to_string()))?
.into()), .into()),
"BlankNode" => Ok(BlankNode::new( "BlankNode" => Ok(BlankNode::new(
&Reflect::get(value, &self.value)? Reflect::get(value, &self.value)?
.as_string() .as_string()
.ok_or_else(|| format_err!("BlankNode should have a string value"))?, .ok_or_else(|| format_err!("BlankNode should have a string value"))?,
) )
@ -739,9 +738,9 @@ impl FromJsConverter {
)) ))
} }
} }
"DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph {})), "DefaultGraph" => Ok(JsTerm::DefaultGraph(JsDefaultGraph)),
"Variable" => Ok(Variable::new( "Variable" => Ok(Variable::new(
&Reflect::get(value, &self.value)? Reflect::get(value, &self.value)?
.as_string() .as_string()
.ok_or_else(|| format_err!("Variable should have a string value"))?, .ok_or_else(|| format_err!("Variable should have a string value"))?,
) )
@ -749,8 +748,7 @@ impl FromJsConverter {
.into()), .into()),
"Quad" => Ok(self.to_quad(value)?.into()), "Quad" => Ok(self.to_quad(value)?.into()),
_ => Err(format_err!( _ => Err(format_err!(
"The termType {} is not supported by Oxigraph", "The termType {term_type} is not supported by Oxigraph"
term_type
)), )),
} }
} else if term_type.is_undefined() { } else if term_type.is_undefined() {

@ -1,12 +1,11 @@
use crate::format_err; use crate::format_err;
use crate::model::*; use crate::model::*;
use crate::utils::to_err; use crate::utils::to_err;
use js_sys::{Array, Map}; use js_sys::{Array, Map, Reflect};
use oxigraph::io::{DatasetFormat, GraphFormat}; use oxigraph::io::{RdfFormat, RdfParser};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::sparql::QueryResults; use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store; use oxigraph::store::Store;
use std::io::Cursor;
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
#[wasm_bindgen(js_name = Store)] #[wasm_bindgen(js_name = Store)]
@ -17,6 +16,7 @@ pub struct JsStore {
#[wasm_bindgen(js_class = Store)] #[wasm_bindgen(js_class = Store)]
impl JsStore { impl JsStore {
#[wasm_bindgen(constructor)] #[wasm_bindgen(constructor)]
#[allow(clippy::use_self)]
pub fn new(quads: Option<Box<[JsValue]>>) -> Result<JsStore, JsValue> { pub fn new(quads: Option<Box<[JsValue]>>) -> Result<JsStore, JsValue> {
console_error_panic_hook::set_once(); console_error_panic_hook::set_once();
@ -24,7 +24,7 @@ impl JsStore {
store: Store::new().map_err(to_err)?, store: Store::new().map_err(to_err)?,
}; };
if let Some(quads) = quads { if let Some(quads) = quads {
for quad in quads.iter() { for quad in &*quads {
store.add(quad)?; store.add(quad)?;
} }
} }
@ -73,28 +73,28 @@ impl JsStore {
None None
} }
.as_ref() .as_ref()
.map(|t: &NamedOrBlankNode| t.into()), .map(<&Subject>::into),
if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? { if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? {
Some(NamedNode::try_from(predicate)?) Some(NamedNode::try_from(predicate)?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(|t: &NamedNode| t.into()), .map(<&NamedNode>::into),
if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? { if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? {
Some(object.try_into()?) Some(object.try_into()?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(|t: &Term| t.into()), .map(<&Term>::into),
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? { if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? {
Some(graph_name.try_into()?) Some(graph_name.try_into()?)
} else { } else {
None None
} }
.as_ref() .as_ref()
.map(|t: &GraphName| t.into()), .map(<&GraphName>::into),
) )
.map(|v| v.map(|v| JsQuad::from(v).into())) .map(|v| v.map(|v| JsQuad::from(v).into()))
.collect::<Result<Vec<_>, _>>() .collect::<Result<Vec<_>, _>>()
@ -102,7 +102,21 @@ impl JsStore {
.into_boxed_slice()) .into_boxed_slice())
} }
pub fn query(&self, query: &str) -> Result<JsValue, JsValue> { pub fn query(&self, query: &str, options: &JsValue) -> Result<JsValue, JsValue> {
// Parsing options
let mut base_iri = None;
let mut use_default_graph_as_union = false;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
use_default_graph_as_union =
Reflect::get(options, &JsValue::from_str("use_default_graph_as_union"))?
.is_truthy();
}
let mut query = Query::parse(query, base_iri.as_deref()).map_err(to_err)?;
if use_default_graph_as_union {
query.dataset_mut().set_default_graph_as_union();
}
let results = self.store.query(query).map_err(to_err)?; let results = self.store.query(query).map_err(to_err)?;
let output = match results { let output = match results {
QueryResults::Solutions(solutions) => { QueryResults::Solutions(solutions) => {
@ -135,17 +149,25 @@ impl JsStore {
Ok(output) Ok(output)
} }
pub fn update(&self, update: &str) -> Result<(), JsValue> { pub fn update(&self, update: &str, options: &JsValue) -> Result<(), JsValue> {
// Parsing options
let mut base_iri = None;
if !options.is_undefined() {
base_iri = Reflect::get(options, &JsValue::from_str("base_iri"))?.as_string();
}
let update = Update::parse(update, base_iri.as_deref()).map_err(to_err)?;
self.store.update(update).map_err(to_err) self.store.update(update).map_err(to_err)
} }
pub fn load( pub fn load(
&self, &self,
data: &str, data: &str,
mime_type: &str, format: &str,
base_iri: &JsValue, base_iri: &JsValue,
to_graph_name: &JsValue, to_graph_name: &JsValue,
) -> Result<(), JsValue> { ) -> Result<(), JsValue> {
let format = rdf_format(format)?;
let base_iri = if base_iri.is_null() || base_iri.is_undefined() { let base_iri = if base_iri.is_null() || base_iri.is_undefined() {
None None
} else if base_iri.is_string() { } else if base_iri.is_string() {
@ -158,65 +180,41 @@ impl JsStore {
)); ));
}; };
let to_graph_name = let mut parser = RdfParser::from_format(format);
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? { if let Some(to_graph_name) = FROM_JS.with(|c| c.to_optional_term(to_graph_name))? {
Some(graph_name.try_into()?) parser = parser.with_default_graph(GraphName::try_from(to_graph_name)?);
} else { }
None if let Some(base_iri) = base_iri {
}; parser = parser.with_base_iri(base_iri).map_err(to_err)?;
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.load_graph(
Cursor::new(data),
graph_format,
&to_graph_name.unwrap_or(GraphName::DefaultGraph),
base_iri.as_deref(),
)
.map_err(to_err)
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) {
if to_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
}
self.store
.load_dataset(Cursor::new(data), dataset_format, base_iri.as_deref())
.map_err(to_err)
} else {
Err(format_err!("Not supported MIME type: {}", mime_type))
} }
self.store
.load_from_read(parser, data.as_bytes())
.map_err(to_err)
} }
pub fn dump(&self, mime_type: &str, from_graph_name: &JsValue) -> Result<String, JsValue> { pub fn dump(&self, format: &str, from_graph_name: &JsValue) -> Result<String, JsValue> {
let from_graph_name = let format = rdf_format(format)?;
if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? { let buffer =
Some(graph_name.try_into()?) if let Some(from_graph_name) = FROM_JS.with(|c| c.to_optional_term(from_graph_name))? {
} else { self.store.dump_graph_to_write(
None &GraphName::try_from(from_graph_name)?,
}; format,
Vec::new(),
let mut buffer = Vec::new();
if let Some(graph_format) = GraphFormat::from_media_type(mime_type) {
self.store
.dump_graph(
&mut buffer,
graph_format,
&from_graph_name.unwrap_or(GraphName::DefaultGraph),
) )
.map_err(to_err)?; } else {
} else if let Some(dataset_format) = DatasetFormat::from_media_type(mime_type) { self.store.dump_to_write(format, Vec::new())
if from_graph_name.is_some() {
return Err(format_err!(
"The target graph name parameter is not available for dataset formats"
));
} }
self.store .map_err(to_err)?;
.dump_dataset(&mut buffer, dataset_format)
.map_err(to_err)?;
} else {
return Err(format_err!("Not supported MIME type: {}", mime_type));
}
String::from_utf8(buffer).map_err(to_err) String::from_utf8(buffer).map_err(to_err)
} }
} }
fn rdf_format(format: &str) -> Result<RdfFormat, JsValue> {
if format.contains('/') {
RdfFormat::from_media_type(format)
.ok_or_else(|| format_err!("Not supported RDF format media type: {format}"))
} else {
RdfFormat::from_extension(format)
.ok_or_else(|| format_err!("Not supported RDF format extension: {format}"))
}
}

@ -11,6 +11,7 @@ macro_rules! format_err {
}; };
} }
#[allow(clippy::needless_pass_by_value)]
pub fn to_err(e: impl ToString) -> JsValue { pub fn to_err(e: impl ToString) -> JsValue {
JsValue::from(Error::new(&e.to_string())) JsValue::from(Error::new(&e.to_string()))
} }

@ -1,38 +1,52 @@
/* global describe, it */ /* global describe, it */
import oxigraph from '../pkg/oxigraph.js' import assert from "node:assert";
import assert from 'assert' import runTests from "../node_modules/@rdfjs/data-model/test/index.js";
import runTests from '../node_modules/@rdfjs/data-model/test/index.js' import oxigraph from "../pkg/oxigraph.js";
runTests({ factory: oxigraph }) runTests({ factory: oxigraph });
describe('DataModel', function () { describe("DataModel", () => {
describe('#toString()', function () { describe("#toString()", () => {
it('namedNode().toString() should return SPARQL compatible syntax', function () { it("namedNode().toString() should return SPARQL compatible syntax", () => {
assert.strictEqual('<http://example.com>', oxigraph.namedNode('http://example.com').toString()) assert.strictEqual(
}) "<http://example.com>",
oxigraph.namedNode("http://example.com").toString(),
it('blankNode().toString() should return SPARQL compatible syntax', function () { );
assert.strictEqual('_:a', oxigraph.blankNode('a').toString()) });
})
it("blankNode().toString() should return SPARQL compatible syntax", () => {
it('literal().toString() should return SPARQL compatible syntax', function () { assert.strictEqual("_:a", oxigraph.blankNode("a").toString());
assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', 'en').toString()) });
})
it("literal().toString() should return SPARQL compatible syntax", () => {
it('defaultGraph().toString() should return SPARQL compatible syntax', function () { assert.strictEqual('"a\\"b"@en', oxigraph.literal('a"b', "en").toString());
assert.strictEqual('DEFAULT', oxigraph.defaultGraph().toString()) });
})
it("defaultGraph().toString() should return SPARQL compatible syntax", () => {
it('variable().toString() should return SPARQL compatible syntax', function () { assert.strictEqual("DEFAULT", oxigraph.defaultGraph().toString());
assert.strictEqual('?a', oxigraph.variable('a').toString()) });
})
it("variable().toString() should return SPARQL compatible syntax", () => {
it('quad().toString() should return SPARQL compatible syntax', function () { assert.strictEqual("?a", oxigraph.variable("a").toString());
assert.strictEqual( });
'<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>',
oxigraph.quad(oxigraph.namedNode('http://example.com/s'), oxigraph.namedNode('http://example.com/p'), oxigraph.quad(oxigraph.namedNode('http://example.com/s1'), oxigraph.namedNode('http://example.com/p1'), oxigraph.namedNode('http://example.com/o1')), oxigraph.namedNode('http://example.com/g')).toString() it("quad().toString() should return SPARQL compatible syntax", () => {
) assert.strictEqual(
}) "<http://example.com/s> <http://example.com/p> <<<http://example.com/s1> <http://example.com/p1> <http://example.com/o1>>> <http://example.com/g>",
}) oxigraph
}) .quad(
oxigraph.namedNode("http://example.com/s"),
oxigraph.namedNode("http://example.com/p"),
oxigraph.quad(
oxigraph.namedNode("http://example.com/s1"),
oxigraph.namedNode("http://example.com/p1"),
oxigraph.namedNode("http://example.com/o1"),
),
oxigraph.namedNode("http://example.com/g"),
)
.toString(),
);
});
});
});

@ -1,161 +1,208 @@
/* global describe, it */ /* global describe, it */
import { Store } from '../pkg/oxigraph.js' import assert from "node:assert";
import assert from 'assert' import dataModel from "@rdfjs/data-model";
import dataModel from '@rdfjs/data-model' import { Store } from "../pkg/oxigraph.js";
const ex = dataModel.namedNode('http://example.com') const ex = dataModel.namedNode("http://example.com");
const triple = dataModel.quad( const triple = dataModel.quad(
dataModel.blankNode('s'), dataModel.blankNode("s"),
dataModel.namedNode('http://example.com/p'), dataModel.namedNode("http://example.com/p"),
dataModel.literal('o') dataModel.literal("o"),
) );
describe('Store', function () { describe("Store", () => {
describe('#add()', function () { describe("#add()", () => {
it('an added quad should be in the store', function () { it("an added quad should be in the store", () => {
const store = new Store() const store = new Store();
store.add(dataModel.quad(ex, ex, triple)) store.add(dataModel.quad(ex, ex, triple));
assert(store.has(dataModel.quad(ex, ex, triple))) assert(store.has(dataModel.quad(ex, ex, triple)));
}) });
}) });
describe('#delete()', function () { describe("#delete()", () => {
it('an removed quad should not be in the store anymore', function () { it("an removed quad should not be in the store anymore", () => {
const store = new Store([dataModel.quad(triple, ex, ex)]) const store = new Store([dataModel.quad(triple, ex, ex)]);
assert(store.has(dataModel.quad(triple, ex, ex))) assert(store.has(dataModel.quad(triple, ex, ex)));
store.delete(dataModel.quad(triple, ex, ex)) store.delete(dataModel.quad(triple, ex, ex));
assert(!store.has(dataModel.quad(triple, ex, ex))) assert(!store.has(dataModel.quad(triple, ex, ex)));
}) });
}) });
describe('#has()', function () { describe("#has()", () => {
it('an added quad should be in the store', function () { it("an added quad should be in the store", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
assert(store.has(dataModel.quad(ex, ex, ex))) assert(store.has(dataModel.quad(ex, ex, ex)));
}) });
}) });
describe('#size()', function () { describe("#size()", () => {
it('A store with one quad should have 1 for size', function () { it("A store with one quad should have 1 for size", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(1, store.size) assert.strictEqual(1, store.size);
}) });
}) });
describe('#match_quads()', function () { describe("#match_quads()", () => {
it('blank pattern should return all quads', function () { it("blank pattern should return all quads", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.match() const results = store.match();
assert.strictEqual(1, results.length) assert.strictEqual(1, results.length);
assert(dataModel.quad(ex, ex, ex).equals(results[0])) assert(dataModel.quad(ex, ex, ex).equals(results[0]));
}) });
}) });
describe('#query()', function () { describe("#query()", () => {
it('ASK true', function () { it("ASK true", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
assert.strictEqual(true, store.query('ASK { ?s ?s ?s }')) assert.strictEqual(true, store.query("ASK { ?s ?s ?s }"));
}) });
it('ASK false', function () { it("ASK false", () => {
const store = new Store() const store = new Store();
assert.strictEqual(false, store.query('ASK { FILTER(false)}')) assert.strictEqual(false, store.query("ASK { FILTER(false)}"));
}) });
it('CONSTRUCT', function () { it("CONSTRUCT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query('CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }') const results = store.query("CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length) assert.strictEqual(1, results.length);
assert(dataModel.quad(ex, ex, ex).equals(results[0])) assert(dataModel.quad(ex, ex, ex).equals(results[0]));
}) });
it('SELECT', function () { it("SELECT", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query('SELECT ?s WHERE { ?s ?p ?o }') const results = store.query("SELECT ?s WHERE { ?s ?p ?o }");
assert.strictEqual(1, results.length) assert.strictEqual(1, results.length);
assert(ex.equals(results[0].get('s'))) assert(ex.equals(results[0].get("s")));
}) });
it('SELECT with NOW()', function () { it("SELECT with NOW()", () => {
const store = new Store([dataModel.quad(ex, ex, ex)]) const store = new Store([dataModel.quad(ex, ex, ex)]);
const results = store.query('SELECT (YEAR(NOW()) AS ?y) WHERE {}') const results = store.query(
assert.strictEqual(1, results.length) "SELECT * WHERE { FILTER(2022 <= YEAR(NOW()) && YEAR(NOW()) <= 2100) }",
}) );
assert.strictEqual(1, results.length);
it('SELECT with RAND()', function () { });
const store = new Store([dataModel.quad(ex, ex, ex)])
const results = store.query('SELECT (RAND() AS ?y) WHERE {}') it("SELECT with RAND()", () => {
assert.strictEqual(1, results.length) const store = new Store([dataModel.quad(ex, ex, ex)]);
}) const results = store.query("SELECT (RAND() AS ?y) WHERE {}");
}) assert.strictEqual(1, results.length);
});
describe('#update()', function () {
it('INSERT DATA', function () { it("SELECT with base IRI", () => {
const store = new Store() const store = new Store();
store.update('INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }') const results = store.query("SELECT * WHERE { BIND(<t> AS ?t) }", {
assert.strictEqual(1, store.size) base_iri: "http://example.com/",
}) });
assert.strictEqual(1, results.length);
it('DELETE DATA', function () { });
const store = new Store([dataModel.quad(ex, ex, ex)])
store.update('DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }') it("SELECT with union graph", () => {
assert.strictEqual(0, store.size) const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
}) const results = store.query("SELECT * WHERE { ?s ?p ?o }", {
use_default_graph_as_union: true,
it('DELETE WHERE', function () { });
const store = new Store([dataModel.quad(ex, ex, ex)]) assert.strictEqual(1, results.length);
store.update('DELETE WHERE { ?v ?v ?v }') });
assert.strictEqual(0, store.size) });
})
}) describe("#update()", () => {
it("INSERT DATA", () => {
describe('#load()', function () { const store = new Store();
it('load NTriples in the default graph', function () { store.update(
const store = new Store() "INSERT DATA { <http://example.com> <http://example.com> <http://example.com> }",
store.load('<http://example.com> <http://example.com> <http://example.com> .', 'application/n-triples') );
assert(store.has(dataModel.quad(ex, ex, ex))) assert.strictEqual(1, store.size);
}) });
it('load NTriples in an other graph', function () { it("DELETE DATA", () => {
const store = new Store() const store = new Store([dataModel.quad(ex, ex, ex)]);
store.load('<http://example.com> <http://example.com> <http://example.com> .', 'application/n-triples', null, ex) store.update(
assert(store.has(dataModel.quad(ex, ex, ex, ex))) "DELETE DATA { <http://example.com> <http://example.com> <http://example.com> }",
}) );
assert.strictEqual(0, store.size);
it('load Turtle with a base IRI', function () { });
const store = new Store()
store.load('<http://example.com> <http://example.com> <> .', 'text/turtle', 'http://example.com') it("DELETE WHERE", () => {
assert(store.has(dataModel.quad(ex, ex, ex))) const store = new Store([dataModel.quad(ex, ex, ex)]);
}) store.update("DELETE WHERE { ?v ?v ?v }");
assert.strictEqual(0, store.size);
it('load NQuads', function () { });
const store = new Store() });
store.load('<http://example.com> <http://example.com> <http://example.com> <http://example.com> .', 'application/n-quads')
assert(store.has(dataModel.quad(ex, ex, ex, ex))) describe("#load()", () => {
}) it("load NTriples in the default graph", () => {
const store = new Store();
it('load TriG with a base IRI', function () { store.load(
const store = new Store() "<http://example.com> <http://example.com> <http://example.com> .",
store.load('GRAPH <> { <http://example.com> <http://example.com> <> }', 'application/trig', 'http://example.com') "application/n-triples",
assert(store.has(dataModel.quad(ex, ex, ex, ex))) );
}) assert(store.has(dataModel.quad(ex, ex, ex)));
}) });
describe('#dump()', function () { it("load NTriples in an other graph", () => {
it('dump dataset content', function () { const store = new Store();
const store = new Store([dataModel.quad(ex, ex, ex, ex)]) store.load(
assert.strictEqual('<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n', store.dump('application/n-quads')) "<http://example.com> <http://example.com> <http://example.com> .",
}) "application/n-triples",
null,
it('dump named graph content', function () { ex,
const store = new Store([dataModel.quad(ex, ex, ex, ex)]) );
assert.strictEqual('<http://example.com> <http://example.com> <http://example.com> .\n', store.dump('application/n-triples', ex)) assert(store.has(dataModel.quad(ex, ex, ex, ex)));
}) });
it('dump default graph content', function () { it("load Turtle with a base IRI", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]) const store = new Store();
assert.strictEqual('', store.dump('application/n-triples')) store.load(
}) "<http://example.com> <http://example.com> <> .",
}) "text/turtle",
}) "http://example.com",
);
assert(store.has(dataModel.quad(ex, ex, ex)));
});
it("load NQuads", () => {
const store = new Store();
store.load(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .",
"application/n-quads",
);
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
it("load TriG with a base IRI", () => {
const store = new Store();
store.load(
"GRAPH <> { <http://example.com> <http://example.com> <> }",
"application/trig",
"http://example.com",
);
assert(store.has(dataModel.quad(ex, ex, ex, ex)));
});
});
describe("#dump()", () => {
it("dump dataset content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .\n",
store.dump("application/n-quads"),
);
});
it("dump named graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual(
"<http://example.com> <http://example.com> <http://example.com> .\n",
store.dump("application/n-triples", ex),
);
});
it("dump default graph content", () => {
const store = new Store([dataModel.quad(ex, ex, ex, ex)]);
assert.strictEqual("", store.dump("application/n-triples", dataModel.defaultGraph()));
});
});
});

@ -1,63 +0,0 @@
[package]
name = "oxigraph"
version = "0.4.0-alpha"
authors = ["Tpt <thomas@pellissier-tanon.fr>"]
license = "MIT OR Apache-2.0"
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib"
homepage = "https://oxigraph.org/"
description = """
a SPARQL database and RDF toolkit
"""
edition = "2021"
[package.metadata.docs.rs]
all-features = true
[features]
default = []
http_client = ["oxhttp", "oxhttp/rustls"]
[dependencies]
rand = "0.8"
md-5 = "0.10"
sha-1 = "0.10"
sha2 = "0.10"
digest = "0.10"
regex = "1"
oxilangtag = "0.1"
oxiri = "0.2"
rio_api = "0.7"
rio_turtle = "0.7"
rio_xml = "0.7"
hex = "0.4"
nom = "7"
siphasher = "0.3"
lazy_static = "1"
sysinfo = "0.26"
oxrdf = { version = "0.1.0", path="oxrdf", features = ["rdf-star"] }
spargebra = { version = "0.3.0-alpha", path="spargebra", features = ["rdf-star", "ex-lateral"] }
sparesults = { version = "0.1.1", path="sparesults", features = ["rdf-star"] }
[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
libc = "0.2"
oxrocksdb-sys = { version = "0.3.7", path="../oxrocksdb-sys" }
oxhttp = { version = "0.1", optional = true }
[target.'cfg(target_arch = "wasm32")'.dependencies]
js-sys = "0.3"
getrandom = {version="0.2", features=["js"]}
[dev-dependencies]
criterion = "0.4"
oxhttp = "0.1"
zstd = "0.11"
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen-test = "0.3"
[[bench]]
name = "store"
harness = false

@ -1,72 +1,13 @@
Oxigraph Oxigraph Rust crates
======== ====================
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph) Oxigraph is implemented in Rust.
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph) It is composed on a main library, [`oxigraph`](./oxigraph) and a set of smaller crates used by the `oxigraph` crate:
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph) * [`oxrdf`](./oxrdf), datastructures encoding RDF basic concepts (the `model` module of the `oxigraph` crate).
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) * [`oxrdfio`](./oxrdfio), a unified parser and serializer API for RDF formats (the `io` module of the `oxigraph` crate). It itself relies on:
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) * [`oxttl`](./oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](./oxrdfxml), RDF/XML parsing and serialization.
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. * [`spargebra`](./spargebra), a SPARQL parser.
* [`sparesults`](./sparesults), parsers and serializers for SPARQL result formats (the `sparql::results` module of the `oxigraph` crate).
Its goal is to provide a compliant, safe and fast on-disk graph database. * [`sparopt`](./sparesults), a SPARQL optimizer.
It also provides a set of utility functions for reading, writing, and processing RDF files. * [`oxsdatatypes`](./oxsdatatypes), an implementation of some XML Schema datatypes.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a standalone HTTP server](https://crates.io/crates/oxigraph_server) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval using the [Rio library](https://github.com/oxigraph/rio).
* [SPARQL Query Results XML Format](http://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
Some parts of this library are available as standalone crates:
* [`oxrdf`](https://crates.io/crates/oxrdf) provides datastructures encoding RDF basic concepts (the `oxigraph::model` module).
* [`spargebra`](https://crates.io/crates/spargebra) provides a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults) provides parsers and serializers for SPARQL result formats.
To build the library, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,208 +0,0 @@
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use oxhttp::model::{Method, Request, Status};
use oxigraph::io::GraphFormat;
use oxigraph::model::GraphNameRef;
use oxigraph::sparql::{Query, QueryResults, Update};
use oxigraph::store::Store;
use rand::random;
use std::env::temp_dir;
use std::fs::{remove_dir_all, File};
use std::io::{BufRead, BufReader, Cursor, Read};
use std::path::{Path, PathBuf};
fn store_load(c: &mut Criterion) {
{
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 1000 in memory", |b| {
b.iter(|| {
let store = Store::new().unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_load(&store, &data);
})
});
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_bulk_load(&store, &data);
})
});
}
{
let mut data = Vec::new();
read_data("explore-10000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let mut group = c.benchmark_group("store load large");
group.throughput(Throughput::Bytes(data.len() as u64));
group.sample_size(10);
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| {
let path = TempDir::default();
let store = Store::open(&path.0).unwrap();
do_bulk_load(&store, &data);
})
});
}
}
fn do_load(store: &Store, data: &[u8]) {
store
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn do_bulk_load(store: &Store, data: &[u8]) {
store
.bulk_loader()
.load_graph(
Cursor::new(&data),
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
None,
)
.unwrap();
store.optimize().unwrap();
}
fn store_query_and_update(c: &mut Criterion) {
let mut data = Vec::new();
read_data("explore-1000.nt.zst")
.read_to_end(&mut data)
.unwrap();
let operations = read_data("mix-exploreAndUpdate-1000.tsv.zst")
.lines()
.map(|l| {
let l = l.unwrap();
let mut parts = l.trim().split('\t');
let kind = parts.next().unwrap();
let operation = parts.next().unwrap();
match kind {
"query" => Operation::Query(Query::parse(operation, None).unwrap()),
"update" => Operation::Update(Update::parse(operation, None).unwrap()),
_ => panic!("Unexpected operation kind {}", kind),
}
})
.collect::<Vec<_>>();
let query_operations = operations
.iter()
.filter(|o| matches!(o, Operation::Query(_)))
.cloned()
.collect::<Vec<_>>();
let mut group = c.benchmark_group("store operations");
group.throughput(Throughput::Elements(operations.len() as u64));
group.sample_size(10);
{
let memory_store = Store::new().unwrap();
do_bulk_load(&memory_store, &data);
group.bench_function("BSBM explore 1000 query in memory", |b| {
b.iter(|| run_operation(&memory_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate in memory", |b| {
b.iter(|| run_operation(&memory_store, &operations))
});
}
{
let path = TempDir::default();
let disk_store = Store::open(&path.0).unwrap();
do_bulk_load(&disk_store, &data);
group.bench_function("BSBM explore 1000 query on disk", |b| {
b.iter(|| run_operation(&disk_store, &query_operations))
});
group.bench_function("BSBM explore 1000 queryAndUpdate on disk", |b| {
b.iter(|| run_operation(&disk_store, &operations))
});
}
}
fn run_operation(store: &Store, operations: &[Operation]) {
for operation in operations {
match operation {
Operation::Query(q) => match store.query(q.clone()).unwrap() {
QueryResults::Boolean(_) => (),
QueryResults::Solutions(s) => {
for s in s {
s.unwrap();
}
}
QueryResults::Graph(g) => {
for t in g {
t.unwrap();
}
}
},
Operation::Update(u) => store.update(u.clone()).unwrap(),
}
}
}
criterion_group!(store, store_query_and_update, store_load);
criterion_main!(store);
fn read_data(file: &str) -> impl BufRead {
if !Path::new(file).exists() {
let mut client = oxhttp::Client::new();
client.set_redirection_limit(5);
let url = format!(
"https://github.com/Tpt/bsbm-tools/releases/download/v0.2/{}",
file
);
let request = Request::builder(Method::GET, url.parse().unwrap()).build();
let response = client.request(request).unwrap();
assert_eq!(
response.status(),
Status::OK,
"{}",
response.into_body().to_string().unwrap()
);
std::io::copy(&mut response.into_body(), &mut File::create(file).unwrap()).unwrap();
}
BufReader::new(zstd::Decoder::new(File::open(file).unwrap()).unwrap())
}
#[allow(clippy::large_enum_variant)]
#[derive(Clone)]
enum Operation {
Query(Query),
Update(Update),
}
struct TempDir(PathBuf);
impl Default for TempDir {
fn default() -> Self {
Self(temp_dir().join(format!("oxigraph-bench-{}", random::<u128>())))
}
}
impl Drop for TempDir {
fn drop(&mut self) {
remove_dir_all(&self.0).unwrap()
}
}

@ -0,0 +1,59 @@
[package]
name = "oxigraph"
version.workspace = true
authors.workspace = true
license.workspace = true
readme = "README.md"
keywords = ["RDF", "SPARQL", "graph-database", "database"]
categories = ["database-implementations"]
repository = "https://github.com/oxigraph/oxigraph/tree/main/lib/oxigraph"
homepage = "https://oxigraph.org/"
documentation = "https://docs.rs/oxigraph"
description = """
a SPARQL database and RDF toolkit
"""
edition.workspace = true
rust-version.workspace = true
[features]
js = ["getrandom/js", "oxsdatatypes/js", "js-sys"]
[dependencies]
digest.workspace = true
hex.workspace = true
json-event-parser.workspace = true
md-5.workspace = true
oxilangtag.workspace = true
oxiri.workspace = true
oxrdf = { workspace = true, features = ["rdf-star", "oxsdatatypes"] }
oxrdfio = { workspace = true, features = ["rdf-star"] }
oxsdatatypes.workspace = true
rand.workspace = true
regex.workspace = true
sha1.workspace = true
sha2.workspace = true
siphasher.workspace = true
sparesults = { workspace = true, features = ["rdf-star"] }
spargebra = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
sparopt = { workspace = true, features = ["rdf-star", "sep-0002", "sep-0006"] }
thiserror.workspace = true
[target.'cfg(not(target_family = "wasm"))'.dependencies]
libc = "0.2"
rocksdb.workspace = true
[target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies]
getrandom.workspace = true
js-sys = { workspace = true, optional = true }
[target.'cfg(not(target_family = "wasm"))'.dev-dependencies]
codspeed-criterion-compat.workspace = true
zstd.workspace = true
[lints]
workspace = true
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "docsrs"]

@ -0,0 +1,82 @@
Oxigraph
========
[![Latest Version](https://img.shields.io/crates/v/oxigraph.svg)](https://crates.io/crates/oxigraph)
[![Released API docs](https://docs.rs/oxigraph/badge.svg)](https://docs.rs/oxigraph)
[![Crates.io downloads](https://img.shields.io/crates/d/oxigraph)](https://crates.io/crates/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community)
Oxigraph is a graph database library implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.
Its goal is to provide a compliant, safe and fast on-disk graph database.
It also provides a set of utility functions for reading, writing, and processing RDF files.
Oxigraph is in heavy development and SPARQL query evaluation has not been optimized yet.
Oxigraph also provides [a CLI tool](https://crates.io/crates/oxigraph-cli) and [a Python library](https://pyoxigraph.readthedocs.io/) based on this library.
Oxigraph implements the following specifications:
* [SPARQL 1.1 Query](https://www.w3.org/TR/sparql11-query/), [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/), and [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/).
* [Turtle](https://www.w3.org/TR/turtle/), [TriG](https://www.w3.org/TR/trig/), [N-Triples](https://www.w3.org/TR/n-triples/), [N-Quads](https://www.w3.org/TR/n-quads/), and [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) RDF serialization formats for both data ingestion and retrieval.
* [SPARQL Query Results XML Format](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
A preliminary benchmark [is provided](../bench/README.md). Oxigraph internal design [is described on the wiki](https://github.com/oxigraph/oxigraph/wiki/Architecture).
The main entry point of Oxigraph is the [`Store`](store::Store) struct:
```rust
use oxigraph::store::Store;
use oxigraph::model::*;
use oxigraph::sparql::QueryResults;
let store = Store::new().unwrap();
// insertion
let ex = NamedNode::new("http://example.com").unwrap();
let quad = Quad::new(ex.clone(), ex.clone(), ex.clone(), GraphName::DefaultGraph);
store.insert(&quad).unwrap();
// quad filter
let results = store.quads_for_pattern(Some(ex.as_ref().into()), None, None, None).collect::<Result<Vec<Quad>,_>>().unwrap();
assert_eq!(vec![quad], results);
// SPARQL query
if let QueryResults::Solutions(mut solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }").unwrap() {
assert_eq!(solutions.next().unwrap().unwrap().get("s"), Some(&ex.into()));
}
```
It is based on these crates that can be used separately:
* [`oxrdf`](https://crates.io/crates/oxrdf), datastructures encoding RDF basic concepts (the [`oxigraph::model`](crate::model) module).
* [`oxrdfio`](https://crates.io/crates/oxrdfio), a unified parser and serializer API for RDF formats (the [`oxigraph::io`](crate::io) module). It itself relies on:
* [`oxttl`](https://crates.io/crates/oxttl), N-Triple, N-Quad, Turtle, TriG and N3 parsing and serialization.
* [`oxrdfxml`](https://crates.io/crates/oxrdfxml), RDF/XML parsing and serialization.
* [`spargebra`](https://crates.io/crates/spargebra), a SPARQL parser.
* [`sparesults`](https://crates.io/crates/sparesults), parsers and serializers for SPARQL result formats (the [`oxigraph::sparql::results`](crate::sparql::results) module).
* [`sparopt`](https://crates.io/crates/sparesults), a SPARQL optimizer.
* [`oxsdatatypes`](https://crates.io/crates/oxsdatatypes), an implementation of some XML Schema datatypes.
To build the library locally, don't forget to clone the submodules using `git clone --recursive https://github.com/oxigraph/oxigraph.git` to clone the repository including submodules or `git submodule update --init` to add submodules to the already cloned repository.
It is possible to disable the RocksDB storage backend to only use the in-memory fallback by disabling the `rocksdb` default feature:
```toml
oxigraph = { version = "*", default-features = false }
```
This is the default behavior when compiling Oxigraph to WASM.
## License
This project is licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE](../LICENSE-APACHE) or
`<http://www.apache.org/licenses/LICENSE-2.0>`)
* MIT license ([LICENSE-MIT](../LICENSE-MIT) or
`<http://opensource.org/licenses/MIT>`)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in Oxigraph by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.

@ -1,8 +1,13 @@
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-graph) serialization formats. #![allow(deprecated)]
use oxrdfio::{RdfFormat, RdfParser, RdfSerializer};
/// [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive] #[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum GraphFormat { pub enum GraphFormat {
/// [N-Triples](https://www.w3.org/TR/n-triples/) /// [N-Triples](https://www.w3.org/TR/n-triples/)
NTriples, NTriples,
@ -18,7 +23,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::NTriples.iri(), "http://www.w3.org/ns/formats/N-Triples") /// assert_eq!(
/// GraphFormat::NTriples.iri(),
/// "http://www.w3.org/ns/formats/N-Triples"
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn iri(self) -> &'static str { pub fn iri(self) -> &'static str {
@ -60,6 +68,7 @@ impl GraphFormat {
Self::RdfXml => "rdf", Self::RdfXml => "rdf",
} }
} }
/// Looks for a known format from a media type. /// Looks for a known format from a media type.
/// ///
/// It supports some media type aliases. /// It supports some media type aliases.
@ -69,7 +78,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::from_media_type("text/turtle; charset=utf-8"), Some(GraphFormat::Turtle)) /// assert_eq!(
/// GraphFormat::from_media_type("text/turtle; charset=utf-8"),
/// Some(GraphFormat::Turtle)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> { pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -89,7 +101,10 @@ impl GraphFormat {
/// ``` /// ```
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// ///
/// assert_eq!(GraphFormat::from_extension("nt"), Some(GraphFormat::NTriples)) /// assert_eq!(
/// GraphFormat::from_extension("nt"),
/// Some(GraphFormat::NTriples)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_extension(extension: &str) -> Option<Self> { pub fn from_extension(extension: &str) -> Option<Self> {
@ -102,11 +117,37 @@ impl GraphFormat {
} }
} }
impl From<GraphFormat> for RdfFormat {
#[inline]
fn from(format: GraphFormat) -> Self {
match format {
GraphFormat::NTriples => Self::NTriples,
GraphFormat::Turtle => Self::Turtle,
GraphFormat::RdfXml => Self::RdfXml,
}
}
}
impl From<GraphFormat> for RdfParser {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<GraphFormat> for RdfSerializer {
#[inline]
fn from(format: GraphFormat) -> Self {
RdfFormat::from(format).into()
}
}
/// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats. /// [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) serialization formats.
/// ///
/// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future. /// This enumeration is non exhaustive. New formats like JSON-LD will be added in the future.
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[non_exhaustive] #[non_exhaustive]
#[deprecated(note = "use RdfFormat instead", since = "0.4.0")]
pub enum DatasetFormat { pub enum DatasetFormat {
/// [N-Quads](https://www.w3.org/TR/n-quads/) /// [N-Quads](https://www.w3.org/TR/n-quads/)
NQuads, NQuads,
@ -120,7 +161,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::NQuads.iri(), "http://www.w3.org/ns/formats/N-Quads") /// assert_eq!(
/// DatasetFormat::NQuads.iri(),
/// "http://www.w3.org/ns/formats/N-Quads"
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn iri(self) -> &'static str { pub fn iri(self) -> &'static str {
@ -159,6 +203,7 @@ impl DatasetFormat {
Self::TriG => "trig", Self::TriG => "trig",
} }
} }
/// Looks for a known format from a media type. /// Looks for a known format from a media type.
/// ///
/// It supports some media type aliases. /// It supports some media type aliases.
@ -167,7 +212,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::from_media_type("application/n-quads; charset=utf-8"), Some(DatasetFormat::NQuads)) /// assert_eq!(
/// DatasetFormat::from_media_type("application/n-quads; charset=utf-8"),
/// Some(DatasetFormat::NQuads)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_media_type(media_type: &str) -> Option<Self> { pub fn from_media_type(media_type: &str) -> Option<Self> {
@ -186,7 +234,10 @@ impl DatasetFormat {
/// ``` /// ```
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// ///
/// assert_eq!(DatasetFormat::from_extension("nq"), Some(DatasetFormat::NQuads)) /// assert_eq!(
/// DatasetFormat::from_extension("nq"),
/// Some(DatasetFormat::NQuads)
/// )
/// ``` /// ```
#[inline] #[inline]
pub fn from_extension(extension: &str) -> Option<Self> { pub fn from_extension(extension: &str) -> Option<Self> {
@ -198,12 +249,36 @@ impl DatasetFormat {
} }
} }
impl From<DatasetFormat> for RdfFormat {
#[inline]
fn from(format: DatasetFormat) -> Self {
match format {
DatasetFormat::NQuads => Self::NQuads,
DatasetFormat::TriG => Self::TriG,
}
}
}
impl From<DatasetFormat> for RdfParser {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl From<DatasetFormat> for RdfSerializer {
#[inline]
fn from(format: DatasetFormat) -> Self {
RdfFormat::from(format).into()
}
}
impl TryFrom<DatasetFormat> for GraphFormat { impl TryFrom<DatasetFormat> for GraphFormat {
type Error = (); type Error = ();
/// Attempts to find a graph format that is a subset of this [`DatasetFormat`]. /// Attempts to find a graph format that is a subset of this [`DatasetFormat`].
#[inline] #[inline]
fn try_from(value: DatasetFormat) -> Result<Self, ()> { fn try_from(value: DatasetFormat) -> Result<Self, Self::Error> {
match value { match value {
DatasetFormat::NQuads => Ok(Self::NTriples), DatasetFormat::NQuads => Ok(Self::NTriples),
DatasetFormat::TriG => Ok(Self::Turtle), DatasetFormat::TriG => Ok(Self::Turtle),
@ -216,7 +291,7 @@ impl TryFrom<GraphFormat> for DatasetFormat {
/// Attempts to find a dataset format that is a superset of this [`GraphFormat`]. /// Attempts to find a dataset format that is a superset of this [`GraphFormat`].
#[inline] #[inline]
fn try_from(value: GraphFormat) -> Result<Self, ()> { fn try_from(value: GraphFormat) -> Result<Self, Self::Error> {
match value { match value {
GraphFormat::NTriples => Ok(Self::NQuads), GraphFormat::NTriples => Ok(Self::NQuads),
GraphFormat::Turtle => Ok(Self::TriG), GraphFormat::Turtle => Ok(Self::TriG),

@ -0,0 +1,39 @@
//! Utilities to read and write RDF graphs and datasets using [OxRDF I/O](https://crates.io/crates/oxrdfio).
//!
//! The entry points of this module are the two [`RdfParser`] and [`RdfSerializer`] structs.
//!
//! Usage example converting a Turtle file to a N-Triples file:
//! ```
//! use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer};
//!
//! let turtle_file = b"@base <http://example.com/> .
//! @prefix schema: <http://schema.org/> .
//! <foo> a schema:Person ;
//! schema:name \"Foo\" .
//! <bar> a schema:Person ;
//! schema:name \"Bar\" .";
//!
//! let ntriples_file = b"<http://example.com/foo> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/foo> <http://schema.org/name> \"Foo\" .
//! <http://example.com/bar> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person> .
//! <http://example.com/bar> <http://schema.org/name> \"Bar\" .
//! ";
//!
//! let mut writer = RdfSerializer::from_format(RdfFormat::NTriples).serialize_to_write(Vec::new());
//! for quad in RdfParser::from_format(RdfFormat::Turtle).parse_read(turtle_file.as_ref()) {
//! writer.write_quad(&quad.unwrap()).unwrap();
//! }
//! assert_eq!(writer.finish().unwrap(), ntriples_file);
//! ```
mod format;
pub mod read;
pub mod write;
#[allow(deprecated)]
pub use self::format::{DatasetFormat, GraphFormat};
#[allow(deprecated)]
pub use self::read::{DatasetParser, GraphParser};
#[allow(deprecated)]
pub use self::write::{DatasetSerializer, GraphSerializer};
pub use oxrdfio::*;

@ -0,0 +1,199 @@
#![allow(deprecated)]
//! Utilities to read RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{FromReadQuadReader, RdfParseError, RdfParser};
use std::io::Read;
/// Parsers for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct GraphParser {
inner: RdfParser,
}
impl GraphParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into())
.without_named_graphs()
.rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "</s> </p> </o> .";
///
/// let parser =
/// GraphParser::from_format(GraphFormat::Turtle).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of triples.
pub fn read_triples<R: Read>(self, reader: R) -> TripleReader<R> {
TripleReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read triples.
/// Could be built using a [`GraphParser`].
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
///
/// let parser = GraphParser::from_format(GraphFormat::NTriples);
/// let triples = parser
/// .read_triples(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct TripleReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for TripleReader<R> {
type Item = Result<Triple, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map(Into::into).map_err(Into::into))
}
}
/// A parser for RDF dataset serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[deprecated(note = "use RdfParser instead", since = "0.4.0")]
pub struct DatasetParser {
inner: RdfParser,
}
impl DatasetParser {
/// Builds a parser for the given format.
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfParser::from_format(format.into()).rename_blank_nodes(),
}
}
/// Provides an IRI that could be used to resolve the file relative IRIs.
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<g> { </s> </p> </o> }";
///
/// let parser =
/// DatasetParser::from_format(DatasetFormat::TriG).with_base_iri("http://example.com")?;
/// let triples = parser
/// .read_quads(file.as_bytes())
/// .collect::<Result<Vec<_>, _>>()?;
///
/// assert_eq!(triples.len(), 1);
/// assert_eq!(triples[0].subject.to_string(), "<http://example.com/s>");
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn with_base_iri(self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
Ok(Self {
inner: self.inner.with_base_iri(base_iri)?,
})
}
/// Executes the parsing itself on a [`Read`] implementation and returns an iterator of quads.
pub fn read_quads<R: Read>(self, reader: R) -> QuadReader<R> {
QuadReader {
parser: self.inner.parse_read(reader),
}
}
}
/// An iterator yielding read quads.
/// Could be built using a [`DatasetParser`].
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetParser};
///
/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
///
/// let parser = DatasetParser::from_format(DatasetFormat::NQuads);
/// let quads = parser.read_quads(file.as_bytes()).collect::<Result<Vec<_>,_>>()?;
///
/// assert_eq!(quads.len(), 1);
/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
/// # std::io::Result::Ok(())
/// ```
#[must_use]
pub struct QuadReader<R: Read> {
parser: FromReadQuadReader<R>,
}
impl<R: Read> Iterator for QuadReader<R> {
type Item = Result<Quad, RdfParseError>;
fn next(&mut self) -> Option<Self::Item> {
Some(self.parser.next()?.map_err(Into::into))
}
}

@ -0,0 +1,185 @@
#![allow(deprecated)]
//! Utilities to write RDF graphs and datasets.
use crate::io::{DatasetFormat, GraphFormat};
use crate::model::*;
use oxrdfio::{RdfSerializer, ToWriteQuadWriter};
use std::io::{self, Write};
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`GraphFormat::NTriples`])
/// * [Turtle](https://www.w3.org/TR/turtle/) ([`GraphFormat::Turtle`])
/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`GraphFormat::RdfXml`])
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct GraphSerializer {
inner: RdfSerializer,
}
impl GraphSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: GraphFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation
pub fn triple_writer<W: Write>(self, write: W) -> TripleWriter<W> {
TripleWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`GraphSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](TripleWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{GraphFormat, GraphSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = GraphSerializer::from_format(GraphFormat::NTriples).triple_writer(&mut buffer);
/// writer.write(&Triple {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(
/// buffer.as_slice(),
/// "<http://example.com/s> <http://example.com/p> <http://example.com/o> .\n".as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct TripleWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> TripleWriter<W> {
/// Writes a triple
pub fn write<'a>(&mut self, triple: impl Into<TripleRef<'a>>) -> io::Result<()> {
self.writer.write_triple(triple)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}
/// A serializer for RDF graph serialization formats.
///
/// It currently supports the following formats:
/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`DatasetFormat::NQuads`])
/// * [TriG](https://www.w3.org/TR/trig/) ([`DatasetFormat::TriG`])
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[deprecated(note = "use RdfSerializer instead", since = "0.4.0")]
pub struct DatasetSerializer {
inner: RdfSerializer,
}
impl DatasetSerializer {
/// Builds a serializer for the given format
#[inline]
pub fn from_format(format: DatasetFormat) -> Self {
Self {
inner: RdfSerializer::from_format(format.into()),
}
}
/// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation
pub fn quad_writer<W: Write>(self, write: W) -> QuadWriter<W> {
QuadWriter {
writer: self.inner.serialize_to_write(write),
}
}
}
/// Allows writing triples.
/// Could be built using a [`DatasetSerializer`].
///
/// <div class="warning">
///
/// Do not forget to run the [`finish`](QuadWriter::finish()) method to properly write the last bytes of the file.</div>
///
/// ```
/// use oxigraph::io::{DatasetFormat, DatasetSerializer};
/// use oxigraph::model::*;
///
/// let mut buffer = Vec::new();
/// let mut writer = DatasetSerializer::from_format(DatasetFormat::NQuads).quad_writer(&mut buffer);
/// writer.write(&Quad {
/// subject: NamedNode::new("http://example.com/s")?.into(),
/// predicate: NamedNode::new("http://example.com/p")?,
/// object: NamedNode::new("http://example.com/o")?.into(),
/// graph_name: NamedNode::new("http://example.com/g")?.into(),
/// })?;
/// writer.finish()?;
///
/// assert_eq!(buffer.as_slice(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .\n".as_bytes());
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[must_use]
pub struct QuadWriter<W: Write> {
writer: ToWriteQuadWriter<W>,
}
impl<W: Write> QuadWriter<W> {
/// Writes a quad
pub fn write<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> io::Result<()> {
self.writer.write_quad(quad)
}
/// Writes the last bytes of the file
pub fn finish(self) -> io::Result<()> {
self.writer.finish()?.flush()
}
}

@ -0,0 +1,12 @@
#![doc = include_str!("../README.md")]
#![doc(test(attr(deny(warnings))))]
#![doc(test(attr(allow(deprecated))))]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
pub mod io;
pub mod model;
pub mod sparql;
mod storage;
pub mod store;

@ -0,0 +1,22 @@
//! Implements data structures for [RDF 1.1 Concepts](https://www.w3.org/TR/rdf11-concepts/) using [OxRDF](https://crates.io/crates/oxrdf).
//!
//! Usage example:
//!
//! ```
//! use oxigraph::model::*;
//!
//! let mut graph = Graph::default();
//!
//! // insertion
//! let ex = NamedNodeRef::new("http://example.com").unwrap();
//! let triple = TripleRef::new(ex, ex, ex);
//! graph.insert(triple);
//!
//! // simple filter
//! let results: Vec<_> = graph.triples_for_subject(ex).collect();
//! assert_eq!(vec![triple], results);
//! ```
pub use oxrdf::*;
pub use spargebra::term::GroundQuad;

@ -1,10 +1,10 @@
//! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery) //! [SPARQL 1.1 Query Algebra](https://www.w3.org/TR/sparql11-query/#sparqlQuery)
//! //!
//! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`]. //! The root type for SPARQL queries is [`Query`] and the root type for updates is [`Update`].
//!
//! Warning: this implementation is an unstable work in progress
use crate::model::*; use crate::model::*;
use crate::sparql::eval::Timer;
use oxsdatatypes::DayTimeDuration;
use spargebra::GraphUpdateOperation; use spargebra::GraphUpdateOperation;
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
@ -23,27 +23,31 @@ use std::str::FromStr;
/// // We edit the query dataset specification /// // We edit the query dataset specification
/// let default = vec![NamedNode::new("http://example.com")?.into()]; /// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone()); /// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Query { pub struct Query {
pub(super) inner: spargebra::Query, pub(super) inner: spargebra::Query,
pub(super) dataset: QueryDataset, pub(super) dataset: QueryDataset,
pub(super) parsing_duration: Option<DayTimeDuration>,
} }
impl Query { impl Query {
/// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query. /// Parses a SPARQL query with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(query: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> { pub fn parse(
let query = spargebra::Query::parse(query, base_iri)?; query: &str,
base_iri: Option<&str>,
) -> Result<Self, spargebra::SparqlSyntaxError> {
let start = Timer::now();
let query = Self::from(spargebra::Query::parse(query, base_iri)?);
Ok(Self { Ok(Self {
dataset: QueryDataset::from_algebra(match &query { dataset: query.dataset,
spargebra::Query::Select { dataset, .. } inner: query.inner,
| spargebra::Query::Construct { dataset, .. } parsing_duration: start.elapsed(),
| spargebra::Query::Describe { dataset, .. }
| spargebra::Query::Ask { dataset, .. } => dataset,
}),
inner: query,
}) })
} }
@ -60,34 +64,49 @@ impl Query {
impl fmt::Display for Query { impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f) //TODO: override self.inner.fmt(f) // TODO: override
} }
} }
impl FromStr for Query { impl FromStr for Query {
type Err = spargebra::ParseError; type Err = spargebra::SparqlSyntaxError;
fn from_str(query: &str) -> Result<Self, spargebra::ParseError> { fn from_str(query: &str) -> Result<Self, Self::Err> {
Self::parse(query, None) Self::parse(query, None)
} }
} }
impl<'a> TryFrom<&'a str> for Query { impl TryFrom<&str> for Query {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &str) -> Result<Self, spargebra::ParseError> { fn try_from(query: &str) -> Result<Self, Self::Error> {
Self::from_str(query) Self::from_str(query)
} }
} }
impl<'a> TryFrom<&'a String> for Query { impl TryFrom<&String> for Query {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(query: &String) -> Result<Self, spargebra::ParseError> { fn try_from(query: &String) -> Result<Self, Self::Error> {
Self::from_str(query) Self::from_str(query)
} }
} }
impl From<spargebra::Query> for Query {
fn from(query: spargebra::Query) -> Self {
Self {
dataset: QueryDataset::from_algebra(match &query {
spargebra::Query::Select { dataset, .. }
| spargebra::Query::Construct { dataset, .. }
| spargebra::Query::Describe { dataset, .. }
| spargebra::Query::Ask { dataset, .. } => dataset,
}),
inner: query,
parsing_duration: None,
}
}
}
/// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/). /// A parsed [SPARQL update](https://www.w3.org/TR/sparql11-update/).
/// ///
/// ``` /// ```
@ -97,7 +116,7 @@ impl<'a> TryFrom<&'a String> for Query {
/// let update = Update::parse(update_str, None)?; /// let update = Update::parse(update_str, None)?;
/// ///
/// assert_eq!(update.to_string().trim(), update_str); /// assert_eq!(update.to_string().trim(), update_str);
/// # Ok::<_, oxigraph::sparql::ParseError>(()) /// # Ok::<_, oxigraph::sparql::SparqlSyntaxError>(())
/// ``` /// ```
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct Update { pub struct Update {
@ -107,22 +126,11 @@ pub struct Update {
impl Update { impl Update {
/// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query. /// Parses a SPARQL update with an optional base IRI to resolve relative IRIs in the query.
pub fn parse(update: &str, base_iri: Option<&str>) -> Result<Self, spargebra::ParseError> { pub fn parse(
let update = spargebra::Update::parse(update, base_iri)?; update: &str,
Ok(Self { base_iri: Option<&str>,
using_datasets: update ) -> Result<Self, spargebra::SparqlSyntaxError> {
.operations Ok(spargebra::Update::parse(update, base_iri)?.into())
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
})
} }
/// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert). /// Returns [the query dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) in [DELETE/INSERT operations](https://www.w3.org/TR/sparql11-update/#deleteInsert).
@ -143,29 +151,48 @@ impl fmt::Display for Update {
} }
impl FromStr for Update { impl FromStr for Update {
type Err = spargebra::ParseError; type Err = spargebra::SparqlSyntaxError;
fn from_str(update: &str) -> Result<Self, spargebra::ParseError> { fn from_str(update: &str) -> Result<Self, Self::Err> {
Self::parse(update, None) Self::parse(update, None)
} }
} }
impl<'a> TryFrom<&'a str> for Update { impl TryFrom<&str> for Update {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &str) -> Result<Self, spargebra::ParseError> { fn try_from(update: &str) -> Result<Self, Self::Error> {
Self::from_str(update) Self::from_str(update)
} }
} }
impl<'a> TryFrom<&'a String> for Update { impl TryFrom<&String> for Update {
type Error = spargebra::ParseError; type Error = spargebra::SparqlSyntaxError;
fn try_from(update: &String) -> Result<Self, spargebra::ParseError> { fn try_from(update: &String) -> Result<Self, Self::Error> {
Self::from_str(update) Self::from_str(update)
} }
} }
impl From<spargebra::Update> for Update {
fn from(update: spargebra::Update) -> Self {
Self {
using_datasets: update
.operations
.iter()
.map(|operation| {
if let GraphUpdateOperation::DeleteInsert { using, .. } = operation {
Some(QueryDataset::from_algebra(using))
} else {
None
}
})
.collect(),
inner: update,
}
}
}
/// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset) /// A SPARQL query [dataset specification](https://www.w3.org/TR/sparql11-query/#specifyingDataset)
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct QueryDataset { pub struct QueryDataset {
@ -204,8 +231,15 @@ impl QueryDataset {
/// ``` /// ```
/// use oxigraph::sparql::Query; /// use oxigraph::sparql::Query;
/// ///
/// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset()); /// assert!(Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?
/// assert!(!Query::parse("SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }", None)?.dataset().is_default_dataset()); /// .dataset()
/// .is_default_dataset());
/// assert!(!Query::parse(
/// "SELECT ?s ?p ?o FROM <http://example.com> WHERE { ?s ?p ?o . }",
/// None
/// )?
/// .dataset()
/// .is_default_dataset());
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
@ -237,7 +271,10 @@ impl QueryDataset {
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let default = vec![NamedNode::new("http://example.com")?.into()]; /// let default = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_default_graph(default.clone()); /// query.dataset_mut().set_default_graph(default.clone());
/// assert_eq!(query.dataset().default_graph_graphs(), Some(default.as_slice())); /// assert_eq!(
/// query.dataset().default_graph_graphs(),
/// Some(default.as_slice())
/// );
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```
@ -258,8 +295,13 @@ impl QueryDataset {
/// ///
/// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?; /// let mut query = Query::parse("SELECT ?s ?p ?o WHERE { ?s ?p ?o . }", None)?;
/// let named = vec![NamedNode::new("http://example.com")?.into()]; /// let named = vec![NamedNode::new("http://example.com")?.into()];
/// query.dataset_mut().set_available_named_graphs(named.clone()); /// query
/// assert_eq!(query.dataset().available_named_graphs(), Some(named.as_slice())); /// .dataset_mut()
/// .set_available_named_graphs(named.clone());
/// assert_eq!(
/// query.dataset().available_named_graphs(),
/// Some(named.as_slice())
/// );
/// ///
/// # Ok::<_, Box<dyn std::error::Error>>(()) /// # Ok::<_, Box<dyn std::error::Error>>(())
/// ``` /// ```

@ -40,7 +40,7 @@ impl DatasetView {
) -> impl Iterator<Item = Result<EncodedQuad, EvaluationError>> + 'static { ) -> impl Iterator<Item = Result<EncodedQuad, EvaluationError>> + 'static {
self.reader self.reader
.quads_for_pattern(subject, predicate, object, graph_name) .quads_for_pattern(subject, predicate, object, graph_name)
.map(|t| t.map_err(|e| e.into())) .map(|t| t.map_err(Into::into))
} }
#[allow(clippy::needless_collect)] #[allow(clippy::needless_collect)]
@ -142,7 +142,7 @@ impl DatasetView {
self.store_encoded_quads_for_pattern(subject, predicate, object, None) self.store_encoded_quads_for_pattern(subject, predicate, object, None)
.filter(|quad| match quad { .filter(|quad| match quad {
Err(_) => true, Err(_) => true,
Ok(quad) => quad.graph_name != EncodedTerm::DefaultGraph, Ok(quad) => !quad.graph_name.is_default_graph(),
}), }),
) )
} }
@ -176,10 +176,6 @@ impl StrLookup for DatasetView {
self.reader.get_str(key)? self.reader.get_str(key)?
}) })
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.extra.borrow().contains_key(key) || self.reader.contains_str(key)?)
}
} }
struct EncodedDatasetSpec { struct EncodedDatasetSpec {

@ -0,0 +1,84 @@
use crate::io::RdfParseError;
use crate::model::NamedNode;
use crate::sparql::results::QueryResultsParseError as ResultsParseError;
use crate::sparql::SparqlSyntaxError;
use crate::storage::StorageError;
use std::convert::Infallible;
use std::error::Error;
use std::io;
/// A SPARQL evaluation error.
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum EvaluationError {
/// An error in SPARQL parsing.
#[error(transparent)]
Parsing(#[from] SparqlSyntaxError),
/// An error from the storage.
#[error(transparent)]
Storage(#[from] StorageError),
/// An error while parsing an external RDF file.
#[error(transparent)]
GraphParsing(#[from] RdfParseError),
/// An error while parsing an external result file (likely from a federated query).
#[error(transparent)]
ResultsParsing(#[from] ResultsParseError),
/// An error returned during results serialization.
#[error(transparent)]
ResultsSerialization(#[from] io::Error),
/// Error during `SERVICE` evaluation
#[error("{0}")]
Service(#[source] Box<dyn Error + Send + Sync + 'static>),
/// Error when `CREATE` tries to create an already existing graph
#[error("The graph {0} already exists")]
GraphAlreadyExists(NamedNode),
/// Error when `DROP` or `CLEAR` tries to remove a not existing graph
#[error("The graph {0} does not exist")]
GraphDoesNotExist(NamedNode),
/// The variable storing the `SERVICE` name is unbound
#[error("The variable encoding the service name is unbound")]
UnboundService,
/// The given `SERVICE` is not supported
#[error("The service {0} is not supported")]
UnsupportedService(NamedNode),
/// The given content media type returned from an HTTP response is not supported (`SERVICE` and `LOAD`)
#[error("The content media type {0} is not supported")]
UnsupportedContentType(String),
/// The `SERVICE` call has not returns solutions
#[error("The service is not returning solutions but a boolean or a graph")]
ServiceDoesNotReturnSolutions,
/// The results are not a RDF graph
#[error("The query results are not a RDF graph")]
NotAGraph,
}
impl From<Infallible> for EvaluationError {
#[inline]
fn from(error: Infallible) -> Self {
match error {}
}
}
impl From<EvaluationError> for io::Error {
#[inline]
fn from(error: EvaluationError) -> Self {
match error {
EvaluationError::Parsing(error) => Self::new(io::ErrorKind::InvalidData, error),
EvaluationError::GraphParsing(error) => error.into(),
EvaluationError::ResultsParsing(error) => error.into(),
EvaluationError::ResultsSerialization(error) => error,
EvaluationError::Storage(error) => error.into(),
EvaluationError::Service(error) => match error.downcast() {
Ok(error) => *error,
Err(error) => Self::new(io::ErrorKind::Other, error),
},
EvaluationError::GraphAlreadyExists(_)
| EvaluationError::GraphDoesNotExist(_)
| EvaluationError::UnboundService
| EvaluationError::UnsupportedService(_)
| EvaluationError::UnsupportedContentType(_)
| EvaluationError::ServiceDoesNotReturnSolutions
| EvaluationError::NotAGraph => Self::new(io::ErrorKind::InvalidInput, error),
}
}
}

File diff suppressed because it is too large Load Diff

@ -3,30 +3,32 @@
use std::io::{Empty, Error, ErrorKind, Result}; use std::io::{Empty, Error, ErrorKind, Result};
use std::time::Duration; use std::time::Duration;
pub struct Client {} pub struct Client;
impl Client { impl Client {
pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self { pub fn new(_timeout: Option<Duration>, _redirection_limit: usize) -> Self {
Self {} Self
} }
pub fn get(&self, _url: &str, _accept: &str) -> Result<(String, Empty)> { #[allow(clippy::unused_self)]
pub fn get(&self, _url: &str, _accept: &'static str) -> Result<(String, Empty)> {
Err(Error::new( Err(Error::new(
ErrorKind::Unsupported, ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'", "HTTP client is not available. Enable the feature 'http-client'",
)) ))
} }
#[allow(clippy::unused_self, clippy::needless_pass_by_value)]
pub fn post( pub fn post(
&self, &self,
_url: &str, _url: &str,
_payload: Vec<u8>, _payload: Vec<u8>,
_content_type: &str, _content_type: &'static str,
_accept: &str, _accept: &'static str,
) -> Result<(String, Empty)> { ) -> Result<(String, Empty)> {
Err(Error::new( Err(Error::new(
ErrorKind::Unsupported, ErrorKind::Unsupported,
"HTTP client is not available. Enable the feature 'http_client'", "HTTP client is not available. Enable the feature 'http-client'",
)) ))
} }
} }

@ -0,0 +1,9 @@
#[cfg(not(feature = "http-client"))]
mod dummy;
#[cfg(feature = "http-client")]
mod simple;
#[cfg(not(feature = "http-client"))]
pub use dummy::Client;
#[cfg(feature = "http-client")]
pub use simple::Client;

@ -8,18 +8,17 @@ pub struct Client {
impl Client { impl Client {
pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self { pub fn new(timeout: Option<Duration>, redirection_limit: usize) -> Self {
let mut client = oxhttp::Client::new(); let mut client = oxhttp::Client::new()
.with_redirection_limit(redirection_limit)
.with_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
if let Some(timeout) = timeout { if let Some(timeout) = timeout {
client.set_global_timeout(timeout); client = client.with_global_timeout(timeout);
} }
client.set_redirection_limit(redirection_limit);
client
.set_user_agent(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))
.unwrap();
Self { client } Self { client }
} }
pub fn get(&self, url: &str, accept: &str) -> Result<(String, Body)> { pub fn get(&self, url: &str, accept: &'static str) -> Result<(String, Body)> {
let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?) let request = Request::builder(Method::GET, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept) .with_header(HeaderName::ACCEPT, accept)
.map_err(invalid_input_error)? .map_err(invalid_input_error)?
@ -39,7 +38,7 @@ impl Client {
} }
let content_type = response let content_type = response
.header(&HeaderName::CONTENT_TYPE) .header(&HeaderName::CONTENT_TYPE)
.ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {}", url)))? .ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {url}")))?
.to_str() .to_str()
.map_err(invalid_data_error)? .map_err(invalid_data_error)?
.to_owned(); .to_owned();
@ -50,8 +49,8 @@ impl Client {
&self, &self,
url: &str, url: &str,
payload: Vec<u8>, payload: Vec<u8>,
content_type: &str, content_type: &'static str,
accept: &str, accept: &'static str,
) -> Result<(String, Body)> { ) -> Result<(String, Body)> {
let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?) let request = Request::builder(Method::POST, url.parse().map_err(invalid_input_error)?)
.with_header(HeaderName::ACCEPT, accept) .with_header(HeaderName::ACCEPT, accept)
@ -74,7 +73,7 @@ impl Client {
} }
let content_type = response let content_type = response
.header(&HeaderName::CONTENT_TYPE) .header(&HeaderName::CONTENT_TYPE)
.ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {}", url)))? .ok_or_else(|| invalid_data_error(format!("No Content-Type returned by {url}")))?
.to_str() .to_str()
.map_err(invalid_data_error)? .map_err(invalid_data_error)?
.to_owned(); .to_owned();

@ -0,0 +1,328 @@
//! [SPARQL](https://www.w3.org/TR/sparql11-overview/) implementation.
//!
//! Stores execute SPARQL. See [`Store`](crate::store::Store::query()) for an example.
mod algebra;
mod dataset;
mod error;
mod eval;
mod http;
mod model;
pub mod results;
mod service;
mod update;
use crate::model::{NamedNode, Term};
pub use crate::sparql::algebra::{Query, QueryDataset, Update};
use crate::sparql::dataset::DatasetView;
pub use crate::sparql::error::EvaluationError;
use crate::sparql::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
pub use crate::sparql::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
pub use crate::sparql::service::ServiceHandler;
use crate::sparql::service::{EmptyServiceHandler, ErrorConversionServiceHandler};
pub(crate) use crate::sparql::update::evaluate_update;
use crate::storage::StorageReader;
use json_event_parser::{JsonEvent, ToWriteJsonWriter};
pub use oxrdf::{Variable, VariableNameParseError};
use oxsdatatypes::{DayTimeDuration, Float};
pub use spargebra::SparqlSyntaxError;
use sparopt::algebra::GraphPattern;
use sparopt::Optimizer;
use std::collections::HashMap;
use std::rc::Rc;
use std::sync::Arc;
use std::time::Duration;
use std::{fmt, io};
#[allow(clippy::needless_pass_by_value)]
pub(crate) fn evaluate_query(
reader: StorageReader,
query: impl TryInto<Query, Error = impl Into<EvaluationError>>,
options: QueryOptions,
run_stats: bool,
) -> Result<(Result<QueryResults, EvaluationError>, QueryExplanation), EvaluationError> {
let query = query.try_into().map_err(Into::into)?;
let dataset = DatasetView::new(reader, &query.dataset);
let start_planning = Timer::now();
let (results, plan_node_with_stats, planning_duration) = match query.inner {
spargebra::Query::Select {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(pattern);
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_select(&pattern);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Ask {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_ask(&pattern);
(results, explanation, planning_duration)
}
spargebra::Query::Construct {
template,
pattern,
base_iri,
..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_construct(&pattern, &template);
(Ok(results), explanation, planning_duration)
}
spargebra::Query::Describe {
pattern, base_iri, ..
} => {
let mut pattern = GraphPattern::from(&pattern);
if !options.without_optimizations {
pattern = Optimizer::optimize_graph_pattern(GraphPattern::Reduced {
inner: Box::new(pattern),
});
}
let planning_duration = start_planning.elapsed();
let (results, explanation) = SimpleEvaluator::new(
Rc::new(dataset),
base_iri.map(Rc::new),
options.service_handler(),
Arc::new(options.custom_functions),
run_stats,
)
.evaluate_describe(&pattern);
(Ok(results), explanation, planning_duration)
}
};
let explanation = QueryExplanation {
inner: plan_node_with_stats,
with_stats: run_stats,
parsing_duration: query.parsing_duration,
planning_duration,
};
Ok((results, explanation))
}
/// Options for SPARQL query evaluation.
///
///
/// If the `"http-client"` optional feature is enabled,
/// a simple HTTP 1.1 client is used to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
///
/// Usage example disabling the federated query support:
/// ```
/// use oxigraph::sparql::QueryOptions;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// store.query_opt(
/// "SELECT * WHERE { SERVICE <https://query.wikidata.org/sparql> {} }",
/// QueryOptions::default().without_service_handler(),
/// )?;
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[derive(Clone, Default)]
pub struct QueryOptions {
service_handler: Option<Arc<dyn ServiceHandler<Error = EvaluationError>>>,
custom_functions: CustomFunctionRegistry,
http_timeout: Option<Duration>,
http_redirection_limit: usize,
without_optimizations: bool,
}
pub(crate) type CustomFunctionRegistry =
HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
impl QueryOptions {
/// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
#[inline]
#[must_use]
pub fn with_service_handler(mut self, service_handler: impl ServiceHandler + 'static) -> Self {
self.service_handler = Some(Arc::new(ErrorConversionServiceHandler::wrap(
service_handler,
)));
self
}
/// Disables the `SERVICE` calls
#[inline]
#[must_use]
pub fn without_service_handler(mut self) -> Self {
self.service_handler = Some(Arc::new(EmptyServiceHandler));
self
}
/// Sets a timeout for HTTP requests done during SPARQL evaluation.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_timeout(mut self, timeout: Duration) -> Self {
self.http_timeout = Some(timeout);
self
}
/// Sets an upper bound of the number of HTTP redirection followed per HTTP request done during SPARQL evaluation.
///
/// By default this value is `0`.
#[cfg(feature = "http-client")]
#[inline]
#[must_use]
pub fn with_http_redirection_limit(mut self, redirection_limit: usize) -> Self {
self.http_redirection_limit = redirection_limit;
self
}
/// Adds a custom SPARQL evaluation function.
///
/// Example with a function serializing terms to N-Triples:
/// ```
/// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
/// QueryOptions::default().with_custom_function(
/// NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
/// |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
/// ),
/// )? {
/// assert_eq!(
/// solutions.next().unwrap()?.get("nt"),
/// Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
#[must_use]
pub fn with_custom_function(
mut self,
name: NamedNode,
evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
) -> Self {
self.custom_functions.insert(name, Arc::new(evaluator));
self
}
fn service_handler(&self) -> Arc<dyn ServiceHandler<Error = EvaluationError>> {
self.service_handler.clone().unwrap_or_else(|| {
if cfg!(feature = "http-client") {
Arc::new(service::SimpleServiceHandler::new(
self.http_timeout,
self.http_redirection_limit,
))
} else {
Arc::new(EmptyServiceHandler)
}
})
}
#[doc(hidden)]
#[inline]
#[must_use]
pub fn without_optimizations(mut self) -> Self {
self.without_optimizations = true;
self
}
}
/// Options for SPARQL update evaluation.
#[derive(Clone, Default)]
pub struct UpdateOptions {
query_options: QueryOptions,
}
impl From<QueryOptions> for UpdateOptions {
#[inline]
fn from(query_options: QueryOptions) -> Self {
Self { query_options }
}
}
/// The explanation of a query.
#[derive(Clone)]
pub struct QueryExplanation {
inner: Rc<EvalNodeWithStats>,
with_stats: bool,
parsing_duration: Option<DayTimeDuration>,
planning_duration: Option<DayTimeDuration>,
}
impl QueryExplanation {
/// Writes the explanation as JSON.
pub fn write_in_json(&self, write: impl io::Write) -> io::Result<()> {
let mut writer = ToWriteJsonWriter::new(write);
writer.write_event(JsonEvent::StartObject)?;
if let Some(parsing_duration) = self.parsing_duration {
writer.write_event(JsonEvent::ObjectKey("parsing duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
parsing_duration.as_seconds().to_string().into(),
))?;
}
if let Some(planning_duration) = self.planning_duration {
writer.write_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
writer.write_event(JsonEvent::Number(
planning_duration.as_seconds().to_string().into(),
))?;
}
writer.write_event(JsonEvent::ObjectKey("plan".into()))?;
self.inner.json_node(&mut writer, self.with_stats)?;
writer.write_event(JsonEvent::EndObject)
}
}
impl fmt::Debug for QueryExplanation {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut obj = f.debug_struct("QueryExplanation");
if let Some(parsing_duration) = self.parsing_duration {
obj.field(
"parsing duration in seconds",
&f32::from(Float::from(parsing_duration.as_seconds())),
);
}
if let Some(planning_duration) = self.planning_duration {
obj.field(
"planning duration in seconds",
&f32::from(Float::from(planning_duration.as_seconds())),
);
}
obj.field("tree", &self.inner);
obj.finish_non_exhaustive()
}
}

@ -0,0 +1,371 @@
use crate::io::{RdfFormat, RdfSerializer};
use crate::model::*;
use crate::sparql::error::EvaluationError;
use crate::sparql::results::{
FromReadQueryResultsReader, FromReadSolutionsReader, QueryResultsFormat,
QueryResultsParseError, QueryResultsParser, QueryResultsSerializer,
};
pub use sparesults::QuerySolution;
use std::io::{Read, Write};
use std::sync::Arc;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/).
pub enum QueryResults {
/// Results of a [SELECT](https://www.w3.org/TR/sparql11-query/#select) query.
Solutions(QuerySolutionIter),
/// Result of a [ASK](https://www.w3.org/TR/sparql11-query/#ask) query.
Boolean(bool),
/// Results of a [CONSTRUCT](https://www.w3.org/TR/sparql11-query/#construct) or [DESCRIBE](https://www.w3.org/TR/sparql11-query/#describe) query.
Graph(QueryTripleIter),
}
impl QueryResults {
/// Reads a SPARQL query results serialization.
pub fn read(
read: impl Read + 'static,
format: QueryResultsFormat,
) -> Result<Self, QueryResultsParseError> {
Ok(QueryResultsParser::from_format(format)
.parse_read(read)?
.into())
}
/// Writes the query results (solutions or boolean).
///
/// This method fails if it is called on the `Graph` results.
///
/// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*;
/// use oxigraph::sparql::results::QueryResultsFormat;
///
/// let store = Store::new()?;
/// let ex = NamedNodeRef::new("http://example.com")?;
/// store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
///
/// let results = store.query("SELECT ?s WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write(Vec::new(), QueryResultsFormat::Json)?,
/// r#"{"head":{"vars":["s"]},"results":{"bindings":[{"s":{"type":"uri","value":"http://example.com"}}]}}"#.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write<W: Write>(
self,
write: W,
format: QueryResultsFormat,
) -> Result<W, EvaluationError> {
let serializer = QueryResultsSerializer::from_format(format);
match self {
Self::Boolean(value) => serializer.serialize_boolean_to_write(write, value),
Self::Solutions(solutions) => {
let mut writer = serializer
.serialize_solutions_to_write(write, solutions.variables().to_vec())
.map_err(EvaluationError::ResultsSerialization)?;
for solution in solutions {
writer
.write(&solution?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
Self::Graph(triples) => {
let s = VariableRef::new_unchecked("subject");
let p = VariableRef::new_unchecked("predicate");
let o = VariableRef::new_unchecked("object");
let mut writer = serializer
.serialize_solutions_to_write(
write,
vec![s.into_owned(), p.into_owned(), o.into_owned()],
)
.map_err(EvaluationError::ResultsSerialization)?;
for triple in triples {
let triple = triple?;
writer
.write([
(s, &triple.subject.into()),
(p, &triple.predicate.into()),
(o, &triple.object),
])
.map_err(EvaluationError::ResultsSerialization)?;
}
writer.finish()
}
}
.map_err(EvaluationError::ResultsSerialization)
}
/// Writes the graph query results.
///
/// This method fails if it is called on the `Solution` or `Boolean` results.
///
/// ```
/// use oxigraph::io::RdfFormat;
/// use oxigraph::model::*;
/// use oxigraph::store::Store;
///
/// let graph = "<http://example.com> <http://example.com> <http://example.com> .\n";
///
/// let store = Store::new()?;
/// store.load_graph(
/// graph.as_bytes(),
/// RdfFormat::NTriples,
/// GraphName::DefaultGraph,
/// None,
/// )?;
///
/// let results = store.query("CONSTRUCT WHERE { ?s ?p ?o }")?;
/// assert_eq!(
/// results.write_graph(Vec::new(), RdfFormat::NTriples)?,
/// graph.as_bytes()
/// );
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub fn write_graph<W: Write>(
self,
write: W,
format: impl Into<RdfFormat>,
) -> Result<W, EvaluationError> {
if let Self::Graph(triples) = self {
let mut writer = RdfSerializer::from_format(format.into()).serialize_to_write(write);
for triple in triples {
writer
.write_triple(&triple?)
.map_err(EvaluationError::ResultsSerialization)?;
}
writer
.finish()
.map_err(EvaluationError::ResultsSerialization)
} else {
Err(EvaluationError::NotAGraph)
}
}
}
impl From<QuerySolutionIter> for QueryResults {
#[inline]
fn from(value: QuerySolutionIter) -> Self {
Self::Solutions(value)
}
}
impl<R: Read + 'static> From<FromReadQueryResultsReader<R>> for QueryResults {
fn from(reader: FromReadQueryResultsReader<R>) -> Self {
match reader {
FromReadQueryResultsReader::Solutions(s) => Self::Solutions(s.into()),
FromReadQueryResultsReader::Boolean(v) => Self::Boolean(v),
}
}
}
/// An iterator over [`QuerySolution`]s.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s WHERE { ?s ?p ?o }")? {
/// for solution in solutions {
/// println!("{:?}", solution?.get("s"));
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QuerySolutionIter {
variables: Arc<[Variable]>,
iter: Box<dyn Iterator<Item = Result<QuerySolution, EvaluationError>>>,
}
impl QuerySolutionIter {
/// Construct a new iterator of solution from an ordered list of solution variables and an iterator of solution tuples
/// (each tuple using the same ordering as the variable list such that tuple element 0 is the value for the variable 0...)
pub fn new(
variables: Arc<[Variable]>,
iter: impl Iterator<Item = Result<Vec<Option<Term>>, EvaluationError>> + 'static,
) -> Self {
Self {
variables: Arc::clone(&variables),
iter: Box::new(
iter.map(move |t| t.map(|values| (Arc::clone(&variables), values).into())),
),
}
}
/// The variables used in the solutions.
///
/// ```
/// use oxigraph::sparql::{QueryResults, Variable};
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }")? {
/// assert_eq!(
/// solutions.variables(),
/// &[Variable::new("s")?, Variable::new("o")?]
/// );
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
#[inline]
pub fn variables(&self) -> &[Variable] {
&self.variables
}
}
impl<R: Read + 'static> From<FromReadSolutionsReader<R>> for QuerySolutionIter {
fn from(reader: FromReadSolutionsReader<R>) -> Self {
Self {
variables: reader.variables().into(),
iter: Box::new(reader.map(|t| t.map_err(EvaluationError::from))),
}
}
}
impl Iterator for QuerySolutionIter {
type Item = Result<QuerySolution, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
}
/// An iterator over the triples that compose a graph solution.
///
/// ```
/// use oxigraph::sparql::QueryResults;
/// use oxigraph::store::Store;
///
/// let store = Store::new()?;
/// if let QueryResults::Graph(triples) = store.query("CONSTRUCT WHERE { ?s ?p ?o }")? {
/// for triple in triples {
/// println!("{}", triple?);
/// }
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
pub struct QueryTripleIter {
pub(crate) iter: Box<dyn Iterator<Item = Result<Triple, EvaluationError>>>,
}
impl Iterator for QueryTripleIter {
type Item = Result<Triple, EvaluationError>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
self.iter.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.iter.size_hint()
}
#[inline]
fn fold<Acc, G>(self, init: Acc, g: G) -> Acc
where
G: FnMut(Acc, Self::Item) -> Acc,
{
self.iter.fold(init, g)
}
}
#[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_serialization_roundtrip() -> Result<(), EvaluationError> {
use std::str;
for format in [
QueryResultsFormat::Json,
QueryResultsFormat::Xml,
QueryResultsFormat::Tsv,
] {
let results = vec![
QueryResults::Boolean(true),
QueryResults::Boolean(false),
QueryResults::Solutions(QuerySolutionIter::new(
[
Variable::new_unchecked("foo"),
Variable::new_unchecked("bar"),
]
.as_ref()
.into(),
Box::new(
vec![
Ok(vec![None, None]),
Ok(vec![
Some(NamedNode::new_unchecked("http://example.com").into()),
None,
]),
Ok(vec![
None,
Some(NamedNode::new_unchecked("http://example.com").into()),
]),
Ok(vec![
Some(BlankNode::new_unchecked("foo").into()),
Some(BlankNode::new_unchecked("bar").into()),
]),
Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]),
Ok(vec![
Some(
Literal::new_language_tagged_literal_unchecked("foo", "fr")
.into(),
),
None,
]),
Ok(vec![
Some(Literal::from(1).into()),
Some(Literal::from(true).into()),
]),
Ok(vec![
Some(Literal::from(1.33).into()),
Some(Literal::from(false).into()),
]),
Ok(vec![
Some(
Triple::new(
NamedNode::new_unchecked("http://example.com/s"),
NamedNode::new_unchecked("http://example.com/p"),
Triple::new(
NamedNode::new_unchecked("http://example.com/os"),
NamedNode::new_unchecked("http://example.com/op"),
NamedNode::new_unchecked("http://example.com/oo"),
),
)
.into(),
),
None,
]),
]
.into_iter(),
),
)),
];
for ex in results {
let mut buffer = Vec::new();
ex.write(&mut buffer, format)?;
let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?;
let mut buffer2 = Vec::new();
ex2.write(&mut buffer2, format)?;
assert_eq!(
str::from_utf8(&buffer).unwrap(),
str::from_utf8(&buffer2).unwrap()
);
}
}
Ok(())
}
}

@ -0,0 +1,44 @@
//! Utilities to read and write RDF results formats using [sparesults](https://crates.io/crates/sparesults).
//!
//! It supports [SPARQL Query Results XML Format (Second Edition)](https://www.w3.org/TR/rdf-sparql-XMLres/), [SPARQL 1.1 Query Results JSON Format](https://www.w3.org/TR/sparql11-results-json/) and [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/).
//!
//! Usage example converting a JSON result file into a TSV result file:
//!
//! ```
//! use oxigraph::sparql::results::{QueryResultsFormat, QueryResultsParser, FromReadQueryResultsReader, QueryResultsSerializer};
//! use std::io::Result;
//!
//! fn convert_json_to_tsv(json_file: &[u8]) -> Result<Vec<u8>> {
//! let json_parser = QueryResultsParser::from_format(QueryResultsFormat::Json);
//! let tsv_serializer = QueryResultsSerializer::from_format(QueryResultsFormat::Tsv);
//! // We start to read the JSON file and see which kind of results it is
//! match json_parser.parse_read(json_file)? {
//! FromReadQueryResultsReader::Boolean(value) => {
//! // it's a boolean result, we copy it in TSV to the output buffer
//! tsv_serializer.serialize_boolean_to_write(Vec::new(), value)
//! }
//! FromReadQueryResultsReader::Solutions(solutions_reader) => {
//! // it's a set of solutions, we create a writer and we write to it while reading in streaming from the JSON file
//! let mut serialize_solutions_to_write = tsv_serializer.serialize_solutions_to_write(Vec::new(), solutions_reader.variables().to_vec())?;
//! for solution in solutions_reader {
//! serialize_solutions_to_write.write(&solution?)?;
//! }
//! serialize_solutions_to_write.finish()
//! }
//! }
//! }
//!
//! // Let's test with a boolean
//! assert_eq!(
//! convert_json_to_tsv(br#"{"boolean":true}"#.as_slice()).unwrap(),
//! b"true"
//! );
//!
//! // And with a set of solutions
//! assert_eq!(
//! convert_json_to_tsv(br#"{"head":{"vars":["foo","bar"]},"results":{"bindings":[{"foo":{"type":"literal","value":"test"}}]}}"#.as_slice()).unwrap(),
//! b"?foo\t?bar\n\"test\"\t\n"
//! );
//! ```
pub use sparesults::*;

@ -3,9 +3,8 @@ use crate::sparql::algebra::Query;
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::http::Client; use crate::sparql::http::Client;
use crate::sparql::model::QueryResults; use crate::sparql::model::QueryResults;
use crate::sparql::QueryResultsFormat; use crate::sparql::results::QueryResultsFormat;
use std::error::Error; use std::error::Error;
use std::io::BufReader;
use std::time::Duration; use std::time::Duration;
/// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE. /// Handler for [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE.
@ -14,18 +13,22 @@ use std::time::Duration;
/// before evaluating a SPARQL query that uses SERVICE calls. /// before evaluating a SPARQL query that uses SERVICE calls.
/// ///
/// ``` /// ```
/// use oxigraph::store::Store;
/// use oxigraph::model::*; /// use oxigraph::model::*;
/// use oxigraph::sparql::{QueryOptions, QueryResults, ServiceHandler, Query, EvaluationError}; /// use oxigraph::sparql::{EvaluationError, Query, QueryOptions, QueryResults, ServiceHandler};
/// use oxigraph::store::Store;
/// ///
/// struct TestServiceHandler { /// struct TestServiceHandler {
/// store: Store /// store: Store,
/// } /// }
/// ///
/// impl ServiceHandler for TestServiceHandler { /// impl ServiceHandler for TestServiceHandler {
/// type Error = EvaluationError; /// type Error = EvaluationError;
/// ///
/// fn handle(&self,service_name: NamedNode, query: Query) -> Result<QueryResults,EvaluationError> { /// fn handle(
/// &self,
/// service_name: NamedNode,
/// query: Query,
/// ) -> Result<QueryResults, Self::Error> {
/// if service_name == "http://example.com/service" { /// if service_name == "http://example.com/service" {
/// self.store.query(query) /// self.store.query(query)
/// } else { /// } else {
@ -36,23 +39,26 @@ use std::time::Duration;
/// ///
/// let store = Store::new()?; /// let store = Store::new()?;
/// let service = TestServiceHandler { /// let service = TestServiceHandler {
/// store: Store::new()? /// store: Store::new()?,
/// }; /// };
/// let ex = NamedNodeRef::new("http://example.com")?; /// let ex = NamedNodeRef::new("http://example.com")?;
/// service.store.insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?; /// service
/// .store
/// .insert(QuadRef::new(ex, ex, ex, GraphNameRef::DefaultGraph))?;
/// ///
/// if let QueryResults::Solutions(mut solutions) = store.query_opt( /// if let QueryResults::Solutions(mut solutions) = store.query_opt(
/// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }", /// "SELECT ?s WHERE { SERVICE <http://example.com/service> { ?s ?p ?o } }",
/// QueryOptions::default().with_service_handler(service) /// QueryOptions::default().with_service_handler(service),
/// )? { /// )? {
/// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into())); /// assert_eq!(solutions.next().unwrap()?.get("s"), Some(&ex.into()));
/// } /// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(()) /// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ``` /// ```
pub trait ServiceHandler { pub trait ServiceHandler: Send + Sync {
/// The service evaluation error.
type Error: Error + Send + Sync + 'static; type Error: Error + Send + Sync + 'static;
/// Evaluates a [`Query`] against a given service identified by a [`NamedNode`](crate::model::NamedNode). /// Evaluates a [`Query`] against a given service identified by a [`NamedNode`].
fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error>; fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error>;
} }
@ -61,10 +67,8 @@ pub struct EmptyServiceHandler;
impl ServiceHandler for EmptyServiceHandler { impl ServiceHandler for EmptyServiceHandler {
type Error = EvaluationError; type Error = EvaluationError;
fn handle(&self, _: NamedNode, _: Query) -> Result<QueryResults, EvaluationError> { fn handle(&self, name: NamedNode, _: Query) -> Result<QueryResults, Self::Error> {
Err(EvaluationError::msg( Err(EvaluationError::UnsupportedService(name))
"The SERVICE feature is not implemented",
))
} }
} }
@ -81,14 +85,10 @@ impl<S: ServiceHandler> ErrorConversionServiceHandler<S> {
impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> { impl<S: ServiceHandler> ServiceHandler for ErrorConversionServiceHandler<S> {
type Error = EvaluationError; type Error = EvaluationError;
fn handle( fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
&self,
service_name: NamedNode,
query: Query,
) -> Result<QueryResults, EvaluationError> {
self.handler self.handler
.handle(service_name, query) .handle(service_name, query)
.map_err(EvaluationError::wrap) .map_err(|e| EvaluationError::Service(Box::new(e)))
} }
} }
@ -107,23 +107,18 @@ impl SimpleServiceHandler {
impl ServiceHandler for SimpleServiceHandler { impl ServiceHandler for SimpleServiceHandler {
type Error = EvaluationError; type Error = EvaluationError;
fn handle( fn handle(&self, service_name: NamedNode, query: Query) -> Result<QueryResults, Self::Error> {
&self, let (content_type, body) = self
service_name: NamedNode, .client
query: Query, .post(
) -> Result<QueryResults, EvaluationError> { service_name.as_str(),
let (content_type, body) = self.client.post( query.to_string().into_bytes(),
service_name.as_str(), "application/sparql-query",
query.to_string().into_bytes(), "application/sparql-results+json, application/sparql-results+xml",
"application/sparql-query", )
"application/sparql-results+json, application/sparql-results+xml", .map_err(|e| EvaluationError::Service(Box::new(e)))?;
)?; let format = QueryResultsFormat::from_media_type(&content_type)
let format = QueryResultsFormat::from_media_type(&content_type).ok_or_else(|| { .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
EvaluationError::msg(format!( Ok(QueryResults::read(body, format)?)
"Unsupported Content-Type returned by {}: {}",
service_name, content_type
))
})?;
Ok(QueryResults::read(BufReader::new(body), format)?)
} }
} }

@ -1,12 +1,9 @@
use crate::io::read::ParseError; use crate::io::{RdfFormat, RdfParser};
use crate::io::{GraphFormat, GraphParser};
use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad}; use crate::model::{GraphName as OxGraphName, GraphNameRef, Quad as OxQuad};
use crate::sparql::algebra::QueryDataset; use crate::sparql::algebra::QueryDataset;
use crate::sparql::dataset::DatasetView; use crate::sparql::dataset::DatasetView;
use crate::sparql::eval::SimpleEvaluator; use crate::sparql::eval::{EncodedTuple, SimpleEvaluator};
use crate::sparql::http::Client; use crate::sparql::http::Client;
use crate::sparql::plan::EncodedTuple;
use crate::sparql::plan_builder::PlanBuilder;
use crate::sparql::{EvaluationError, Update, UpdateOptions}; use crate::sparql::{EvaluationError, Update, UpdateOptions};
use crate::storage::numeric_encoder::{Decoder, EncodedTerm}; use crate::storage::numeric_encoder::{Decoder, EncodedTerm};
use crate::storage::StorageWriter; use crate::storage::StorageWriter;
@ -18,9 +15,11 @@ use spargebra::term::{
Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable, Quad, QuadPattern, Subject, Term, TermPattern, Triple, TriplePattern, Variable,
}; };
use spargebra::GraphUpdateOperation; use spargebra::GraphUpdateOperation;
use sparopt::Optimizer;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::BufReader; use std::io;
use std::rc::Rc; use std::rc::Rc;
use std::sync::Arc;
pub fn evaluate_update<'a, 'b: 'a>( pub fn evaluate_update<'a, 'b: 'a>(
transaction: &'a mut StorageWriter<'b>, transaction: &'a mut StorageWriter<'b>,
@ -71,7 +70,12 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
insert, insert,
pattern, pattern,
.. ..
} => self.eval_delete_insert(delete, insert, using_dataset.as_ref().unwrap(), pattern), } => self.eval_delete_insert(
delete,
insert,
using_dataset.as_ref().unwrap_or(&QueryDataset::new()),
pattern,
),
GraphUpdateOperation::Load { GraphUpdateOperation::Load {
silent, silent,
source, source,
@ -118,21 +122,25 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
algebra: &GraphPattern, algebra: &GraphPattern,
) -> Result<(), EvaluationError> { ) -> Result<(), EvaluationError> {
let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using)); let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using));
let (plan, variables) = PlanBuilder::build( let mut pattern = sparopt::algebra::GraphPattern::from(algebra);
dataset.as_ref(), if !self.options.query_options.without_optimizations {
algebra, pattern = Optimizer::optimize_graph_pattern(sparopt::algebra::GraphPattern::Reduced {
false, inner: Box::new(pattern),
&self.options.query_options.custom_functions, });
)?; }
let evaluator = SimpleEvaluator::new( let evaluator = SimpleEvaluator::new(
dataset.clone(), Rc::clone(&dataset),
self.base_iri.clone(), self.base_iri.clone(),
self.options.query_options.service_handler(), self.options.query_options.service_handler(),
Rc::new(self.options.query_options.custom_functions.clone()), Arc::new(self.options.query_options.custom_functions.clone()),
false,
); );
let mut variables = Vec::new();
let mut bnodes = HashMap::new(); let mut bnodes = HashMap::new();
for tuple in evaluator.plan_evaluator(&plan)(EncodedTuple::with_capacity(variables.len())) { let (eval, _) = evaluator.graph_pattern_evaluator(&pattern, &mut variables);
let tuple = tuple?; let tuples =
eval(EncodedTuple::with_capacity(variables.len())).collect::<Result<Vec<_>, _>>()?; // TODO: would be much better to stream
for tuple in tuples {
for quad in delete { for quad in delete {
if let Some(quad) = if let Some(quad) =
Self::convert_ground_quad_pattern(quad, &variables, &tuple, &dataset)? Self::convert_ground_quad_pattern(quad, &variables, &tuple, &dataset)?
@ -153,29 +161,31 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} }
fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> { fn eval_load(&mut self, from: &NamedNode, to: &GraphName) -> Result<(), EvaluationError> {
let (content_type, body) = self.client.get( let (content_type, body) = self
from.as_str(), .client
"application/n-triples, text/turtle, application/rdf+xml", .get(
)?; from.as_str(),
let format = GraphFormat::from_media_type(&content_type).ok_or_else(|| { "application/n-triples, text/turtle, application/rdf+xml",
EvaluationError::msg(format!( )
"Unsupported Content-Type returned by {}: {}", .map_err(|e| EvaluationError::Service(Box::new(e)))?;
from, content_type let format = RdfFormat::from_media_type(&content_type)
)) .ok_or_else(|| EvaluationError::UnsupportedContentType(content_type))?;
})?;
let to_graph_name = match to { let to_graph_name = match to {
GraphName::NamedNode(graph_name) => graph_name.into(), GraphName::NamedNode(graph_name) => graph_name.into(),
GraphName::DefaultGraph => GraphNameRef::DefaultGraph, GraphName::DefaultGraph => GraphNameRef::DefaultGraph,
}; };
let mut parser = GraphParser::from_format(format); let mut parser = RdfParser::from_format(format)
if let Some(base_iri) = &self.base_iri { .rename_blank_nodes()
parser = parser .without_named_graphs()
.with_base_iri(base_iri.as_str()) .with_default_graph(to_graph_name);
.map_err(|e| ParseError::invalid_base_iri(base_iri, e))?; parser = parser.with_base_iri(from.as_str()).map_err(|e| {
} EvaluationError::Service(Box::new(io::Error::new(
for t in parser.read_triples(BufReader::new(body))? { io::ErrorKind::InvalidInput,
self.transaction format!("Invalid URL: {from}: {e}"),
.insert(t?.as_ref().in_graph(to_graph_name))?; )))
})?;
for q in parser.parse_read(body) {
self.transaction.insert(q?.as_ref())?;
} }
Ok(()) Ok(())
} }
@ -184,10 +194,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.insert_named_graph(graph_name.into())? || silent { if self.transaction.insert_named_graph(graph_name.into())? || silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphAlreadyExists(graph_name.clone()))
"The graph {} already exists",
graph_name
)))
} }
} }
@ -203,10 +210,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
} else if silent { } else if silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
"The graph {} does not exists",
graph
)))
} }
} }
GraphTarget::DefaultGraph => { GraphTarget::DefaultGraph => {
@ -224,10 +228,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
if self.transaction.remove_named_graph(graph_name.into())? || silent { if self.transaction.remove_named_graph(graph_name.into())? || silent {
Ok(()) Ok(())
} else { } else {
Err(EvaluationError::msg(format!( Err(EvaluationError::GraphDoesNotExist(graph_name.clone()))
"The graph {} does not exists",
graph_name
)))
} }
} }
GraphTarget::DefaultGraph => { GraphTarget::DefaultGraph => {
@ -374,7 +375,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
TermPattern::Literal(term) => Some(term.clone().into()), TermPattern::Literal(term) => Some(term.clone().into()),
TermPattern::Triple(triple) => { TermPattern::Triple(triple) => {
Self::convert_triple_pattern(triple, variables, values, dataset, bnodes)? Self::convert_triple_pattern(triple, variables, values, dataset, bnodes)?
.map(|t| t.into()) .map(Into::into)
} }
TermPattern::Variable(v) => Self::lookup_variable(v, variables, values) TermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node)) .map(|node| dataset.decode_term(&node))
@ -507,7 +508,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> {
GroundTermPattern::Literal(term) => Some(term.clone().into()), GroundTermPattern::Literal(term) => Some(term.clone().into()),
GroundTermPattern::Triple(triple) => { GroundTermPattern::Triple(triple) => {
Self::convert_ground_triple_pattern(triple, variables, values, dataset)? Self::convert_ground_triple_pattern(triple, variables, values, dataset)?
.map(|t| t.into()) .map(Into::into)
} }
GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values) GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values)
.map(|node| dataset.decode_term(&node)) .map(|node| dataset.decode_term(&node))

@ -1,6 +1,7 @@
//! TODO: This storage is dramatically naive. //! TODO: This storage is dramatically naive.
use crate::storage::StorageError; use crate::storage::StorageError;
use crate::store::CorruptionError;
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::error::Error; use std::error::Error;
@ -19,6 +20,7 @@ pub struct ColumnFamilyDefinition {
pub struct Db(Arc<RwLock<HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>); pub struct Db(Arc<RwLock<HashMap<ColumnFamily, BTreeMap<Vec<u8>, Vec<u8>>>>>);
impl Db { impl Db {
#[allow(clippy::unnecessary_wraps)]
pub fn new(column_families: Vec<ColumnFamilyDefinition>) -> Result<Self, StorageError> { pub fn new(column_families: Vec<ColumnFamilyDefinition>) -> Result<Self, StorageError> {
let mut trees = HashMap::new(); let mut trees = HashMap::new();
for cf in column_families { for cf in column_families {
@ -28,20 +30,22 @@ impl Db {
Ok(Self(Arc::new(RwLock::new(trees)))) Ok(Self(Arc::new(RwLock::new(trees))))
} }
pub fn column_family(&self, name: &'static str) -> Option<ColumnFamily> { #[allow(clippy::unwrap_in_result)]
let name = ColumnFamily(name); pub fn column_family(&self, name: &'static str) -> Result<ColumnFamily, StorageError> {
if self.0.read().unwrap().contains_key(&name) { let column_family = ColumnFamily(name);
Some(name) if self.0.read().unwrap().contains_key(&column_family) {
Ok(column_family)
} else { } else {
None Err(CorruptionError::from_missing_column_family_name(name).into())
} }
} }
#[must_use] #[must_use]
pub fn snapshot(&self) -> Reader { pub fn snapshot(&self) -> Reader {
Reader(InnerReader::Simple(self.0.clone())) Reader(InnerReader::Simple(Arc::clone(&self.0)))
} }
#[allow(clippy::unwrap_in_result)]
pub fn transaction<'a, 'b: 'a, T, E: Error + 'static + From<StorageError>>( pub fn transaction<'a, 'b: 'a, T, E: Error + 'static + From<StorageError>>(
&'b self, &'b self,
f: impl Fn(Transaction<'a>) -> Result<T, E>, f: impl Fn(Transaction<'a>) -> Result<T, E>,
@ -63,6 +67,7 @@ enum InnerReader {
} }
impl Reader { impl Reader {
#[allow(clippy::unwrap_in_result)]
pub fn get( pub fn get(
&self, &self,
column_family: &ColumnFamily, column_family: &ColumnFamily,
@ -89,6 +94,7 @@ impl Reader {
} }
} }
#[allow(clippy::unwrap_in_result)]
pub fn contains_key( pub fn contains_key(
&self, &self,
column_family: &ColumnFamily, column_family: &ColumnFamily,
@ -115,10 +121,12 @@ impl Reader {
} }
} }
#[allow(clippy::iter_not_returning_iterator)]
pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> { pub fn iter(&self, column_family: &ColumnFamily) -> Result<Iter, StorageError> {
self.scan_prefix(column_family, &[]) self.scan_prefix(column_family, &[])
} }
#[allow(clippy::unwrap_in_result)]
pub fn scan_prefix( pub fn scan_prefix(
&self, &self,
column_family: &ColumnFamily, column_family: &ColumnFamily,
@ -127,9 +135,7 @@ impl Reader {
let data: Vec<_> = match &self.0 { let data: Vec<_> = match &self.0 {
InnerReader::Simple(reader) => { InnerReader::Simple(reader) => {
let trees = reader.read().unwrap(); let trees = reader.read().unwrap();
let tree = if let Some(tree) = trees.get(column_family) { let Some(tree) = trees.get(column_family) else {
tree
} else {
return Ok(Iter { return Ok(Iter {
iter: Vec::new().into_iter(), iter: Vec::new().into_iter(),
current: None, current: None,
@ -145,28 +151,25 @@ impl Reader {
} }
} }
InnerReader::Transaction(reader) => { InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() { let Some(reader) = reader.upgrade() else {
let trees = (*reader).borrow();
let tree = if let Some(tree) = trees.get(column_family) {
tree
} else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
}
} else {
return Err(StorageError::Other( return Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
)); ));
};
let trees = (*reader).borrow();
let Some(tree) = trees.get(column_family) else {
return Ok(Iter {
iter: Vec::new().into_iter(),
current: None,
});
};
if prefix.is_empty() {
tree.iter().map(|(k, v)| (k.clone(), v.clone())).collect()
} else {
tree.range(prefix.to_vec()..)
.take_while(|(k, _)| k.starts_with(prefix))
.map(|(k, v)| (k.clone(), v.clone()))
.collect()
} }
} }
}; };
@ -175,19 +178,20 @@ impl Reader {
Ok(Iter { iter, current }) Ok(Iter { iter, current })
} }
#[allow(clippy::unwrap_in_result)]
pub fn len(&self, column_family: &ColumnFamily) -> Result<usize, StorageError> { pub fn len(&self, column_family: &ColumnFamily) -> Result<usize, StorageError> {
match &self.0 { match &self.0 {
InnerReader::Simple(reader) => Ok(reader InnerReader::Simple(reader) => Ok(reader
.read() .read()
.unwrap() .unwrap()
.get(column_family) .get(column_family)
.map_or(0, |tree| tree.len())), .map_or(0, BTreeMap::len)),
InnerReader::Transaction(reader) => { InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() { if let Some(reader) = reader.upgrade() {
Ok((*reader) Ok((*reader)
.borrow() .borrow()
.get(column_family) .get(column_family)
.map_or(0, |tree| tree.len())) .map_or(0, BTreeMap::len))
} else { } else {
Err(StorageError::Other( Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
@ -197,19 +201,20 @@ impl Reader {
} }
} }
#[allow(clippy::unwrap_in_result)]
pub fn is_empty(&self, column_family: &ColumnFamily) -> Result<bool, StorageError> { pub fn is_empty(&self, column_family: &ColumnFamily) -> Result<bool, StorageError> {
match &self.0 { match &self.0 {
InnerReader::Simple(reader) => Ok(reader InnerReader::Simple(reader) => Ok(reader
.read() .read()
.unwrap() .unwrap()
.get(column_family) .get(column_family)
.map_or(true, |tree| tree.is_empty())), .map_or(true, BTreeMap::is_empty)),
InnerReader::Transaction(reader) => { InnerReader::Transaction(reader) => {
if let Some(reader) = reader.upgrade() { if let Some(reader) = reader.upgrade() {
Ok((*reader) Ok((*reader)
.borrow() .borrow()
.get(column_family) .get(column_family)
.map_or(true, |tree| tree.is_empty())) .map_or(true, BTreeMap::is_empty))
} else { } else {
Err(StorageError::Other( Err(StorageError::Other(
"The transaction is already ended".into(), "The transaction is already ended".into(),
@ -225,14 +230,15 @@ pub struct Transaction<'a>(
); );
impl Transaction<'_> { impl Transaction<'_> {
#[allow(unsafe_code)] #[allow(unsafe_code, clippy::useless_transmute)]
pub fn reader(&self) -> Reader { pub fn reader(&self) -> Reader {
// This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime. // SAFETY: This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime.
Reader(InnerReader::Transaction(Rc::downgrade(unsafe { Reader(InnerReader::Transaction(Rc::downgrade(unsafe {
transmute(&self.0) transmute(&self.0)
}))) })))
} }
#[allow(clippy::unnecessary_wraps)]
pub fn contains_key_for_update( pub fn contains_key_for_update(
&self, &self,
column_family: &ColumnFamily, column_family: &ColumnFamily,
@ -244,6 +250,7 @@ impl Transaction<'_> {
.map_or(false, |cf| cf.contains_key(key))) .map_or(false, |cf| cf.contains_key(key)))
} }
#[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)]
pub fn insert( pub fn insert(
&mut self, &mut self,
column_family: &ColumnFamily, column_family: &ColumnFamily,
@ -266,6 +273,7 @@ impl Transaction<'_> {
self.insert(column_family, key, &[]) self.insert(column_family, key, &[])
} }
#[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)]
pub fn remove(&mut self, column_family: &ColumnFamily, key: &[u8]) -> Result<(), StorageError> { pub fn remove(&mut self, column_family: &ColumnFamily, key: &[u8]) -> Result<(), StorageError> {
self.0 self.0
.borrow_mut() .borrow_mut()
@ -286,6 +294,7 @@ impl Iter {
Some(&self.current.as_ref()?.0) Some(&self.current.as_ref()?.0)
} }
#[allow(dead_code)]
pub fn value(&self) -> Option<&[u8]> { pub fn value(&self) -> Option<&[u8]> {
Some(&self.current.as_ref()?.1) Some(&self.current.as_ref()?.1)
} }
@ -294,6 +303,7 @@ impl Iter {
self.current = self.iter.next(); self.current = self.iter.next();
} }
#[allow(clippy::unnecessary_wraps, clippy::unused_self)]
pub fn status(&self) -> Result<(), StorageError> { pub fn status(&self) -> Result<(), StorageError> {
Ok(()) Ok(())
} }

@ -0,0 +1,12 @@
//! A storage backend
//! RocksDB is available, if not in memory
#[cfg(any(target_family = "wasm"))]
pub use fallback::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(all(not(target_family = "wasm")))]
pub use oxi_rocksdb::{ColumnFamily, ColumnFamilyDefinition, Db, Iter, Reader, Transaction};
#[cfg(any(target_family = "wasm"))]
mod fallback;
#[cfg(all(not(target_family = "wasm")))]
mod oxi_rocksdb;

File diff suppressed because it is too large Load Diff

@ -1,13 +1,11 @@
use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash}; use crate::storage::numeric_encoder::{EncodedQuad, EncodedTerm, EncodedTriple, StrHash};
use crate::storage::small_string::SmallString; use crate::storage::small_string::SmallString;
use crate::storage::StorageError; use oxsdatatypes::*;
use crate::store::CorruptionError; use std::io::Read;
use crate::xsd::*;
use std::io::{Cursor, Read};
use std::mem::size_of; use std::mem::size_of;
use std::rc::Rc;
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub const LATEST_STORAGE_VERSION: u64 = 1; pub const LATEST_STORAGE_VERSION: u64 = 1;
pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>(); pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
@ -64,24 +62,23 @@ pub enum QuadEncoding {
} }
impl QuadEncoding { impl QuadEncoding {
pub fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, StorageError> { pub fn decode(self, mut buffer: &[u8]) -> Result<EncodedQuad, StorageError> {
let mut cursor = Cursor::new(&buffer);
match self { match self {
Self::Spog => cursor.read_spog_quad(), Self::Spog => buffer.read_spog_quad(),
Self::Posg => cursor.read_posg_quad(), Self::Posg => buffer.read_posg_quad(),
Self::Ospg => cursor.read_ospg_quad(), Self::Ospg => buffer.read_ospg_quad(),
Self::Gspo => cursor.read_gspo_quad(), Self::Gspo => buffer.read_gspo_quad(),
Self::Gpos => cursor.read_gpos_quad(), Self::Gpos => buffer.read_gpos_quad(),
Self::Gosp => cursor.read_gosp_quad(), Self::Gosp => buffer.read_gosp_quad(),
Self::Dspo => cursor.read_dspo_quad(), Self::Dspo => buffer.read_dspo_quad(),
Self::Dpos => cursor.read_dpos_quad(), Self::Dpos => buffer.read_dpos_quad(),
Self::Dosp => cursor.read_dosp_quad(), Self::Dosp => buffer.read_dosp_quad(),
} }
} }
} }
pub fn decode_term(buffer: &[u8]) -> Result<EncodedTerm, StorageError> { pub fn decode_term(mut buffer: &[u8]) -> Result<EncodedTerm, StorageError> {
Cursor::new(&buffer).read_term() buffer.read_term()
} }
pub trait TermReader { pub trait TermReader {
@ -314,100 +311,89 @@ impl<R: Read> TermReader for R {
value_id: StrHash::from_be_bytes(buffer), value_id: StrHash::from_be_bytes(buffer),
}) })
} }
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)), TYPE_BOOLEAN_LITERAL_TRUE => Ok(true.into()),
TYPE_BOOLEAN_LITERAL_FALSE => Ok(EncodedTerm::BooleanLiteral(false)), TYPE_BOOLEAN_LITERAL_FALSE => Ok(false.into()),
TYPE_FLOAT_LITERAL => { TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4]; let mut buffer = [0; 4];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::FloatLiteral(Float::from_be_bytes(buffer))) Ok(Float::from_be_bytes(buffer).into())
} }
TYPE_DOUBLE_LITERAL => { TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8]; let mut buffer = [0; 8];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DoubleLiteral(Double::from_be_bytes(buffer))) Ok(Double::from_be_bytes(buffer).into())
} }
TYPE_INTEGER_LITERAL => { TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8]; let mut buffer = [0; 8];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer))) Ok(Integer::from_be_bytes(buffer).into())
} }
TYPE_DECIMAL_LITERAL => { TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer))) Ok(Decimal::from_be_bytes(buffer).into())
} }
TYPE_DATE_TIME_LITERAL => { TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes( Ok(DateTime::from_be_bytes(buffer).into())
buffer,
)))
} }
TYPE_TIME_LITERAL => { TYPE_TIME_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer))) Ok(Time::from_be_bytes(buffer).into())
} }
TYPE_DATE_LITERAL => { TYPE_DATE_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer))) Ok(Date::from_be_bytes(buffer).into())
} }
TYPE_G_YEAR_MONTH_LITERAL => { TYPE_G_YEAR_MONTH_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::GYearMonthLiteral(GYearMonth::from_be_bytes( Ok(GYearMonth::from_be_bytes(buffer).into())
buffer,
)))
} }
TYPE_G_YEAR_LITERAL => { TYPE_G_YEAR_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::GYearLiteral(GYear::from_be_bytes(buffer))) Ok(GYear::from_be_bytes(buffer).into())
} }
TYPE_G_MONTH_DAY_LITERAL => { TYPE_G_MONTH_DAY_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::GMonthDayLiteral(GMonthDay::from_be_bytes( Ok(GMonthDay::from_be_bytes(buffer).into())
buffer,
)))
} }
TYPE_G_DAY_LITERAL => { TYPE_G_DAY_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::GDayLiteral(GDay::from_be_bytes(buffer))) Ok(GDay::from_be_bytes(buffer).into())
} }
TYPE_G_MONTH_LITERAL => { TYPE_G_MONTH_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::GMonthLiteral(GMonth::from_be_bytes(buffer))) Ok(GMonth::from_be_bytes(buffer).into())
} }
TYPE_DURATION_LITERAL => { TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24]; let mut buffer = [0; 24];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes( Ok(Duration::from_be_bytes(buffer).into())
buffer,
)))
} }
TYPE_YEAR_MONTH_DURATION_LITERAL => { TYPE_YEAR_MONTH_DURATION_LITERAL => {
let mut buffer = [0; 8]; let mut buffer = [0; 8];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::YearMonthDurationLiteral( Ok(YearMonthDuration::from_be_bytes(buffer).into())
YearMonthDuration::from_be_bytes(buffer),
))
} }
TYPE_DAY_TIME_DURATION_LITERAL => { TYPE_DAY_TIME_DURATION_LITERAL => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DayTimeDurationLiteral( Ok(DayTimeDuration::from_be_bytes(buffer).into())
DayTimeDuration::from_be_bytes(buffer),
))
} }
TYPE_TRIPLE => Ok(EncodedTerm::Triple(Rc::new(EncodedTriple { TYPE_TRIPLE => Ok(EncodedTriple {
subject: self.read_term()?, subject: self.read_term()?,
predicate: self.read_term()?, predicate: self.read_term()?,
object: self.read_term()?, object: self.read_term()?,
}))), }
.into()),
_ => Err(CorruptionError::msg("the term buffer has an invalid type id").into()), _ => Err(CorruptionError::msg("the term buffer has an invalid type id").into()),
} }
} }
@ -571,8 +557,11 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
sink.extend_from_slice(&datatype_id.to_be_bytes()); sink.extend_from_slice(&datatype_id.to_be_bytes());
sink.extend_from_slice(&value_id.to_be_bytes()); sink.extend_from_slice(&value_id.to_be_bytes());
} }
EncodedTerm::BooleanLiteral(true) => sink.push(TYPE_BOOLEAN_LITERAL_TRUE), EncodedTerm::BooleanLiteral(value) => sink.push(if bool::from(*value) {
EncodedTerm::BooleanLiteral(false) => sink.push(TYPE_BOOLEAN_LITERAL_FALSE), TYPE_BOOLEAN_LITERAL_TRUE
} else {
TYPE_BOOLEAN_LITERAL_FALSE
}),
EncodedTerm::FloatLiteral(value) => { EncodedTerm::FloatLiteral(value) => {
sink.push(TYPE_FLOAT_LITERAL); sink.push(TYPE_FLOAT_LITERAL);
sink.extend_from_slice(&value.to_be_bytes()) sink.extend_from_slice(&value.to_be_bytes())
@ -643,6 +632,7 @@ pub fn write_term(sink: &mut Vec<u8>, term: &EncodedTerm) {
} }
#[cfg(test)] #[cfg(test)]
#[allow(clippy::panic_in_result_fn)]
mod tests { mod tests {
use super::*; use super::*;
use crate::model::TermRef; use crate::model::TermRef;
@ -659,10 +649,6 @@ mod tests {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self.id2str.borrow().get(key).cloned()) Ok(self.id2str.borrow().get(key).cloned())
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
Ok(self.id2str.borrow().contains_key(key))
}
} }
impl MemoryStrStore { impl MemoryStrStore {
@ -750,7 +736,7 @@ mod tests {
let mut buffer = Vec::new(); let mut buffer = Vec::new();
write_term(&mut buffer, &encoded); write_term(&mut buffer, &encoded);
assert_eq!(encoded, Cursor::new(&buffer).read_term().unwrap()); assert_eq!(encoded, buffer.as_slice().read_term().unwrap());
} }
} }
} }

@ -0,0 +1,139 @@
use crate::io::{RdfFormat, RdfParseError};
use crate::storage::numeric_encoder::EncodedTerm;
use oxiri::IriParseError;
use oxrdf::TermRef;
use std::error::Error;
use std::io;
/// An error related to storage operations (reads, writes...).
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum StorageError {
/// Error from the OS I/O layer.
#[error(transparent)]
Io(#[from] io::Error),
/// Error related to data corruption.
#[error(transparent)]
Corruption(#[from] CorruptionError),
#[doc(hidden)]
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl From<StorageError> for io::Error {
#[inline]
fn from(error: StorageError) -> Self {
match error {
StorageError::Io(error) => error,
StorageError::Corruption(error) => error.into(),
StorageError::Other(error) => Self::new(io::ErrorKind::Other, error),
}
}
}
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
#[error(transparent)]
pub struct CorruptionError(#[from] CorruptionErrorKind);
/// An error return if some content in the database is corrupted.
#[derive(Debug, thiserror::Error)]
enum CorruptionErrorKind {
#[error("{0}")]
Msg(String),
#[error("{0}")]
Other(#[source] Box<dyn Error + Send + Sync + 'static>),
}
impl CorruptionError {
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn new(error: impl Into<Box<dyn Error + Send + Sync + 'static>>) -> Self {
Self(CorruptionErrorKind::Other(error.into()))
}
#[inline]
pub(crate) fn from_encoded_term(encoded: &EncodedTerm, term: &TermRef<'_>) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Invalid term encoding {encoded:?} for {term}"))
}
#[inline]
pub(crate) fn from_missing_column_family_name(name: &'static str) -> Self {
// TODO: eventually use a dedicated error enum value
Self::msg(format!("Column family {name} does not exist"))
}
/// Builds an error from a printable error message.
#[inline]
pub(crate) fn msg(msg: impl Into<String>) -> Self {
Self(CorruptionErrorKind::Msg(msg.into()))
}
}
impl From<CorruptionError> for io::Error {
#[inline]
fn from(error: CorruptionError) -> Self {
Self::new(io::ErrorKind::InvalidData, error)
}
}
/// An error raised while loading a file into a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum LoaderError {
/// An error raised while reading the file.
#[error(transparent)]
Parsing(#[from] RdfParseError),
/// An error raised during the insertion in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// The base IRI is invalid.
#[error("Invalid base IRI '{iri}': {error}")]
InvalidBaseIri {
/// The IRI itself.
iri: String,
/// The parsing error.
#[source]
error: IriParseError,
},
}
impl From<LoaderError> for io::Error {
#[inline]
fn from(error: LoaderError) -> Self {
match error {
LoaderError::Storage(error) => error.into(),
LoaderError::Parsing(error) => error.into(),
LoaderError::InvalidBaseIri { .. } => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}
/// An error raised while writing a file from a [`Store`](crate::store::Store).
#[derive(Debug, thiserror::Error)]
pub enum SerializerError {
/// An error raised while writing the content.
#[error(transparent)]
Io(#[from] io::Error),
/// An error raised during the lookup in the store.
#[error(transparent)]
Storage(#[from] StorageError),
/// A format compatible with [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset) is required.
#[error("A RDF format supporting datasets was expected, {0} found")]
DatasetFormatExpected(RdfFormat),
}
impl From<SerializerError> for io::Error {
#[inline]
fn from(error: SerializerError) -> Self {
match error {
SerializerError::Storage(error) => error.into(),
SerializerError::Io(error) => error,
SerializerError::DatasetFormatExpected(_) => {
Self::new(io::ErrorKind::InvalidInput, error.to_string())
}
}
}
}

@ -1,6 +1,9 @@
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, Quad, QuadRef, TermRef}; #![allow(clippy::same_name_method)]
#[cfg(all(not(target_family = "wasm")))]
use crate::model::Quad;
use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef};
use crate::storage::backend::{Reader, Transaction}; use crate::storage::backend::{Reader, Transaction};
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
use crate::storage::binary_encoder::LATEST_STORAGE_VERSION; use crate::storage::binary_encoder::LATEST_STORAGE_VERSION;
use crate::storage::binary_encoder::{ use crate::storage::binary_encoder::{
decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple, decode_term, encode_term, encode_term_pair, encode_term_quad, encode_term_triple,
@ -9,25 +12,23 @@ use crate::storage::binary_encoder::{
WRITTEN_TERM_MAX_SIZE, WRITTEN_TERM_MAX_SIZE,
}; };
pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError}; pub use crate::storage::error::{CorruptionError, LoaderError, SerializerError, StorageError};
use crate::storage::numeric_encoder::{ #[cfg(all(not(target_family = "wasm")))]
insert_term, Decoder, EncodedQuad, EncodedTerm, StrHash, StrLookup, use crate::storage::numeric_encoder::Decoder;
}; use crate::storage::numeric_encoder::{insert_term, EncodedQuad, EncodedTerm, StrHash, StrLookup};
use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter}; use backend::{ColumnFamily, ColumnFamilyDefinition, Db, Iter};
use std::cmp::{max, min}; #[cfg(all(not(target_family = "wasm")))]
use std::collections::VecDeque; use std::collections::VecDeque;
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::error::Error; use std::error::Error;
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
use std::mem::take; use std::mem::{swap, take};
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering}; #[cfg(all(not(target_family = "wasm")))]
use std::sync::Arc; use std::sync::Mutex;
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
use std::thread::spawn; use std::{io, thread};
use std::thread::JoinHandle;
use sysinfo::{System, SystemExt};
mod backend; mod backend;
mod binary_encoder; mod binary_encoder;
@ -46,15 +47,16 @@ const DSPO_CF: &str = "dspo";
const DPOS_CF: &str = "dpos"; const DPOS_CF: &str = "dpos";
const DOSP_CF: &str = "dosp"; const DOSP_CF: &str = "dosp";
const GRAPHS_CF: &str = "graphs"; const GRAPHS_CF: &str = "graphs";
#[cfg(all(not(target_family = "wasm")))]
const DEFAULT_CF: &str = "default"; const DEFAULT_CF: &str = "default";
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
const DEFAULT_BULK_LOAD_BATCH_SIZE: usize = 1_000_000; const DEFAULT_BULK_LOAD_BATCH_SIZE: usize = 1_000_000;
const MAX_BULK_LOAD_BATCH_SIZE: usize = 100_000_000;
/// Low level storage primitives /// Low level storage primitives
#[derive(Clone)] #[derive(Clone)]
pub struct Storage { pub struct Storage {
db: Db, db: Db,
#[cfg(all(not(target_family = "wasm")))]
default_cf: ColumnFamily, default_cf: ColumnFamily,
id2str_cf: ColumnFamily, id2str_cf: ColumnFamily,
spog_cf: ColumnFamily, spog_cf: ColumnFamily,
@ -74,9 +76,39 @@ impl Storage {
Self::setup(Db::new(Self::column_families())?) Self::setup(Db::new(Self::column_families())?)
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn open(path: &Path) -> Result<Self, StorageError> { pub fn open(path: &Path, key: Option<[u8; 32]>) -> Result<Self, StorageError> {
Self::setup(Db::open(path, Self::column_families())?) Self::setup(Db::open_read_write(
Some(path),
Self::column_families(),
key,
)?)
}
// #[cfg(all(not(target_family = "wasm")))]
// pub fn open_secondary(primary_path: &Path) -> Result<Self, StorageError> {
// Self::setup(Db::open_secondary(
// primary_path,
// None,
// Self::column_families(),
// )?)
// }
// #[cfg(all(not(target_family = "wasm")))]
// pub fn open_persistent_secondary(
// primary_path: &Path,
// secondary_path: &Path,
// ) -> Result<Self, StorageError> {
// Self::setup(Db::open_secondary(
// primary_path,
// Some(secondary_path),
// Self::column_families(),
// )?)
// }
#[cfg(all(not(target_family = "wasm")))]
pub fn open_read_only(path: &Path, key: Option<[u8; 32]>) -> Result<Self, StorageError> {
Self::setup(Db::open_read_only(path, Self::column_families(), key)?)
} }
fn column_families() -> Vec<ColumnFamilyDefinition> { fn column_families() -> Vec<ColumnFamilyDefinition> {
@ -152,26 +184,27 @@ impl Storage {
fn setup(db: Db) -> Result<Self, StorageError> { fn setup(db: Db) -> Result<Self, StorageError> {
let this = Self { let this = Self {
default_cf: db.column_family(DEFAULT_CF).unwrap(), #[cfg(all(not(target_family = "wasm")))]
id2str_cf: db.column_family(ID2STR_CF).unwrap(), default_cf: db.column_family(DEFAULT_CF)?,
spog_cf: db.column_family(SPOG_CF).unwrap(), id2str_cf: db.column_family(ID2STR_CF)?,
posg_cf: db.column_family(POSG_CF).unwrap(), spog_cf: db.column_family(SPOG_CF)?,
ospg_cf: db.column_family(OSPG_CF).unwrap(), posg_cf: db.column_family(POSG_CF)?,
gspo_cf: db.column_family(GSPO_CF).unwrap(), ospg_cf: db.column_family(OSPG_CF)?,
gpos_cf: db.column_family(GPOS_CF).unwrap(), gspo_cf: db.column_family(GSPO_CF)?,
gosp_cf: db.column_family(GOSP_CF).unwrap(), gpos_cf: db.column_family(GPOS_CF)?,
dspo_cf: db.column_family(DSPO_CF).unwrap(), gosp_cf: db.column_family(GOSP_CF)?,
dpos_cf: db.column_family(DPOS_CF).unwrap(), dspo_cf: db.column_family(DSPO_CF)?,
dosp_cf: db.column_family(DOSP_CF).unwrap(), dpos_cf: db.column_family(DPOS_CF)?,
graphs_cf: db.column_family(GRAPHS_CF).unwrap(), dosp_cf: db.column_family(DOSP_CF)?,
graphs_cf: db.column_family(GRAPHS_CF)?,
db, db,
}; };
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
this.migrate()?; this.migrate()?;
Ok(this) Ok(this)
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
fn migrate(&self) -> Result<(), StorageError> { fn migrate(&self) -> Result<(), StorageError> {
let mut version = self.ensure_version()?; let mut version = self.ensure_version()?;
if version == 0 { if version == 0 {
@ -200,24 +233,24 @@ impl Storage {
match version { match version {
_ if version < LATEST_STORAGE_VERSION => Err(CorruptionError::msg(format!( _ if version < LATEST_STORAGE_VERSION => Err(CorruptionError::msg(format!(
"The RocksDB database is using the outdated encoding version {}. Automated migration is not supported, please dump the store dataset using a compatible Oxigraph version and load it again using the current version", "The RocksDB database is using the outdated encoding version {version}. Automated migration is not supported, please dump the store dataset using a compatible Oxigraph version and load it again using the current version"
version
)).into()), )).into()),
LATEST_STORAGE_VERSION => Ok(()), LATEST_STORAGE_VERSION => Ok(()),
_ => Err(CorruptionError::msg(format!( _ => Err(CorruptionError::msg(format!(
"The RocksDB database is using the too recent version {}. Upgrade to the latest Oxigraph version to load this database", "The RocksDB database is using the too recent version {version}. Upgrade to the latest Oxigraph version to load this database"
version
)).into()) )).into())
} }
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
fn ensure_version(&self) -> Result<u64, StorageError> { fn ensure_version(&self) -> Result<u64, StorageError> {
Ok( Ok(
if let Some(version) = self.db.get(&self.default_cf, b"oxversion")? { if let Some(version) = self.db.get(&self.default_cf, b"oxversion")? {
let mut buffer = [0; 8]; u64::from_be_bytes(version.as_ref().try_into().map_err(|e| {
buffer.copy_from_slice(&version); CorruptionError::new(format!("Error while parsing the version key: {e}"))
u64::from_be_bytes(buffer) })?)
} else { } else {
self.update_version(LATEST_STORAGE_VERSION)?; self.update_version(LATEST_STORAGE_VERSION)?;
LATEST_STORAGE_VERSION LATEST_STORAGE_VERSION
@ -225,11 +258,11 @@ impl Storage {
) )
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
fn update_version(&self, version: u64) -> Result<(), StorageError> { fn update_version(&self, version: u64) -> Result<(), StorageError> {
self.db self.db
.insert(&self.default_cf, b"oxversion", &version.to_be_bytes())?; .insert(&self.default_cf, b"oxversion", &version.to_be_bytes())?;
self.db.flush(&self.default_cf) self.db.flush()
} }
pub fn snapshot(&self) -> StorageReader { pub fn snapshot(&self) -> StorageReader {
@ -252,25 +285,15 @@ impl Storage {
}) })
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn flush(&self) -> Result<(), StorageError> { pub fn flush(&self) -> Result<(), StorageError> {
self.db.flush(&self.default_cf)?; self.db.flush()
self.db.flush(&self.gpos_cf)?; }
self.db.flush(&self.gpos_cf)?;
self.db.flush(&self.gosp_cf)?; #[cfg(all(not(target_family = "wasm")))]
self.db.flush(&self.spog_cf)?;
self.db.flush(&self.posg_cf)?;
self.db.flush(&self.ospg_cf)?;
self.db.flush(&self.dspo_cf)?;
self.db.flush(&self.dpos_cf)?;
self.db.flush(&self.dosp_cf)?;
self.db.flush(&self.id2str_cf)
}
#[cfg(not(target_arch = "wasm32"))]
pub fn compact(&self) -> Result<(), StorageError> { pub fn compact(&self) -> Result<(), StorageError> {
self.db.compact(&self.default_cf)?; self.db.compact(&self.default_cf)?;
self.db.compact(&self.gpos_cf)?; self.db.compact(&self.gspo_cf)?;
self.db.compact(&self.gpos_cf)?; self.db.compact(&self.gpos_cf)?;
self.db.compact(&self.gosp_cf)?; self.db.compact(&self.gosp_cf)?;
self.db.compact(&self.spog_cf)?; self.db.compact(&self.spog_cf)?;
@ -282,7 +305,7 @@ impl Storage {
self.db.compact(&self.id2str_cf) self.db.compact(&self.id2str_cf)
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn backup(&self, target_directory: &Path) -> Result<(), StorageError> { pub fn backup(&self, target_directory: &Path) -> Result<(), StorageError> {
self.db.backup(target_directory) self.db.backup(target_directory)
} }
@ -550,7 +573,7 @@ impl StorageReader {
pub fn named_graphs(&self) -> DecodingGraphIterator { pub fn named_graphs(&self) -> DecodingGraphIterator {
DecodingGraphIterator { DecodingGraphIterator {
iter: self.reader.iter(&self.storage.graphs_cf).unwrap(), //TODO: propagate error? iter: self.reader.iter(&self.storage.graphs_cf).unwrap(), // TODO: propagate error?
} }
} }
@ -607,7 +630,7 @@ impl StorageReader {
} }
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self Ok(self
.storage .storage
@ -618,31 +641,31 @@ impl StorageReader {
.map_err(CorruptionError::new)?) .map_err(CorruptionError::new)?)
} }
#[cfg(target_arch = "wasm32")] #[cfg(any(target_family = "wasm"))]
pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { pub fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
Ok(self Ok(self
.reader .reader
.get(&self.storage.id2str_cf, &key.to_be_bytes())? .get(&self.storage.id2str_cf, &key.to_be_bytes())?
.map(|v| String::from_utf8(v.into())) .map(String::from_utf8)
.transpose() .transpose()
.map_err(CorruptionError::new)?) .map_err(CorruptionError::new)?)
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.storage self.storage
.db .db
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) .contains_key(&self.storage.id2str_cf, &key.to_be_bytes())
} }
#[cfg(target_arch = "wasm32")] #[cfg(any(target_family = "wasm"))]
pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> { pub fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.reader self.reader
.contains_key(&self.storage.id2str_cf, &key.to_be_bytes()) .contains_key(&self.storage.id2str_cf, &key.to_be_bytes())
} }
/// Validates that all the storage invariants held in the data /// Validates that all the storage invariants held in the data
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
pub fn validate(&self) -> Result<(), StorageError> { pub fn validate(&self) -> Result<(), StorageError> {
// triples // triples
let dspo_size = self.dspo_quads(&[]).count(); let dspo_size = self.dspo_quads(&[]).count();
@ -752,6 +775,13 @@ impl StorageReader {
} }
Ok(()) Ok(())
} }
/// Validates that all the storage invariants held in the data
#[cfg(any(target_family = "wasm"))]
#[allow(clippy::unused_self, clippy::unnecessary_wraps)]
pub fn validate(&self) -> Result<(), StorageError> {
Ok(()) // TODO
}
} }
pub struct ChainedDecodingQuadIterator { pub struct ChainedDecodingQuadIterator {
@ -778,7 +808,7 @@ impl ChainedDecodingQuadIterator {
impl Iterator for ChainedDecodingQuadIterator { impl Iterator for ChainedDecodingQuadIterator {
type Item = Result<EncodedQuad, StorageError>; type Item = Result<EncodedQuad, StorageError>;
fn next(&mut self) -> Option<Result<EncodedQuad, StorageError>> { fn next(&mut self) -> Option<Self::Item> {
if let Some(result) = self.first.next() { if let Some(result) = self.first.next() {
Some(result) Some(result)
} else if let Some(second) = self.second.as_mut() { } else if let Some(second) = self.second.as_mut() {
@ -797,7 +827,7 @@ pub struct DecodingQuadIterator {
impl Iterator for DecodingQuadIterator { impl Iterator for DecodingQuadIterator {
type Item = Result<EncodedQuad, StorageError>; type Item = Result<EncodedQuad, StorageError>;
fn next(&mut self) -> Option<Result<EncodedQuad, StorageError>> { fn next(&mut self) -> Option<Self::Item> {
if let Err(e) = self.iter.status() { if let Err(e) = self.iter.status() {
return Some(Err(e)); return Some(Err(e));
} }
@ -814,7 +844,7 @@ pub struct DecodingGraphIterator {
impl Iterator for DecodingGraphIterator { impl Iterator for DecodingGraphIterator {
type Item = Result<EncodedTerm, StorageError>; type Item = Result<EncodedTerm, StorageError>;
fn next(&mut self) -> Option<Result<EncodedTerm, StorageError>> { fn next(&mut self) -> Option<Self::Item> {
if let Err(e) = self.iter.status() { if let Err(e) = self.iter.status() {
return Some(Err(e)); return Some(Err(e));
} }
@ -828,10 +858,6 @@ impl StrLookup for StorageReader {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> { fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
self.get_str(key) self.get_str(key)
} }
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError> {
self.contains_str(key)
}
} }
pub struct StorageWriter<'a> { pub struct StorageWriter<'a> {
@ -975,7 +1001,7 @@ impl<'a> StorageWriter<'a> {
} }
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> {
if self if self
.storage .storage
@ -991,7 +1017,7 @@ impl<'a> StorageWriter<'a> {
) )
} }
#[cfg(target_arch = "wasm32")] #[cfg(any(target_family = "wasm"))]
fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> { fn insert_str(&mut self, key: &StrHash, value: &str) -> Result<(), StorageError> {
self.transaction.insert( self.transaction.insert(
&self.storage.id2str_cf, &self.storage.id2str_cf,
@ -1156,7 +1182,8 @@ impl<'a> StorageWriter<'a> {
} }
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
#[must_use]
pub struct StorageBulkLoader { pub struct StorageBulkLoader {
storage: Storage, storage: Storage,
hooks: Vec<Box<dyn Fn(u64)>>, hooks: Vec<Box<dyn Fn(u64)>>,
@ -1164,7 +1191,7 @@ pub struct StorageBulkLoader {
max_memory_size: Option<usize>, max_memory_size: Option<usize>,
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
impl StorageBulkLoader { impl StorageBulkLoader {
pub fn new(storage: Storage) -> Self { pub fn new(storage: Storage) -> Self {
Self { Self {
@ -1175,12 +1202,12 @@ impl StorageBulkLoader {
} }
} }
pub fn set_num_threads(mut self, num_threads: usize) -> Self { pub fn with_num_threads(mut self, num_threads: usize) -> Self {
self.num_threads = Some(num_threads); self.num_threads = Some(num_threads);
self self
} }
pub fn set_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self { pub fn with_max_memory_size_in_megabytes(mut self, max_memory_size: usize) -> Self {
self.max_memory_size = Some(max_memory_size); self.max_memory_size = Some(max_memory_size);
self self
} }
@ -1191,139 +1218,143 @@ impl StorageBulkLoader {
} }
#[allow(clippy::trait_duplication_in_bounds)] #[allow(clippy::trait_duplication_in_bounds)]
pub fn load<EI, EO: From<StorageError> + From<EI>, I: IntoIterator<Item = Result<Quad, EI>>>( pub fn load<EI, EO: From<StorageError> + From<EI>>(
&self, &self,
quads: I, quads: impl IntoIterator<Item = Result<Quad, EI>>,
) -> Result<(), EO> { ) -> Result<(), EO> {
let system = System::new_all(); let num_threads = self.num_threads.unwrap_or(2);
let cpu_count = min(4, system.physical_core_count().unwrap_or(2)); if num_threads < 2 {
let num_threads = max( return Err(
if let Some(num_threads) = self.num_threads { StorageError::Other("The bulk loader needs at least 2 threads".into()).into(),
num_threads );
} else if let Some(max_memory_size) = self.max_memory_size {
min(
cpu_count,
max_memory_size * 1000 / DEFAULT_BULK_LOAD_BATCH_SIZE,
)
} else {
cpu_count
},
2,
);
let batch_size = min(
if let Some(max_memory_size) = self.max_memory_size {
max(1000, max_memory_size * 1000 / num_threads)
} else {
max(
usize::try_from(system.free_memory()).unwrap() / 1000 / num_threads,
DEFAULT_BULK_LOAD_BATCH_SIZE,
)
},
MAX_BULK_LOAD_BATCH_SIZE,
);
let mut threads = VecDeque::with_capacity(num_threads - 1);
let mut buffer = Vec::with_capacity(batch_size);
let done_counter = Arc::new(AtomicU64::new(0));
let mut done_and_displayed_counter = 0;
for quad in quads {
let quad = quad?;
buffer.push(quad);
if buffer.len() >= batch_size {
self.spawn_load_thread(
&mut buffer,
&mut threads,
&done_counter,
&mut done_and_displayed_counter,
num_threads,
)?;
}
} }
self.spawn_load_thread( let batch_size = if let Some(max_memory_size) = self.max_memory_size {
&mut buffer, max_memory_size * 1000 / num_threads
&mut threads, } else {
&done_counter, DEFAULT_BULK_LOAD_BATCH_SIZE
&mut done_and_displayed_counter, };
num_threads, if batch_size < 10_000 {
)?; return Err(StorageError::Other(
for thread in threads { "The bulk loader memory bound is too low. It needs at least 100MB".into(),
thread.join().unwrap()?; )
self.on_possible_progress(&done_counter, &mut done_and_displayed_counter); .into());
} }
Ok(()) let done_counter = Mutex::new(0);
let mut done_and_displayed_counter = 0;
thread::scope(|thread_scope| {
let mut threads = VecDeque::with_capacity(num_threads - 1);
let mut buffer = Vec::with_capacity(batch_size);
for quad in quads {
let quad = quad?;
buffer.push(quad);
if buffer.len() >= batch_size {
self.spawn_load_thread(
&mut buffer,
&mut threads,
thread_scope,
&done_counter,
&mut done_and_displayed_counter,
num_threads,
batch_size,
)?;
}
}
self.spawn_load_thread(
&mut buffer,
&mut threads,
thread_scope,
&done_counter,
&mut done_and_displayed_counter,
num_threads,
batch_size,
)?;
for thread in threads {
map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(&done_counter, &mut done_and_displayed_counter)?;
}
Ok(())
})
} }
fn spawn_load_thread( fn spawn_load_thread<'scope>(
&self, &'scope self,
buffer: &mut Vec<Quad>, buffer: &mut Vec<Quad>,
threads: &mut VecDeque<JoinHandle<Result<(), StorageError>>>, threads: &mut VecDeque<thread::ScopedJoinHandle<'scope, Result<(), StorageError>>>,
done_counter: &Arc<AtomicU64>, thread_scope: &'scope thread::Scope<'scope, '_>,
done_counter: &'scope Mutex<u64>,
done_and_displayed_counter: &mut u64, done_and_displayed_counter: &mut u64,
num_threads: usize, num_threads: usize,
batch_size: usize,
) -> Result<(), StorageError> { ) -> Result<(), StorageError> {
self.on_possible_progress(done_counter, done_and_displayed_counter); self.on_possible_progress(done_counter, done_and_displayed_counter)?;
// We avoid to have too many threads // We avoid to have too many threads
if threads.len() >= num_threads { if threads.len() >= num_threads {
if let Some(thread) = threads.pop_front() { if let Some(thread) = threads.pop_front() {
thread.join().unwrap()?; map_thread_result(thread.join()).map_err(StorageError::Io)??;
self.on_possible_progress(done_counter, done_and_displayed_counter); self.on_possible_progress(done_counter, done_and_displayed_counter)?;
} }
} }
let buffer = take(buffer); let mut buffer_to_load = Vec::with_capacity(batch_size);
let storage = self.storage.clone(); swap(buffer, &mut buffer_to_load);
let done_counter_clone = done_counter.clone(); let storage = &self.storage;
threads.push_back(spawn(move || { threads.push_back(thread_scope.spawn(move || {
FileBulkLoader::new(storage).load(buffer, &done_counter_clone) FileBulkLoader::new(storage, batch_size).load(buffer_to_load, done_counter)
})); }));
self.on_possible_progress(done_counter, done_and_displayed_counter);
Ok(()) Ok(())
} }
fn on_possible_progress(&self, done: &AtomicU64, done_and_displayed: &mut u64) { fn on_possible_progress(
let new_counter = done.fetch_max(*done_and_displayed, Ordering::Relaxed); &self,
let display_step = u64::try_from(DEFAULT_BULK_LOAD_BATCH_SIZE).unwrap(); done: &Mutex<u64>,
if new_counter % display_step > *done_and_displayed % display_step { done_and_displayed: &mut u64,
) -> Result<(), StorageError> {
let new_counter = *done
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))?;
let display_step = DEFAULT_BULK_LOAD_BATCH_SIZE as u64;
if new_counter / display_step > *done_and_displayed / display_step {
for hook in &self.hooks { for hook in &self.hooks {
hook(new_counter); hook(new_counter);
} }
} }
*done_and_displayed = new_counter; *done_and_displayed = new_counter;
Ok(())
} }
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
struct FileBulkLoader { struct FileBulkLoader<'a> {
storage: Storage, storage: &'a Storage,
id2str: HashMap<StrHash, Box<str>>, id2str: HashMap<StrHash, Box<str>>,
quads: HashSet<EncodedQuad>, quads: HashSet<EncodedQuad>,
triples: HashSet<EncodedQuad>, triples: HashSet<EncodedQuad>,
graphs: HashSet<EncodedTerm>, graphs: HashSet<EncodedTerm>,
} }
#[cfg(not(target_arch = "wasm32"))] #[cfg(all(not(target_family = "wasm")))]
impl FileBulkLoader { impl<'a> FileBulkLoader<'a> {
fn new(storage: Storage) -> Self { fn new(storage: &'a Storage, batch_size: usize) -> Self {
Self { Self {
storage, storage,
id2str: HashMap::default(), id2str: HashMap::with_capacity(3 * batch_size),
quads: HashSet::default(), quads: HashSet::with_capacity(batch_size),
triples: HashSet::default(), triples: HashSet::with_capacity(batch_size),
graphs: HashSet::default(), graphs: HashSet::default(),
} }
} }
fn load( fn load(&mut self, quads: Vec<Quad>, counter: &Mutex<u64>) -> Result<(), StorageError> {
&mut self,
quads: impl IntoIterator<Item = Quad>,
counter: &AtomicU64,
) -> Result<(), StorageError> {
self.encode(quads)?; self.encode(quads)?;
let size = self.triples.len() + self.quads.len(); let size = self.triples.len() + self.quads.len();
self.save()?; self.save()?;
counter.fetch_add(size.try_into().unwrap(), Ordering::Relaxed); *counter
.lock()
.map_err(|_| io::Error::new(io::ErrorKind::Other, "Mutex poisoned"))? +=
size.try_into().unwrap_or(u64::MAX);
Ok(()) Ok(())
} }
fn encode(&mut self, quads: impl IntoIterator<Item = Quad>) -> Result<(), StorageError> { fn encode(&mut self, quads: Vec<Quad>) -> Result<(), StorageError> {
for quad in quads { for quad in quads {
let encoded = EncodedQuad::from(quad.as_ref()); let encoded = EncodedQuad::from(quad.as_ref());
if quad.graph_name.is_default_graph() { if quad.graph_name.is_default_graph() {
@ -1342,7 +1373,12 @@ impl FileBulkLoader {
match quad.graph_name.as_ref() { match quad.graph_name.as_ref() {
GraphNameRef::NamedNode(n) => n.into(), GraphNameRef::NamedNode(n) => n.into(),
GraphNameRef::BlankNode(n) => n.into(), GraphNameRef::BlankNode(n) => n.into(),
GraphNameRef::DefaultGraph => unreachable!(), GraphNameRef::DefaultGraph => {
return Err(CorruptionError::new(
"Default graph this not the default graph",
)
.into())
}
}, },
&encoded.graph_name, &encoded.graph_name,
)?; )?;
@ -1500,3 +1536,17 @@ impl FileBulkLoader {
sst.finish() sst.finish()
} }
} }
#[cfg(all(not(target_family = "wasm")))]
fn map_thread_result<R>(result: thread::Result<R>) -> io::Result<R> {
result.map_err(|e| {
io::Error::new(
io::ErrorKind::Other,
if let Ok(e) = e.downcast::<&dyn std::fmt::Display>() {
format!("A loader processed crashed with {e}")
} else {
"A loader processed crashed with and unknown error".into()
},
)
})
}

@ -1,15 +1,14 @@
#![allow(clippy::unreadable_literal)] #![allow(clippy::unreadable_literal)]
use crate::model::*; use crate::model::*;
use crate::storage::error::{CorruptionError, StorageError};
use crate::storage::small_string::SmallString; use crate::storage::small_string::SmallString;
use crate::store::{CorruptionError, StorageError}; use oxsdatatypes::*;
use crate::xsd::*;
use siphasher::sip128::{Hasher128, SipHasher24}; use siphasher::sip128::{Hasher128, SipHasher24};
use std::fmt::Debug; use std::fmt::Debug;
use std::hash::Hash; use std::hash::{Hash, Hasher};
use std::hash::Hasher;
use std::rc::Rc;
use std::str; use std::str;
use std::sync::Arc;
#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
#[repr(transparent)] #[repr(transparent)]
@ -80,10 +79,10 @@ pub enum EncodedTerm {
value_id: StrHash, value_id: StrHash,
datatype_id: StrHash, datatype_id: StrHash,
}, },
BooleanLiteral(bool), BooleanLiteral(Boolean),
FloatLiteral(Float), FloatLiteral(Float),
DoubleLiteral(Double), DoubleLiteral(Double),
IntegerLiteral(i64), IntegerLiteral(Integer),
DecimalLiteral(Decimal), DecimalLiteral(Decimal),
DateTimeLiteral(DateTime), DateTimeLiteral(DateTime),
TimeLiteral(Time), TimeLiteral(Time),
@ -96,7 +95,7 @@ pub enum EncodedTerm {
DurationLiteral(Duration), DurationLiteral(Duration),
YearMonthDurationLiteral(YearMonthDuration), YearMonthDurationLiteral(YearMonthDuration),
DayTimeDurationLiteral(DayTimeDuration), DayTimeDurationLiteral(DayTimeDuration),
Triple(Rc<EncodedTriple>), Triple(Arc<EncodedTriple>),
} }
impl PartialEq for EncodedTerm { impl PartialEq for EncodedTerm {
@ -183,21 +182,25 @@ impl PartialEq for EncodedTerm {
}, },
) => value_id_a == value_id_b && datatype_id_a == datatype_id_b, ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b,
(Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b, (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b,
(Self::FloatLiteral(a), Self::FloatLiteral(b)) => a == b, (Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(*b),
(Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a == b, (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(*b),
(Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a == b, (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(*b),
(Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a == b, (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(*b),
(Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(b), (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(*b),
(Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(b), (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(*b),
(Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(b), (Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(*b),
(Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(b), (Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(b), (Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(b), (Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(*b),
(Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(b), (Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(*b),
(Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(b), (Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(*b),
(Self::DurationLiteral(a), Self::DurationLiteral(b)) => a == b, (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(*b),
(Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => a == b, (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => {
(Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => a == b, a.is_identical_with(*b)
}
(Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => {
a.is_identical_with(*b)
}
(Self::Triple(a), Self::Triple(b)) => a == b, (Self::Triple(a), Self::Triple(b)) => a == b,
(_, _) => false, (_, _) => false,
} }
@ -247,8 +250,8 @@ impl Hash for EncodedTerm {
datatype_id.hash(state); datatype_id.hash(state);
} }
Self::BooleanLiteral(value) => value.hash(state), Self::BooleanLiteral(value) => value.hash(state),
Self::FloatLiteral(value) => value.hash(state), Self::FloatLiteral(value) => value.to_be_bytes().hash(state),
Self::DoubleLiteral(value) => value.hash(state), Self::DoubleLiteral(value) => value.to_be_bytes().hash(state),
Self::IntegerLiteral(value) => value.hash(state), Self::IntegerLiteral(value) => value.hash(state),
Self::DecimalLiteral(value) => value.hash(state), Self::DecimalLiteral(value) => value.hash(state),
Self::DateTimeLiteral(value) => value.hash(state), Self::DateTimeLiteral(value) => value.hash(state),
@ -329,13 +332,13 @@ impl EncodedTerm {
impl From<bool> for EncodedTerm { impl From<bool> for EncodedTerm {
fn from(value: bool) -> Self { fn from(value: bool) -> Self {
Self::BooleanLiteral(value) Self::BooleanLiteral(value.into())
} }
} }
impl From<i64> for EncodedTerm { impl From<i64> for EncodedTerm {
fn from(value: i64) -> Self { fn from(value: i64) -> Self {
Self::IntegerLiteral(value) Self::IntegerLiteral(value.into())
} }
} }
@ -375,12 +378,24 @@ impl From<f64> for EncodedTerm {
} }
} }
impl From<Boolean> for EncodedTerm {
fn from(value: Boolean) -> Self {
Self::BooleanLiteral(value)
}
}
impl From<Double> for EncodedTerm { impl From<Double> for EncodedTerm {
fn from(value: Double) -> Self { fn from(value: Double) -> Self {
Self::DoubleLiteral(value) Self::DoubleLiteral(value)
} }
} }
impl From<Integer> for EncodedTerm {
fn from(value: Integer) -> Self {
Self::IntegerLiteral(value)
}
}
impl From<Decimal> for EncodedTerm { impl From<Decimal> for EncodedTerm {
fn from(value: Decimal) -> Self { fn from(value: Decimal) -> Self {
Self::DecimalLiteral(value) Self::DecimalLiteral(value)
@ -405,6 +420,36 @@ impl From<Date> for EncodedTerm {
} }
} }
impl From<GMonthDay> for EncodedTerm {
fn from(value: GMonthDay) -> Self {
Self::GMonthDayLiteral(value)
}
}
impl From<GDay> for EncodedTerm {
fn from(value: GDay) -> Self {
Self::GDayLiteral(value)
}
}
impl From<GMonth> for EncodedTerm {
fn from(value: GMonth) -> Self {
Self::GMonthLiteral(value)
}
}
impl From<GYearMonth> for EncodedTerm {
fn from(value: GYearMonth) -> Self {
Self::GYearMonthLiteral(value)
}
}
impl From<GYear> for EncodedTerm {
fn from(value: GYear) -> Self {
Self::GYearLiteral(value)
}
}
impl From<Duration> for EncodedTerm { impl From<Duration> for EncodedTerm {
fn from(value: Duration) -> Self { fn from(value: Duration) -> Self {
Self::DurationLiteral(value) Self::DurationLiteral(value)
@ -425,7 +470,7 @@ impl From<DayTimeDuration> for EncodedTerm {
impl From<EncodedTriple> for EncodedTerm { impl From<EncodedTriple> for EncodedTerm {
fn from(value: EncodedTriple) -> Self { fn from(value: EncodedTriple) -> Self {
Self::Triple(Rc::new(value)) Self::Triple(Arc::new(value))
} }
} }
@ -485,7 +530,6 @@ impl From<LiteralRef<'_>> for EncodedTerm {
} }
"http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value), "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => { "http://www.w3.org/2001/XMLSchema#string" => {
let value = value;
Some(if let Ok(value) = SmallString::try_from(value) { Some(if let Ok(value) = SmallString::try_from(value) {
Self::SmallStringLiteral(value) Self::SmallStringLiteral(value)
} else { } else {
@ -589,7 +633,7 @@ impl From<GraphNameRef<'_>> for EncodedTerm {
impl From<TripleRef<'_>> for EncodedTerm { impl From<TripleRef<'_>> for EncodedTerm {
fn from(triple: TripleRef<'_>) -> Self { fn from(triple: TripleRef<'_>) -> Self {
Self::Triple(Rc::new(triple.into())) Self::Triple(Arc::new(triple.into()))
} }
} }
@ -657,8 +701,6 @@ impl From<QuadRef<'_>> for EncodedQuad {
pub trait StrLookup { pub trait StrLookup {
fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>; fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>;
fn contains_str(&self, key: &StrHash) -> Result<bool, StorageError>;
} }
pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>( pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
@ -671,13 +713,13 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let EncodedTerm::NamedNode { iri_id } = encoded { if let EncodedTerm::NamedNode { iri_id } = encoded {
insert_str(iri_id, node.as_str()) insert_str(iri_id, node.as_str())
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
TermRef::BlankNode(node) => match encoded { TermRef::BlankNode(node) => match encoded {
EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()), EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()),
EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()), EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term), _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
}, },
TermRef::Literal(literal) => match encoded { TermRef::Literal(literal) => match encoded {
EncodedTerm::BigStringLiteral { value_id } EncodedTerm::BigStringLiteral { value_id }
@ -688,7 +730,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
insert_str(language_id, language) insert_str(language_id, language)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
EncodedTerm::BigBigLangStringLiteral { EncodedTerm::BigBigLangStringLiteral {
@ -699,7 +741,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
if let Some(language) = literal.language() { if let Some(language) = literal.language() {
insert_str(language_id, language) insert_str(language_id, language)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
EncodedTerm::SmallTypedLiteral { datatype_id, .. } => { EncodedTerm::SmallTypedLiteral { datatype_id, .. } => {
@ -730,7 +772,7 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
| EncodedTerm::DurationLiteral(..) | EncodedTerm::DurationLiteral(..)
| EncodedTerm::YearMonthDurationLiteral(..) | EncodedTerm::YearMonthDurationLiteral(..)
| EncodedTerm::DayTimeDurationLiteral(..) => Ok(()), | EncodedTerm::DayTimeDurationLiteral(..) => Ok(()),
_ => unreachable!("Invalid term encoding {:?} for {}", encoded, term), _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
}, },
TermRef::Triple(triple) => { TermRef::Triple(triple) => {
if let EncodedTerm::Triple(encoded) = encoded { if let EncodedTerm::Triple(encoded) = encoded {
@ -742,18 +784,14 @@ pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
)?; )?;
insert_term(triple.object.as_ref(), &encoded.object, insert_str) insert_term(triple.object.as_ref(), &encoded.object, insert_str)
} else { } else {
unreachable!("Invalid term encoding {:?} for {}", encoded, term) Err(CorruptionError::from_encoded_term(encoded, &term).into())
} }
} }
} }
} }
pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> { pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> {
match value { value.parse().map(EncodedTerm::BooleanLiteral).ok()
"true" | "1" => Some(EncodedTerm::BooleanLiteral(true)),
"false" | "0" => Some(EncodedTerm::BooleanLiteral(false)),
_ => None,
}
} }
pub fn parse_float_str(value: &str) -> Option<EncodedTerm> { pub fn parse_float_str(value: &str) -> Option<EncodedTerm> {
@ -987,8 +1025,7 @@ impl<S: StrLookup> Decoder for S {
fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> { fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> {
Ok(lookup.get_str(id)?.ok_or_else(|| { Ok(lookup.get_str(id)?.ok_or_else(|| {
CorruptionError::new(format!( CorruptionError::new(format!(
"Not able to find the string with id {:?} in the string store", "Not able to find the string with id {id:?} in the string store"
id
)) ))
})?) })?)
} }

@ -1,11 +1,9 @@
use std::borrow::Borrow; use std::borrow::Borrow;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
use std::hash::{Hash, Hasher}; use std::hash::{Hash, Hasher};
use std::ops::Deref; use std::ops::Deref;
use std::str;
use std::str::{FromStr, Utf8Error}; use std::str::{FromStr, Utf8Error};
use std::{fmt, str};
/// A small inline string /// A small inline string
#[derive(Clone, Copy, Default)] #[derive(Clone, Copy, Default)]
@ -46,10 +44,8 @@ impl SmallString {
#[inline] #[inline]
#[allow(unsafe_code)] #[allow(unsafe_code)]
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
unsafe { // SAFETY: safe because we ensured it in constructors
// safe because we ensured it in constructors unsafe { str::from_utf8_unchecked(self.as_bytes()) }
str::from_utf8_unchecked(self.as_bytes())
}
} }
#[inline] #[inline]
@ -67,7 +63,7 @@ impl Deref for SmallString {
type Target = str; type Target = str;
#[inline] #[inline]
fn deref(&self) -> &str { fn deref(&self) -> &Self::Target {
self.as_str() self.as_str()
} }
} }
@ -103,7 +99,7 @@ impl fmt::Display for SmallString {
impl PartialEq for SmallString { impl PartialEq for SmallString {
#[inline] #[inline]
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
self.as_str().eq(&**other) self.as_str() == other.as_str()
} }
} }
@ -112,7 +108,7 @@ impl Eq for SmallString {}
impl PartialOrd for SmallString { impl PartialOrd for SmallString {
#[inline] #[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_str().partial_cmp(other.as_str()) Some(self.cmp(other))
} }
} }
@ -148,17 +144,17 @@ impl FromStr for SmallString {
type Err = BadSmallStringError; type Err = BadSmallStringError;
#[inline] #[inline]
fn from_str(value: &str) -> Result<Self, BadSmallStringError> { fn from_str(value: &str) -> Result<Self, Self::Err> {
if value.len() <= 15 { if value.len() <= 15 {
let mut inner = [0; 16]; let mut inner = [0; 16];
inner[..value.len()].copy_from_slice(value.as_bytes()); inner[..value.len()].copy_from_slice(value.as_bytes());
inner[15] = value inner[15] = value
.len() .len()
.try_into() .try_into()
.map_err(|_| BadSmallStringError::TooLong(value.len()))?; .map_err(|_| Self::Err::TooLong(value.len()))?;
Ok(Self { inner }) Ok(Self { inner })
} else { } else {
Err(BadSmallStringError::TooLong(value.len())) Err(Self::Err::TooLong(value.len()))
} }
} }
} }
@ -167,37 +163,15 @@ impl<'a> TryFrom<&'a str> for SmallString {
type Error = BadSmallStringError; type Error = BadSmallStringError;
#[inline] #[inline]
fn try_from(value: &'a str) -> Result<Self, BadSmallStringError> { fn try_from(value: &'a str) -> Result<Self, Self::Error> {
Self::from_str(value) Self::from_str(value)
} }
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy, thiserror::Error)]
pub enum BadSmallStringError { pub enum BadSmallStringError {
#[error("small strings could only contain at most 15 characters, found {0}")]
TooLong(usize), TooLong(usize),
BadUtf8(Utf8Error), #[error(transparent)]
} BadUtf8(#[from] Utf8Error),
impl fmt::Display for BadSmallStringError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::TooLong(v) => write!(
f,
"small strings could only contain at most 15 characters, found {}",
v
),
Self::BadUtf8(e) => e.fmt(f),
}
}
}
impl Error for BadSmallStringError {
#[inline]
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::TooLong(_) => None,
Self::BadUtf8(e) => Some(e),
}
}
} }

File diff suppressed because it is too large Load Diff

@ -1,16 +1,29 @@
use oxigraph::io::{DatasetFormat, GraphFormat}; #![cfg(test)]
#![allow(clippy::panic_in_result_fn)]
use oxigraph::io::RdfFormat;
use oxigraph::model::vocab::{rdf, xsd}; use oxigraph::model::vocab::{rdf, xsd};
use oxigraph::model::*; use oxigraph::model::*;
use oxigraph::store::Store; use oxigraph::store::Store;
#[cfg(all(not(target_family = "wasm")))]
use rand::random; use rand::random;
#[cfg(all(not(target_family = "wasm")))]
use std::env::temp_dir; use std::env::temp_dir;
use std::error::Error; use std::error::Error;
use std::fs::{create_dir, remove_dir_all, File}; #[cfg(all(not(target_family = "wasm")))]
use std::io::{Cursor, Write}; use std::fs::{create_dir_all, remove_dir_all, File};
#[cfg(all(not(target_family = "wasm")))]
use std::io::Write;
#[cfg(all(not(target_family = "wasm")))]
use std::iter::empty;
#[cfg(all(target_os = "linux"))]
use std::iter::once; use std::iter::once;
use std::path::PathBuf; #[cfg(all(not(target_family = "wasm")))]
use std::path::{Path, PathBuf};
#[cfg(all(target_os = "linux"))]
use std::process::Command; use std::process::Command;
#[allow(clippy::non_ascii_literal)]
const DATA: &str = r#" const DATA: &str = r#"
@prefix schema: <http://schema.org/> . @prefix schema: <http://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> . @prefix wd: <http://www.wikidata.org/entity/> .
@ -24,6 +37,8 @@ wd:Q90 a schema:City ;
schema:url "https://www.paris.fr/"^^xsd:anyURI ; schema:url "https://www.paris.fr/"^^xsd:anyURI ;
schema:postalCode "75001" . schema:postalCode "75001" .
"#; "#;
#[allow(clippy::non_ascii_literal)]
const GRAPH_DATA: &str = r#" const GRAPH_DATA: &str = r#"
@prefix schema: <http://schema.org/> . @prefix schema: <http://schema.org/> .
@prefix wd: <http://www.wikidata.org/entity/> . @prefix wd: <http://www.wikidata.org/entity/> .
@ -63,7 +78,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
QuadRef::new( QuadRef::new(
paris, paris,
name, name,
LiteralRef::new_language_tagged_literal_unchecked("la ville lumière", "fr"), LiteralRef::new_language_tagged_literal_unchecked("la ville lumi\u{E8}re", "fr"),
graph_name, graph_name,
), ),
QuadRef::new(paris, country, france, graph_name), QuadRef::new(paris, country, france, graph_name),
@ -97,12 +112,7 @@ fn quads(graph_name: impl Into<GraphNameRef<'static>>) -> Vec<QuadRef<'static>>
#[test] #[test]
fn test_load_graph() -> Result<(), Box<dyn Error>> { fn test_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.load_graph( store.load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
Cursor::new(DATA),
GraphFormat::Turtle,
GraphNameRef::DefaultGraph,
None,
)?;
for q in quads(GraphNameRef::DefaultGraph) { for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?); assert!(store.contains(q)?);
} }
@ -111,14 +121,12 @@ fn test_load_graph() -> Result<(), Box<dyn Error>> {
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> { fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.bulk_loader().load_graph( store
Cursor::new(DATA), .bulk_loader()
GraphFormat::Turtle, .load_from_read(RdfFormat::Turtle, DATA.as_bytes())?;
GraphNameRef::DefaultGraph,
None,
)?;
for q in quads(GraphNameRef::DefaultGraph) { for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?); assert!(store.contains(q)?);
} }
@ -127,13 +135,12 @@ fn test_bulk_load_graph() -> Result<(), Box<dyn Error>> {
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> { fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.bulk_loader().on_parse_error(|_| Ok(())).load_graph( store.bulk_loader().on_parse_error(|_| Ok(())).load_from_read(
Cursor::new(b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> ."), RdfFormat::NTriples,
GraphFormat::NTriples, b"<http://example.com> <http://example.com> <http://example.com##> .\n<http://example.com> <http://example.com> <http://example.com> .".as_slice(),
GraphNameRef::DefaultGraph,
None,
)?; )?;
assert_eq!(store.len()?, 1); assert_eq!(store.len()?, 1);
assert!(store.contains(QuadRef::new( assert!(store.contains(QuadRef::new(
@ -146,10 +153,20 @@ fn test_bulk_load_graph_lenient() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_empty() -> Result<(), Box<dyn Error>> {
let store = Store::new()?;
store.bulk_loader().load_quads(empty::<Quad>())?;
assert!(store.is_empty()?);
store.validate()?;
Ok(())
}
#[test] #[test]
fn test_load_dataset() -> Result<(), Box<dyn Error>> { fn test_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
store.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; store.load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
for q in quads(NamedNodeRef::new_unchecked( for q in quads(NamedNodeRef::new_unchecked(
"http://www.wikidata.org/wiki/Special:EntityData/Q90", "http://www.wikidata.org/wiki/Special:EntityData/Q90",
)) { )) {
@ -160,11 +177,12 @@ fn test_load_dataset() -> Result<(), Box<dyn Error>> {
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> { fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
let store = Store::new().unwrap(); let store = Store::new()?;
store store
.bulk_loader() .bulk_loader()
.load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; .load_from_read(RdfFormat::TriG, GRAPH_DATA.as_bytes())?;
let graph_name = let graph_name =
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"); NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90");
for q in quads(graph_name) { for q in quads(graph_name) {
@ -179,11 +197,9 @@ fn test_bulk_load_dataset() -> Result<(), Box<dyn Error>> {
fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> { fn test_load_graph_generates_new_blank_nodes() -> Result<(), Box<dyn Error>> {
let store = Store::new()?; let store = Store::new()?;
for _ in 0..2 { for _ in 0..2 {
store.load_graph( store.load_from_read(
Cursor::new("_:a <http://example.com/p> <http://example.com/p> ."), RdfFormat::NTriples,
GraphFormat::NTriples, "_:a <http://example.com/p> <http://example.com/p> .".as_bytes(),
GraphNameRef::DefaultGraph,
None,
)?; )?;
} }
assert_eq!(store.len()?, 2); assert_eq!(store.len()?, 2);
@ -198,11 +214,7 @@ fn test_dump_graph() -> Result<(), Box<dyn Error>> {
} }
let mut buffer = Vec::new(); let mut buffer = Vec::new();
store.dump_graph( store.dump_graph_to_write(GraphNameRef::DefaultGraph, RdfFormat::NTriples, &mut buffer)?;
&mut buffer,
GraphFormat::NTriples,
GraphNameRef::DefaultGraph,
)?;
assert_eq!( assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(), buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES NUMBER_OF_TRIPLES
@ -217,8 +229,7 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
store.insert(q)?; store.insert(q)?;
} }
let mut buffer = Vec::new(); let buffer = store.dump_to_write(RdfFormat::NQuads, Vec::new())?;
store.dump_dataset(&mut buffer, DatasetFormat::NQuads)?;
assert_eq!( assert_eq!(
buffer.into_iter().filter(|c| *c == b'\n').count(), buffer.into_iter().filter(|c| *c == b'\n').count(),
NUMBER_OF_TRIPLES NUMBER_OF_TRIPLES
@ -229,24 +240,25 @@ fn test_dump_dataset() -> Result<(), Box<dyn Error>> {
#[test] #[test]
fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> { fn test_snapshot_isolation_iterator() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new( let quad = QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new("http://example.com/s")?,
NamedNodeRef::new_unchecked("http://example.com/p"), NamedNodeRef::new("http://example.com/p")?,
NamedNodeRef::new_unchecked("http://example.com/o"), NamedNodeRef::new("http://example.com/o")?,
NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"), NamedNodeRef::new("http://www.wikidata.org/wiki/Special:EntityData/Q90")?,
); );
let store = Store::new()?; let store = Store::new()?;
store.insert(quad)?; store.insert(quad)?;
let iter = store.iter(); let iter = store.iter();
store.remove(quad)?; store.remove(quad)?;
store.validate()?;
assert_eq!( assert_eq!(
iter.collect::<Result<Vec<_>, _>>()?, iter.collect::<Result<Vec<_>, _>>()?,
vec![quad.into_owned()] vec![quad.into_owned()]
); );
store.validate()?;
Ok(()) Ok(())
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dyn Error>> { fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dyn Error>> {
let quad = QuadRef::new( let quad = QuadRef::new(
NamedNodeRef::new_unchecked("http://example.com/s"), NamedNodeRef::new_unchecked("http://example.com/s"),
@ -262,9 +274,10 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box<dy
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_open_bad_dir() -> Result<(), Box<dyn Error>> { fn test_open_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default(); let dir = TempDir::default();
create_dir(&dir.0)?; create_dir_all(&dir.0)?;
{ {
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?; File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
} }
@ -273,65 +286,84 @@ fn test_open_bad_dir() -> Result<(), Box<dyn Error>> {
} }
#[test] #[test]
#[cfg(target_os = "linux")] #[cfg(all(target_os = "linux"))]
fn test_bad_stt_open() -> Result<(), Box<dyn Error>> { fn test_bad_stt_open() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default(); let dir = TempDir::default();
let store = Store::open(&dir.0)?; let store = Store::open(&dir.0)?;
remove_dir_all(&dir.0)?; remove_dir_all(&dir.0)?;
assert!(store store
.bulk_loader() .bulk_loader()
.load_quads(once(Quad { .load_quads(once(Quad::new(
subject: NamedNode::new_unchecked("http://example.com/s").into(), NamedNode::new_unchecked("http://example.com/s"),
predicate: NamedNode::new_unchecked("http://example.com/p"), NamedNode::new_unchecked("http://example.com/p"),
object: NamedNode::new_unchecked("http://example.com/o").into(), NamedNode::new_unchecked("http://example.com/o"),
graph_name: GraphName::DefaultGraph GraphName::DefaultGraph,
})) )))
.is_err()); .unwrap_err();
Ok(()) Ok(())
} }
#[test] // #[test]
fn test_backup() -> Result<(), Box<dyn Error>> { // #[cfg(all(not(target_family = "wasm")))]
let quad = QuadRef { // fn test_backup() -> Result<(), Box<dyn Error>> {
subject: NamedNodeRef::new_unchecked("http://example.com/s").into(), // let quad = QuadRef::new(
predicate: NamedNodeRef::new_unchecked("http://example.com/p"), // NamedNodeRef::new_unchecked("http://example.com/s"),
object: NamedNodeRef::new_unchecked("http://example.com/o").into(), // NamedNodeRef::new_unchecked("http://example.com/p"),
graph_name: GraphNameRef::DefaultGraph, // NamedNodeRef::new_unchecked("http://example.com/o"),
}; // GraphNameRef::DefaultGraph,
let store_dir = TempDir::default(); // );
let backup_dir = TempDir::default(); // let store_dir = TempDir::default();
// let backup_from_rw_dir = TempDir::default();
// let backup_from_ro_dir = TempDir::default();
// let backup_from_secondary_dir = TempDir::default();
let store = Store::open(&store_dir.0)?; // let store = Store::open(&store_dir)?;
store.insert(quad)?; // store.insert(quad)?;
store.backup(&backup_dir.0)?; // let secondary_store = Store::open_secondary(&store_dir)?;
store.remove(quad)?; // store.flush()?;
assert!(!store.contains(quad)?); // store.backup(&backup_from_rw_dir)?;
let backup = Store::open(&backup_dir.0)?; // secondary_store.backup(&backup_from_secondary_dir)?;
backup.validate()?; // store.remove(quad)?;
assert!(backup.contains(quad)?); // assert!(!store.contains(quad)?);
Ok(())
} // let backup_from_rw = Store::open_read_only(&backup_from_rw_dir.0)?;
// backup_from_rw.validate()?;
// assert!(backup_from_rw.contains(quad)?);
// backup_from_rw.backup(&backup_from_ro_dir)?;
// let backup_from_ro = Store::open_read_only(&backup_from_ro_dir.0)?;
// backup_from_ro.validate()?;
// assert!(backup_from_ro.contains(quad)?);
// let backup_from_secondary = Store::open_read_only(&backup_from_secondary_dir.0)?;
// backup_from_secondary.validate()?;
// assert!(backup_from_secondary.contains(quad)?);
// Ok(())
// }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_bad_backup() -> Result<(), Box<dyn Error>> { fn test_bad_backup() -> Result<(), Box<dyn Error>> {
let store_dir = TempDir::default(); let store_dir = TempDir::default();
let backup_dir = TempDir::default(); let backup_dir = TempDir::default();
create_dir(&backup_dir.0)?; create_dir_all(&backup_dir.0)?;
assert!(Store::open(&store_dir.0)?.backup(&backup_dir.0).is_err()); Store::open(&store_dir)?.backup(&backup_dir.0).unwrap_err();
Ok(()) Ok(())
} }
#[test] #[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> { fn test_backup_on_in_memory() -> Result<(), Box<dyn Error>> {
let backup_dir = TempDir::default(); let backup_dir = TempDir::default();
assert!(Store::new()?.backup(&backup_dir.0).is_err()); Store::new()?.backup(&backup_dir).unwrap_err();
Ok(()) Ok(())
} }
#[test] #[test]
#[cfg(target_os = "linux")] #[cfg(all(target_os = "linux"))]
fn test_backward_compatibility() -> Result<(), Box<dyn Error>> { fn test_backward_compatibility() -> Result<(), Box<dyn Error>> {
// We run twice to check if data is properly saved and closed // We run twice to check if data is properly saved and closed
for _ in 0..2 { for _ in 0..2 {
@ -354,6 +386,123 @@ fn test_backward_compatibility() -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
// #[test]
// #[cfg(all(not(target_family = "wasm")))]
// fn test_secondary() -> Result<(), Box<dyn Error>> {
// let quad = QuadRef::new(
// NamedNodeRef::new_unchecked("http://example.com/s"),
// NamedNodeRef::new_unchecked("http://example.com/p"),
// NamedNodeRef::new_unchecked("http://example.com/o"),
// GraphNameRef::DefaultGraph,
// );
// let primary_dir = TempDir::default();
// // We open the store
// let primary = Store::open(&primary_dir)?;
// let secondary = Store::open_secondary(&primary_dir)?;
// // We insert a quad
// primary.insert(quad)?;
// primary.flush()?;
// // It is readable from both stores
// for store in &[&primary, &secondary] {
// assert!(store.contains(quad)?);
// assert_eq!(
// store.iter().collect::<Result<Vec<_>, _>>()?,
// vec![quad.into_owned()]
// );
// }
// // We validate the states
// primary.validate()?;
// secondary.validate()?;
// // We close the primary store and remove its content
// drop(primary);
// remove_dir_all(&primary_dir)?;
// // We secondary store is still readable
// assert!(secondary.contains(quad)?);
// secondary.validate()?;
// Ok(())
// }
// #[test]
// #[cfg(all(not(target_family = "wasm")))]
// fn test_open_secondary_bad_dir() -> Result<(), Box<dyn Error>> {
// let primary_dir = TempDir::default();
// create_dir_all(&primary_dir.0)?;
// {
// File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?;
// }
// assert!(Store::open_secondary(&primary_dir).is_err());
// Ok(())
// }
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_read_only() -> Result<(), Box<dyn Error>> {
let s = NamedNodeRef::new_unchecked("http://example.com/s");
let p = NamedNodeRef::new_unchecked("http://example.com/p");
let first_quad = QuadRef::new(
s,
p,
NamedNodeRef::new_unchecked("http://example.com/o"),
GraphNameRef::DefaultGraph,
);
let second_quad = QuadRef::new(
s,
p,
NamedNodeRef::new_unchecked("http://example.com/o2"),
GraphNameRef::DefaultGraph,
);
let store_dir = TempDir::default();
// We write to the store and close it
{
let read_write = Store::open(&store_dir)?;
read_write.insert(first_quad)?;
read_write.flush()?;
}
// We open as read-only
let read_only = Store::open_read_only(&store_dir, None)?;
assert!(read_only.contains(first_quad)?);
assert_eq!(
read_only.iter().collect::<Result<Vec<_>, _>>()?,
vec![first_quad.into_owned()]
);
read_only.validate()?;
// We open as read-write again
let read_write = Store::open(&store_dir)?;
read_write.insert(second_quad)?;
read_write.flush()?;
read_write.optimize()?; // Makes sure it's well flushed
// The new quad is in the read-write instance but not the read-only instance
assert!(read_write.contains(second_quad)?);
assert!(!read_only.contains(second_quad)?);
read_only.validate()?;
Ok(())
}
#[test]
#[cfg(all(not(target_family = "wasm")))]
fn test_open_read_only_bad_dir() -> Result<(), Box<dyn Error>> {
let dir = TempDir::default();
create_dir_all(&dir.0)?;
{
File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?;
}
assert!(Store::open_read_only(&dir, None).is_err());
Ok(())
}
#[cfg(all(target_os = "linux"))]
fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> { fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> {
assert!(Command::new("git") assert!(Command::new("git")
.args(["clean", "-fX", dir]) .args(["clean", "-fX", dir])
@ -366,16 +515,28 @@ fn reset_dir(dir: &str) -> Result<(), Box<dyn Error>> {
Ok(()) Ok(())
} }
#[cfg(all(not(target_family = "wasm")))]
struct TempDir(PathBuf); struct TempDir(PathBuf);
#[cfg(all(not(target_family = "wasm")))]
impl Default for TempDir { impl Default for TempDir {
fn default() -> Self { fn default() -> Self {
Self(temp_dir().join(format!("oxigraph-test-{}", random::<u128>()))) Self(temp_dir().join(format!("oxigraph-test-{}", random::<u128>())))
} }
} }
#[cfg(all(not(target_family = "wasm")))]
impl AsRef<Path> for TempDir {
fn as_ref(&self) -> &Path {
&self.0
}
}
#[cfg(all(not(target_family = "wasm")))]
impl Drop for TempDir { impl Drop for TempDir {
fn drop(&mut self) { fn drop(&mut self) {
let _ = remove_dir_all(&self.0); if self.0.is_dir() {
remove_dir_all(&self.0).unwrap();
}
} }
} }

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save