From ff56a8c5eb690b9daf7a787e0142d911aac47d2b Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Wed, 8 Dec 2021 20:05:14 +0100 Subject: [PATCH 01/10] Delete dependabot.yml Huge noise now that the Cargo.lock file is commited --- .github/dependabot.yml | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 417733e1..00000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,24 +0,0 @@ -version: 2 -updates: - - package-ecosystem: cargo - directory: "/" - schedule: - interval: daily - open-pull-requests-limit: 10 - - package-ecosystem: npm - directory: "/js" - schedule: - interval: daily - open-pull-requests-limit: 10 - - package-ecosystem: github-actions - directory: "/" - schedule: - interval: daily - - package-ecosystem: gitsubmodule - directory: "/" - schedule: - interval: daily - - package-ecosystem: docker - directory: "/server" - schedule: - interval: daily From 44d1a5f04c95cde233fd26876d09e461a0e2f3b4 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 8 Dec 2021 08:06:37 +0100 Subject: [PATCH 02/10] Makes bulk load partial insertions atomic Ensures that the stores does not end up in an inconsistent state. --- Cargo.lock | 12 ++++----- lib/benches/store.rs | 18 +++++++------- lib/src/storage/backend/rocksdb.rs | 40 +++++++++++++++++++++--------- lib/src/storage/mod.rs | 4 +-- lib/src/store.rs | 22 ++++++++++------ lib/tests/store.rs | 4 +-- python/src/store.rs | 2 +- rocksdb-sys/api/c.cc | 16 ++++++++++++ rocksdb-sys/api/c.h | 11 ++++++++ server/src/main.rs | 2 +- 10 files changed, 90 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d387e3ac..6c49b686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,9 +513,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.10.1" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" dependencies = [ "either", ] @@ -958,9 +958,9 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "pkg-config" -version = "0.3.22" +version = "0.3.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" +checksum = "d1a3ea4f0dd7f1f3e512cf97bf100819aa547f36a6eccac8dbaae839eb92363e" [[package]] name = "plotters" @@ -1004,9 +1004,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" [[package]] name = "proc-macro2" -version = "1.0.32" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" +checksum = "fb37d2df5df740e582f28f8560cf425f52bb267d872fe58358eadb554909f07a" dependencies = [ "unicode-xid", ] diff --git a/lib/benches/store.rs b/lib/benches/store.rs index e82ace72..e062583b 100644 --- a/lib/benches/store.rs +++ b/lib/benches/store.rs @@ -36,8 +36,8 @@ fn store_load(c: &mut Criterion) { group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { b.iter(|| { let path = TempDir::default(); - let mut store = Store::open(&path.0).unwrap(); - do_bulk_load(&mut store, &data); + let store = Store::open(&path.0).unwrap(); + do_bulk_load(&store, &data); }) }); } @@ -54,8 +54,8 @@ fn store_load(c: &mut Criterion) { group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { b.iter(|| { let path = TempDir::default(); - let mut store = Store::open(&path.0).unwrap(); - do_bulk_load(&mut store, &data); + let store = Store::open(&path.0).unwrap(); + do_bulk_load(&store, &data); }) }); } @@ -73,7 +73,7 @@ fn do_load(store: &Store, data: &[u8]) { store.optimize().unwrap(); } -fn do_bulk_load(store: &mut Store, data: &[u8]) { +fn do_bulk_load(store: &Store, data: &[u8]) { store .bulk_load_graph( Cursor::new(&data), @@ -116,8 +116,8 @@ fn store_query_and_update(c: &mut Criterion) { group.sample_size(10); { - let mut memory_store = Store::new().unwrap(); - do_bulk_load(&mut memory_store, &data); + let memory_store = Store::new().unwrap(); + do_bulk_load(&memory_store, &data); group.bench_function("BSBM explore 1000 query in memory", |b| { b.iter(|| run_operation(&memory_store, &query_operations)) }); @@ -128,8 +128,8 @@ fn store_query_and_update(c: &mut Criterion) { { let path = TempDir::default(); - let mut disk_store = Store::open(&path.0).unwrap(); - do_bulk_load(&mut disk_store, &data); + let disk_store = Store::open(&path.0).unwrap(); + do_bulk_load(&disk_store, &data); group.bench_function("BSBM explore 1000 query on disk", |b| { b.iter(|| run_operation(&disk_store, &query_operations)) }); diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index f3c9d874..ed992b68 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -10,6 +10,7 @@ use libc::{self, c_char, c_void, free}; use oxrocksdb_sys::*; use rand::random; use std::borrow::Borrow; +use std::collections::HashMap; use std::env::temp_dir; use std::ffi::{CStr, CString}; use std::fs::remove_dir_all; @@ -438,25 +439,40 @@ impl Db { } } - pub fn insert_stt_files(&self, ssts_for_cf: Vec<(&ColumnFamily, PathBuf)>) -> Result<()> { + pub fn insert_stt_files(&self, ssts_for_cf: &[(&ColumnFamily, PathBuf)]) -> Result<()> { + let mut paths_by_cf = HashMap::<_, Vec<_>>::new(); for (cf, path) in ssts_for_cf { - unsafe { - ffi_result!(rocksdb_transactiondb_ingest_external_file_cf( - self.0.db, - cf.0, - &path_to_cstring(&path)?.as_ptr(), - 1, - self.0.ingest_external_file_options - ))?; - } + paths_by_cf + .entry(*cf) + .or_default() + .push(path_to_cstring(path)?); + } + let cpaths_by_cf = paths_by_cf + .iter() + .map(|(cf, paths)| (*cf, paths.iter().map(|p| p.as_ptr()).collect::>())) + .collect::>(); + let args = cpaths_by_cf + .iter() + .map(|(cf, p)| rocksdb_ingestexternalfilearg_t { + column_family: cf.0, + external_files: p.as_ptr(), + external_files_len: p.len(), + options: self.0.ingest_external_file_options, + }) + .collect::>(); + unsafe { + ffi_result!(rocksdb_transactiondb_ingest_external_files( + self.0.db, + args.as_ptr(), + args.len() + )) } - Ok(()) } } // It is fine to not keep a lifetime: there is no way to use this type without the database being still in scope. // So, no use after free possible. -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq, Hash)] pub struct ColumnFamily(*mut rocksdb_column_family_handle_t); unsafe impl Send for ColumnFamily {} diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index a7e780ac..3b7df5b7 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -172,7 +172,7 @@ impl Storage { stt_file.insert_empty(&k)?; } self.db - .insert_stt_files(vec![(&self.graphs_cf, stt_file.finish()?)])?; + .insert_stt_files(&[(&self.graphs_cf, stt_file.finish()?)])?; version = 1; self.update_version(version)?; } @@ -1172,7 +1172,7 @@ impl BulkLoader { self.quads.clear(); } - self.storage.db.insert_stt_files(to_load) + self.storage.db.insert_stt_files(&to_load) } fn insert_term(&mut self, term: TermRef<'_>, encoded: &EncodedTerm) -> Result<()> { diff --git a/lib/src/store.rs b/lib/src/store.rs index cc5bafce..882069f6 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -612,7 +612,9 @@ impl Store { /// /// This function is optimized for large dataset loading speed. For small files, [`load_dataset`](Store::load_dataset) might be more convenient. /// - /// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. + /// Warning: This method is not atomic. + /// If the parsing fails in the middle of the file, only a part of it may be written to the store. + /// Results might get weird if you delete data during the loading process. /// /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. /// @@ -622,7 +624,7 @@ impl Store { /// use oxigraph::io::DatasetFormat; /// use oxigraph::model::*; /// - /// let mut store = Store::new()?; + /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; @@ -639,7 +641,7 @@ impl Store { /// Errors related to data loading into the store use the other error kinds. #[cfg(not(target_arch = "wasm32"))] pub fn bulk_load_dataset( - &mut self, + &self, reader: impl BufRead, format: DatasetFormat, base_iri: Option<&str>, @@ -657,7 +659,9 @@ impl Store { /// /// This function is optimized for large dataset loading speed. For small files, [`load_graph`](Store::load_graph) might be more convenient. /// - /// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. + /// Warning: This method is not atomic. + /// If the parsing fails in the middle of the file, only a part of it may be written to the store. + /// Results might get weird if you delete data during the loading process. /// /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. /// @@ -667,7 +671,7 @@ impl Store { /// use oxigraph::io::GraphFormat; /// use oxigraph::model::*; /// - /// let mut store = Store::new()?; + /// let store = Store::new()?; /// /// // insertion /// let file = b" ."; @@ -684,7 +688,7 @@ impl Store { /// Errors related to data loading into the store use the other error kinds. #[cfg(not(target_arch = "wasm32"))] pub fn bulk_load_graph<'a>( - &mut self, + &self, reader: impl BufRead, format: GraphFormat, to_graph_name: impl Into>, @@ -707,11 +711,13 @@ impl Store { /// Adds a set of triples to this store using bulk load. /// - /// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. + /// Warning: This method is not atomic. + /// If the process fails in the middle of the file, only a part of the data may be written to the store. + /// Results might get weird if you delete data during the loading process. /// /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. #[cfg(not(target_arch = "wasm32"))] - pub fn bulk_extend(&mut self, quads: impl IntoIterator) -> io::Result<()> { + pub fn bulk_extend(&self, quads: impl IntoIterator) -> io::Result<()> { bulk_load(&self.storage, quads.into_iter().map(Ok)) } } diff --git a/lib/tests/store.rs b/lib/tests/store.rs index 55c5fe6d..1376bf83 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -100,7 +100,7 @@ fn test_load_dataset() -> Result<()> { #[test] fn test_bulk_load_dataset() -> Result<()> { - let mut store = Store::new().unwrap(); + let store = Store::new().unwrap(); store.bulk_load_dataset(Cursor::new(DATA), DatasetFormat::TriG, None)?; for q in quads(GraphNameRef::DefaultGraph) { assert!(store.contains(q)?); @@ -183,7 +183,7 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<()> { NamedNodeRef::new_unchecked("http://example.com/o"), NamedNodeRef::new_unchecked("http://example.com/g"), ); - let mut store = Store::new()?; + let store = Store::new()?; store.remove(quad)?; store.bulk_extend([quad.into_owned()])?; assert_eq!(store.len()?, 1); diff --git a/python/src/store.rs b/python/src/store.rs index 7511a4aa..b9fd62c8 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -349,7 +349,7 @@ impl PyStore { #[pyo3(text_signature = "($self, data, /, mime_type, *, base_iri = None, to_graph = None)")] #[args(input, mime_type, "*", base_iri = "None", to_graph = "None")] fn bulk_load( - &mut self, + &self, input: PyObject, mime_type: &str, base_iri: Option<&str>, diff --git a/rocksdb-sys/api/c.cc b/rocksdb-sys/api/c.cc index e1e8443f..8c316201 100644 --- a/rocksdb-sys/api/c.cc +++ b/rocksdb-sys/api/c.cc @@ -53,6 +53,22 @@ void rocksdb_transactiondb_ingest_external_file_cf( SaveError(errptr, db->rep->IngestExternalFile(handle->rep, files, opt->rep)); } +void rocksdb_transactiondb_ingest_external_files( + rocksdb_transactiondb_t* db, const rocksdb_ingestexternalfilearg_t* list, + const size_t list_len, char** errptr) { + std::vector args(list_len); + for (size_t i = 0; i < list_len; ++i) { + args[i].column_family = list[i].column_family->rep; + std::vector files(list[i].external_files_len); + for (size_t j = 0; j < list[i].external_files_len; ++j) { + files[j] = std::string(list[i].external_files[j]); + } + args[i].external_files = files; + args[i].options = list[i].options->rep; + } + SaveError(errptr, db->rep->IngestExternalFiles(args)); +} + rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, diff --git a/rocksdb-sys/api/c.h b/rocksdb-sys/api/c.h index 74c492a7..7d38290f 100644 --- a/rocksdb-sys/api/c.h +++ b/rocksdb-sys/api/c.h @@ -6,6 +6,13 @@ extern "C" { #endif +typedef struct rocksdb_ingestexternalfilearg_t { + rocksdb_column_family_handle_t* column_family; + char const* const* external_files; + size_t external_files_len; + rocksdb_ingestexternalfileoptions_t* options; +} rocksdb_ingestexternalfilearg_t; + extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transactiondb_get_pinned_cf( rocksdb_transactiondb_t* db, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, @@ -25,6 +32,10 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_ingest_external_file_cf( const char* const* file_list, const size_t list_len, const rocksdb_ingestexternalfileoptions_t* opt, char** errptr); +extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_ingest_external_files( + rocksdb_transactiondb_t* db, const rocksdb_ingestexternalfilearg_t* list, + const size_t list_len, char** errptr); + extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf( rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_column_family_handle_t* column_family, const char* key, diff --git a/server/src/main.rs b/server/src/main.rs index 38f34b64..71282ce0 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -69,7 +69,7 @@ pub fn main() -> std::io::Result<()> { ) .get_matches(); - let mut store = if let Some(path) = matches.value_of_os("location") { + let store = if let Some(path) = matches.value_of_os("location") { Store::open(path) } else { Store::new() From 05d7370a69eeee3e66d9c1135c240af3300d3764 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 9 Dec 2021 17:28:32 +0100 Subject: [PATCH 03/10] Removes chrono for time Reduces the number of dependencies --- Cargo.lock | 39 ++++++++------------------------------ README.md | 1 + testsuite/Cargo.toml | 2 +- testsuite/src/evaluator.rs | 4 ++-- testsuite/src/report.rs | 9 +++++---- 5 files changed, 17 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c49b686..a146fc76 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -149,19 +149,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "time", - "winapi 0.3.9", -] - [[package]] name = "clang-sys" version = "1.3.0" @@ -689,16 +676,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.14" @@ -845,11 +822,11 @@ name = "oxigraph_testsuite" version = "0.3.0-dev" dependencies = [ "anyhow", - "chrono", "clap", "criterion", "oxigraph", "text-diff", + "time", ] [[package]] @@ -1368,9 +1345,9 @@ checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" [[package]] name = "serde" -version = "1.0.130" +version = "1.0.131" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1" [[package]] name = "serde_cbor" @@ -1384,9 +1361,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.130" +version = "1.0.131" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +checksum = "b710a83c4e0dff6a3d511946b95274ad9ca9e5d3ae497b63fda866ac955358d2" dependencies = [ "proc-macro2", "quote", @@ -1581,12 +1558,12 @@ dependencies = [ [[package]] name = "time" -version = "0.1.43" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "41effe7cfa8af36f439fac33861b66b049edc6f9a32331e2312660529c1c24ad" dependencies = [ + "itoa", "libc", - "winapi 0.3.9", ] [[package]] diff --git a/README.md b/README.md index c5dcca0f..b15def33 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ Oxigraph [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) +[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. diff --git a/testsuite/Cargo.toml b/testsuite/Cargo.toml index e745e307..a38ffd51 100644 --- a/testsuite/Cargo.toml +++ b/testsuite/Cargo.toml @@ -14,7 +14,7 @@ publish = false [dependencies] anyhow = "1" clap = "2" -chrono = "0.4" +time = { version = "0.3", features = ["formatting"] } oxigraph = { version = "0.3.0-dev", path="../lib" } text-diff = "0.4" diff --git a/testsuite/src/evaluator.rs b/testsuite/src/evaluator.rs index 7dbd8b80..ddde5cea 100644 --- a/testsuite/src/evaluator.rs +++ b/testsuite/src/evaluator.rs @@ -1,8 +1,8 @@ use crate::manifest::Test; use crate::report::TestResult; use anyhow::{anyhow, Result}; -use chrono::Utc; use std::collections::HashMap; +use time::OffsetDateTime; #[derive(Default)] pub struct TestEvaluator { @@ -33,7 +33,7 @@ impl TestEvaluator { Ok(TestResult { test: test.id, outcome, - date: Utc::now(), + date: OffsetDateTime::now_utc(), }) }) .collect() diff --git a/testsuite/src/report.rs b/testsuite/src/report.rs index e9fb2e13..524e3b6d 100644 --- a/testsuite/src/report.rs +++ b/testsuite/src/report.rs @@ -1,14 +1,15 @@ use anyhow::Result; -use chrono::{DateTime, Utc}; use oxigraph::model::{Dataset, NamedNode}; use std::fmt::Write; use text_diff::{diff, Difference}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; #[derive(Debug)] pub struct TestResult { pub test: NamedNode, pub outcome: Result<()>, - pub date: DateTime, + pub date: OffsetDateTime, } pub fn dataset_diff(expected: &Dataset, actual: &Dataset) -> String { @@ -71,7 +72,7 @@ pub fn build_report(results: impl IntoIterator) -> String { writeln!( &mut buffer, "\tdc:issued \"{}\"^^xsd:dateTime ;", - Utc::now().to_rfc3339() + OffsetDateTime::now_utc().format(&Rfc3339).unwrap() ); writeln!( &mut buffer, @@ -140,7 +141,7 @@ pub fn build_report(results: impl IntoIterator) -> String { writeln!( &mut buffer, "\t\tdc:date \"{}\"^^xsd:dateTime", - result.date.to_rfc3339() + result.date.format(&Rfc3339).unwrap() ); writeln!(&mut buffer, "\t] ;"); writeln!(&mut buffer, "\tearl:mode earl:automatic"); From 8c60844ae2fd896874cf1291554f4fd1d52ffcea Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 9 Dec 2021 17:31:51 +0100 Subject: [PATCH 04/10] Updates dependencies --- Cargo.lock | 43 +++++++++++++++++++++--------------------- lib/Cargo.toml | 8 ++++---- lib/src/sparql/eval.rs | 2 +- testsuite/rdf-star | 2 +- 4 files changed, 27 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a146fc76..b50d651a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,9 +91,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "block-buffer" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +checksum = "f1d36a02058e76b040de25a4464ba1c80935655595b661505c8b39b664828b95" dependencies = [ "generic-array", ] @@ -290,6 +290,15 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "crypto-common" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567569e659735adb39ff2d4c20600f7cd78be5471f8c58ab162bce3c03fdbc5f" +dependencies = [ + "generic-array", +] + [[package]] name = "csv" version = "1.1.6" @@ -324,10 +333,12 @@ dependencies = [ [[package]] name = "digest" -version = "0.9.0" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +checksum = "8549e6bfdecd113b7e221fe60b433087f6957387a20f8118ebca9b12af19143d" dependencies = [ + "block-buffer", + "crypto-common", "generic-array", ] @@ -611,13 +622,11 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" [[package]] name = "md-5" -version = "0.9.1" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +checksum = "e6a38fc55c8bbc10058782919516f88826e70320db6d206aebc49611d24216ae" dependencies = [ - "block-buffer", "digest", - "opaque-debug", ] [[package]] @@ -707,12 +716,6 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" - [[package]] name = "openssl" version = "0.10.38" @@ -1383,28 +1386,24 @@ dependencies = [ [[package]] name = "sha-1" -version = "0.9.8" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" +checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" dependencies = [ - "block-buffer", "cfg-if", "cpufeatures", "digest", - "opaque-debug", ] [[package]] name = "sha2" -version = "0.9.8" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" +checksum = "900d964dd36bb15bcf2f2b35694c072feab74969a54f2bbeec7a2d725d2bdcb6" dependencies = [ - "block-buffer", "cfg-if", "cpufeatures", "digest", - "opaque-debug", ] [[package]] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 1a35a3ef..168e11b0 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -24,10 +24,10 @@ http_client = ["oxhttp"] [dependencies] quick-xml = "0.22" rand = "0.8" -md-5 = "0.9" -sha-1 = "0.9" -sha2 = "0.9" -digest = "0.9" +md-5 = "0.10" +sha-1 = "0.10" +sha2 = "0.10" +digest = "0.10" regex = "1" oxilangtag = "0.1" oxiri = "0.1" diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 10a9a281..d6c6afb7 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -2048,7 +2048,7 @@ impl SimpleEvaluator { let dataset = self.dataset.clone(); Rc::new(move |tuple| { let input = to_simple_string(&dataset, &arg(tuple)?)?; - let hash = hex::encode(H::new().chain(input.as_str()).finalize()); + let hash = hex::encode(H::new().chain_update(input.as_str()).finalize()); Some(build_string_literal(&dataset, &hash)) }) } diff --git a/testsuite/rdf-star b/testsuite/rdf-star index 4481d7b5..5efe146c 160000 --- a/testsuite/rdf-star +++ b/testsuite/rdf-star @@ -1 +1 @@ -Subproject commit 4481d7b5b7dd473b443e4f986f77ce38655b74d4 +Subproject commit 5efe146c6edfc2e710e1e13eac409b7c7ae982c7 From a96956361738132f6891c6bd1b5220ce951fd2d4 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 9 Dec 2021 17:25:32 +0100 Subject: [PATCH 05/10] Uses Rustls everywhere Avoids linking problems with system libraries. Rustls uses the system certificates. --- .github/workflows/build.yml | 2 +- lib/Cargo.toml | 2 +- python/Cargo.toml | 4 ---- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71091d60..78c60e9b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -94,7 +94,7 @@ jobs: manylinux: 2014 container: messense/manylinux2014-cross:x86_64 command: build - args: -m python/Cargo.toml --cargo-extra-args="--no-default-features --features vendored" + args: -m python/Cargo.toml - run: pip install pyoxigraph --no-index --find-links ../../target/wheels && python -m unittest working-directory: ./python/tests diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 168e11b0..3edd7b12 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -19,7 +19,7 @@ all-features = true [features] default = [] sophia = ["sophia_api"] -http_client = ["oxhttp"] +http_client = ["oxhttp", "oxhttp/rustls"] [dependencies] quick-xml = "0.22" diff --git a/python/Cargo.toml b/python/Cargo.toml index 1a0b1a29..bebcc321 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -19,7 +19,3 @@ doctest = false oxigraph = { version = "0.3.0-dev", path="../lib", features = ["http_client"] } pyo3 = { version = "0.15", features = ["extension-module", "abi3-py37"] } oxhttp = "0.1" - -[features] -default = ["oxhttp/native-tls"] -vendored = ["oxhttp/rustls"] From e95bf721dc83f4dca9fd8ee1b07528737307f4c9 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 9 Dec 2021 18:39:50 +0100 Subject: [PATCH 06/10] Adds Cargo Deny --- .github/workflows/build.yml | 6 ++++++ deny.toml | 43 +++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 deny.toml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 78c60e9b..7aec6edd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -27,6 +27,12 @@ jobs: - run: rustup update && rustup component add clippy - run: cargo clippy --all-targets --all-features + deny: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: EmbarkStudios/cargo-deny-action@v1 + test_linux: runs-on: ubuntu-latest steps: diff --git a/deny.toml b/deny.toml new file mode 100644 index 00000000..45cee58f --- /dev/null +++ b/deny.toml @@ -0,0 +1,43 @@ +[advisories] +vulnerability = "deny" +unmaintained = "warn" +yanked = "warn" +notice = "warn" +ignore = [] + +[licenses] +unlicensed = "deny" +allow = [ + "MIT", + "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", + "CECILL-B" +] +copyleft = "deny" +allow-osi-fsf-free = "either" +default = "deny" +confidence-threshold = 0.8 +exceptions = [ + # Optional dependency (from sophia_api) + { allow = ["MPL-2.0"], name = "resiter", version = "*" } + +] + +[[licenses.clarify]] +name = "ring" +version = "*" +expression = "MIT AND ISC AND OpenSSL" +license-files = [ + { path = "LICENSE", hash = 0xbd0eed23 } +] + +[bans] +multiple-versions = "warn" +wildcards = "deny" +highlight = "all" + +[sources] +unknown-registry = "deny" +unknown-git = "deny" +allow-registry = ["https://github.com/rust-lang/crates.io-index"] + From bada850284b39c27bc2b13491b6f3bfd4479b38f Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 12 Dec 2021 19:00:44 +0100 Subject: [PATCH 07/10] Txn conflict: allows the OS to do something else before retrying --- lib/src/storage/backend/rocksdb.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index ed992b68..960ce740 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -20,6 +20,7 @@ use std::ops::Deref; use std::path::{Path, PathBuf}; use std::rc::{Rc, Weak}; use std::sync::Arc; +use std::thread::yield_now; use std::{ptr, slice}; macro_rules! ffi_result { @@ -393,7 +394,10 @@ impl Db { msg == "Resource busy: " || msg == "Operation timed out: Timeout waiting to lock key" }); - if !is_conflict_error { + if is_conflict_error { + // We give a chance to the OS to do something else before retrying in order to help avoiding an other conflict + yield_now(); + } else { // We raise the error return Err(e); } From 15072f21b5a5d602a2fe83905524890904954483 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 12 Dec 2021 20:49:22 +0100 Subject: [PATCH 08/10] Updates dependencies --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b50d651a..d7c2a327 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -582,9 +582,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.109" +version = "0.2.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" +checksum = "8e167738f1866a7ec625567bae89ca0d44477232a4f7c52b1c7f2adc2c98804f" [[package]] name = "libloading" @@ -738,9 +738,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" [[package]] name = "openssl-sys" -version = "0.9.71" +version = "0.9.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df13d165e607909b363a4757a6f133f8a818a74e9d3a98d09c6128e15fa4c73" +checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb" dependencies = [ "autocfg", "cc", @@ -938,9 +938,9 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" [[package]] name = "pkg-config" -version = "0.3.23" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1a3ea4f0dd7f1f3e512cf97bf100819aa547f36a6eccac8dbaae839eb92363e" +checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe" [[package]] name = "plotters" @@ -1272,9 +1272,9 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.6" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" +checksum = "b30e4c09749c107e83dd61baf9604198efc4542863c88af39dafcaca89c7c9f9" [[package]] name = "same-file" From bb0d4fb2c4e503d50546503cbfcc89bdaeadcd19 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 12 Dec 2021 20:50:40 +0100 Subject: [PATCH 09/10] Server: Allows loading multiple files in // --- server/src/main.rs | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index 71282ce0..3d3f51d3 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -24,6 +24,7 @@ use std::fs::File; use std::io::{BufReader, Error, ErrorKind, Read, Write}; use std::rc::Rc; use std::str::FromStr; +use std::thread::{spawn, JoinHandle}; use std::time::Duration; use url::form_urlencoded; @@ -64,6 +65,7 @@ pub fn main() -> std::io::Result<()> { .long("file") .help("The file to load") .takes_value(true) + .multiple(true) .required(true), ), ) @@ -77,20 +79,29 @@ pub fn main() -> std::io::Result<()> { match matches.subcommand() { ("load", Some(submatches)) => { - let file = submatches.value_of("file").unwrap(); - let format = file - .rsplit_once(".") - .and_then(|(_, extension)| { - DatasetFormat::from_extension(extension) - .or_else(|| GraphFormat::from_extension(extension)?.try_into().ok()) + let handles = submatches.values_of("file").unwrap().into_iter().map(|file| { + let store = store.clone(); + let file = file.to_string(); + spawn(move || { + let format = file + .rsplit_once(".") + .and_then(|(_, extension)| { + DatasetFormat::from_extension(extension) + .or_else(|| GraphFormat::from_extension(extension)?.try_into().ok()) + }) + .ok_or_else(|| { + Error::new( + ErrorKind::InvalidInput, + "The server is not able to guess the file format of {} from its extension", + ) + })?; + store.bulk_load_dataset(BufReader::new(File::open(file)?), format, None)?; + Ok(()) }) - .ok_or_else(|| { - Error::new( - ErrorKind::InvalidInput, - "The server is not able to guess the file format of {} from its extension", - ) - })?; - store.bulk_load_dataset(BufReader::new(File::open(file)?), format, None)?; + }).collect::>>>(); + for handle in handles { + handle.join().unwrap()?; + } store.optimize() } ("serve", Some(submatches)) => { From 95f30b83272e8a6017fa1a7924c1af88c31738e6 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 13 Dec 2021 14:26:51 +0100 Subject: [PATCH 10/10] Uses usual URLs for git submodules --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0900df5d..a1d89fcc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,7 +9,7 @@ url = https://github.com/Tpt/bsbm-tools.git [submodule "rocksdb-sys/rocksdb"] path = rocksdb-sys/rocksdb - url = https://github.com/facebook/rocksdb/ + url = https://github.com/facebook/rocksdb.git [submodule "rocksdb-sys/lz4"] path = rocksdb-sys/lz4 - url = https://github.com/lz4/lz4/ + url = https://github.com/lz4/lz4.git