Merge branch 'master' of https://github.com/oxigraph/oxigraph into fix-dockerfile

pull/178/head
Vincent Emonet 3 years ago
commit 48043fe57d
  1. 24
      .github/dependabot.yml
  2. 8
      .github/workflows/build.yml
  3. 4
      .gitmodules
  4. 106
      Cargo.lock
  5. 1
      README.md
  6. 43
      deny.toml
  7. 10
      lib/Cargo.toml
  8. 18
      lib/benches/store.rs
  9. 2
      lib/src/sparql/eval.rs
  10. 46
      lib/src/storage/backend/rocksdb.rs
  11. 4
      lib/src/storage/mod.rs
  12. 22
      lib/src/store.rs
  13. 4
      lib/tests/store.rs
  14. 4
      python/Cargo.toml
  15. 2
      python/src/store.rs
  16. 16
      rocksdb-sys/api/c.cc
  17. 11
      rocksdb-sys/api/c.h
  18. 39
      server/src/main.rs
  19. 2
      testsuite/Cargo.toml
  20. 2
      testsuite/rdf-star
  21. 4
      testsuite/src/evaluator.rs
  22. 9
      testsuite/src/report.rs

@ -1,24 +0,0 @@
version: 2
updates:
- package-ecosystem: cargo
directory: "/"
schedule:
interval: daily
open-pull-requests-limit: 10
- package-ecosystem: npm
directory: "/js"
schedule:
interval: daily
open-pull-requests-limit: 10
- package-ecosystem: github-actions
directory: "/"
schedule:
interval: daily
- package-ecosystem: gitsubmodule
directory: "/"
schedule:
interval: daily
- package-ecosystem: docker
directory: "/server"
schedule:
interval: daily

@ -27,6 +27,12 @@ jobs:
- run: rustup update && rustup component add clippy - run: rustup update && rustup component add clippy
- run: cargo clippy --all-targets --all-features - run: cargo clippy --all-targets --all-features
deny:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: EmbarkStudios/cargo-deny-action@v1
test_linux: test_linux:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
@ -94,7 +100,7 @@ jobs:
manylinux: 2014 manylinux: 2014
container: messense/manylinux2014-cross:x86_64 container: messense/manylinux2014-cross:x86_64
command: build command: build
args: -m python/Cargo.toml --cargo-extra-args="--no-default-features --features vendored" args: -m python/Cargo.toml
- run: pip install pyoxigraph --no-index --find-links ../../target/wheels && python -m unittest - run: pip install pyoxigraph --no-index --find-links ../../target/wheels && python -m unittest
working-directory: ./python/tests working-directory: ./python/tests

4
.gitmodules vendored

@ -9,7 +9,7 @@
url = https://github.com/Tpt/bsbm-tools.git url = https://github.com/Tpt/bsbm-tools.git
[submodule "rocksdb-sys/rocksdb"] [submodule "rocksdb-sys/rocksdb"]
path = rocksdb-sys/rocksdb path = rocksdb-sys/rocksdb
url = https://github.com/facebook/rocksdb/ url = https://github.com/facebook/rocksdb.git
[submodule "rocksdb-sys/lz4"] [submodule "rocksdb-sys/lz4"]
path = rocksdb-sys/lz4 path = rocksdb-sys/lz4
url = https://github.com/lz4/lz4/ url = https://github.com/lz4/lz4.git

106
Cargo.lock generated

@ -91,9 +91,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "block-buffer" name = "block-buffer"
version = "0.9.0" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" checksum = "f1d36a02058e76b040de25a4464ba1c80935655595b661505c8b39b664828b95"
dependencies = [ dependencies = [
"generic-array", "generic-array",
] ]
@ -149,19 +149,6 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time",
"winapi 0.3.9",
]
[[package]] [[package]]
name = "clang-sys" name = "clang-sys"
version = "1.3.0" version = "1.3.0"
@ -303,6 +290,15 @@ dependencies = [
"lazy_static", "lazy_static",
] ]
[[package]]
name = "crypto-common"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "567569e659735adb39ff2d4c20600f7cd78be5471f8c58ab162bce3c03fdbc5f"
dependencies = [
"generic-array",
]
[[package]] [[package]]
name = "csv" name = "csv"
version = "1.1.6" version = "1.1.6"
@ -337,10 +333,12 @@ dependencies = [
[[package]] [[package]]
name = "digest" name = "digest"
version = "0.9.0" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" checksum = "8549e6bfdecd113b7e221fe60b433087f6957387a20f8118ebca9b12af19143d"
dependencies = [ dependencies = [
"block-buffer",
"crypto-common",
"generic-array", "generic-array",
] ]
@ -513,9 +511,9 @@ dependencies = [
[[package]] [[package]]
name = "itertools" name = "itertools"
version = "0.10.1" version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69ddb889f9d0d08a67338271fa9b62996bc788c7796a5c18cf057420aaed5eaf" checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
dependencies = [ dependencies = [
"either", "either",
] ]
@ -584,9 +582,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]] [[package]]
name = "libc" name = "libc"
version = "0.2.109" version = "0.2.111"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f98a04dce437184842841303488f70d0188c5f51437d2a834dc097eafa909a01" checksum = "8e167738f1866a7ec625567bae89ca0d44477232a4f7c52b1c7f2adc2c98804f"
[[package]] [[package]]
name = "libloading" name = "libloading"
@ -624,13 +622,11 @@ checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f"
[[package]] [[package]]
name = "md-5" name = "md-5"
version = "0.9.1" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" checksum = "e6a38fc55c8bbc10058782919516f88826e70320db6d206aebc49611d24216ae"
dependencies = [ dependencies = [
"block-buffer",
"digest", "digest",
"opaque-debug",
] ]
[[package]] [[package]]
@ -689,16 +685,6 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]] [[package]]
name = "num-traits" name = "num-traits"
version = "0.2.14" version = "0.2.14"
@ -730,12 +716,6 @@ version = "11.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"
[[package]]
name = "opaque-debug"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
[[package]] [[package]]
name = "openssl" name = "openssl"
version = "0.10.38" version = "0.10.38"
@ -758,9 +738,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a"
[[package]] [[package]]
name = "openssl-sys" name = "openssl-sys"
version = "0.9.71" version = "0.9.72"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7df13d165e607909b363a4757a6f133f8a818a74e9d3a98d09c6128e15fa4c73" checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb"
dependencies = [ dependencies = [
"autocfg", "autocfg",
"cc", "cc",
@ -845,11 +825,11 @@ name = "oxigraph_testsuite"
version = "0.3.0-dev" version = "0.3.0-dev"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"chrono",
"clap", "clap",
"criterion", "criterion",
"oxigraph", "oxigraph",
"text-diff", "text-diff",
"time",
] ]
[[package]] [[package]]
@ -958,9 +938,9 @@ checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
[[package]] [[package]]
name = "pkg-config" name = "pkg-config"
version = "0.3.22" version = "0.3.24"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
[[package]] [[package]]
name = "plotters" name = "plotters"
@ -1004,9 +984,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.32" version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" checksum = "fb37d2df5df740e582f28f8560cf425f52bb267d872fe58358eadb554909f07a"
dependencies = [ dependencies = [
"unicode-xid", "unicode-xid",
] ]
@ -1292,9 +1272,9 @@ dependencies = [
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.6" version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" checksum = "b30e4c09749c107e83dd61baf9604198efc4542863c88af39dafcaca89c7c9f9"
[[package]] [[package]]
name = "same-file" name = "same-file"
@ -1368,9 +1348,9 @@ checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.130" version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1"
[[package]] [[package]]
name = "serde_cbor" name = "serde_cbor"
@ -1384,9 +1364,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.130" version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" checksum = "b710a83c4e0dff6a3d511946b95274ad9ca9e5d3ae497b63fda866ac955358d2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -1406,28 +1386,24 @@ dependencies = [
[[package]] [[package]]
name = "sha-1" name = "sha-1"
version = "0.9.8" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99cd6713db3cf16b6c84e06321e049a9b9f699826e16096d23bbcc44d15d51a6" checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f"
dependencies = [ dependencies = [
"block-buffer",
"cfg-if", "cfg-if",
"cpufeatures", "cpufeatures",
"digest", "digest",
"opaque-debug",
] ]
[[package]] [[package]]
name = "sha2" name = "sha2"
version = "0.9.8" version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" checksum = "900d964dd36bb15bcf2f2b35694c072feab74969a54f2bbeec7a2d725d2bdcb6"
dependencies = [ dependencies = [
"block-buffer",
"cfg-if", "cfg-if",
"cpufeatures", "cpufeatures",
"digest", "digest",
"opaque-debug",
] ]
[[package]] [[package]]
@ -1581,12 +1557,12 @@ dependencies = [
[[package]] [[package]]
name = "time" name = "time"
version = "0.1.43" version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" checksum = "41effe7cfa8af36f439fac33861b66b049edc6f9a32331e2312660529c1c24ad"
dependencies = [ dependencies = [
"itoa",
"libc", "libc",
"winapi 0.3.9",
] ]
[[package]] [[package]]

@ -6,6 +6,7 @@ Oxigraph
[![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/) [![PyPI](https://img.shields.io/pypi/v/pyoxigraph)](https://pypi.org/project/pyoxigraph/)
[![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph) [![npm](https://img.shields.io/npm/v/oxigraph)](https://www.npmjs.com/package/oxigraph)
[![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions) [![actions status](https://github.com/oxigraph/oxigraph/workflows/build/badge.svg)](https://github.com/oxigraph/oxigraph/actions)
[![dependency status](https://deps.rs/repo/github/oxigraph/oxigraph/status.svg)](https://deps.rs/repo/github/oxigraph/oxigraph)
[![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) [![Gitter](https://badges.gitter.im/oxigraph/community.svg)](https://gitter.im/oxigraph/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard. Oxigraph is a graph database implementing the [SPARQL](https://www.w3.org/TR/sparql11-overview/) standard.

@ -0,0 +1,43 @@
[advisories]
vulnerability = "deny"
unmaintained = "warn"
yanked = "warn"
notice = "warn"
ignore = []
[licenses]
unlicensed = "deny"
allow = [
"MIT",
"Apache-2.0",
"Apache-2.0 WITH LLVM-exception",
"CECILL-B"
]
copyleft = "deny"
allow-osi-fsf-free = "either"
default = "deny"
confidence-threshold = 0.8
exceptions = [
# Optional dependency (from sophia_api)
{ allow = ["MPL-2.0"], name = "resiter", version = "*" }
]
[[licenses.clarify]]
name = "ring"
version = "*"
expression = "MIT AND ISC AND OpenSSL"
license-files = [
{ path = "LICENSE", hash = 0xbd0eed23 }
]
[bans]
multiple-versions = "warn"
wildcards = "deny"
highlight = "all"
[sources]
unknown-registry = "deny"
unknown-git = "deny"
allow-registry = ["https://github.com/rust-lang/crates.io-index"]

@ -19,15 +19,15 @@ all-features = true
[features] [features]
default = [] default = []
sophia = ["sophia_api"] sophia = ["sophia_api"]
http_client = ["oxhttp"] http_client = ["oxhttp", "oxhttp/rustls"]
[dependencies] [dependencies]
quick-xml = "0.22" quick-xml = "0.22"
rand = "0.8" rand = "0.8"
md-5 = "0.9" md-5 = "0.10"
sha-1 = "0.9" sha-1 = "0.10"
sha2 = "0.9" sha2 = "0.10"
digest = "0.9" digest = "0.10"
regex = "1" regex = "1"
oxilangtag = "0.1" oxilangtag = "0.1"
oxiri = "0.1" oxiri = "0.1"

@ -36,8 +36,8 @@ fn store_load(c: &mut Criterion) {
group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| { group.bench_function("load BSBM explore 1000 in on disk with bulk load", |b| {
b.iter(|| { b.iter(|| {
let path = TempDir::default(); let path = TempDir::default();
let mut store = Store::open(&path.0).unwrap(); let store = Store::open(&path.0).unwrap();
do_bulk_load(&mut store, &data); do_bulk_load(&store, &data);
}) })
}); });
} }
@ -54,8 +54,8 @@ fn store_load(c: &mut Criterion) {
group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| { group.bench_function("load BSBM explore 10000 in on disk with bulk load", |b| {
b.iter(|| { b.iter(|| {
let path = TempDir::default(); let path = TempDir::default();
let mut store = Store::open(&path.0).unwrap(); let store = Store::open(&path.0).unwrap();
do_bulk_load(&mut store, &data); do_bulk_load(&store, &data);
}) })
}); });
} }
@ -73,7 +73,7 @@ fn do_load(store: &Store, data: &[u8]) {
store.optimize().unwrap(); store.optimize().unwrap();
} }
fn do_bulk_load(store: &mut Store, data: &[u8]) { fn do_bulk_load(store: &Store, data: &[u8]) {
store store
.bulk_load_graph( .bulk_load_graph(
Cursor::new(&data), Cursor::new(&data),
@ -116,8 +116,8 @@ fn store_query_and_update(c: &mut Criterion) {
group.sample_size(10); group.sample_size(10);
{ {
let mut memory_store = Store::new().unwrap(); let memory_store = Store::new().unwrap();
do_bulk_load(&mut memory_store, &data); do_bulk_load(&memory_store, &data);
group.bench_function("BSBM explore 1000 query in memory", |b| { group.bench_function("BSBM explore 1000 query in memory", |b| {
b.iter(|| run_operation(&memory_store, &query_operations)) b.iter(|| run_operation(&memory_store, &query_operations))
}); });
@ -128,8 +128,8 @@ fn store_query_and_update(c: &mut Criterion) {
{ {
let path = TempDir::default(); let path = TempDir::default();
let mut disk_store = Store::open(&path.0).unwrap(); let disk_store = Store::open(&path.0).unwrap();
do_bulk_load(&mut disk_store, &data); do_bulk_load(&disk_store, &data);
group.bench_function("BSBM explore 1000 query on disk", |b| { group.bench_function("BSBM explore 1000 query on disk", |b| {
b.iter(|| run_operation(&disk_store, &query_operations)) b.iter(|| run_operation(&disk_store, &query_operations))
}); });

@ -2048,7 +2048,7 @@ impl SimpleEvaluator {
let dataset = self.dataset.clone(); let dataset = self.dataset.clone();
Rc::new(move |tuple| { Rc::new(move |tuple| {
let input = to_simple_string(&dataset, &arg(tuple)?)?; let input = to_simple_string(&dataset, &arg(tuple)?)?;
let hash = hex::encode(H::new().chain(input.as_str()).finalize()); let hash = hex::encode(H::new().chain_update(input.as_str()).finalize());
Some(build_string_literal(&dataset, &hash)) Some(build_string_literal(&dataset, &hash))
}) })
} }

@ -10,6 +10,7 @@ use libc::{self, c_char, c_void, free};
use oxrocksdb_sys::*; use oxrocksdb_sys::*;
use rand::random; use rand::random;
use std::borrow::Borrow; use std::borrow::Borrow;
use std::collections::HashMap;
use std::env::temp_dir; use std::env::temp_dir;
use std::ffi::{CStr, CString}; use std::ffi::{CStr, CString};
use std::fs::remove_dir_all; use std::fs::remove_dir_all;
@ -19,6 +20,7 @@ use std::ops::Deref;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::rc::{Rc, Weak}; use std::rc::{Rc, Weak};
use std::sync::Arc; use std::sync::Arc;
use std::thread::yield_now;
use std::{ptr, slice}; use std::{ptr, slice};
macro_rules! ffi_result { macro_rules! ffi_result {
@ -392,7 +394,10 @@ impl Db {
msg == "Resource busy: " msg == "Resource busy: "
|| msg == "Operation timed out: Timeout waiting to lock key" || msg == "Operation timed out: Timeout waiting to lock key"
}); });
if !is_conflict_error { if is_conflict_error {
// We give a chance to the OS to do something else before retrying in order to help avoiding an other conflict
yield_now();
} else {
// We raise the error // We raise the error
return Err(e); return Err(e);
} }
@ -438,25 +443,40 @@ impl Db {
} }
} }
pub fn insert_stt_files(&self, ssts_for_cf: Vec<(&ColumnFamily, PathBuf)>) -> Result<()> { pub fn insert_stt_files(&self, ssts_for_cf: &[(&ColumnFamily, PathBuf)]) -> Result<()> {
let mut paths_by_cf = HashMap::<_, Vec<_>>::new();
for (cf, path) in ssts_for_cf { for (cf, path) in ssts_for_cf {
unsafe { paths_by_cf
ffi_result!(rocksdb_transactiondb_ingest_external_file_cf( .entry(*cf)
self.0.db, .or_default()
cf.0, .push(path_to_cstring(path)?);
&path_to_cstring(&path)?.as_ptr(), }
1, let cpaths_by_cf = paths_by_cf
self.0.ingest_external_file_options .iter()
))?; .map(|(cf, paths)| (*cf, paths.iter().map(|p| p.as_ptr()).collect::<Vec<_>>()))
} .collect::<Vec<_>>();
let args = cpaths_by_cf
.iter()
.map(|(cf, p)| rocksdb_ingestexternalfilearg_t {
column_family: cf.0,
external_files: p.as_ptr(),
external_files_len: p.len(),
options: self.0.ingest_external_file_options,
})
.collect::<Vec<_>>();
unsafe {
ffi_result!(rocksdb_transactiondb_ingest_external_files(
self.0.db,
args.as_ptr(),
args.len()
))
} }
Ok(())
} }
} }
// It is fine to not keep a lifetime: there is no way to use this type without the database being still in scope. // It is fine to not keep a lifetime: there is no way to use this type without the database being still in scope.
// So, no use after free possible. // So, no use after free possible.
#[derive(Clone)] #[derive(Clone, Eq, PartialEq, Hash)]
pub struct ColumnFamily(*mut rocksdb_column_family_handle_t); pub struct ColumnFamily(*mut rocksdb_column_family_handle_t);
unsafe impl Send for ColumnFamily {} unsafe impl Send for ColumnFamily {}

@ -172,7 +172,7 @@ impl Storage {
stt_file.insert_empty(&k)?; stt_file.insert_empty(&k)?;
} }
self.db self.db
.insert_stt_files(vec![(&self.graphs_cf, stt_file.finish()?)])?; .insert_stt_files(&[(&self.graphs_cf, stt_file.finish()?)])?;
version = 1; version = 1;
self.update_version(version)?; self.update_version(version)?;
} }
@ -1172,7 +1172,7 @@ impl BulkLoader {
self.quads.clear(); self.quads.clear();
} }
self.storage.db.insert_stt_files(to_load) self.storage.db.insert_stt_files(&to_load)
} }
fn insert_term(&mut self, term: TermRef<'_>, encoded: &EncodedTerm) -> Result<()> { fn insert_term(&mut self, term: TermRef<'_>, encoded: &EncodedTerm) -> Result<()> {

@ -612,7 +612,9 @@ impl Store {
/// ///
/// This function is optimized for large dataset loading speed. For small files, [`load_dataset`](Store::load_dataset) might be more convenient. /// This function is optimized for large dataset loading speed. For small files, [`load_dataset`](Store::load_dataset) might be more convenient.
/// ///
/// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. /// Warning: This method is not atomic.
/// If the parsing fails in the middle of the file, only a part of it may be written to the store.
/// Results might get weird if you delete data during the loading process.
/// ///
/// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files.
/// ///
@ -622,7 +624,7 @@ impl Store {
/// use oxigraph::io::DatasetFormat; /// use oxigraph::io::DatasetFormat;
/// use oxigraph::model::*; /// use oxigraph::model::*;
/// ///
/// let mut store = Store::new()?; /// let store = Store::new()?;
/// ///
/// // insertion /// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com> ."; /// let file = b"<http://example.com> <http://example.com> <http://example.com> <http://example.com> .";
@ -639,7 +641,7 @@ impl Store {
/// Errors related to data loading into the store use the other error kinds. /// Errors related to data loading into the store use the other error kinds.
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
pub fn bulk_load_dataset( pub fn bulk_load_dataset(
&mut self, &self,
reader: impl BufRead, reader: impl BufRead,
format: DatasetFormat, format: DatasetFormat,
base_iri: Option<&str>, base_iri: Option<&str>,
@ -657,7 +659,9 @@ impl Store {
/// ///
/// This function is optimized for large dataset loading speed. For small files, [`load_graph`](Store::load_graph) might be more convenient. /// This function is optimized for large dataset loading speed. For small files, [`load_graph`](Store::load_graph) might be more convenient.
/// ///
/// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. /// Warning: This method is not atomic.
/// If the parsing fails in the middle of the file, only a part of it may be written to the store.
/// Results might get weird if you delete data during the loading process.
/// ///
/// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files.
/// ///
@ -667,7 +671,7 @@ impl Store {
/// use oxigraph::io::GraphFormat; /// use oxigraph::io::GraphFormat;
/// use oxigraph::model::*; /// use oxigraph::model::*;
/// ///
/// let mut store = Store::new()?; /// let store = Store::new()?;
/// ///
/// // insertion /// // insertion
/// let file = b"<http://example.com> <http://example.com> <http://example.com> ."; /// let file = b"<http://example.com> <http://example.com> <http://example.com> .";
@ -684,7 +688,7 @@ impl Store {
/// Errors related to data loading into the store use the other error kinds. /// Errors related to data loading into the store use the other error kinds.
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
pub fn bulk_load_graph<'a>( pub fn bulk_load_graph<'a>(
&mut self, &self,
reader: impl BufRead, reader: impl BufRead,
format: GraphFormat, format: GraphFormat,
to_graph_name: impl Into<GraphNameRef<'a>>, to_graph_name: impl Into<GraphNameRef<'a>>,
@ -707,11 +711,13 @@ impl Store {
/// Adds a set of triples to this store using bulk load. /// Adds a set of triples to this store using bulk load.
/// ///
/// Warning: This method is not atomic. If the parsing fails in the middle of the file, only a part of it may be written to the store. /// Warning: This method is not atomic.
/// If the process fails in the middle of the file, only a part of the data may be written to the store.
/// Results might get weird if you delete data during the loading process.
/// ///
/// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files. /// Warning: This method is optimized for speed. It uses multiple threads and multiple GBs of RAM on large files.
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
pub fn bulk_extend(&mut self, quads: impl IntoIterator<Item = Quad>) -> io::Result<()> { pub fn bulk_extend(&self, quads: impl IntoIterator<Item = Quad>) -> io::Result<()> {
bulk_load(&self.storage, quads.into_iter().map(Ok)) bulk_load(&self.storage, quads.into_iter().map(Ok))
} }
} }

@ -100,7 +100,7 @@ fn test_load_dataset() -> Result<()> {
#[test] #[test]
fn test_bulk_load_dataset() -> Result<()> { fn test_bulk_load_dataset() -> Result<()> {
let mut store = Store::new().unwrap(); let store = Store::new().unwrap();
store.bulk_load_dataset(Cursor::new(DATA), DatasetFormat::TriG, None)?; store.bulk_load_dataset(Cursor::new(DATA), DatasetFormat::TriG, None)?;
for q in quads(GraphNameRef::DefaultGraph) { for q in quads(GraphNameRef::DefaultGraph) {
assert!(store.contains(q)?); assert!(store.contains(q)?);
@ -183,7 +183,7 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<()> {
NamedNodeRef::new_unchecked("http://example.com/o"), NamedNodeRef::new_unchecked("http://example.com/o"),
NamedNodeRef::new_unchecked("http://example.com/g"), NamedNodeRef::new_unchecked("http://example.com/g"),
); );
let mut store = Store::new()?; let store = Store::new()?;
store.remove(quad)?; store.remove(quad)?;
store.bulk_extend([quad.into_owned()])?; store.bulk_extend([quad.into_owned()])?;
assert_eq!(store.len()?, 1); assert_eq!(store.len()?, 1);

@ -19,7 +19,3 @@ doctest = false
oxigraph = { version = "0.3.0-dev", path="../lib", features = ["http_client"] } oxigraph = { version = "0.3.0-dev", path="../lib", features = ["http_client"] }
pyo3 = { version = "0.15", features = ["extension-module", "abi3-py37"] } pyo3 = { version = "0.15", features = ["extension-module", "abi3-py37"] }
oxhttp = "0.1" oxhttp = "0.1"
[features]
default = ["oxhttp/native-tls"]
vendored = ["oxhttp/rustls"]

@ -349,7 +349,7 @@ impl PyStore {
#[pyo3(text_signature = "($self, data, /, mime_type, *, base_iri = None, to_graph = None)")] #[pyo3(text_signature = "($self, data, /, mime_type, *, base_iri = None, to_graph = None)")]
#[args(input, mime_type, "*", base_iri = "None", to_graph = "None")] #[args(input, mime_type, "*", base_iri = "None", to_graph = "None")]
fn bulk_load( fn bulk_load(
&mut self, &self,
input: PyObject, input: PyObject,
mime_type: &str, mime_type: &str,
base_iri: Option<&str>, base_iri: Option<&str>,

@ -53,6 +53,22 @@ void rocksdb_transactiondb_ingest_external_file_cf(
SaveError(errptr, db->rep->IngestExternalFile(handle->rep, files, opt->rep)); SaveError(errptr, db->rep->IngestExternalFile(handle->rep, files, opt->rep));
} }
void rocksdb_transactiondb_ingest_external_files(
rocksdb_transactiondb_t* db, const rocksdb_ingestexternalfilearg_t* list,
const size_t list_len, char** errptr) {
std::vector<rocksdb::IngestExternalFileArg> args(list_len);
for (size_t i = 0; i < list_len; ++i) {
args[i].column_family = list[i].column_family->rep;
std::vector<std::string> files(list[i].external_files_len);
for (size_t j = 0; j < list[i].external_files_len; ++j) {
files[j] = std::string(list[i].external_files[j]);
}
args[i].external_files = files;
args[i].options = list[i].options->rep;
}
SaveError(errptr, db->rep->IngestExternalFiles(args));
}
rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf( rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf(
rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
rocksdb_column_family_handle_t* column_family, const char* key, rocksdb_column_family_handle_t* column_family, const char* key,

@ -6,6 +6,13 @@
extern "C" { extern "C" {
#endif #endif
typedef struct rocksdb_ingestexternalfilearg_t {
rocksdb_column_family_handle_t* column_family;
char const* const* external_files;
size_t external_files_len;
rocksdb_ingestexternalfileoptions_t* options;
} rocksdb_ingestexternalfilearg_t;
extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transactiondb_get_pinned_cf( extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transactiondb_get_pinned_cf(
rocksdb_transactiondb_t* db, const rocksdb_readoptions_t* options, rocksdb_transactiondb_t* db, const rocksdb_readoptions_t* options,
rocksdb_column_family_handle_t* column_family, const char* key, rocksdb_column_family_handle_t* column_family, const char* key,
@ -25,6 +32,10 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_ingest_external_file_cf(
const char* const* file_list, const size_t list_len, const char* const* file_list, const size_t list_len,
const rocksdb_ingestexternalfileoptions_t* opt, char** errptr); const rocksdb_ingestexternalfileoptions_t* opt, char** errptr);
extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_ingest_external_files(
rocksdb_transactiondb_t* db, const rocksdb_ingestexternalfilearg_t* list,
const size_t list_len, char** errptr);
extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf( extern ROCKSDB_LIBRARY_API rocksdb_pinnableslice_t* rocksdb_transaction_get_pinned_cf(
rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options, rocksdb_transaction_t* txn, const rocksdb_readoptions_t* options,
rocksdb_column_family_handle_t* column_family, const char* key, rocksdb_column_family_handle_t* column_family, const char* key,

@ -24,6 +24,7 @@ use std::fs::File;
use std::io::{BufReader, Error, ErrorKind, Read, Write}; use std::io::{BufReader, Error, ErrorKind, Read, Write};
use std::rc::Rc; use std::rc::Rc;
use std::str::FromStr; use std::str::FromStr;
use std::thread::{spawn, JoinHandle};
use std::time::Duration; use std::time::Duration;
use url::form_urlencoded; use url::form_urlencoded;
@ -64,12 +65,13 @@ pub fn main() -> std::io::Result<()> {
.long("file") .long("file")
.help("The file to load") .help("The file to load")
.takes_value(true) .takes_value(true)
.multiple(true)
.required(true), .required(true),
), ),
) )
.get_matches(); .get_matches();
let mut store = if let Some(path) = matches.value_of_os("location") { let store = if let Some(path) = matches.value_of_os("location") {
Store::open(path) Store::open(path)
} else { } else {
Store::new() Store::new()
@ -77,20 +79,29 @@ pub fn main() -> std::io::Result<()> {
match matches.subcommand() { match matches.subcommand() {
("load", Some(submatches)) => { ("load", Some(submatches)) => {
let file = submatches.value_of("file").unwrap(); let handles = submatches.values_of("file").unwrap().into_iter().map(|file| {
let format = file let store = store.clone();
.rsplit_once(".") let file = file.to_string();
.and_then(|(_, extension)| { spawn(move || {
DatasetFormat::from_extension(extension) let format = file
.or_else(|| GraphFormat::from_extension(extension)?.try_into().ok()) .rsplit_once(".")
.and_then(|(_, extension)| {
DatasetFormat::from_extension(extension)
.or_else(|| GraphFormat::from_extension(extension)?.try_into().ok())
})
.ok_or_else(|| {
Error::new(
ErrorKind::InvalidInput,
"The server is not able to guess the file format of {} from its extension",
)
})?;
store.bulk_load_dataset(BufReader::new(File::open(file)?), format, None)?;
Ok(())
}) })
.ok_or_else(|| { }).collect::<Vec<JoinHandle<Result<(),Error>>>>();
Error::new( for handle in handles {
ErrorKind::InvalidInput, handle.join().unwrap()?;
"The server is not able to guess the file format of {} from its extension", }
)
})?;
store.bulk_load_dataset(BufReader::new(File::open(file)?), format, None)?;
store.optimize() store.optimize()
} }
("serve", Some(submatches)) => { ("serve", Some(submatches)) => {

@ -14,7 +14,7 @@ publish = false
[dependencies] [dependencies]
anyhow = "1" anyhow = "1"
clap = "2" clap = "2"
chrono = "0.4" time = { version = "0.3", features = ["formatting"] }
oxigraph = { version = "0.3.0-dev", path="../lib" } oxigraph = { version = "0.3.0-dev", path="../lib" }
text-diff = "0.4" text-diff = "0.4"

@ -1 +1 @@
Subproject commit 4481d7b5b7dd473b443e4f986f77ce38655b74d4 Subproject commit 5efe146c6edfc2e710e1e13eac409b7c7ae982c7

@ -1,8 +1,8 @@
use crate::manifest::Test; use crate::manifest::Test;
use crate::report::TestResult; use crate::report::TestResult;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use chrono::Utc;
use std::collections::HashMap; use std::collections::HashMap;
use time::OffsetDateTime;
#[derive(Default)] #[derive(Default)]
pub struct TestEvaluator { pub struct TestEvaluator {
@ -33,7 +33,7 @@ impl TestEvaluator {
Ok(TestResult { Ok(TestResult {
test: test.id, test: test.id,
outcome, outcome,
date: Utc::now(), date: OffsetDateTime::now_utc(),
}) })
}) })
.collect() .collect()

@ -1,14 +1,15 @@
use anyhow::Result; use anyhow::Result;
use chrono::{DateTime, Utc};
use oxigraph::model::{Dataset, NamedNode}; use oxigraph::model::{Dataset, NamedNode};
use std::fmt::Write; use std::fmt::Write;
use text_diff::{diff, Difference}; use text_diff::{diff, Difference};
use time::format_description::well_known::Rfc3339;
use time::OffsetDateTime;
#[derive(Debug)] #[derive(Debug)]
pub struct TestResult { pub struct TestResult {
pub test: NamedNode, pub test: NamedNode,
pub outcome: Result<()>, pub outcome: Result<()>,
pub date: DateTime<Utc>, pub date: OffsetDateTime,
} }
pub fn dataset_diff(expected: &Dataset, actual: &Dataset) -> String { pub fn dataset_diff(expected: &Dataset, actual: &Dataset) -> String {
@ -71,7 +72,7 @@ pub fn build_report(results: impl IntoIterator<Item = TestResult>) -> String {
writeln!( writeln!(
&mut buffer, &mut buffer,
"\tdc:issued \"{}\"^^xsd:dateTime ;", "\tdc:issued \"{}\"^^xsd:dateTime ;",
Utc::now().to_rfc3339() OffsetDateTime::now_utc().format(&Rfc3339).unwrap()
); );
writeln!( writeln!(
&mut buffer, &mut buffer,
@ -140,7 +141,7 @@ pub fn build_report(results: impl IntoIterator<Item = TestResult>) -> String {
writeln!( writeln!(
&mut buffer, &mut buffer,
"\t\tdc:date \"{}\"^^xsd:dateTime", "\t\tdc:date \"{}\"^^xsd:dateTime",
result.date.to_rfc3339() result.date.format(&Rfc3339).unwrap()
); );
writeln!(&mut buffer, "\t] ;"); writeln!(&mut buffer, "\t] ;");
writeln!(&mut buffer, "\tearl:mode earl:automatic"); writeln!(&mut buffer, "\tearl:mode earl:automatic");

Loading…
Cancel
Save