diff --git a/.cargo/build_config.py b/.cargo/build_config.py new file mode 100644 index 00000000..97dd9667 --- /dev/null +++ b/.cargo/build_config.py @@ -0,0 +1,82 @@ +import json +from urllib.request import urlopen + +MSRV = "1.70.0" +DEFAULT_BUILD_FLAGS = { + "-Wtrivial-casts", + "-Wtrivial-numeric-casts", + "-Wunsafe-code", + "-Wunused-lifetimes", + "-Wunused-qualifications", +} +FLAGS_BLACKLIST = { + "-Wclippy::alloc-instead-of-core", + "-Wclippy::arithmetic-side-effects", # TODO: might be nice + "-Wclippy::as-conversions", + "-Wclippy::cargo-common-metadata", # TODO: might be nice + "-Wclippy::doc-markdown", # Too many false positives + "-Wclippy::default-numeric-fallback", + "-Wclippy::else-if-without-else", + "-Wclippy::exhaustive-enums", + "-Wclippy::exhaustive-structs", + "-Wclippy::float-arithmetic", + "-Wclippy::float-cmp", + "-Wclippy::float-cmp-const", + "-Wclippy::impl-trait-in-params", + "-Wclippy::implicit-return", + "-Wclippy::indexing-slicing", + "-Wclippy::integer-arithmetic", + "-Wclippy::integer-division", + "-Wclippy::map-err-ignore", + "-Wclippy::missing-docs-in-private-items", + "-Wclippy::missing-errors-doc", + "-Wclippy::missing-inline-in-public-items", + "-Wclippy::missing-panics-doc", + "-Wclippy::missing-trait-methods", + "-Wclippy::mixed-read-write-in-expression", + "-Wclippy::mod-module-files", + "-Wclippy::module-name-repetitions", + "-Wclippy::modulo-arithmetic", + "-Wclippy::multiple-crate-versions", + "-Wclippy::multiple-unsafe-ops-per-block", + "-Wclippy::must-use-candidate", # TODO: might be nice + "-Wclippy::option-option", + "-Wclippy::pattern-type-mismatch", + "-Wclippy::pub-use", + "-Wclippy::question-mark-used", + "-Wclippy::self-named-module-files", # TODO: might be nice + "-Wclippy::semicolon-if-nothing-returned", # TODO: might be nice + "-Wclippy::semicolon-outside-block", + "-Wclippy::similar-names", + "-Wclippy::single-char-lifetime-names", + "-Wclippy::std-instead-of-alloc", + "-Wclippy::std-instead-of-core", + "-Wclippy::shadow-reuse", + "-Wclippy::shadow-unrelated", + "-Wclippy::string-slice", # TODO: might be nice + "-Wclippy::too-many-lines", + "-Wclippy::separated-literal-suffix", + "-Wclippy::unreachable", # TODO: might be nice + "-Wclippy::unwrap-used", # TODO: might be nice to use expect instead + "-Wclippy::wildcard-enum-match-arm", # TODO: might be nice + "-Wclippy::wildcard-imports", # TODO: might be nice +} + +build_flags = set(DEFAULT_BUILD_FLAGS) +with urlopen(f"https://rust-lang.github.io/rust-clippy/rust-{MSRV}/lints.json") as response: + for lint in json.load(response): + if lint["level"] == "allow" and lint["group"] != "nursery": + build_flags.add(f"-Wclippy::{lint['id'].replace('_', '-')}") + +for flag in FLAGS_BLACKLIST: + if flag in build_flags: + build_flags.remove(flag) + else: + print(f"Unused blacklisted flag: {flag}") + +with open("./config.toml", "wt") as fp: + fp.write("[build]\n") + fp.write("rustflags = [\n") + for flag in sorted(build_flags): + fp.write(f" \"{flag}\",\n") + fp.write("]\n") diff --git a/.cargo/config.toml b/.cargo/config.toml index 09e6f6fe..3f1a93ac 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,11 +1,9 @@ [build] rustflags = [ - "-Wtrivial-casts", - "-Wtrivial-numeric-casts", - "-Wunsafe-code", - "-Wunused-lifetimes", - "-Wunused-qualifications", + "-Wclippy::allow-attributes", + "-Wclippy::allow-attributes-without-reason", "-Wclippy::as-underscore", + "-Wclippy::assertions-on-result-states", "-Wclippy::bool-to-int-with-if", "-Wclippy::borrow-as-ptr", "-Wclippy::case-sensitive-file-extension-comparisons", @@ -19,11 +17,13 @@ rustflags = [ "-Wclippy::clone-on-ref-ptr", "-Wclippy::cloned-instead-of-copied", "-Wclippy::copy-iterator", + "-Wclippy::create-dir", "-Wclippy::dbg-macro", "-Wclippy::decimal-literal-representation", "-Wclippy::default-trait-access", "-Wclippy::default-union-representation", "-Wclippy::deref-by-slicing", + "-Wclippy::disallowed-script-idents", "-Wclippy::doc-link-with-quotes", "-Wclippy::empty-drop", "-Wclippy::empty-enum", @@ -35,8 +35,10 @@ rustflags = [ "-Wclippy::explicit-deref-methods", "-Wclippy::explicit-into-iter-loop", "-Wclippy::explicit-iter-loop", + "-Wclippy::filetype-is-file", "-Wclippy::filter-map-next", "-Wclippy::flat-map-option", + "-Wclippy::fn-params-excessive-bools", "-Wclippy::fn-to-numeric-cast-any", "-Wclippy::format-push-string", "-Wclippy::from-iter-instead-of-collect", @@ -44,6 +46,7 @@ rustflags = [ "-Wclippy::if-not-else", "-Wclippy::if-then-some-else-none", "-Wclippy::implicit-clone", + "-Wclippy::implicit-hasher", "-Wclippy::inconsistent-struct-constructor", "-Wclippy::index-refutable-slice", "-Wclippy::inefficient-to-string", @@ -52,38 +55,49 @@ rustflags = [ "-Wclippy::inline-asm-x86-intel-syntax", "-Wclippy::invalid-upcast-comparisons", "-Wclippy::items-after-statements", + "-Wclippy::iter-not-returning-iterator", "-Wclippy::large-digit-groups", - # TODO: 1.68+ "-Wclippy::large-futures", + "-Wclippy::large-futures", + "-Wclippy::large-include-file", "-Wclippy::large-stack-arrays", "-Wclippy::large-types-passed-by-value", "-Wclippy::let-underscore-must-use", - "-Wclippy::let-unit-value", + "-Wclippy::let-underscore-untyped", "-Wclippy::linkedlist", "-Wclippy::lossy-float-literal", "-Wclippy::macro-use-imports", "-Wclippy::manual-assert", "-Wclippy::manual-instant-elapsed", - # TODO: 1.67+ "-Wclippy::manual-let-else", + "-Wclippy::manual-let-else", "-Wclippy::manual-ok-or", "-Wclippy::manual-string-new", "-Wclippy::many-single-char-names", "-Wclippy::map-unwrap-or", "-Wclippy::match-bool", + "-Wclippy::match-on-vec-items", "-Wclippy::match-same-arms", + "-Wclippy::match-wild-err-arm", "-Wclippy::match-wildcard-for-single-variants", "-Wclippy::maybe-infinite-iter", "-Wclippy::mem-forget", "-Wclippy::mismatching-type-param-order", + "-Wclippy::missing-assert-message", + "-Wclippy::missing-enforced-import-renames", "-Wclippy::multiple-inherent-impl", "-Wclippy::mut-mut", "-Wclippy::mutex-atomic", "-Wclippy::naive-bytecount", "-Wclippy::needless-bitwise-bool", "-Wclippy::needless-continue", + "-Wclippy::needless-for-each", "-Wclippy::needless-pass-by-value", + "-Wclippy::negative-feature-names", "-Wclippy::no-effect-underscore-binding", - # TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", + "-Wclippy::no-mangle-with-rust-abi", "-Wclippy::non-ascii-literal", + "-Wclippy::panic", + "-Wclippy::panic-in-result-fn", + "-Wclippy::partial-pub-fields", "-Wclippy::print-stderr", "-Wclippy::print-stdout", "-Wclippy::ptr-as-ptr", @@ -100,38 +114,49 @@ rustflags = [ "-Wclippy::return-self-not-must-use", "-Wclippy::same-functions-in-if-condition", "-Wclippy::same-name-method", - # TODO: 1.68+ "-Wclippy::semicolon-outside-block", + "-Wclippy::semicolon-inside-block", + "-Wclippy::shadow-same", "-Wclippy::single-match-else", "-Wclippy::stable-sort-primitive", "-Wclippy::str-to-string", "-Wclippy::string-add", "-Wclippy::string-add-assign", - "-Wclippy::string-lit-as-bytes", "-Wclippy::string-to-string", - # TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", + "-Wclippy::struct-excessive-bools", + "-Wclippy::suspicious-xor-used-as-pow", + "-Wclippy::tests-outside-test-module", "-Wclippy::todo", "-Wclippy::transmute-ptr-to-ptr", "-Wclippy::trivially-copy-pass-by-ref", "-Wclippy::try-err", + "-Wclippy::unchecked-duration-subtraction", + "-Wclippy::undocumented-unsafe-blocks", "-Wclippy::unicode-not-nfc", "-Wclippy::unimplemented", - # TODO: 1.66+ "-Wclippy::uninlined-format-args", - # TODO: 1.70+ "-Wclippy::unnecessary-box-returns", + "-Wclippy::uninlined-format-args", + "-Wclippy::unnecessary-box-returns", "-Wclippy::unnecessary-join", - # TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", - # TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", + "-Wclippy::unnecessary-safety-comment", + "-Wclippy::unnecessary-safety-doc", "-Wclippy::unnecessary-self-imports", "-Wclippy::unnecessary-wraps", "-Wclippy::unneeded-field-pattern", "-Wclippy::unnested-or-patterns", "-Wclippy::unreadable-literal", + "-Wclippy::unsafe-derive-deserialize", "-Wclippy::unseparated-literal-suffix", "-Wclippy::unused-async", "-Wclippy::unused-self", + "-Wclippy::unwrap-in-result", "-Wclippy::use-debug", "-Wclippy::used-underscore-binding", "-Wclippy::verbose-bit-mask", "-Wclippy::verbose-file-reads", "-Wclippy::wildcard-dependencies", "-Wclippy::zero-sized-map-values", -] \ No newline at end of file + "-Wtrivial-casts", + "-Wtrivial-numeric-casts", + "-Wunsafe-code", + "-Wunused-lifetimes", + "-Wunused-qualifications", +] diff --git a/Cargo.lock b/Cargo.lock index 22aa96e4..fa61e235 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -952,7 +952,6 @@ dependencies = [ "hex", "js-sys", "json-event-parser", - "lazy_static", "libc", "md-5", "oxhttp", diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index f1524903..4e41ea60 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -9,7 +9,6 @@ cargo-fuzz = true [dependencies] anyhow = "1" -lazy_static = "1" libfuzzer-sys = "0.4" oxrdf = { path = "../lib/oxrdf", features = ["rdf-star"] } oxttl = { path = "../lib/oxttl", features = ["rdf-star"] } diff --git a/fuzz/fuzz_targets/sparql_eval.rs b/fuzz/fuzz_targets/sparql_eval.rs index 5b52f4bd..24c8a176 100644 --- a/fuzz/fuzz_targets/sparql_eval.rs +++ b/fuzz/fuzz_targets/sparql_eval.rs @@ -1,27 +1,26 @@ #![no_main] -use lazy_static::lazy_static; use libfuzzer_sys::fuzz_target; use oxigraph::io::RdfFormat; use oxigraph::sparql::{Query, QueryOptions, QueryResults, QuerySolutionIter}; use oxigraph::store::Store; +use std::sync::OnceLock; -lazy_static! { - static ref STORE: Store = { +fuzz_target!(|data: sparql_smith::Query| { + static STORE: OnceLock = OnceLock::new(); + let store = STORE.get_or_init(|| { let store = Store::new().unwrap(); store .load_dataset(sparql_smith::DATA_TRIG.as_bytes(), RdfFormat::TriG, None) .unwrap(); store - }; -} + }); -fuzz_target!(|data: sparql_smith::Query| { let query_str = data.to_string(); if let Ok(query) = Query::parse(&query_str, None) { let options = QueryOptions::default(); - let with_opt = STORE.query_opt(query.clone(), options.clone()).unwrap(); - let without_opt = STORE + let with_opt = store.query_opt(query.clone(), options.clone()).unwrap(); + let without_opt = store .query_opt(query, options.without_optimizations()) .unwrap(); match (with_opt, without_opt) { diff --git a/lib/Cargo.toml b/lib/Cargo.toml index dd5a7c4d..7b06b69a 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -32,11 +32,10 @@ oxilangtag = "0.1" oxiri = "0.2" hex = "0.4" siphasher = "1" -lazy_static = "1" json-event-parser = "0.1" oxrdf = { version = "0.2.0-alpha.1-dev", path = "oxrdf", features = ["rdf-star", "oxsdatatypes"] } oxsdatatypes = { version = "0.2.0-alpha.1-dev", path="oxsdatatypes" } -oxrdfio = { version = "0.1.0-alpha.1-dev" , path = "oxrdfio", features = ["rdf-star"] } +oxrdfio = { version = "0.1.0-alpha.1-dev", path = "oxrdfio", features = ["rdf-star"] } spargebra = { version = "0.3.0-alpha.1-dev", path = "spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } sparopt = { version = "0.1.0-alpha.1-dev", path="sparopt", features = ["rdf-star", "sep-0002", "sep-0006"] } sparesults = { version = "0.2.0-alpha.1-dev", path = "sparesults", features = ["rdf-star"] } diff --git a/lib/benches/store.rs b/lib/benches/store.rs index eaaf71e7..6f18f8d1 100644 --- a/lib/benches/store.rs +++ b/lib/benches/store.rs @@ -1,3 +1,5 @@ +#![allow(clippy::panic)] + use criterion::{criterion_group, criterion_main, Criterion, Throughput}; use oxhttp::model::{Method, Request, Status}; use oxigraph::io::RdfFormat; diff --git a/lib/oxrdf/src/literal.rs b/lib/oxrdf/src/literal.rs index de37adc8..0e0b6da8 100644 --- a/lib/oxrdf/src/literal.rs +++ b/lib/oxrdf/src/literal.rs @@ -628,7 +628,7 @@ pub fn print_quoted_str(string: &str, f: &mut impl Write) -> fmt::Result { '"' => f.write_str("\\\""), '\\' => f.write_str("\\\\"), '\0'..='\u{1f}' | '\u{7f}' => write!(f, "\\u{:04X}", u32::from(c)), - c => f.write_char(c), + _ => f.write_char(c), }?; } f.write_char('"') diff --git a/lib/oxrdf/src/parser.rs b/lib/oxrdf/src/parser.rs index e06128d3..4d95072f 100644 --- a/lib/oxrdf/src/parser.rs +++ b/lib/oxrdf/src/parser.rs @@ -232,7 +232,7 @@ fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> { return Err(TermParseError::msg("Unexpected literal end")); } } - c => value.push(c), + _ => value.push(c), } } Err(TermParseError::msg("Unexpected literal end")) diff --git a/lib/oxrdfio/Cargo.toml b/lib/oxrdfio/Cargo.toml index c04c16fa..1ca0ce17 100644 --- a/lib/oxrdfio/Cargo.toml +++ b/lib/oxrdfio/Cargo.toml @@ -22,7 +22,7 @@ rdf-star = ["oxrdf/rdf-star", "oxttl/rdf-star"] [dependencies] oxrdf = { version = "0.2.0-alpha.1-dev", path = "../oxrdf" } oxrdfxml = { version = "0.1.0-alpha.1-dev", path = "../oxrdfxml" } -oxttl = { version = "0.1.0-alpha.1-dev" , path = "../oxttl" } +oxttl = { version = "0.1.0-alpha.1-dev", path = "../oxttl" } tokio = { version = "1", optional = true, features = ["io-util"] } [dev-dependencies] diff --git a/lib/oxrdfxml/Cargo.toml b/lib/oxrdfxml/Cargo.toml index f4c4d67d..fa7ef2b1 100644 --- a/lib/oxrdfxml/Cargo.toml +++ b/lib/oxrdfxml/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0-alpha.1-dev" authors.workspace = true license.workspace = true readme = "README.md" -keywords = ["RDF/XML", "RDF"] +keywords = ["RDFXML", "XML", "RDF"] repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxrdfxml" homepage.workspace = true description = """ diff --git a/lib/oxrdfxml/src/error.rs b/lib/oxrdfxml/src/error.rs index 1382844e..fd561be6 100644 --- a/lib/oxrdfxml/src/error.rs +++ b/lib/oxrdfxml/src/error.rs @@ -65,7 +65,7 @@ impl From for ParseError { Ok(error) => error, Err(error) => io::Error::new(error.kind(), error), }), - error => Self::Syntax(SyntaxError { + _ => Self::Syntax(SyntaxError { inner: SyntaxErrorKind::Xml(error), }), } @@ -121,10 +121,10 @@ impl fmt::Display for SyntaxError { SyntaxErrorKind::Xml(error) => error.fmt(f), SyntaxErrorKind::XmlAttribute(error) => error.fmt(f), SyntaxErrorKind::InvalidIri { iri, error } => { - write!(f, "error while parsing IRI '{}': {}", iri, error) + write!(f, "error while parsing IRI '{iri}': {error}") } SyntaxErrorKind::InvalidLanguageTag { tag, error } => { - write!(f, "error while parsing language tag '{}': {}", tag, error) + write!(f, "error while parsing language tag '{tag}': {error}") } SyntaxErrorKind::Msg { msg } => f.write_str(msg), } @@ -156,7 +156,7 @@ impl From for io::Error { quick_xml::Error::UnexpectedEof(error) => { Self::new(io::ErrorKind::UnexpectedEof, error) } - error => Self::new(io::ErrorKind::InvalidData, error), + _ => Self::new(io::ErrorKind::InvalidData, error), }, SyntaxErrorKind::Msg { msg } => Self::new(io::ErrorKind::InvalidData, msg), _ => Self::new(io::ErrorKind::InvalidData, error), diff --git a/lib/oxrdfxml/src/parser.rs b/lib/oxrdfxml/src/parser.rs index 747b2419..22983350 100644 --- a/lib/oxrdfxml/src/parser.rs +++ b/lib/oxrdfxml/src/parser.rs @@ -421,7 +421,9 @@ impl RdfXmlReader { match event { Event::Start(event) => self.parse_start_event(&event, results), Event::End(event) => self.parse_end_event(&event, results), - Event::Empty(_) => unreachable!("The expand_empty_elements option must be enabled"), + Event::Empty(_) => { + Err(SyntaxError::msg("The expand_empty_elements option must be enabled").into()) + } Event::Text(event) => self.parse_text_event(&event), Event::CData(event) => self.parse_text_event(&event.escape()?), Event::Comment(_) | Event::PI(_) => Ok(()), @@ -672,7 +674,9 @@ impl RdfXmlReader { subject: subject.clone(), }, Some(RdfXmlState::ParseTypeLiteralPropertyElt { .. }) => { - panic!("ParseTypeLiteralPropertyElt production children should never be considered as a RDF/XML content") + return Err( + SyntaxError::msg("ParseTypeLiteralPropertyElt production children should never be considered as a RDF/XML content").into() + ); } None => { return Err( @@ -747,8 +751,7 @@ impl RdfXmlReader { }; *li_counter += 1; NamedNode::new_unchecked(format!( - "http://www.w3.org/1999/02/22-rdf-syntax-ns#_{}", - li_counter + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_{li_counter}" )) } else if RESERVED_RDF_ELEMENTS.contains(&&*tag_name) || *tag_name == *RDF_DESCRIPTION @@ -881,7 +884,7 @@ impl RdfXmlReader { if event.iter().copied().all(is_whitespace) { Ok(()) } else { - Err(SyntaxError::msg(format!("Unexpected text event: '{}'", text)).into()) + Err(SyntaxError::msg(format!("Unexpected text event: '{text}'")).into()) } } } @@ -1057,8 +1060,7 @@ impl RdfXmlReader { let object = writer.into_inner(); if object.is_empty() { return Err(SyntaxError::msg(format!( - "No value found for rdf:XMLLiteral value of property {}", - iri + "No value found for rdf:XMLLiteral value of property {iri}" ))); } let triple = Triple::new( diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index c93ef56a..fca06e5a 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -121,7 +121,6 @@ impl Decimal { let mut shift_left = 0_u32; if left != 0 { while let Some(r) = left.checked_mul(10) { - assert_eq!(r / 10, left); left = r; shift_left += 1; } @@ -571,7 +570,7 @@ impl fmt::Display for Decimal { .find_map(|(i, v)| if v == b'0' { None } else { Some(i) }) .unwrap_or(40); - let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).unwrap(); + let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).map_err(|_| fmt::Error)?; if last_non_zero >= decimal_part_digits { let end = if let Some(mut width) = f.width() { if self.value.is_negative() { diff --git a/lib/oxsdatatypes/src/double.rs b/lib/oxsdatatypes/src/double.rs index b392d9cf..d0c77c8a 100644 --- a/lib/oxsdatatypes/src/double.rs +++ b/lib/oxsdatatypes/src/double.rs @@ -173,7 +173,7 @@ impl From for Double { impl From for Double { #[inline] fn from(value: Boolean) -> Self { - if bool::from(value) { 1. } else { 0. }.into() + f64::from(bool::from(value)).into() } } diff --git a/lib/oxsdatatypes/src/float.rs b/lib/oxsdatatypes/src/float.rs index 4de94913..1feac769 100644 --- a/lib/oxsdatatypes/src/float.rs +++ b/lib/oxsdatatypes/src/float.rs @@ -153,7 +153,7 @@ impl From for Float { impl From for Float { #[inline] fn from(value: Boolean) -> Self { - if bool::from(value) { 1. } else { 0. }.into() + f32::from(bool::from(value)).into() } } diff --git a/lib/oxttl/Cargo.toml b/lib/oxttl/Cargo.toml index f5dda6c6..0cc2741b 100644 --- a/lib/oxttl/Cargo.toml +++ b/lib/oxttl/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0-alpha.1-dev" authors.workspace = true license.workspace = true readme = "README.md" -keywords = ["N-Triples", "N-Quads", "Turtle", "TriG", "N3", "RDF"] +keywords = ["N-Triples", "N-Quads", "Turtle", "TriG", "N3"] repository = "https://github.com/oxigraph/oxigraph/tree/master/lib/oxttl" homepage.workspace = true description = """ diff --git a/lib/oxttl/src/lexer.rs b/lib/oxttl/src/lexer.rs index 7480908b..f12f3b25 100644 --- a/lib/oxttl/src/lexer.rs +++ b/lib/oxttl/src/lexer.rs @@ -4,6 +4,7 @@ use oxilangtag::LanguageTag; use oxiri::Iri; use oxrdf::NamedNode; use std::borrow::Cow; +use std::cmp::min; use std::collections::HashMap; use std::ops::{Range, RangeInclusive}; use std::str; @@ -261,7 +262,7 @@ impl N3Lexer { } return Some(( i, - Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())), + str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), )); } } @@ -279,14 +280,17 @@ impl N3Lexer { } else { ( i, - Ok(N3Token::PlainKeyword(str::from_utf8(&data[..i]).unwrap())), + str_from_utf8(&data[..i], 0..i).map(N3Token::PlainKeyword), ) }); } else { return None; } } - let pn_prefix = str::from_utf8(&data[..i - 1]).unwrap(); + let pn_prefix = match str_from_utf8(&data[..i - 1], 0..i - 1) { + Ok(pn_prefix) => pn_prefix, + Err(e) => return Some((i, Err(e))), + }; if pn_prefix.ends_with('.') { return Some(( i, @@ -387,10 +391,13 @@ impl N3Lexer { // We add the missing bytes if i - position_that_is_already_in_buffer > 1 { buffer.push_str( - str::from_utf8( + match str_from_utf8( &data[position_that_is_already_in_buffer..i - 1], - ) - .unwrap(), + position_that_is_already_in_buffer..i - 1, + ) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }, ) } buffer.push(a); @@ -411,8 +418,13 @@ impl N3Lexer { } else { let buffer = if let Some(mut buffer) = buffer { buffer.push_str( - str::from_utf8(&data[position_that_is_already_in_buffer..i]) - .unwrap(), + match str_from_utf8( + &data[position_that_is_already_in_buffer..i], + position_that_is_already_in_buffer..i, + ) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }, ); // We do not include the last dot while buffer.ends_with('.') { @@ -421,7 +433,10 @@ impl N3Lexer { } Cow::Owned(buffer) } else { - let mut data = str::from_utf8(&data[..i]).unwrap(); + let mut data = match str_from_utf8(&data[..i], 0..i) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }; // We do not include the last dot while let Some(d) = data.strip_suffix('.') { data = d; @@ -443,7 +458,10 @@ impl N3Lexer { } Cow::Owned(buffer) } else { - let mut data = str::from_utf8(&data[..i]).unwrap(); + let mut data = match str_from_utf8(&data[..i], 0..i) { + Ok(data) => data, + Err(e) => return Some((i, Err(e))), + }; // We do not include the last dot while let Some(d) = data.strip_suffix('.') { data = d; @@ -475,9 +493,7 @@ impl N3Lexer { i -= 1; return Some(( i, - Ok(N3Token::BlankNodeLabel( - str::from_utf8(&data[2..i]).unwrap(), - )), + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), )); } } else if i == 0 { @@ -489,16 +505,12 @@ impl N3Lexer { i -= 1; return Some(( i, - Ok(N3Token::BlankNodeLabel( - str::from_utf8(&data[2..i]).unwrap(), - )), + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), )); } else { return Some(( i, - Ok(N3Token::BlankNodeLabel( - str::from_utf8(&data[2..i]).unwrap(), - )), + str_from_utf8(&data[2..i], 2..i).map(N3Token::BlankNodeLabel), )); } i += consumed; @@ -537,7 +549,7 @@ impl N3Lexer { position: Range, ) -> Result, TokenRecognizerError> { Ok(N3Token::LangTag( - LanguageTag::parse(str::from_utf8(lang_tag).unwrap()) + LanguageTag::parse(str_from_utf8(lang_tag, position.clone())?) .map_err(|e| (position.clone(), e.to_string()))? .into_inner(), )) @@ -553,18 +565,9 @@ impl N3Lexer { let mut i = 1; loop { let end = memchr2(delimiter, b'\\', &data[i..])?; - match str::from_utf8(&data[i..i + end]) { - Ok(a) => string.push_str(a), - Err(e) => { - return Some(( - end, - Err(( - i..i + end, - format!("The string contains invalid UTF-8 characters: {e}"), - ) - .into()), - )) - } + match str_from_utf8(&data[i..i + end], i..i + end) { + Ok(s) => string.push_str(s), + Err(e) => return Some((end, Err(e))), }; i += end; match data[i] { @@ -600,18 +603,9 @@ impl N3Lexer { let mut i = 3; loop { let end = memchr2(delimiter, b'\\', &data[i..])?; - match str::from_utf8(&data[i..i + end]) { - Ok(a) => string.push_str(a), - Err(e) => { - return Some(( - end, - Err(( - i..i + end, - format!("The string contains invalid UTF-8 characters: {e}"), - ) - .into()), - )) - } + match str_from_utf8(&data[i..i + end], i..i + end) { + Ok(s) => string.push_str(s), + Err(e) => return Some((end, Err(e))), }; i += end; match data[i] { @@ -706,7 +700,7 @@ impl N3Lexer { } else if count_before == 0 && count_after.unwrap_or(0) == 0 { Err((0..i, "A double should not be empty").into()) } else { - Ok(N3Token::Double(str::from_utf8(&data[..i]).unwrap())) + str_from_utf8(&data[..i], 0..i).map(N3Token::Double) }, )) } else if let Some(count_after) = count_after { @@ -718,11 +712,11 @@ impl N3Lexer { if count_before == 0 { Err((0..i, "An integer should not be empty").into()) } else { - Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap())) + str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) }, )) } else { - Some((i, Ok(N3Token::Decimal(str::from_utf8(&data[..i]).unwrap())))) + Some((i, str_from_utf8(&data[..i], 0..i).map(N3Token::Decimal))) } } else { Some(( @@ -730,7 +724,7 @@ impl N3Lexer { if count_before == 0 { Err((0..i, "An integer should not be empty").into()) } else { - Ok(N3Token::Integer(str::from_utf8(&data[..i]).unwrap())) + str_from_utf8(&data[..i], 0..i).map(N3Token::Integer) }, )) } @@ -780,12 +774,7 @@ impl N3Lexer { if data.len() < len { return Ok(None); } - let val = str::from_utf8(&data[..len]).map_err(|e| { - ( - position..position + len + 2, - format!("The escape sequence contains invalid UTF-8 characters: {e}"), - ) - })?; + let val = str_from_utf8(&data[..len], position..position + len + 2)?; let codepoint = u32::from_str_radix(val, 16).map_err(|e| { ( position..position + len + 2, @@ -936,3 +925,13 @@ pub fn resolve_local_name( Err(format!("The prefix {prefix}: has not been declared")) } } + +fn str_from_utf8(data: &[u8], range: Range) -> Result<&str, TokenRecognizerError> { + str::from_utf8(data).map_err(|e| { + ( + range.start + e.valid_up_to()..min(range.end, range.start + e.valid_up_to() + 4), + format!("Invalid UTF-8: {e}"), + ) + .into() + }) +} diff --git a/lib/oxttl/src/line_formats.rs b/lib/oxttl/src/line_formats.rs index 5990c889..f95e56f3 100644 --- a/lib/oxttl/src/line_formats.rs +++ b/lib/oxttl/src/line_formats.rs @@ -74,7 +74,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.stack.push(NQuadsState::ExpectSubject); self } - token => self.error( + _ => self.error( errors, format!("The subject of a triple should be an IRI or a blank node, {token:?} found"), ), @@ -86,7 +86,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.stack.push(NQuadsState::ExpectedObject); self } - token => self.error( + _ => self.error( errors, format!("The predicate of a triple should be an IRI, {token:?} found"), ), @@ -116,7 +116,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.stack.push(NQuadsState::ExpectSubject); self } - token => self.error( + _ => self.error( errors, format!("The object of a triple should be an IRI, a blank node or a literal, {token:?} found"), ), @@ -139,7 +139,7 @@ impl RuleRecognizer for NQuadsRecognizer { .push(NQuadsState::ExpectLiteralDatatype { value }); self } - token => { + _ => { self.objects.push(Literal::new_simple_literal(value).into()); self.stack .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); @@ -159,7 +159,7 @@ impl RuleRecognizer for NQuadsRecognizer { .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple); self } - token => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")), + _ => self.error(errors, format!("A literal datatype must be an IRI, found {token:?}")), }, NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => { if self.stack.is_empty() { @@ -177,7 +177,7 @@ impl RuleRecognizer for NQuadsRecognizer { self.stack.push(NQuadsState::ExpectDot); self } - token => { + _ => { self.emit_quad(results, GraphName::DefaultGraph); self.stack.push(NQuadsState::ExpectDot); self.recognize_next(token, results, errors) @@ -189,16 +189,13 @@ impl RuleRecognizer for NQuadsRecognizer { self.error(errors, "Expecting the end of a quoted triple '>>'") } } - NQuadsState::ExpectDot => match token { - N3Token::Punctuation(".") => { - self.stack.push(NQuadsState::ExpectSubject); - self - } - token => { - errors.push("Quads should be followed by a dot".into()); - self.stack.push(NQuadsState::ExpectSubject); - self.recognize_next(token, results, errors) - } + NQuadsState::ExpectDot => if let N3Token::Punctuation(".") = token { + self.stack.push(NQuadsState::ExpectSubject); + self + } else { + errors.push("Quads should be followed by a dot".into()); + self.stack.push(NQuadsState::ExpectSubject); + self.recognize_next(token, results, errors) }, #[cfg(feature = "rdf-star")] NQuadsState::AfterQuotedSubject => { diff --git a/lib/oxttl/src/n3.rs b/lib/oxttl/src/n3.rs index 14c58413..46274ecf 100644 --- a/lib/oxttl/src/n3.rs +++ b/lib/oxttl/src/n3.rs @@ -567,7 +567,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::BaseExpectIri); self } - token => { + _ => { self.stack.push(N3State::N3DocExpectDot); self.stack.push(N3State::Triples); self.recognize_next(token, results, errors) @@ -712,7 +712,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::Path); self } - token => { + _ => { self.stack.push(N3State::AfterRegularVerb); self.stack.push(N3State::Path); self.recognize_next(token, results, errors) @@ -755,7 +755,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::PathItem); self } - token => self.recognize_next(token, results, errors) + _ => self.recognize_next(token, results, errors) }, N3State::PathAfterIndicator { is_inverse } => { let predicate = self.terms.pop().unwrap(); @@ -836,7 +836,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::FormulaContent); self } - token => self.error(errors, format!("This is not a valid RDF value: {token:?}")) + _ => self.error(errors, format!("This is not a valid RDF value: {token:?}")) } } N3State::PropertyListMiddle => match token { @@ -848,7 +848,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::IriPropertyList); self }, - token => { + _ => { self.terms.push(BlankNode::default().into()); self.stack.push(N3State::PropertyListEnd); self.stack.push(N3State::PredicateObjectList); @@ -881,47 +881,43 @@ impl RuleRecognizer for N3Recognizer { self.error(errors, "The '[ id' construction should be followed by an IRI") } } - N3State::CollectionBeginning => match token { - N3Token::Punctuation(")") => { - self.terms.push(rdf::NIL.into()); - self - } - token => { - let root = BlankNode::default(); - self.terms.push(root.clone().into()); - self.terms.push(root.into()); - self.stack.push(N3State::CollectionPossibleEnd); - self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) - } + N3State::CollectionBeginning => if let N3Token::Punctuation(")") = token { + self.terms.push(rdf::NIL.into()); + self + } else { + let root = BlankNode::default(); + self.terms.push(root.clone().into()); + self.terms.push(root.into()); + self.stack.push(N3State::CollectionPossibleEnd); + self.stack.push(N3State::Path); + self.recognize_next(token, results, errors) }, N3State::CollectionPossibleEnd => { let value = self.terms.pop().unwrap(); let old = self.terms.pop().unwrap(); results.push(self.quad( - old.clone(), - rdf::FIRST, - value, + old.clone(), + rdf::FIRST, + value, )); - match token { - N3Token::Punctuation(")") => { - results.push(self.quad(old, - rdf::REST, - rdf::NIL - )); - self - } - token => { - let new = BlankNode::default(); - results.push(self.quad( old, - rdf::REST, - new.clone() - )); - self.terms.push(new.into()); - self.stack.push(N3State::CollectionPossibleEnd); - self.stack.push(N3State::Path); - self.recognize_next(token, results, errors) - } + if let N3Token::Punctuation(")") = token { + results.push(self.quad( + old, + rdf::REST, + rdf::NIL + )); + self + } else { + let new = BlankNode::default(); + results.push(self.quad( + old, + rdf::REST, + new.clone() + )); + self.terms.push(new.into()); + self.stack.push(N3State::CollectionPossibleEnd); + self.stack.push(N3State::Path); + self.recognize_next(token, results, errors) } } N3State::LiteralPossibleSuffix { value } => { @@ -934,7 +930,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::LiteralExpectDatatype { value }); self } - token => { + _ => { self.terms.push(Literal::new_simple_literal(value).into()); self.recognize_next(token, results, errors) } @@ -953,7 +949,7 @@ impl RuleRecognizer for N3Recognizer { }, Err(e) => self.error(errors, e) } - token => { + _ => { self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors) } } @@ -985,7 +981,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::BaseExpectIri); self } - token => { + _ => { self.stack.push(N3State::FormulaContentExpectDot); self.stack.push(N3State::Triples); self.recognize_next(token, results, errors) @@ -1002,7 +998,7 @@ impl RuleRecognizer for N3Recognizer { self.stack.push(N3State::FormulaContent); self } - token => { + _ => { errors.push("A dot is expected at the end of N3 statements".into()); self.stack.push(N3State::FormulaContent); self.recognize_next(token, results, errors) diff --git a/lib/oxttl/src/terse.rs b/lib/oxttl/src/terse.rs index cedf089e..ecd24d4a 100644 --- a/lib/oxttl/src/terse.rs +++ b/lib/oxttl/src/terse.rs @@ -80,11 +80,11 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::GraphName); self } - token @ N3Token::Punctuation("{") if self.with_graph_name => { + N3Token::Punctuation("{") if self.with_graph_name => { self.stack.push(TriGState::WrappedGraph); self.recognize_next(token, results, errors) } - token => { + _ => { self.stack.push(TriGState::TriplesOrGraph); self.recognize_next(token, results, errors) } @@ -166,7 +166,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::QuotedSubject); self } - token => { + _ => { self.error(errors, format!("The token {token:?} is not a valid subject or graph name")) } } @@ -208,49 +208,43 @@ impl RuleRecognizer for TriGRecognizer { self.recognize_next(token, results, errors) } TriGState::SubjectCollectionBeginning => { - match token { - N3Token::Punctuation(")") => { - self.cur_subject.push(rdf::NIL.into()); - self - } - token => { - let root = BlankNode::default(); - self.cur_subject.push(root.clone().into()); - self.cur_subject.push(root.into()); - self.cur_predicate.push(rdf::FIRST.into()); - self.stack.push(TriGState::SubjectCollectionPossibleEnd); - self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) - } + if let N3Token::Punctuation(")") = token { + self.cur_subject.push(rdf::NIL.into()); + self + } else { + let root = BlankNode::default(); + self.cur_subject.push(root.clone().into()); + self.cur_subject.push(root.into()); + self.cur_predicate.push(rdf::FIRST.into()); + self.stack.push(TriGState::SubjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, results, errors) } }, TriGState::SubjectCollectionPossibleEnd => { let old = self.cur_subject.pop().unwrap(); self.cur_object.pop(); - match token { - N3Token::Punctuation(")") => { - self.cur_predicate.pop(); - results.push(Quad::new( - old, - rdf::REST, - rdf::NIL, - self.cur_graph.clone() - )); - self - } - token => { - let new = BlankNode::default(); - results.push(Quad::new( - old, - rdf::REST, - new.clone(), - self.cur_graph.clone() - )); - self.cur_subject.push(new.into()); - self.stack.push(TriGState::ObjectCollectionPossibleEnd); - self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) - } + if let N3Token::Punctuation(")") = token { + self.cur_predicate.pop(); + results.push(Quad::new( + old, + rdf::REST, + rdf::NIL, + self.cur_graph.clone() + )); + self + } else { + let new = BlankNode::default(); + results.push(Quad::new( + old, + rdf::REST, + new.clone(), + self.cur_graph.clone() + )); + self.cur_subject.push(new.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, results, errors) } } // [5g] wrappedGraph ::= '{' triplesBlock? '}' @@ -273,7 +267,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::Triples); self } - token => { + _ => { errors.push("A '}' or a '.' is expected at the end of a graph block".into()); self.recognize_next(token, results, errors) } @@ -322,7 +316,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::QuotedSubject); self } - token => { + _ => { self.error(errors, format!("The token {token:?} is not a valid RDF subject")) } }, @@ -355,7 +349,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::GraphNameAnonEnd); self } - token => { + _ => { self.error(errors, format!("The token {token:?} is not a valid graph name")) } } @@ -419,7 +413,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::PredicateObjectList); self } - token => { + _ => { self.cur_object.pop(); self.recognize_next(token, results, errors) } @@ -461,7 +455,7 @@ impl RuleRecognizer for TriGRecognizer { }, Err(e) => self.error(errors, e) } - token => { + _ => { self.error(errors, format!("The token {token:?} is not a valid predicate")) } } @@ -541,7 +535,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::QuotedSubject); self } - token => { + _ => { self.error(errors, format!("This is not a valid RDF object: {token:?}")) } @@ -563,48 +557,42 @@ impl RuleRecognizer for TriGRecognizer { } else { self.error(errors, "blank node property lists should end with a ']'") } - TriGState::ObjectCollectionBeginning => match token { - N3Token::Punctuation(")") => { - self.cur_object.push(rdf::NIL.into()); - self.emit_quad(results); - self - } - token => { - let root = BlankNode::default(); - self.cur_object.push(root.clone().into()); - self.emit_quad(results); - self.cur_subject.push(root.into()); - self.cur_predicate.push(rdf::FIRST.into()); - self.stack.push(TriGState::ObjectCollectionPossibleEnd); - self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) - } + TriGState::ObjectCollectionBeginning => if let N3Token::Punctuation(")") = token { + self.cur_object.push(rdf::NIL.into()); + self.emit_quad(results); + self + } else { + let root = BlankNode::default(); + self.cur_object.push(root.clone().into()); + self.emit_quad(results); + self.cur_subject.push(root.into()); + self.cur_predicate.push(rdf::FIRST.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, results, errors) }, TriGState::ObjectCollectionPossibleEnd => { let old = self.cur_subject.pop().unwrap(); self.cur_object.pop(); - match token { - N3Token::Punctuation(")") => { - self.cur_predicate.pop(); - results.push(Quad::new(old, - rdf::REST, - rdf::NIL, - self.cur_graph.clone() - )); - self - } - token => { - let new = BlankNode::default(); - results.push(Quad::new(old, - rdf::REST, - new.clone(), - self.cur_graph.clone() - )); - self.cur_subject.push(new.into()); - self.stack.push(TriGState::ObjectCollectionPossibleEnd); - self.stack.push(TriGState::Object); - self.recognize_next(token, results, errors) - } + if let N3Token::Punctuation(")") = token { + self.cur_predicate.pop(); + results.push(Quad::new(old, + rdf::REST, + rdf::NIL, + self.cur_graph.clone() + )); + self + }else { + let new = BlankNode::default(); + results.push(Quad::new(old, + rdf::REST, + new.clone(), + self.cur_graph.clone() + )); + self.cur_subject.push(new.into()); + self.stack.push(TriGState::ObjectCollectionPossibleEnd); + self.stack.push(TriGState::Object); + self.recognize_next(token, results, errors) } } TriGState::LiteralPossibleSuffix { value, emit } => { @@ -620,7 +608,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::LiteralExpectDatatype { value, emit }); self } - token => { + _ => { self.cur_object.push(Literal::new_simple_literal(value).into()); if emit { self.emit_quad(results); @@ -648,7 +636,7 @@ impl RuleRecognizer for TriGRecognizer { }, Err(e) => self.error(errors, e) } - token => { + _ => { self.error(errors, format!("Expecting a datatype IRI after '^^, found {token:?}")).recognize_next(token, results, errors) } } @@ -715,7 +703,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::QuotedSubject); self } - token => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}")) + _ => self.error(errors, format!("This is not a valid RDF quoted triple subject: {token:?}")) } // [29t] qtObject ::= iri | BlankNode | literal | quotedTriple #[cfg(feature = "rdf-star")] @@ -771,7 +759,7 @@ impl RuleRecognizer for TriGRecognizer { self.stack.push(TriGState::QuotedSubject); self } - token => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}")) + _ => self.error(errors, format!("This is not a valid RDF quoted triple object: {token:?}")) } #[cfg(feature = "rdf-star")] TriGState::QuotedAnonEnd => if token == N3Token::Punctuation("]") { @@ -796,9 +784,18 @@ impl RuleRecognizer for TriGRecognizer { ) { match &*self.stack { [] | [TriGState::TriGDoc] => { - debug_assert!(self.cur_subject.is_empty()); - debug_assert!(self.cur_predicate.is_empty()); - debug_assert!(self.cur_object.is_empty()); + debug_assert!( + self.cur_subject.is_empty(), + "The cur_subject stack must be empty if the state stack is empty" + ); + debug_assert!( + self.cur_predicate.is_empty(), + "The cur_predicate stack must be empty if the state stack is empty" + ); + debug_assert!( + self.cur_object.is_empty(), + "The cur_object stack must be empty if the state stack is empty" + ); } [.., TriGState::LiteralPossibleSuffix { value, emit: true }] => { self.cur_object diff --git a/lib/oxttl/src/toolkit/lexer.rs b/lib/oxttl/src/toolkit/lexer.rs index 5b980f94..34c1c01e 100644 --- a/lib/oxttl/src/toolkit/lexer.rs +++ b/lib/oxttl/src/toolkit/lexer.rs @@ -153,12 +153,10 @@ impl Lexer { options: &R::Options, ) -> Option>, LexerError>> { self.skip_whitespaces_and_comments()?; - let (consumed, result) = if let Some(r) = + let Some((consumed, result)) = self.parser .recognize_next_token(&self.data[self.start..], self.is_ending, options) - { - r - } else { + else { return if self.is_ending { if self.start == self.data.len() { None // We have finished diff --git a/lib/sparesults/src/csv.rs b/lib/sparesults/src/csv.rs index 759aea26..ff33ce30 100644 --- a/lib/sparesults/src/csv.rs +++ b/lib/sparesults/src/csv.rs @@ -198,7 +198,7 @@ fn write_tsv_quoted_str(string: &str, f: &mut impl Write) -> io::Result<()> { b'\r' => f.write_all(b"\\r"), b'"' => f.write_all(b"\\\""), b'\\' => f.write_all(b"\\\\"), - c => f.write_all(&[c]), + _ => f.write_all(&[c]), }?; } f.write_all(b"\"") diff --git a/lib/sparesults/src/error.rs b/lib/sparesults/src/error.rs index fbb45728..d150e847 100644 --- a/lib/sparesults/src/error.rs +++ b/lib/sparesults/src/error.rs @@ -64,7 +64,7 @@ impl From for ParseError { Ok(error) => error, Err(error) => io::Error::new(error.kind(), error), }), - error => Self::Syntax(SyntaxError { + _ => Self::Syntax(SyntaxError { inner: SyntaxErrorKind::Xml(error), }), } @@ -128,7 +128,7 @@ impl From for io::Error { quick_xml::Error::UnexpectedEof(error) => { Self::new(io::ErrorKind::UnexpectedEof, error) } - error => Self::new(io::ErrorKind::InvalidData, error), + _ => Self::new(io::ErrorKind::InvalidData, error), }, SyntaxErrorKind::Term(error) => Self::new(io::ErrorKind::InvalidData, error), SyntaxErrorKind::Msg { msg } => Self::new(io::ErrorKind::InvalidData, msg), diff --git a/lib/sparesults/src/solution.rs b/lib/sparesults/src/solution.rs index a8059204..842bc7d3 100644 --- a/lib/sparesults/src/solution.rs +++ b/lib/sparesults/src/solution.rs @@ -141,6 +141,7 @@ impl<'a> IntoIterator for &'a QuerySolution { impl Index for QuerySolution { type Output = Term; + #[allow(clippy::panic)] #[inline] fn index(&self, index: usize) -> &Term { self.get(index) @@ -151,6 +152,7 @@ impl Index for QuerySolution { impl Index<&str> for QuerySolution { type Output = Term; + #[allow(clippy::panic)] #[inline] fn index(&self, index: &str) -> &Term { self.get(index) @@ -161,6 +163,7 @@ impl Index<&str> for QuerySolution { impl Index> for QuerySolution { type Output = Term; + #[allow(clippy::panic)] #[inline] fn index(&self, index: VariableRef<'_>) -> &Term { self.get(index) diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 32522c5c..60f8038c 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -1085,6 +1085,7 @@ parser! { let (delete, insert) = c; let mut delete = delete.unwrap_or_default(); let mut insert = insert.unwrap_or_default(); + #[allow(clippy::shadow_same)] let mut pattern = pattern; let mut using = if u.is_empty() { diff --git a/lib/sparopt/src/algebra.rs b/lib/sparopt/src/algebra.rs index c7d7c82a..fd7942d5 100644 --- a/lib/sparopt/src/algebra.rs +++ b/lib/sparopt/src/algebra.rs @@ -875,7 +875,7 @@ impl GraphPattern { inner, expression: expression & e2, }, - inner => Self::Filter { + _ => Self::Filter { inner: Box::new(inner), expression, }, diff --git a/lib/sparopt/src/optimizer.rs b/lib/sparopt/src/optimizer.rs index 344a4536..87902b59 100644 --- a/lib/sparopt/src/optimizer.rs +++ b/lib/sparopt/src/optimizer.rs @@ -272,13 +272,13 @@ impl Optimizer { fn push_filters( pattern: GraphPattern, - filters: Vec, + mut filters: Vec, input_types: &VariableTypes, ) -> GraphPattern { match pattern { - pattern @ (GraphPattern::QuadPattern { .. } + GraphPattern::QuadPattern { .. } | GraphPattern::Path { .. } - | GraphPattern::Values { .. }) => { + | GraphPattern::Values { .. } => { GraphPattern::filter(pattern, Expression::and_all(filters)) } GraphPattern::Join { @@ -416,7 +416,6 @@ impl Optimizer { ) } GraphPattern::Filter { inner, expression } => { - let mut filters = filters; if let Expression::And(expressions) = expression { filters.extend(expressions) } else { @@ -479,9 +478,9 @@ impl Optimizer { fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern { match pattern { - pattern @ (GraphPattern::QuadPattern { .. } + GraphPattern::QuadPattern { .. } | GraphPattern::Path { .. } - | GraphPattern::Values { .. }) => pattern, + | GraphPattern::Values { .. } => pattern, GraphPattern::Join { left, right, .. } => { // We flatten the join operation let mut to_reorder = Vec::new(); diff --git a/lib/sparopt/src/type_inference.rs b/lib/sparopt/src/type_inference.rs index d52476bd..161ba58a 100644 --- a/lib/sparopt/src/type_inference.rs +++ b/lib/sparopt/src/type_inference.rs @@ -340,6 +340,7 @@ impl VariableTypes { } } +#[allow(clippy::struct_excessive_bools)] #[derive(Clone, Copy, Eq, PartialEq, Debug, Default)] pub struct VariableType { pub undef: bool, diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index ad50cd2f..fb1737d6 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -326,11 +326,11 @@ impl SimpleEvaluator { inner, silent, } => { + #[allow(clippy::shadow_same)] let silent = *silent; let service_name = TupleSelector::from_named_node_pattern(name, encoded_variables, &self.dataset); - let _ = - self.build_graph_pattern_evaluator(inner, encoded_variables, &mut Vec::new()); // We call recursively to fill "encoded_variables" + self.build_graph_pattern_evaluator(inner, encoded_variables, &mut Vec::new()); // We call recursively to fill "encoded_variables" let graph_pattern = spargebra::algebra::GraphPattern::from(inner.as_ref()); let variables = Rc::from(encoded_variables.as_slice()); let eval = self.clone(); @@ -907,6 +907,7 @@ impl SimpleEvaluator { let (mut child, child_stats) = self.graph_pattern_evaluator(inner, encoded_variables); stat_children.push(child_stats); + #[allow(clippy::shadow_same)] let start = *start; if start > 0 { child = Rc::new(move |from| Box::new(child(from).skip(start))); @@ -3416,10 +3417,10 @@ fn cmp_terms(dataset: &DatasetView, a: Option<&EncodedTerm>, b: Option<&EncodedT } _ => Ordering::Greater, }, - a => match b { + _ => match b { _ if b.is_named_node() || b.is_blank_node() => Ordering::Greater, _ if b.is_triple() => Ordering::Less, - b => { + _ => { if let Some(ord) = partial_cmp_literals(dataset, a, b) { ord } else if let (Ok(Term::Literal(a)), Ok(Term::Literal(b))) = @@ -5872,14 +5873,18 @@ impl Timer { } } -#[test] -fn uuid() { - let mut buffer = String::default(); - generate_uuid(&mut buffer); - assert!( - Regex::new("^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$") - .unwrap() - .is_match(&buffer), - "{buffer} is not a valid UUID" - ); +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn uuid() { + let mut buffer = String::default(); + generate_uuid(&mut buffer); + assert!( + Regex::new("^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$") + .unwrap() + .is_match(&buffer), + "{buffer} is not a valid UUID" + ); + } } diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 1cd64158..d076baec 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -259,87 +259,95 @@ impl Iterator for QueryTripleIter { } } -#[test] -fn test_serialization_roundtrip() -> Result<(), EvaluationError> { - use std::io::Cursor; - use std::str; +#[cfg(test)] +mod tests { + #![allow(clippy::panic_in_result_fn)] - for format in [ - QueryResultsFormat::Json, - QueryResultsFormat::Xml, - QueryResultsFormat::Tsv, - ] { - let results = vec![ - QueryResults::Boolean(true), - QueryResults::Boolean(false), - QueryResults::Solutions(QuerySolutionIter::new( - Rc::new(vec![ - Variable::new_unchecked("foo"), - Variable::new_unchecked("bar"), - ]), - Box::new( - vec![ - Ok(vec![None, None]), - Ok(vec![ - Some(NamedNode::new_unchecked("http://example.com").into()), - None, - ]), - Ok(vec![ - None, - Some(NamedNode::new_unchecked("http://example.com").into()), - ]), - Ok(vec![ - Some(BlankNode::new_unchecked("foo").into()), - Some(BlankNode::new_unchecked("bar").into()), - ]), - Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), - Ok(vec![ - Some( - Literal::new_language_tagged_literal_unchecked("foo", "fr").into(), - ), - None, - ]), - Ok(vec![ - Some(Literal::from(1).into()), - Some(Literal::from(true).into()), - ]), - Ok(vec![ - Some(Literal::from(1.33).into()), - Some(Literal::from(false).into()), - ]), - Ok(vec![ - Some( - Triple::new( - NamedNode::new_unchecked("http://example.com/s"), - NamedNode::new_unchecked("http://example.com/p"), + use super::*; + + #[test] + fn test_serialization_roundtrip() -> Result<(), EvaluationError> { + use std::io::Cursor; + use std::str; + + for format in [ + QueryResultsFormat::Json, + QueryResultsFormat::Xml, + QueryResultsFormat::Tsv, + ] { + let results = vec![ + QueryResults::Boolean(true), + QueryResults::Boolean(false), + QueryResults::Solutions(QuerySolutionIter::new( + Rc::new(vec![ + Variable::new_unchecked("foo"), + Variable::new_unchecked("bar"), + ]), + Box::new( + vec![ + Ok(vec![None, None]), + Ok(vec![ + Some(NamedNode::new_unchecked("http://example.com").into()), + None, + ]), + Ok(vec![ + None, + Some(NamedNode::new_unchecked("http://example.com").into()), + ]), + Ok(vec![ + Some(BlankNode::new_unchecked("foo").into()), + Some(BlankNode::new_unchecked("bar").into()), + ]), + Ok(vec![Some(Literal::new_simple_literal("foo").into()), None]), + Ok(vec![ + Some( + Literal::new_language_tagged_literal_unchecked("foo", "fr") + .into(), + ), + None, + ]), + Ok(vec![ + Some(Literal::from(1).into()), + Some(Literal::from(true).into()), + ]), + Ok(vec![ + Some(Literal::from(1.33).into()), + Some(Literal::from(false).into()), + ]), + Ok(vec![ + Some( Triple::new( - NamedNode::new_unchecked("http://example.com/os"), - NamedNode::new_unchecked("http://example.com/op"), - NamedNode::new_unchecked("http://example.com/oo"), - ), - ) - .into(), - ), - None, - ]), - ] - .into_iter(), - ), - )), - ]; + NamedNode::new_unchecked("http://example.com/s"), + NamedNode::new_unchecked("http://example.com/p"), + Triple::new( + NamedNode::new_unchecked("http://example.com/os"), + NamedNode::new_unchecked("http://example.com/op"), + NamedNode::new_unchecked("http://example.com/oo"), + ), + ) + .into(), + ), + None, + ]), + ] + .into_iter(), + ), + )), + ]; - for ex in results { - let mut buffer = Vec::new(); - ex.write(&mut buffer, format)?; - let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?; - let mut buffer2 = Vec::new(); - ex2.write(&mut buffer2, format)?; - assert_eq!( - str::from_utf8(&buffer).unwrap(), - str::from_utf8(&buffer2).unwrap() - ); + for ex in results { + let mut buffer = Vec::new(); + ex.write(&mut buffer, format)?; + let ex2 = QueryResults::read(Cursor::new(buffer.clone()), format)?; + let mut buffer2 = Vec::new(); + ex2.write(&mut buffer2, format)?; + assert_eq!( + str::from_utf8(&buffer).unwrap(), + str::from_utf8(&buffer2).unwrap() + ); + } } - } - Ok(()) + Ok(()) + } } diff --git a/lib/src/storage/backend/fallback.rs b/lib/src/storage/backend/fallback.rs index 6bdd5673..c2eb17ec 100644 --- a/lib/src/storage/backend/fallback.rs +++ b/lib/src/storage/backend/fallback.rs @@ -1,6 +1,7 @@ //! TODO: This storage is dramatically naive. use crate::storage::StorageError; +use crate::store::CorruptionError; use std::cell::RefCell; use std::collections::{BTreeMap, HashMap}; use std::error::Error; @@ -30,9 +31,13 @@ impl Db { } #[allow(clippy::unwrap_in_result)] - pub fn column_family(&self, name: &'static str) -> Option { - let name = ColumnFamily(name); - self.0.read().unwrap().contains_key(&name).then_some(name) + pub fn column_family(&self, name: &'static str) -> Result { + let column_family = ColumnFamily(name); + if self.0.read().unwrap().contains_key(&column_family) { + Ok(column_family) + } else { + Err(CorruptionError::msg(format!("Column family {name} does not exist")).into()) + } } #[must_use] @@ -116,6 +121,7 @@ impl Reader { } } + #[allow(clippy::iter_not_returning_iterator)] pub fn iter(&self, column_family: &ColumnFamily) -> Result { self.scan_prefix(column_family, &[]) } @@ -226,7 +232,7 @@ pub struct Transaction<'a>( impl Transaction<'_> { #[allow(unsafe_code, clippy::useless_transmute)] pub fn reader(&self) -> Reader { - // This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime. + // SAFETY: This transmute is safe because we take a weak reference and the only Rc reference used is guarded by the lifetime. Reader(InnerReader::Transaction(Rc::downgrade(unsafe { transmute(&self.0) }))) diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index 68e88744..6d065059 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -1,9 +1,14 @@ //! Code inspired by [Rust RocksDB](https://github.com/rust-rocksdb/rust-rocksdb) under Apache License 2.0. -#![allow(unsafe_code, trivial_casts)] +#![allow( + unsafe_code, + trivial_casts, + clippy::undocumented_unsafe_blocks, + clippy::panic_in_result_fn, + clippy::unwrap_in_result +)] use crate::storage::error::{CorruptionError, StorageError}; -use lazy_static::lazy_static; use libc::{self, c_void, free}; use oxrocksdb_sys::*; use rand::random; @@ -20,7 +25,7 @@ use std::marker::PhantomData; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::rc::{Rc, Weak}; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use std::thread::{available_parallelism, yield_now}; use std::{ptr, slice}; @@ -51,23 +56,6 @@ macro_rules! ffi_result_impl { }} } -lazy_static! { - static ref ROCKSDB_ENV: UnsafeEnv = { - unsafe { - let env = rocksdb_create_default_env(); - assert!(!env.is_null(), "rocksdb_create_default_env returned null"); - UnsafeEnv(env) - } - }; - static ref ROCKSDB_MEM_ENV: UnsafeEnv = { - unsafe { - let env = rocksdb_create_mem_env(); - assert!(!env.is_null(), "rocksdb_create_mem_env returned null"); - UnsafeEnv(env) - } - }; -} - pub struct ColumnFamilyDefinition { pub name: &'static str, pub use_iter: bool, @@ -132,7 +120,7 @@ impl Drop for RwDbHandler { } if self.in_memory { #[allow(clippy::let_underscore_must_use)] - let _ = remove_dir_all(&self.path); + let _: io::Result<()> = remove_dir_all(&self.path); } } } @@ -167,7 +155,7 @@ impl Drop for RoDbHandler { } if let Some(path) = &self.path_to_remove { #[allow(clippy::let_underscore_must_use)] - let _ = remove_dir_all(path); + let _: io::Result<()> = remove_dir_all(path); } } } @@ -466,6 +454,9 @@ impl Db { limit_max_open_files: bool, in_memory: bool, ) -> Result<*mut rocksdb_options_t, StorageError> { + static ROCKSDB_ENV: OnceLock = OnceLock::new(); + static ROCKSDB_MEM_ENV: OnceLock = OnceLock::new(); + unsafe { let options = rocksdb_options_create(); assert!(!options.is_null(), "rocksdb_options_create returned null"); @@ -502,10 +493,19 @@ impl Db { rocksdb_options_set_env( options, if in_memory { - ROCKSDB_MEM_ENV.0 + ROCKSDB_MEM_ENV.get_or_init(|| { + let env = rocksdb_create_mem_env(); + assert!(!env.is_null(), "rocksdb_create_mem_env returned null"); + UnsafeEnv(env) + }) } else { - ROCKSDB_ENV.0 - }, + ROCKSDB_ENV.get_or_init(|| { + let env = rocksdb_create_default_env(); + assert!(!env.is_null(), "rocksdb_create_default_env returned null"); + UnsafeEnv(env) + }) + } + .0, ); Ok(options) } @@ -551,17 +551,17 @@ impl Db { (column_family_names, c_column_family_names, cf_options) } - pub fn column_family(&self, name: &'static str) -> Option { + pub fn column_family(&self, name: &'static str) -> Result { let (column_family_names, cf_handles) = match &self.inner { DbKind::ReadOnly(db) => (&db.column_family_names, &db.cf_handles), DbKind::ReadWrite(db) => (&db.column_family_names, &db.cf_handles), }; for (cf, cf_handle) in column_family_names.iter().zip(cf_handles) { if *cf == name { - return Some(ColumnFamily(*cf_handle)); + return Ok(ColumnFamily(*cf_handle)); } } - None + Err(CorruptionError::msg(format!("Column family {name} does not exist")).into()) } #[must_use] @@ -572,7 +572,8 @@ impl Db { if db.is_secondary { // We try to refresh (and ignore the errors) #[allow(clippy::let_underscore_must_use)] - let _ = ffi_result!(rocksdb_try_catch_up_with_primary_with_status(db.db)); + let _: Result<(), ErrorStatus> = + ffi_result!(rocksdb_try_catch_up_with_primary_with_status(db.db)); } let options = rocksdb_readoptions_create_copy(db.read_options); Reader { @@ -980,6 +981,7 @@ impl Reader { Ok(self.get(column_family, key)?.is_some()) //TODO: optimize } + #[allow(clippy::iter_not_returning_iterator)] pub fn iter(&self, column_family: &ColumnFamily) -> Result { self.scan_prefix(column_family, &[]) } @@ -1392,7 +1394,8 @@ impl From for StorageError { struct UnsafeEnv(*mut rocksdb_env_t); -// Hack for lazy_static. OK because only written in lazy static and used in a thread-safe way by RocksDB +// Hack for OnceCell. OK because only written in OnceCell and used in a thread-safe way by RocksDB +unsafe impl Send for UnsafeEnv {} unsafe impl Sync for UnsafeEnv {} fn path_to_cstring(path: &Path) -> Result { diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 6591ac47..126f5fcc 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -28,7 +28,7 @@ use std::path::{Path, PathBuf}; #[cfg(not(target_family = "wasm"))] use std::sync::atomic::{AtomicU64, Ordering}; #[cfg(not(target_family = "wasm"))] -use std::thread; +use std::{io, thread}; mod backend; mod binary_encoder; @@ -178,22 +178,21 @@ impl Storage { ] } - #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] fn setup(db: Db) -> Result { let this = Self { #[cfg(not(target_family = "wasm"))] - default_cf: db.column_family(DEFAULT_CF).unwrap(), - id2str_cf: db.column_family(ID2STR_CF).unwrap(), - spog_cf: db.column_family(SPOG_CF).unwrap(), - posg_cf: db.column_family(POSG_CF).unwrap(), - ospg_cf: db.column_family(OSPG_CF).unwrap(), - gspo_cf: db.column_family(GSPO_CF).unwrap(), - gpos_cf: db.column_family(GPOS_CF).unwrap(), - gosp_cf: db.column_family(GOSP_CF).unwrap(), - dspo_cf: db.column_family(DSPO_CF).unwrap(), - dpos_cf: db.column_family(DPOS_CF).unwrap(), - dosp_cf: db.column_family(DOSP_CF).unwrap(), - graphs_cf: db.column_family(GRAPHS_CF).unwrap(), + default_cf: db.column_family(DEFAULT_CF)?, + id2str_cf: db.column_family(ID2STR_CF)?, + spog_cf: db.column_family(SPOG_CF)?, + posg_cf: db.column_family(POSG_CF)?, + ospg_cf: db.column_family(OSPG_CF)?, + gspo_cf: db.column_family(GSPO_CF)?, + gpos_cf: db.column_family(GPOS_CF)?, + gosp_cf: db.column_family(GOSP_CF)?, + dspo_cf: db.column_family(DSPO_CF)?, + dpos_cf: db.column_family(DPOS_CF)?, + dosp_cf: db.column_family(DOSP_CF)?, + graphs_cf: db.column_family(GRAPHS_CF)?, db, }; #[cfg(not(target_family = "wasm"))] @@ -1282,7 +1281,7 @@ impl StorageBulkLoader { batch_size, )?; for thread in threads { - thread.join().unwrap()?; + map_thread_result(thread.join()).map_err(StorageError::Io)??; self.on_possible_progress(&done_counter, &mut done_and_displayed_counter); } Ok(()) @@ -1303,7 +1302,7 @@ impl StorageBulkLoader { // We avoid to have too many threads if threads.len() >= num_threads { if let Some(thread) = threads.pop_front() { - thread.join().unwrap()?; + map_thread_result(thread.join()).map_err(StorageError::Io)??; self.on_possible_progress(done_counter, done_and_displayed_counter); } } @@ -1353,7 +1352,7 @@ impl<'a> FileBulkLoader<'a> { self.encode(quads)?; let size = self.triples.len() + self.quads.len(); self.save()?; - counter.fetch_add(size.try_into().unwrap(), Ordering::Relaxed); + counter.fetch_add(size.try_into().unwrap_or(u64::MAX), Ordering::Relaxed); Ok(()) } @@ -1376,7 +1375,12 @@ impl<'a> FileBulkLoader<'a> { match quad.graph_name.as_ref() { GraphNameRef::NamedNode(n) => n.into(), GraphNameRef::BlankNode(n) => n.into(), - GraphNameRef::DefaultGraph => unreachable!(), + GraphNameRef::DefaultGraph => { + return Err(CorruptionError::new( + "Default graph this not the default graph", + ) + .into()) + } }, &encoded.graph_name, )?; @@ -1534,3 +1538,17 @@ impl<'a> FileBulkLoader<'a> { sst.finish() } } + +#[cfg(not(target_family = "wasm"))] +fn map_thread_result(result: thread::Result) -> io::Result { + result.map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + if let Ok(e) = e.downcast::<&dyn std::fmt::Display>() { + format!("A loader processed crashed with {e}") + } else { + "A loader processed crashed with and unknown error".into() + }, + ) + }) +} diff --git a/lib/src/storage/numeric_encoder.rs b/lib/src/storage/numeric_encoder.rs index 94e237f6..59f7532f 100644 --- a/lib/src/storage/numeric_encoder.rs +++ b/lib/src/storage/numeric_encoder.rs @@ -716,13 +716,19 @@ pub fn insert_term Result<(), StorageError>>( if let EncodedTerm::NamedNode { iri_id } = encoded { insert_str(iri_id, node.as_str()) } else { - unreachable!("Invalid term encoding {encoded:?} for {term}") + Err( + CorruptionError::new(format!("Invalid term encoding {encoded:?} for {term}")) + .into(), + ) } } TermRef::BlankNode(node) => match encoded { EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()), EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()), - _ => unreachable!("Invalid term encoding {encoded:?} for {term}"), + _ => Err( + CorruptionError::new(format!("Invalid term encoding {encoded:?} for {term}")) + .into(), + ), }, TermRef::Literal(literal) => match encoded { EncodedTerm::BigStringLiteral { value_id } @@ -733,7 +739,10 @@ pub fn insert_term Result<(), StorageError>>( if let Some(language) = literal.language() { insert_str(language_id, language) } else { - unreachable!("Invalid term encoding {encoded:?} for {term}") + Err(CorruptionError::new(format!( + "Invalid term encoding {encoded:?} for {term}" + )) + .into()) } } EncodedTerm::BigBigLangStringLiteral { @@ -744,7 +753,10 @@ pub fn insert_term Result<(), StorageError>>( if let Some(language) = literal.language() { insert_str(language_id, language) } else { - unreachable!("Invalid term encoding {encoded:?} for {term}") + Err(CorruptionError::new(format!( + "Invalid term encoding {encoded:?} for {term}" + )) + .into()) } } EncodedTerm::SmallTypedLiteral { datatype_id, .. } => { @@ -775,7 +787,10 @@ pub fn insert_term Result<(), StorageError>>( | EncodedTerm::DurationLiteral(..) | EncodedTerm::YearMonthDurationLiteral(..) | EncodedTerm::DayTimeDurationLiteral(..) => Ok(()), - _ => unreachable!("Invalid term encoding {encoded:?} for {term}"), + _ => Err( + CorruptionError::new(format!("Invalid term encoding {encoded:?} for {term}")) + .into(), + ), }, TermRef::Triple(triple) => { if let EncodedTerm::Triple(encoded) = encoded { @@ -787,7 +802,10 @@ pub fn insert_term Result<(), StorageError>>( )?; insert_term(triple.object.as_ref(), &encoded.object, insert_str) } else { - unreachable!("Invalid term encoding {encoded:?} for {term}") + Err( + CorruptionError::new(format!("Invalid term encoding {encoded:?} for {term}")) + .into(), + ) } } } diff --git a/lib/src/storage/small_string.rs b/lib/src/storage/small_string.rs index a7134bd6..d5d18987 100644 --- a/lib/src/storage/small_string.rs +++ b/lib/src/storage/small_string.rs @@ -46,10 +46,8 @@ impl SmallString { #[inline] #[allow(unsafe_code)] pub fn as_str(&self) -> &str { - unsafe { - // safe because we ensured it in constructors - str::from_utf8_unchecked(self.as_bytes()) - } + // SAFETY: safe because we ensured it in constructors + unsafe { str::from_utf8_unchecked(self.as_bytes()) } } #[inline] diff --git a/lib/src/store.rs b/lib/src/store.rs index f8ea0af5..9bba3c3b 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -1598,215 +1598,222 @@ impl BulkLoader { } } -#[test] -fn store() -> Result<(), StorageError> { - use crate::model::*; - - let main_s = Subject::from(BlankNode::default()); - let main_p = NamedNode::new("http://example.com").unwrap(); - let main_o = Term::from(Literal::from(1)); - let main_g = GraphName::from(BlankNode::default()); - - let default_quad = Quad::new( - main_s.clone(), - main_p.clone(), - main_o.clone(), - GraphName::DefaultGraph, - ); - let named_quad = Quad::new( - main_s.clone(), - main_p.clone(), - main_o.clone(), - main_g.clone(), - ); - let default_quads = vec![ - Quad::new( - main_s.clone(), - main_p.clone(), - Literal::from(0), - GraphName::DefaultGraph, - ), - default_quad.clone(), - Quad::new( - main_s.clone(), - main_p.clone(), - Literal::from(200_000_000), - GraphName::DefaultGraph, - ), - ]; - let all_quads = vec![ - Quad::new( +#[cfg(test)] +mod tests { + #![allow(clippy::panic_in_result_fn)] + + use super::*; + + #[test] + fn store() -> Result<(), StorageError> { + use crate::model::*; + + let main_s = Subject::from(BlankNode::default()); + let main_p = NamedNode::new("http://example.com").unwrap(); + let main_o = Term::from(Literal::from(1)); + let main_g = GraphName::from(BlankNode::default()); + + let default_quad = Quad::new( main_s.clone(), main_p.clone(), - Literal::from(0), + main_o.clone(), GraphName::DefaultGraph, - ), - default_quad.clone(), - Quad::new( + ); + let named_quad = Quad::new( main_s.clone(), main_p.clone(), - Literal::from(200_000_000), - GraphName::DefaultGraph, - ), - named_quad.clone(), - ]; - - let store = Store::new()?; - for t in &default_quads { - assert!(store.insert(t)?); - } - assert!(!store.insert(&default_quad)?); - - assert!(store.remove(&default_quad)?); - assert!(!store.remove(&default_quad)?); - assert!(store.insert(&named_quad)?); - assert!(!store.insert(&named_quad)?); - assert!(store.insert(&default_quad)?); - assert!(!store.insert(&default_quad)?); - - assert_eq!(store.len()?, 4); - assert_eq!(store.iter().collect::, _>>()?, all_quads); - assert_eq!( - store - .quads_for_pattern(Some(main_s.as_ref()), None, None, None) - .collect::, _>>()?, - all_quads - ); - assert_eq!( - store - .quads_for_pattern(Some(main_s.as_ref()), Some(main_p.as_ref()), None, None) - .collect::, _>>()?, - all_quads - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - Some(main_p.as_ref()), - Some(main_o.as_ref()), - None - ) - .collect::, _>>()?, - vec![default_quad.clone(), named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - Some(main_p.as_ref()), - Some(main_o.as_ref()), - Some(GraphNameRef::DefaultGraph) - ) - .collect::, _>>()?, - vec![default_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - Some(main_p.as_ref()), - Some(main_o.as_ref()), - Some(main_g.as_ref()) - ) - .collect::, _>>()?, - vec![named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - Some(main_p.as_ref()), - None, - Some(GraphNameRef::DefaultGraph) - ) - .collect::, _>>()?, - default_quads - ); - assert_eq!( - store - .quads_for_pattern(Some(main_s.as_ref()), None, Some(main_o.as_ref()), None) - .collect::, _>>()?, - vec![default_quad.clone(), named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - None, - Some(main_o.as_ref()), - Some(GraphNameRef::DefaultGraph) - ) - .collect::, _>>()?, - vec![default_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - None, - Some(main_o.as_ref()), - Some(main_g.as_ref()) - ) - .collect::, _>>()?, - vec![named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern( - Some(main_s.as_ref()), - None, - None, - Some(GraphNameRef::DefaultGraph) - ) - .collect::, _>>()?, - default_quads - ); - assert_eq!( - store - .quads_for_pattern(None, Some(main_p.as_ref()), None, None) - .collect::, _>>()?, - all_quads - ); - assert_eq!( - store - .quads_for_pattern(None, Some(main_p.as_ref()), Some(main_o.as_ref()), None) - .collect::, _>>()?, - vec![default_quad.clone(), named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern(None, None, Some(main_o.as_ref()), None) - .collect::, _>>()?, - vec![default_quad.clone(), named_quad.clone()] - ); - assert_eq!( - store - .quads_for_pattern(None, None, None, Some(GraphNameRef::DefaultGraph)) - .collect::, _>>()?, - default_quads - ); - assert_eq!( - store - .quads_for_pattern( - None, - Some(main_p.as_ref()), - Some(main_o.as_ref()), - Some(GraphNameRef::DefaultGraph) - ) - .collect::, _>>()?, - vec![default_quad] - ); - assert_eq!( - store - .quads_for_pattern( - None, - Some(main_p.as_ref()), - Some(main_o.as_ref()), - Some(main_g.as_ref()) - ) - .collect::, _>>()?, - vec![named_quad] - ); - - Ok(()) + main_o.clone(), + main_g.clone(), + ); + let default_quads = vec![ + Quad::new( + main_s.clone(), + main_p.clone(), + Literal::from(0), + GraphName::DefaultGraph, + ), + default_quad.clone(), + Quad::new( + main_s.clone(), + main_p.clone(), + Literal::from(200_000_000), + GraphName::DefaultGraph, + ), + ]; + let all_quads = vec![ + Quad::new( + main_s.clone(), + main_p.clone(), + Literal::from(0), + GraphName::DefaultGraph, + ), + default_quad.clone(), + Quad::new( + main_s.clone(), + main_p.clone(), + Literal::from(200_000_000), + GraphName::DefaultGraph, + ), + named_quad.clone(), + ]; + + let store = Store::new()?; + for t in &default_quads { + assert!(store.insert(t)?); + } + assert!(!store.insert(&default_quad)?); + + assert!(store.remove(&default_quad)?); + assert!(!store.remove(&default_quad)?); + assert!(store.insert(&named_quad)?); + assert!(!store.insert(&named_quad)?); + assert!(store.insert(&default_quad)?); + assert!(!store.insert(&default_quad)?); + + assert_eq!(store.len()?, 4); + assert_eq!(store.iter().collect::, _>>()?, all_quads); + assert_eq!( + store + .quads_for_pattern(Some(main_s.as_ref()), None, None, None) + .collect::, _>>()?, + all_quads + ); + assert_eq!( + store + .quads_for_pattern(Some(main_s.as_ref()), Some(main_p.as_ref()), None, None) + .collect::, _>>()?, + all_quads + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + Some(main_p.as_ref()), + Some(main_o.as_ref()), + None + ) + .collect::, _>>()?, + vec![default_quad.clone(), named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + Some(main_p.as_ref()), + Some(main_o.as_ref()), + Some(GraphNameRef::DefaultGraph) + ) + .collect::, _>>()?, + vec![default_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + Some(main_p.as_ref()), + Some(main_o.as_ref()), + Some(main_g.as_ref()) + ) + .collect::, _>>()?, + vec![named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + Some(main_p.as_ref()), + None, + Some(GraphNameRef::DefaultGraph) + ) + .collect::, _>>()?, + default_quads + ); + assert_eq!( + store + .quads_for_pattern(Some(main_s.as_ref()), None, Some(main_o.as_ref()), None) + .collect::, _>>()?, + vec![default_quad.clone(), named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + None, + Some(main_o.as_ref()), + Some(GraphNameRef::DefaultGraph) + ) + .collect::, _>>()?, + vec![default_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + None, + Some(main_o.as_ref()), + Some(main_g.as_ref()) + ) + .collect::, _>>()?, + vec![named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern( + Some(main_s.as_ref()), + None, + None, + Some(GraphNameRef::DefaultGraph) + ) + .collect::, _>>()?, + default_quads + ); + assert_eq!( + store + .quads_for_pattern(None, Some(main_p.as_ref()), None, None) + .collect::, _>>()?, + all_quads + ); + assert_eq!( + store + .quads_for_pattern(None, Some(main_p.as_ref()), Some(main_o.as_ref()), None) + .collect::, _>>()?, + vec![default_quad.clone(), named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern(None, None, Some(main_o.as_ref()), None) + .collect::, _>>()?, + vec![default_quad.clone(), named_quad.clone()] + ); + assert_eq!( + store + .quads_for_pattern(None, None, None, Some(GraphNameRef::DefaultGraph)) + .collect::, _>>()?, + default_quads + ); + assert_eq!( + store + .quads_for_pattern( + None, + Some(main_p.as_ref()), + Some(main_o.as_ref()), + Some(GraphNameRef::DefaultGraph) + ) + .collect::, _>>()?, + vec![default_quad] + ); + assert_eq!( + store + .quads_for_pattern( + None, + Some(main_p.as_ref()), + Some(main_o.as_ref()), + Some(main_g.as_ref()) + ) + .collect::, _>>()?, + vec![named_quad] + ); + + Ok(()) + } } diff --git a/lib/tests/store.rs b/lib/tests/store.rs index 3deeeaac..e17fb69c 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -1,3 +1,6 @@ +#![cfg(test)] +#![allow(clippy::panic_in_result_fn)] + use oxigraph::io::RdfFormat; use oxigraph::model::vocab::{rdf, xsd}; use oxigraph::model::*; @@ -8,7 +11,7 @@ use rand::random; use std::env::temp_dir; use std::error::Error; #[cfg(not(target_family = "wasm"))] -use std::fs::{create_dir, remove_dir_all, File}; +use std::fs::{create_dir_all, remove_dir_all, File}; #[cfg(not(target_family = "wasm"))] use std::io::Write; #[cfg(target_os = "linux")] @@ -237,10 +240,10 @@ fn test_dump_dataset() -> Result<(), Box> { #[test] fn test_snapshot_isolation_iterator() -> Result<(), Box> { let quad = QuadRef::new( - NamedNodeRef::new_unchecked("http://example.com/s"), - NamedNodeRef::new_unchecked("http://example.com/p"), - NamedNodeRef::new_unchecked("http://example.com/o"), - NamedNodeRef::new_unchecked("http://www.wikidata.org/wiki/Special:EntityData/Q90"), + NamedNodeRef::new("http://example.com/s")?, + NamedNodeRef::new("http://example.com/p")?, + NamedNodeRef::new("http://example.com/o")?, + NamedNodeRef::new("http://www.wikidata.org/wiki/Special:EntityData/Q90")?, ); let store = Store::new()?; store.insert(quad)?; @@ -274,7 +277,7 @@ fn test_bulk_load_on_existing_delete_overrides_the_delete() -> Result<(), Box Result<(), Box> { let dir = TempDir::default(); - create_dir(&dir.0)?; + create_dir_all(&dir.0)?; { File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?; } @@ -346,7 +349,7 @@ fn test_bad_backup() -> Result<(), Box> { let store_dir = TempDir::default(); let backup_dir = TempDir::default(); - create_dir(&backup_dir.0)?; + create_dir_all(&backup_dir.0)?; assert!(Store::open(&store_dir)?.backup(&backup_dir.0).is_err()); Ok(()) } @@ -430,7 +433,7 @@ fn test_secondary() -> Result<(), Box> { #[cfg(not(target_family = "wasm"))] fn test_open_secondary_bad_dir() -> Result<(), Box> { let primary_dir = TempDir::default(); - create_dir(&primary_dir.0)?; + create_dir_all(&primary_dir.0)?; { File::create(primary_dir.0.join("CURRENT"))?.write_all(b"foo")?; } @@ -491,7 +494,7 @@ fn test_read_only() -> Result<(), Box> { #[cfg(not(target_family = "wasm"))] fn test_open_read_only_bad_dir() -> Result<(), Box> { let dir = TempDir::default(); - create_dir(&dir.0)?; + create_dir_all(&dir.0)?; { File::create(dir.0.join("CURRENT"))?.write_all(b"foo")?; } diff --git a/python/src/io.rs b/python/src/io.rs index f365af50..0b705c11 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -333,14 +333,15 @@ pub fn allow_threads_unsafe(f: impl FnOnce() -> T) -> T { impl Drop for RestoreGuard { fn drop(&mut self) { + // SAFETY: not cloned so called once unsafe { pyo3::ffi::PyEval_RestoreThread(self.tstate); } } } - let _guard = RestoreGuard { - tstate: unsafe { pyo3::ffi::PyEval_SaveThread() }, - }; + // SAFETY: we have the restore part in Drop to make sure it's properly executed + let tstate = unsafe { pyo3::ffi::PyEval_SaveThread() }; + let _guard = RestoreGuard { tstate }; f() } diff --git a/server/Cargo.toml b/server/Cargo.toml index 9e61e54d..01ed8a77 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -4,6 +4,8 @@ version.workspace = true authors.workspace = true license.workspace = true readme = "README.md" +keywords = ["RDF", "SPARQL", "graph-database", "database"] +categories = ["command-line-utilities", "database"] repository = "https://github.com/oxigraph/oxigraph/tree/main/server" homepage = "https://oxigraph.org/server/" description = """ @@ -15,7 +17,7 @@ rust-version.workspace = true [dependencies] anyhow = "1" oxhttp = { version = "0.1", features = ["rayon"] } -clap = { version = "4.0", features = ["derive"] } +clap = { version = "4", features = ["derive"] } oxigraph = { version = "0.4.0-alpha.1-dev", path = "../lib", features = ["http_client"] } sparesults = { version = "0.2.0-alpha.1-dev", path = "../lib/sparesults", features = ["rdf-star"] } rand = "0.8" diff --git a/server/src/main.rs b/server/src/main.rs index 4082ed52..038f5d05 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -642,13 +642,13 @@ pub fn main() -> anyhow::Result<()> { explanation.write_in_json(&mut file)?; }, Some("txt") => { - write!(file, "{:?}", explanation)?; + write!(file, "{explanation:?}")?; }, _ => bail!("The given explanation file {} must have an extension that is .json or .txt", explain_file.display()) } close_file_writer(file)?; } else if explain || stats { - eprintln!("{:#?}", explanation); + eprintln!("{explanation:#?}"); } print_result } @@ -1753,7 +1753,7 @@ impl io::Result>) + 'static> ReadForWrite Result<()> { let mut evaluator = TestEvaluator::default(); register_parser_tests(&mut evaluator); diff --git a/testsuite/src/manifest.rs b/testsuite/src/manifest.rs index fd450fe2..3a70442e 100644 --- a/testsuite/src/manifest.rs +++ b/testsuite/src/manifest.rs @@ -87,9 +87,7 @@ impl TestManifest { let Some(test_node) = self.tests_to_do.pop_front() else { return Ok(None); }; - let test_node = if let Term::NamedNode(test_node) = test_node { - test_node - } else { + let Term::NamedNode(test_node) = test_node else { bail!("Invalid test identifier. Got {test_node}"); }; diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index f440b6ad..318da172 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -505,11 +505,12 @@ impl StaticQueryResults { // Hack to normalize literals let store = Store::new()?; for t in graph { - store - .insert(t.in_graph(GraphNameRef::DefaultGraph)) - .unwrap(); + store.insert(t.in_graph(GraphNameRef::DefaultGraph))?; } - let mut graph: Graph = store.iter().map(|q| Triple::from(q.unwrap())).collect(); + let mut graph = store + .iter() + .map(|q| Ok(Triple::from(q?))) + .collect::>()?; if let Some(result_set) = graph.subject_for_predicate_object(rdf::TYPE, rs::RESULT_SET) { if let Some(bool) = graph.object_for_subject_predicate(result_set, rs::BOOLEAN) { @@ -737,11 +738,10 @@ fn evaluate_query_optimization_test(test: &Test) -> Result<()> { .result .as_ref() .ok_or_else(|| anyhow!("No tests result found"))?; - let expected = if let spargebra::Query::Select { pattern, .. } = - spargebra::Query::parse(&read_file_to_string(result)?, Some(result))? - { - pattern - } else { + let spargebra::Query::Select { + pattern: expected, .. + } = spargebra::Query::parse(&read_file_to_string(result)?, Some(result))? + else { bail!("Only SELECT queries are supported in query sparql-optimization tests") }; if expected == actual { diff --git a/testsuite/tests/canonicalization.rs b/testsuite/tests/canonicalization.rs index c6e5f0e2..978902d2 100644 --- a/testsuite/tests/canonicalization.rs +++ b/testsuite/tests/canonicalization.rs @@ -1,3 +1,5 @@ +#![cfg(test)] + use anyhow::Result; use oxigraph_testsuite::check_testsuite; diff --git a/testsuite/tests/oxigraph.rs b/testsuite/tests/oxigraph.rs index a5bc7e0f..b76e5a2a 100644 --- a/testsuite/tests/oxigraph.rs +++ b/testsuite/tests/oxigraph.rs @@ -1,3 +1,5 @@ +#![cfg(test)] + use anyhow::Result; use oxigraph_testsuite::check_testsuite; diff --git a/testsuite/tests/parser.rs b/testsuite/tests/parser.rs index 2846b8f0..9e7141b5 100644 --- a/testsuite/tests/parser.rs +++ b/testsuite/tests/parser.rs @@ -1,3 +1,5 @@ +#![cfg(test)] + use anyhow::Result; use oxigraph_testsuite::check_testsuite; diff --git a/testsuite/tests/serd.rs b/testsuite/tests/serd.rs index ae40d752..eb4910ad 100644 --- a/testsuite/tests/serd.rs +++ b/testsuite/tests/serd.rs @@ -1,3 +1,5 @@ +#![cfg(test)] + use anyhow::Result; use oxigraph_testsuite::check_testsuite; diff --git a/testsuite/tests/sparql.rs b/testsuite/tests/sparql.rs index 74e8c437..eafb80fd 100644 --- a/testsuite/tests/sparql.rs +++ b/testsuite/tests/sparql.rs @@ -1,3 +1,5 @@ +#![cfg(test)] + use anyhow::Result; use oxigraph_testsuite::check_testsuite;