From cf03da0fab06c245c549a1e8ff87eb245b2b09ad Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 28 Apr 2023 21:07:10 +0200 Subject: [PATCH 01/45] CI: fixes linux cross-compilation v2 --- .github/workflows/artifacts.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/artifacts.yml b/.github/workflows/artifacts.yml index 256a5084..73bdc686 100644 --- a/.github/workflows/artifacts.yml +++ b/.github/workflows/artifacts.yml @@ -22,7 +22,7 @@ jobs: - run: rustup update && rustup target add aarch64-unknown-linux-gnu - run: | sudo apt install -y g++-aarch64-linux-gnu - echo -e "[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml + echo -e "\n\n[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml - uses: Swatinem/rust-cache@v2 - run: cargo build --release working-directory: ./server From bbe9bd03037bb64962663239805a9e379eb6f38c Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 28 Apr 2023 08:50:40 -0400 Subject: [PATCH 02/45] Make all Thomases into one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before ❯ git shortlog -sn | head 1211 Tpt 46 Thomas Tanon ... After ❯ git shortlog -sn | head 1259 Thomas Tanon ... --- .mailmap | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .mailmap diff --git a/.mailmap b/.mailmap new file mode 100644 index 00000000..7c85fb7b --- /dev/null +++ b/.mailmap @@ -0,0 +1,3 @@ +Thomas Tanon +Thomas Tanon +Thomas Tanon From 704440538db9fade7781d3051839a1f04128db70 Mon Sep 17 00:00:00 2001 From: Thomas Date: Thu, 27 Apr 2023 12:30:15 +0200 Subject: [PATCH 03/45] Adds EXISTS operation to the profiler output --- lib/src/sparql/eval.rs | 263 ++++++++++++++++++++++------------------- 1 file changed, 139 insertions(+), 124 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 7cce17d6..a71b95cb 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -495,7 +495,7 @@ impl SimpleEvaluator { stat_children.push(left_stats); let (right, right_stats) = self.plan_evaluator(right.clone()); stat_children.push(right_stats); - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); // Real hash join Rc::new(move |from| { let mut errors = Vec::default(); @@ -547,7 +547,7 @@ impl SimpleEvaluator { PlanNode::Filter { child, expression } => { let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { let expression = expression.clone(); Box::new(child(from).filter(move |tuple| { @@ -586,7 +586,7 @@ impl SimpleEvaluator { let (child, child_stats) = self.plan_evaluator(child.clone()); stat_children.push(child_stats); let position = variable.encoded; - let expression = self.expression_evaluator(expression); + let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { let expression = expression.clone(); Box::new(child(from).map(move |tuple| { @@ -604,12 +604,12 @@ impl SimpleEvaluator { let by: Vec<_> = by .iter() .map(|comp| match comp { - Comparator::Asc(expression) => { - ComparatorFunction::Asc(self.expression_evaluator(expression)) - } - Comparator::Desc(expression) => { - ComparatorFunction::Desc(self.expression_evaluator(expression)) - } + Comparator::Asc(expression) => ComparatorFunction::Asc( + self.expression_evaluator(expression, &mut stat_children), + ), + Comparator::Desc(expression) => ComparatorFunction::Desc( + self.expression_evaluator(expression, &mut stat_children), + ), }) .collect(); let dataset = self.dataset.clone(); @@ -733,7 +733,7 @@ impl SimpleEvaluator { aggregate .parameter .as_ref() - .map(|p| self.expression_evaluator(p)) + .map(|p| self.expression_evaluator(p, &mut stat_children)) }) .collect(); let accumulator_builders: Vec<_> = aggregates @@ -933,6 +933,7 @@ impl SimpleEvaluator { fn expression_evaluator( &self, expression: &PlanExpression, + stat_children: &mut Vec>, ) -> Rc Option> { match expression { PlanExpression::NamedNode(t) => { @@ -948,12 +949,13 @@ impl SimpleEvaluator { Rc::new(move |tuple| tuple.get(v).cloned()) } PlanExpression::Exists(plan) => { - let (eval, _) = self.plan_evaluator(plan.clone()); //TODO: stats + let (eval, stats) = self.plan_evaluator(plan.clone()); + stat_children.push(stats); Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) } PlanExpression::Or(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { Some(true) => Some(true.into()), Some(false) => b(tuple), @@ -967,8 +969,8 @@ impl SimpleEvaluator { }) } PlanExpression::And(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { Some(true) => b(tuple), Some(false) => Some(false.into()), @@ -982,13 +984,13 @@ impl SimpleEvaluator { }) } PlanExpression::Equal(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| equals(&a(tuple)?, &b(tuple)?).map(|v| v.into())) } PlanExpression::Greater(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some( @@ -998,8 +1000,8 @@ impl SimpleEvaluator { }) } PlanExpression::GreaterOrEqual(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some( @@ -1012,16 +1014,16 @@ impl SimpleEvaluator { }) } PlanExpression::Less(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some((partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? == Ordering::Less).into()) }) } PlanExpression::LessOrEqual(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some( @@ -1034,8 +1036,8 @@ impl SimpleEvaluator { }) } PlanExpression::Add(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 + v2).into()), @@ -1078,8 +1080,8 @@ impl SimpleEvaluator { ) } PlanExpression::Subtract(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| { Some(match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => (v1 - v2).into(), @@ -1130,8 +1132,8 @@ impl SimpleEvaluator { }) } PlanExpression::Multiply(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 * v2).into()), @@ -1143,8 +1145,8 @@ impl SimpleEvaluator { ) } PlanExpression::Divide(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new( move |tuple| match NumericBinaryOperands::new(a(tuple)?, b(tuple)?)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 / v2).into()), @@ -1158,7 +1160,7 @@ impl SimpleEvaluator { ) } PlanExpression::UnaryPlus(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(value.into()), EncodedTerm::DoubleLiteral(value) => Some(value.into()), @@ -1171,7 +1173,7 @@ impl SimpleEvaluator { }) } PlanExpression::UnaryMinus(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some((-value).into()), EncodedTerm::DoubleLiteral(value) => Some((-value).into()), @@ -1184,11 +1186,11 @@ impl SimpleEvaluator { }) } PlanExpression::Not(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| to_bool(&e(tuple)?).map(|v| (!v).into())) } PlanExpression::Str(e) | PlanExpression::StringCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some(build_string_literal_from_id(to_string_id( @@ -1198,7 +1200,7 @@ impl SimpleEvaluator { }) } PlanExpression::Lang(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::SmallSmallLangStringLiteral { language, .. } @@ -1214,8 +1216,8 @@ impl SimpleEvaluator { }) } PlanExpression::LangMatches(language_tag, language_range) => { - let language_tag = self.expression_evaluator(language_tag); - let language_range = self.expression_evaluator(language_range); + let language_tag = self.expression_evaluator(language_tag, stat_children); + let language_range = self.expression_evaluator(language_range, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let mut language_tag = to_simple_string(&dataset, &language_tag(tuple)?)?; @@ -1240,7 +1242,7 @@ impl SimpleEvaluator { }) } PlanExpression::Datatype(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| datatype(&dataset, &e(tuple)?)) } @@ -1249,7 +1251,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| Some(tuple.contains(v).into())) } PlanExpression::Iri(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); let base_iri = self.base_iri.clone(); Rc::new(move |tuple| { @@ -1273,7 +1275,7 @@ impl SimpleEvaluator { } PlanExpression::BNode(id) => match id { Some(id) => { - let id = self.expression_evaluator(id); + let id = self.expression_evaluator(id, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some( @@ -1293,7 +1295,7 @@ impl SimpleEvaluator { }, PlanExpression::Rand => Rc::new(|_| Some(random::().into())), PlanExpression::Abs(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.abs().into()), EncodedTerm::DecimalLiteral(value) => Some(value.abs().into()), @@ -1303,7 +1305,7 @@ impl SimpleEvaluator { }) } PlanExpression::Ceil(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.ceil().into()), @@ -1313,7 +1315,7 @@ impl SimpleEvaluator { }) } PlanExpression::Floor(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.floor().into()), @@ -1323,7 +1325,7 @@ impl SimpleEvaluator { }) } PlanExpression::Round(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), EncodedTerm::DecimalLiteral(value) => Some(value.round().into()), @@ -1333,7 +1335,10 @@ impl SimpleEvaluator { }) } PlanExpression::Concat(l) => { - let l: Vec<_> = l.iter().map(|e| self.expression_evaluator(e)).collect(); + let l: Vec<_> = l + .iter() + .map(|e| self.expression_evaluator(e, stat_children)) + .collect(); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let mut result = String::default(); @@ -1357,9 +1362,11 @@ impl SimpleEvaluator { }) } PlanExpression::SubStr(source, starting_loc, length) => { - let source = self.expression_evaluator(source); - let starting_loc = self.expression_evaluator(starting_loc); - let length = length.as_ref().map(|l| self.expression_evaluator(l)); + let source = self.expression_evaluator(source, stat_children); + let starting_loc = self.expression_evaluator(starting_loc, stat_children); + let length = length + .as_ref() + .map(|l| self.expression_evaluator(l, stat_children)); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (source, language) = to_string_and_language(&dataset, &source(tuple)?)?; @@ -1403,16 +1410,16 @@ impl SimpleEvaluator { }) } PlanExpression::StrLen(arg) => { - let arg = self.expression_evaluator(arg); + let arg = self.expression_evaluator(arg, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some((to_string(&dataset, &arg(tuple)?)?.chars().count() as i64).into()) }) } PlanExpression::StaticReplace(arg, regex, replacement) => { - let arg = self.expression_evaluator(arg); + let arg = self.expression_evaluator(arg, stat_children); let regex = regex.clone(); - let replacement = self.expression_evaluator(replacement); + let replacement = self.expression_evaluator(replacement, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (text, language) = to_string_and_language(&dataset, &arg(tuple)?)?; @@ -1425,10 +1432,12 @@ impl SimpleEvaluator { }) } PlanExpression::DynamicReplace(arg, pattern, replacement, flags) => { - let arg = self.expression_evaluator(arg); - let pattern = self.expression_evaluator(pattern); - let replacement = self.expression_evaluator(replacement); - let flags = flags.as_ref().map(|flags| self.expression_evaluator(flags)); + let arg = self.expression_evaluator(arg, stat_children); + let pattern = self.expression_evaluator(pattern, stat_children); + let replacement = self.expression_evaluator(replacement, stat_children); + let flags = flags + .as_ref() + .map(|flags| self.expression_evaluator(flags, stat_children)); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; @@ -1448,7 +1457,7 @@ impl SimpleEvaluator { }) } PlanExpression::UCase(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; @@ -1460,7 +1469,7 @@ impl SimpleEvaluator { }) } PlanExpression::LCase(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; @@ -1472,8 +1481,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrStarts(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (arg1, arg2, _) = @@ -1482,7 +1491,7 @@ impl SimpleEvaluator { }) } PlanExpression::EncodeForUri(ltrl) => { - let ltrl = self.expression_evaluator(ltrl); + let ltrl = self.expression_evaluator(ltrl, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let ltlr = to_string(&dataset, <rl(tuple)?)?; @@ -1516,8 +1525,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrEnds(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (arg1, arg2, _) = @@ -1526,8 +1535,8 @@ impl SimpleEvaluator { }) } PlanExpression::Contains(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (arg1, arg2, _) = @@ -1536,8 +1545,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrBefore(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (arg1, arg2, language) = @@ -1550,8 +1559,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrAfter(arg1, arg2) => { - let arg1 = self.expression_evaluator(arg1); - let arg2 = self.expression_evaluator(arg2); + let arg1 = self.expression_evaluator(arg1, stat_children); + let arg2 = self.expression_evaluator(arg2, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let (arg1, arg2, language) = @@ -1564,7 +1573,7 @@ impl SimpleEvaluator { }) } PlanExpression::Year(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.year().into()), EncodedTerm::DateLiteral(date) => Some(date.year().into()), @@ -1574,7 +1583,7 @@ impl SimpleEvaluator { }) } PlanExpression::Month(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.month().into()), EncodedTerm::DateLiteral(date) => Some(date.month().into()), @@ -1585,7 +1594,7 @@ impl SimpleEvaluator { }) } PlanExpression::Day(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.day().into()), EncodedTerm::DateLiteral(date) => Some(date.day().into()), @@ -1595,7 +1604,7 @@ impl SimpleEvaluator { }) } PlanExpression::Hours(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.hour().into()), EncodedTerm::TimeLiteral(time) => Some(time.hour().into()), @@ -1603,7 +1612,7 @@ impl SimpleEvaluator { }) } PlanExpression::Minutes(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.minute().into()), EncodedTerm::TimeLiteral(time) => Some(time.minute().into()), @@ -1611,7 +1620,7 @@ impl SimpleEvaluator { }) } PlanExpression::Seconds(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => Some(date_time.second().into()), EncodedTerm::TimeLiteral(time) => Some(time.second().into()), @@ -1619,7 +1628,7 @@ impl SimpleEvaluator { }) } PlanExpression::Timezone(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { Some( match e(tuple)? { @@ -1638,7 +1647,7 @@ impl SimpleEvaluator { }) } PlanExpression::Tz(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let timezone_offset = match e(tuple)? { @@ -1662,8 +1671,8 @@ impl SimpleEvaluator { } PlanExpression::Adjust(dt, tz) => { - let dt = self.expression_evaluator(dt); - let tz = self.expression_evaluator(tz); + let dt = self.expression_evaluator(dt, stat_children); + let tz = self.expression_evaluator(tz, stat_children); Rc::new(move |tuple| { let timezone_offset = Some( match tz(tuple)? { @@ -1713,13 +1722,16 @@ impl SimpleEvaluator { Some(build_string_literal(&dataset, &buffer)) }) } - PlanExpression::Md5(arg) => self.hash::(arg), - PlanExpression::Sha1(arg) => self.hash::(arg), - PlanExpression::Sha256(arg) => self.hash::(arg), - PlanExpression::Sha384(arg) => self.hash::(arg), - PlanExpression::Sha512(arg) => self.hash::(arg), + PlanExpression::Md5(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha1(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha256(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha384(arg) => self.hash::(arg, stat_children), + PlanExpression::Sha512(arg) => self.hash::(arg, stat_children), PlanExpression::Coalesce(l) => { - let l: Vec<_> = l.iter().map(|e| self.expression_evaluator(e)).collect(); + let l: Vec<_> = l + .iter() + .map(|e| self.expression_evaluator(e, stat_children)) + .collect(); Rc::new(move |tuple| { for e in &l { if let Some(result) = e(tuple) { @@ -1730,9 +1742,9 @@ impl SimpleEvaluator { }) } PlanExpression::If(a, b, c) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); - let c = self.expression_evaluator(c); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); + let c = self.expression_evaluator(c, stat_children); Rc::new(move |tuple| { if to_bool(&a(tuple)?)? { b(tuple) @@ -1742,8 +1754,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrLang(lexical_form, lang_tag) => { - let lexical_form = self.expression_evaluator(lexical_form); - let lang_tag = self.expression_evaluator(lang_tag); + let lexical_form = self.expression_evaluator(lexical_form, stat_children); + let lang_tag = self.expression_evaluator(lang_tag, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { Some(build_lang_string_literal_from_id( @@ -1753,8 +1765,8 @@ impl SimpleEvaluator { }) } PlanExpression::StrDt(lexical_form, datatype) => { - let lexical_form = self.expression_evaluator(lexical_form); - let datatype = self.expression_evaluator(datatype); + let lexical_form = self.expression_evaluator(lexical_form, stat_children); + let datatype = self.expression_evaluator(datatype, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let value = to_simple_string(&dataset, &lexical_form(tuple)?)?; @@ -1770,24 +1782,24 @@ impl SimpleEvaluator { }) } PlanExpression::SameTerm(a, b) => { - let a = self.expression_evaluator(a); - let b = self.expression_evaluator(b); + let a = self.expression_evaluator(a, stat_children); + let b = self.expression_evaluator(b, stat_children); Rc::new(move |tuple| Some((a(tuple)? == b(tuple)?).into())) } PlanExpression::IsIri(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_named_node().into())) } PlanExpression::IsBlank(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_blank_node().into())) } PlanExpression::IsLiteral(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_literal().into())) } PlanExpression::IsNumeric(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { Some( matches!( @@ -1802,7 +1814,7 @@ impl SimpleEvaluator { }) } PlanExpression::StaticRegex(text, regex) => { - let text = self.expression_evaluator(text); + let text = self.expression_evaluator(text, stat_children); let dataset = self.dataset.clone(); let regex = regex.clone(); Rc::new(move |tuple| { @@ -1811,9 +1823,11 @@ impl SimpleEvaluator { }) } PlanExpression::DynamicRegex(text, pattern, flags) => { - let text = self.expression_evaluator(text); - let pattern = self.expression_evaluator(pattern); - let flags = flags.as_ref().map(|flags| self.expression_evaluator(flags)); + let text = self.expression_evaluator(text, stat_children); + let pattern = self.expression_evaluator(pattern, stat_children); + let flags = flags + .as_ref() + .map(|flags| self.expression_evaluator(flags, stat_children)); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; @@ -1828,9 +1842,9 @@ impl SimpleEvaluator { }) } PlanExpression::Triple(s, p, o) => { - let s = self.expression_evaluator(s); - let p = self.expression_evaluator(p); - let o = self.expression_evaluator(o); + let s = self.expression_evaluator(s, stat_children); + let p = self.expression_evaluator(p, stat_children); + let o = self.expression_evaluator(o, stat_children); Rc::new(move |tuple| { let s = s(tuple)?; let p = p(tuple)?; @@ -1847,7 +1861,7 @@ impl SimpleEvaluator { }) } PlanExpression::Subject(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.subject.clone()) @@ -1857,7 +1871,7 @@ impl SimpleEvaluator { }) } PlanExpression::Predicate(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.predicate.clone()) @@ -1867,7 +1881,7 @@ impl SimpleEvaluator { }) } PlanExpression::Object(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| { if let EncodedTerm::Triple(t) = e(tuple)? { Some(t.object.clone()) @@ -1877,11 +1891,11 @@ impl SimpleEvaluator { }) } PlanExpression::IsTriple(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| Some(e(tuple)?.is_triple().into())) } PlanExpression::BooleanCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::FloatLiteral(value) => Some(Boolean::from(value).into()), @@ -1893,7 +1907,7 @@ impl SimpleEvaluator { }) } PlanExpression::DoubleCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Double::from(value).into()), @@ -1909,7 +1923,7 @@ impl SimpleEvaluator { }) } PlanExpression::FloatCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(value.into()), @@ -1925,7 +1939,7 @@ impl SimpleEvaluator { }) } PlanExpression::IntegerCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Integer::try_from(value).ok()?.into()), @@ -1945,7 +1959,7 @@ impl SimpleEvaluator { }) } PlanExpression::DecimalCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Decimal::try_from(value).ok()?.into()), @@ -1965,7 +1979,7 @@ impl SimpleEvaluator { }) } PlanExpression::DateCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateLiteral(value) => Some(value.into()), @@ -1978,7 +1992,7 @@ impl SimpleEvaluator { }) } PlanExpression::TimeCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::TimeLiteral(value) => Some(value.into()), @@ -1991,7 +2005,7 @@ impl SimpleEvaluator { }) } PlanExpression::DateTimeCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(value) => Some(value.into()), @@ -2004,7 +2018,7 @@ impl SimpleEvaluator { }) } PlanExpression::DurationCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => Some(value.into()), @@ -2022,7 +2036,7 @@ impl SimpleEvaluator { }) } PlanExpression::YearMonthDurationCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { @@ -2037,7 +2051,7 @@ impl SimpleEvaluator { }) } PlanExpression::DayTimeDurationCast(e) => { - let e = self.expression_evaluator(e); + let e = self.expression_evaluator(e, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { @@ -2055,7 +2069,7 @@ impl SimpleEvaluator { if let Some(function) = self.custom_functions.get(function_name).cloned() { let args = args .iter() - .map(|e| self.expression_evaluator(e)) + .map(|e| self.expression_evaluator(e, stat_children)) .collect::>(); let dataset = self.dataset.clone(); Rc::new(move |tuple| { @@ -2075,8 +2089,9 @@ impl SimpleEvaluator { fn hash( &self, arg: &PlanExpression, + stat_children: &mut Vec>, ) -> Rc Option> { - let arg = self.expression_evaluator(arg); + let arg = self.expression_evaluator(arg, stat_children); let dataset = self.dataset.clone(); Rc::new(move |tuple| { let input = to_simple_string(&dataset, &arg(tuple)?)?; From 38357dd9b57b2f2622622cb5a14aba1daad7fc3e Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 27 Apr 2023 16:18:19 -0400 Subject: [PATCH 04/45] Add github action to shellcheck main on push and PRs --- .github/workflows/shellcheck.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 .github/workflows/shellcheck.yml diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml new file mode 100644 index 00000000..ef7175de --- /dev/null +++ b/.github/workflows/shellcheck.yml @@ -0,0 +1,23 @@ +--- +name: Shellcheck + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + shellcheck: + name: Check shell scripts + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Install dependencies + run: | + sudo apt update && sudo apt install -y shellcheck + - name: shellcheck + run: | + git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck From 80ce67e6dd796d709f93af67eaee2b093944d005 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 27 Apr 2023 16:29:59 -0400 Subject: [PATCH 05/45] Run bench/bsbm_ with set -eu to exit right away if something abnormal happens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit e.g. if one command fails or undefined variable is used. In turn it would allow to address such codeshell warnings like In bench/bsbm_graphdb.sh line 7: cd bsbm-tools ^-----------^ SC2164 (warning): Use 'cd ... || exit' or 'cd ... || return' in case cd fails. Did you mean: cd bsbm-tools || exit but note that there are uses left like ❯ git grep 'cd bsbm-tools || exit' bench/bsbm_blazegraph.sh:cd bsbm-tools || exit bench/bsbm_jena.sh:cd bsbm-tools || exit bench/bsbm_rdf4j.sh:cd bsbm-tools || exit but may be `|| exit` should be removed and they should exit with error? or the same should be added to the other 3? ❯ git grep 'cd bsbm-tools$' bench/bsbm_graphdb.sh:cd bsbm-tools bench/bsbm_oxigraph.sh:cd bsbm-tools bench/bsbm_virtuoso.sh:cd bsbm-tools --- bench/bsbm_blazegraph.sh | 10 ++++++---- bench/bsbm_graphdb.sh | 10 ++++++---- bench/bsbm_jena.sh | 14 ++++++++------ bench/bsbm_oxigraph.sh | 10 ++++++---- bench/bsbm_rdf4j.sh | 12 +++++++----- bench/bsbm_virtuoso.sh | 10 ++++++---- 6 files changed, 39 insertions(+), 27 deletions(-) diff --git a/bench/bsbm_blazegraph.sh b/bench/bsbm_blazegraph.sh index 559334ff..41f2dd0c 100755 --- a/bench/bsbm_blazegraph.sh +++ b/bench/bsbm_blazegraph.sh @@ -2,6 +2,8 @@ DATASET_SIZE=100000 PARALLELISM=16 + +set -eu wget -nc https://github.com/blazegraph/database/releases/download/BLAZEGRAPH_RELEASE_2_1_5/blazegraph.jar cd bsbm-tools || exit ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" @@ -12,7 +14,7 @@ curl -f -X POST -H 'Content-Type:text/turtle' -T "explore-${DATASET_SIZE}.nt" ht ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql -u http://localhost:9999/blazegraph/sparql -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.blazegraph.2.1.5.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:9999/blazegraph/sparql kill $! -rm blazegraph.jnl -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -f blazegraph.jnl +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_graphdb.sh b/bench/bsbm_graphdb.sh index 21c83d7c..2c52dae2 100755 --- a/bench/bsbm_graphdb.sh +++ b/bench/bsbm_graphdb.sh @@ -4,6 +4,8 @@ DATASET_SIZE=100000 PARALLELISM=16 VERSION="9.3.3" JAVA_HOME=/usr/lib/jvm/java-11-openjdk + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" ../graphdb-free-9.3.3/bin/graphdb -s -Dgraphdb.logger.root.level=WARN & @@ -17,7 +19,7 @@ curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZ #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.graphdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:7200/repositories/bsbm kill $! sleep 5 -rm -r ../graphdb-free-9.3.3/data -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf ../graphdb-free-9.3.3/data +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_jena.sh b/bench/bsbm_jena.sh index 107fd47a..caa56d92 100755 --- a/bench/bsbm_jena.sh +++ b/bench/bsbm_jena.sh @@ -3,6 +3,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 VERSION="4.3.2" + +set -eu wget -nc https://downloads.apache.org/jena/binaries/apache-jena-${VERSION}.zip cd bsbm-tools || exit ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" @@ -18,9 +20,9 @@ sleep 10 ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query -u http://localhost:3030/bsbm/update -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.jena.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:3030/bsbm/query kill $! -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data -rm -r run -rm -r apache-jena-${VERSION} -rm -r apache-jena-fuseki-${VERSION} +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data +rm -rf run +rm -rf apache-jena-${VERSION} +rm -rf apache-jena-fuseki-${VERSION} diff --git a/bench/bsbm_oxigraph.sh b/bench/bsbm_oxigraph.sh index 844b662a..7ee691cb 100755 --- a/bench/bsbm_oxigraph.sh +++ b/bench/bsbm_oxigraph.sh @@ -2,6 +2,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" cargo build --release --manifest-path="../../server/Cargo.toml" @@ -13,7 +15,7 @@ sleep 1 ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.oxigraph.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://127.0.0.1:7878/query -u http://127.0.0.1:7878/update -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" "http://127.0.0.1:7878/query" kill $! -rm -r oxigraph_data -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf oxigraph_data +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data diff --git a/bench/bsbm_rdf4j.sh b/bench/bsbm_rdf4j.sh index a88e785e..6592b65f 100755 --- a/bench/bsbm_rdf4j.sh +++ b/bench/bsbm_rdf4j.sh @@ -4,6 +4,8 @@ DATASET_SIZE=100000 PARALLELISM=16 VERSION="4.2.2" TOMCAT_VERSION="9.0.71" + +set -eu wget -nc -O "rdf4j-${VERSION}.zip" "https://www.eclipse.org/downloads/download.php?file=/rdf4j/eclipse-rdf4j-${VERSION}-sdk.zip&mirror_id=1" wget -nc -O "tomcat-${TOMCAT_VERSION}.zip" "https://dlcdn.apache.org/tomcat/tomcat-9/v${TOMCAT_VERSION}/bin/apache-tomcat-${TOMCAT_VERSION}.zip" cd bsbm-tools || exit @@ -40,8 +42,8 @@ curl -f -X PUT -H 'Content-Type:application/n-triples' -T "explore-${DATASET_SIZ ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm -u http://localhost:8080/rdf4j-server/repositories/bsbm/statements -udataset "explore-update-${DATASET_SIZE}.nt" #./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.rdf4j-lmdb.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" http://localhost:8080/rdf4j-server/repositories/bsbm "${CATALINA_HOME}"/bin/shutdown.sh -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data -rm -r "eclipse-rdf4j-${VERSION}" -rm -r "apache-tomcat-${TOMCAT_VERSION}" +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data +rm -rf "eclipse-rdf4j-${VERSION}" +rm -rf "apache-tomcat-${TOMCAT_VERSION}" diff --git a/bench/bsbm_virtuoso.sh b/bench/bsbm_virtuoso.sh index 60ef533e..2ff9b405 100755 --- a/bench/bsbm_virtuoso.sh +++ b/bench/bsbm_virtuoso.sh @@ -3,6 +3,8 @@ DATASET_SIZE=100000 # number of products in the dataset. There is around 350 triples generated by product. PARALLELISM=16 VERSION="7.2.5" + +set -eu cd bsbm-tools ./generate -fc -pc ${DATASET_SIZE} -s nt -fn "explore-${DATASET_SIZE}" -ud -ufn "explore-update-${DATASET_SIZE}" cp ../virtuoso-opensource/database/virtuoso.ini.sample virtuoso.ini @@ -18,7 +20,7 @@ EOF # ./testdriver -mt ${PARALLELISM} -ucf usecases/exploreAndUpdate/sparql.txt -o "../bsbm.exploreAndUpdate.virtuoso.${DATASET_SIZE}.${PARALLELISM}.${PARALLELISM}.${VERSION}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' -u 'http://dba:dba@localhost:8890/sparql-auth?graph-uri=urn:graph:test' -udataset "explore-update-${DATASET_SIZE}.nt" # ./testdriver -mt ${PARALLELISM} -ucf usecases/businessIntelligence/sparql.txt -o "../bsbm.businessIntelligence.virtuoso.${VERSION}.${DATASET_SIZE}.${PARALLELISM}.xml" 'http://localhost:8890/sparql?graph-uri=urn:graph:test' kill $! -rm -r ../database -rm "explore-${DATASET_SIZE}.nt" -rm "explore-update-${DATASET_SIZE}.nt" -rm -r td_data +rm -rf ../database +rm -f "explore-${DATASET_SIZE}.nt" +rm -f "explore-update-${DATASET_SIZE}.nt" +rm -rf td_data From a3294a8abdbfaf9603bc628c5ed39ab8fb7e48af Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 27 Apr 2023 16:37:31 -0400 Subject: [PATCH 06/45] Remove unused JAVA_HOME. If to be used by child processes -- should have been exported --- bench/bsbm_graphdb.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/bench/bsbm_graphdb.sh b/bench/bsbm_graphdb.sh index 2c52dae2..1b865fc8 100755 --- a/bench/bsbm_graphdb.sh +++ b/bench/bsbm_graphdb.sh @@ -3,7 +3,6 @@ DATASET_SIZE=100000 PARALLELISM=16 VERSION="9.3.3" -JAVA_HOME=/usr/lib/jvm/java-11-openjdk set -eu cd bsbm-tools From 029fbf470ea1871b01ba5706393eb392c1036576 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 28 Apr 2023 08:38:34 -0400 Subject: [PATCH 07/45] Disable shellcheck warning about loop running once -- intended --- .clusterfuzzlite/build.sh | 2 ++ 1 file changed, 2 insertions(+) mode change 100644 => 100755 .clusterfuzzlite/build.sh diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh old mode 100644 new mode 100755 index 87af18df..b48e2c4e --- a/.clusterfuzzlite/build.sh +++ b/.clusterfuzzlite/build.sh @@ -15,6 +15,8 @@ function build_seed_corpus() { cd "$SRC"/oxigraph cargo fuzz build -O --debug-assertions +# shellcheck disable=SC2043 +# SC2043 (warning): This loop will only ever run once. for TARGET in sparql_eval # sparql_results_json sparql_results_tsv do cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/ From aeeabf5d1cd8790ed8b54f6ce455fe5751c7f4f1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 28 Apr 2023 08:40:11 -0400 Subject: [PATCH 08/45] Replace very cute (ab)use of array to get first element with explicit awk That is to overcome warnings from shellcheck In .clusterfuzzlite/build.sh line 8: hash=($(sha256sum "$file")) ^------------------^ SC2207 (warning): Prefer mapfile or read -a to split command output (or quote to avoid splitting). In .clusterfuzzlite/build.sh line 9: cp "$file" "/tmp/oxigraph_$1/$hash" ^---^ SC2128 (warning): Expanding an array without an index only gives the first element. although original code is making a cute use of an array, if we are to follow shellcheck and use mapfile or read -a - and then use explicit index -- makes it just too cumbersome. IMHO explicit awk would be easier to read etc. --- .clusterfuzzlite/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh index b48e2c4e..31f59bd2 100755 --- a/.clusterfuzzlite/build.sh +++ b/.clusterfuzzlite/build.sh @@ -5,7 +5,7 @@ function build_seed_corpus() { mkdir "/tmp/oxigraph_$1" for file in **/*."$2" do - hash=($(sha256sum "$file")) + hash=$(sha256sum "$file" | awk '{print $1;}') cp "$file" "/tmp/oxigraph_$1/$hash" done zip "$1_seed_corpus.zip" /tmp/"oxigraph_$1"/* From 6f37c4c9c989fe6ed9fa6f8296c64c667c54a000 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 28 Apr 2023 17:08:40 -0400 Subject: [PATCH 09/45] Move shellcheck into tests workflow collecting all kinds of testing --- .github/workflows/shellcheck.yml | 23 ----------------------- .github/workflows/tests.yml | 7 +++++++ 2 files changed, 7 insertions(+), 23 deletions(-) delete mode 100644 .github/workflows/shellcheck.yml diff --git a/.github/workflows/shellcheck.yml b/.github/workflows/shellcheck.yml deleted file mode 100644 index ef7175de..00000000 --- a/.github/workflows/shellcheck.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -name: Shellcheck - -on: - push: - branches: [main] - pull_request: - branches: [main] - -jobs: - shellcheck: - name: Check shell scripts - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Install dependencies - run: | - sudo apt update && sudo apt install -y shellcheck - - name: shellcheck - run: | - git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 097c191a..3eab4c72 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -383,3 +383,10 @@ jobs: mode: coverage sanitizer: coverage storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git + + shellcheck: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: sudo apt install -y shellcheck + - run: git grep -l '^#\( *shellcheck \|!\(/bin/\|/usr/bin/env \)\(sh\|bash\|dash\|ksh\)\)' | xargs shellcheck From 8e3ee3b6ddc519dde2c9e8824f58322e180117dc Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 28 Apr 2023 21:36:18 +0200 Subject: [PATCH 10/45] Upgrades RocksDB to 8.1.1 --- oxrocksdb-sys/rocksdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oxrocksdb-sys/rocksdb b/oxrocksdb-sys/rocksdb index 740854a7..443333d8 160000 --- a/oxrocksdb-sys/rocksdb +++ b/oxrocksdb-sys/rocksdb @@ -1 +1 @@ -Subproject commit 740854a7b0c09450e67e5e29d9979e743485aebf +Subproject commit 443333d8c059c87db408ec2d11685db00031b30a From f520de889341f95b6d846935d3651d2e6763b626 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 28 Apr 2023 21:32:56 +0200 Subject: [PATCH 11/45] Releases v0.3.16 --- CHANGELOG.md | 9 ++++++ Cargo.lock | 69 +++++++++++++++++++++++----------------- js/Cargo.toml | 4 +-- lib/Cargo.toml | 4 +-- oxrocksdb-sys/Cargo.toml | 2 +- python/Cargo.toml | 4 +-- server/Cargo.toml | 4 +-- 7 files changed, 57 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b26dc674..58ff2b6a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## [0.3.16] - 2023-04-29 + +### Changed +- Fixes flush and compaction on the GSPO index. It might improve Oxigraph performances and storage space. +- SPARQL: fixes some optimizations in presence quoted triples with nested variables. +- SPARQL profiler: adds EXISTS operation to the explanation and profiling tree. +- Upgrades RocksDB to 8.1.1. + + ## [0.3.15] - 2023-04-18 ### Added diff --git a/Cargo.lock b/Cargo.lock index 52f50962..3ce846c3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "aho-corasick" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" +dependencies = [ + "memchr", +] + [[package]] name = "anes" version = "0.1.6" @@ -141,9 +150,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "9b1ce199063694f33ffb7dd4e0ee620741495c32833cde5aa08f02a0bf96f0c8" [[package]] name = "cast" @@ -215,9 +224,9 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.23" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags", "clap_lex 0.2.4", @@ -299,9 +308,9 @@ checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" dependencies = [ "libc", ] @@ -325,7 +334,7 @@ dependencies = [ "atty", "cast", "ciborium", - "clap 3.2.23", + "clap 3.2.25", "criterion-plot", "itertools", "lazy_static", @@ -487,9 +496,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", "miniz_oxide", @@ -563,7 +572,7 @@ version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "029d74589adefde59de1a0c4f4732695c32805624aec7b68d91503d4dba79afc" dependencies = [ - "aho-corasick", + "aho-corasick 0.7.20", "bstr", "fnv", "log", @@ -774,9 +783,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.141" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "libloading" @@ -790,9 +799,9 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.3.3" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b085a4f2cde5781fc4b1717f2e86c62f5cda49de7ba99a7c2eae02b61c9064c" +checksum = "b64f40e5e03e0d54f03845c8197d0291253cdbedfb1cb46b13c2c117554a9f4c" [[package]] name = "lock_api" @@ -845,9 +854,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.6.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" dependencies = [ "adler", ] @@ -927,7 +936,7 @@ dependencies = [ [[package]] name = "oxigraph" -version = "0.3.16-dev" +version = "0.3.16" dependencies = [ "criterion", "digest", @@ -959,7 +968,7 @@ dependencies = [ [[package]] name = "oxigraph_js" -version = "0.3.16-dev" +version = "0.3.16" dependencies = [ "console_error_panic_hook", "js-sys", @@ -969,7 +978,7 @@ dependencies = [ [[package]] name = "oxigraph_server" -version = "0.3.16-dev" +version = "0.3.16" dependencies = [ "anyhow", "assert_cmd", @@ -1027,7 +1036,7 @@ dependencies = [ [[package]] name = "oxrocksdb-sys" -version = "0.3.16-dev" +version = "0.3.16" dependencies = [ "bindgen", "cc", @@ -1279,7 +1288,7 @@ dependencies = [ [[package]] name = "pyoxigraph" -version = "0.3.16-dev" +version = "0.3.16" dependencies = [ "oxigraph", "pyo3", @@ -1375,11 +1384,11 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ - "aho-corasick", + "aho-corasick 1.0.1", "memchr", "regex-syntax", ] @@ -1392,9 +1401,9 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" [[package]] name = "ring" @@ -1448,9 +1457,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.37.13" +version = "0.37.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f79bef90eb6d984c72722595b5b1348ab39275a5e5123faca6863bf07d75a4e0" +checksum = "a0661814f891c57c930a610266415528da53c4933e6dea5fb350cbfe048a9ece" dependencies = [ "bitflags", "errno", @@ -1690,9 +1699,9 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.12.6" +version = "0.12.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" +checksum = "fd1ba337640d60c3e96bc6f0638a939b9c9a7f2c316a1598c279828b3d1dc8c5" [[package]] name = "tempfile" diff --git a/js/Cargo.toml b/js/Cargo.toml index fa008e7e..ad451d07 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_js" -version = "0.3.16-dev" +version = "0.3.16" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -14,7 +14,7 @@ crate-type = ["cdylib"] name = "oxigraph" [dependencies] -oxigraph = { version = "0.3.16-dev", path="../lib" } +oxigraph = { version = "0.3.16", path="../lib" } wasm-bindgen = "0.2" js-sys = "0.3" console_error_panic_hook = "0.1" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index c425dd67..d34d8b80 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph" -version = "0.3.16-dev" +version = "0.3.16" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -45,7 +45,7 @@ sparesults = { version = "0.1.7", path="sparesults", features = ["rdf-star"] } [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2" -oxrocksdb-sys = { version = "0.3.16-dev", path="../oxrocksdb-sys" } +oxrocksdb-sys = { version = "0.3.16", path="../oxrocksdb-sys" } oxhttp = { version = "0.1", optional = true } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] diff --git a/oxrocksdb-sys/Cargo.toml b/oxrocksdb-sys/Cargo.toml index 9ba2d405..2b4fc604 100644 --- a/oxrocksdb-sys/Cargo.toml +++ b/oxrocksdb-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrocksdb-sys" -version = "0.3.16-dev" +version = "0.3.16" authors = ["Tpt "] license = "GPL-2.0 OR Apache-2.0" repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys" diff --git a/python/Cargo.toml b/python/Cargo.toml index 414dc63d..ffd445a0 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyoxigraph" -version = "0.3.16-dev" +version = "0.3.16" authors = ["Tpt"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,5 +19,5 @@ doctest = false abi3 = ["pyo3/abi3-py37"] [dependencies] -oxigraph = { version = "0.3.16-dev", path="../lib", features = ["http_client"] } +oxigraph = { version = "0.3.16", path="../lib", features = ["http_client"] } pyo3 = { version = "0.18", features = ["extension-module"] } diff --git a/server/Cargo.toml b/server/Cargo.toml index 0dc7c5ec..040600c3 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_server" -version = "0.3.16-dev" +version = "0.3.16" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -17,7 +17,7 @@ anyhow = "1" oxhttp = { version = "0.1", features = ["rayon"] } clap = { version = "=4.0", features = ["derive"] } clap_lex = "=0.3.0" -oxigraph = { version = "0.3.16-dev", path = "../lib", features = ["http_client"] } +oxigraph = { version = "0.3.16", path = "../lib", features = ["http_client"] } sparesults = { version = "0.1.7", path = "../lib/sparesults", features = ["rdf-star"] } rand = "0.8" url = "2" From a2d8bcaaa3dfe9a7ad183163d7f6b2f06888c29d Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 29 Apr 2023 21:26:11 +0200 Subject: [PATCH 12/45] Starts 0.3.17 dev --- Cargo.lock | 18 +++++++++--------- js/Cargo.toml | 4 ++-- lib/Cargo.toml | 12 ++++++------ lib/oxrdf/Cargo.toml | 4 ++-- lib/oxsdatatypes/Cargo.toml | 2 +- lib/sparesults/Cargo.toml | 4 ++-- lib/spargebra/Cargo.toml | 4 ++-- oxrocksdb-sys/Cargo.toml | 2 +- python/Cargo.toml | 4 ++-- server/Cargo.toml | 6 +++--- 10 files changed, 30 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3ce846c3..052688a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -936,7 +936,7 @@ dependencies = [ [[package]] name = "oxigraph" -version = "0.3.16" +version = "0.3.17-dev" dependencies = [ "criterion", "digest", @@ -968,7 +968,7 @@ dependencies = [ [[package]] name = "oxigraph_js" -version = "0.3.16" +version = "0.3.17-dev" dependencies = [ "console_error_panic_hook", "js-sys", @@ -978,7 +978,7 @@ dependencies = [ [[package]] name = "oxigraph_server" -version = "0.3.16" +version = "0.3.17-dev" dependencies = [ "anyhow", "assert_cmd", @@ -1026,7 +1026,7 @@ checksum = "bb175ec8981211357b7b379869c2f8d555881c55ea62311428ec0de46d89bd5c" [[package]] name = "oxrdf" -version = "0.1.5" +version = "0.1.6-dev" dependencies = [ "oxilangtag", "oxiri", @@ -1036,7 +1036,7 @@ dependencies = [ [[package]] name = "oxrocksdb-sys" -version = "0.3.16" +version = "0.3.17-dev" dependencies = [ "bindgen", "cc", @@ -1045,7 +1045,7 @@ dependencies = [ [[package]] name = "oxsdatatypes" -version = "0.1.1" +version = "0.1.2-dev" dependencies = [ "js-sys", "nom", @@ -1288,7 +1288,7 @@ dependencies = [ [[package]] name = "pyoxigraph" -version = "0.3.16" +version = "0.3.17-dev" dependencies = [ "oxigraph", "pyo3", @@ -1638,7 +1638,7 @@ checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "sparesults" -version = "0.1.7" +version = "0.1.8-dev" dependencies = [ "json-event-parser", "oxrdf", @@ -1647,7 +1647,7 @@ dependencies = [ [[package]] name = "spargebra" -version = "0.2.7" +version = "0.2.8-dev" dependencies = [ "oxilangtag", "oxiri", diff --git a/js/Cargo.toml b/js/Cargo.toml index ad451d07..897e37b3 100644 --- a/js/Cargo.toml +++ b/js/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_js" -version = "0.3.16" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -14,7 +14,7 @@ crate-type = ["cdylib"] name = "oxigraph" [dependencies] -oxigraph = { version = "0.3.16", path="../lib" } +oxigraph = { version = "0.3.17-dev", path="../lib" } wasm-bindgen = "0.2" js-sys = "0.3" console_error_panic_hook = "0.1" diff --git a/lib/Cargo.toml b/lib/Cargo.toml index d34d8b80..8c7ecfc9 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph" -version = "0.3.16" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -38,14 +38,14 @@ hex = "0.4" siphasher = "0.3" lazy_static = "1" json-event-parser = "0.1" -oxrdf = { version = "0.1.5", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } -oxsdatatypes = { version = "0.1.1", path="oxsdatatypes" } -spargebra = { version = "0.2.7", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } -sparesults = { version = "0.1.7", path="sparesults", features = ["rdf-star"] } +oxrdf = { version = "0.1.6-dev", path="oxrdf", features = ["rdf-star", "oxsdatatypes"] } +oxsdatatypes = { version = "0.1.2-dev", path="oxsdatatypes" } +spargebra = { version = "0.2.8-dev", path="spargebra", features = ["rdf-star", "sep-0002", "sep-0006"] } +sparesults = { version = "0.1.8-dev", path="sparesults", features = ["rdf-star"] } [target.'cfg(not(target_family = "wasm"))'.dependencies] libc = "0.2" -oxrocksdb-sys = { version = "0.3.16", path="../oxrocksdb-sys" } +oxrocksdb-sys = { version = "0.3.17-dev", path="../oxrocksdb-sys" } oxhttp = { version = "0.1", optional = true } [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] diff --git a/lib/oxrdf/Cargo.toml b/lib/oxrdf/Cargo.toml index 9bb88fe3..df554630 100644 --- a/lib/oxrdf/Cargo.toml +++ b/lib/oxrdf/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrdf" -version = "0.1.5" +version = "0.1.6-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -21,7 +21,7 @@ rdf-star = [] rand = "0.8" oxilangtag = "0.1" oxiri = "0.2" -oxsdatatypes = { version = "0.1.1", path="../oxsdatatypes", optional = true } +oxsdatatypes = { version = "0.1.2-dev", path="../oxsdatatypes", optional = true } [package.metadata.docs.rs] all-features = true diff --git a/lib/oxsdatatypes/Cargo.toml b/lib/oxsdatatypes/Cargo.toml index 54841f88..9f622b28 100644 --- a/lib/oxsdatatypes/Cargo.toml +++ b/lib/oxsdatatypes/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxsdatatypes" -version = "0.1.1" +version = "0.1.2-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" diff --git a/lib/sparesults/Cargo.toml b/lib/sparesults/Cargo.toml index eff7da14..007004eb 100644 --- a/lib/sparesults/Cargo.toml +++ b/lib/sparesults/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparesults" -version = "0.1.7" +version = "0.1.8-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,7 +19,7 @@ rdf-star = ["oxrdf/rdf-star"] [dependencies] json-event-parser = "0.1" -oxrdf = { version = "0.1.5", path="../oxrdf" } +oxrdf = { version = "0.1.6-dev", path="../oxrdf" } quick-xml = "0.28" [package.metadata.docs.rs] diff --git a/lib/spargebra/Cargo.toml b/lib/spargebra/Cargo.toml index 101ef8a7..88efe608 100644 --- a/lib/spargebra/Cargo.toml +++ b/lib/spargebra/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "spargebra" -version = "0.2.7" +version = "0.2.8-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -24,7 +24,7 @@ peg = "0.8" rand = "0.8" oxiri = "0.2" oxilangtag = "0.1" -oxrdf = { version = "0.1.5", path="../oxrdf" } +oxrdf = { version = "0.1.6-dev", path="../oxrdf" } [package.metadata.docs.rs] all-features = true diff --git a/oxrocksdb-sys/Cargo.toml b/oxrocksdb-sys/Cargo.toml index 2b4fc604..fea41139 100644 --- a/oxrocksdb-sys/Cargo.toml +++ b/oxrocksdb-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxrocksdb-sys" -version = "0.3.16" +version = "0.3.17-dev" authors = ["Tpt "] license = "GPL-2.0 OR Apache-2.0" repository = "https://github.com/oxigraph/oxigraph/tree/main/oxrocksdb-sys" diff --git a/python/Cargo.toml b/python/Cargo.toml index ffd445a0..20488c8c 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pyoxigraph" -version = "0.3.16" +version = "0.3.17-dev" authors = ["Tpt"] license = "MIT OR Apache-2.0" readme = "README.md" @@ -19,5 +19,5 @@ doctest = false abi3 = ["pyo3/abi3-py37"] [dependencies] -oxigraph = { version = "0.3.16", path="../lib", features = ["http_client"] } +oxigraph = { version = "0.3.17-dev", path="../lib", features = ["http_client"] } pyo3 = { version = "0.18", features = ["extension-module"] } diff --git a/server/Cargo.toml b/server/Cargo.toml index 040600c3..82ba6ab6 100644 --- a/server/Cargo.toml +++ b/server/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "oxigraph_server" -version = "0.3.16" +version = "0.3.17-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -17,8 +17,8 @@ anyhow = "1" oxhttp = { version = "0.1", features = ["rayon"] } clap = { version = "=4.0", features = ["derive"] } clap_lex = "=0.3.0" -oxigraph = { version = "0.3.16", path = "../lib", features = ["http_client"] } -sparesults = { version = "0.1.7", path = "../lib/sparesults", features = ["rdf-star"] } +oxigraph = { version = "0.3.17-dev", path = "../lib", features = ["http_client"] } +sparesults = { version = "0.1.8-dev", path = "../lib/sparesults", features = ["rdf-star"] } rand = "0.8" url = "2" oxiri = "0.2" From 7175784356887da4a998d6fef01ad2edbfcb9025 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 29 Apr 2023 11:51:50 +0200 Subject: [PATCH 13/45] Improves Clippy lint list --- .cargo/config.toml | 68 ++++-- js/src/model.rs | 2 +- js/src/store.rs | 8 +- lib/oxrdf/src/dataset.rs | 4 +- lib/oxrdf/src/interning.rs | 9 +- lib/oxrdf/src/lib.rs | 1 - lib/oxsdatatypes/src/decimal.rs | 2 +- lib/oxsdatatypes/src/duration.rs | 6 +- lib/oxsdatatypes/src/integer.rs | 10 +- lib/oxsdatatypes/src/lib.rs | 2 +- lib/sparesults/src/csv.rs | 14 +- lib/sparesults/src/lib.rs | 4 +- lib/sparesults/src/solution.rs | 3 +- lib/sparesults/src/xml.rs | 22 +- lib/spargebra/src/lib.rs | 1 - lib/spargebra/src/parser.rs | 19 +- lib/src/io/error.rs | 12 +- lib/src/lib.rs | 2 +- lib/src/sparql/dataset.rs | 2 +- lib/src/sparql/eval.rs | 366 +++++++++++++--------------- lib/src/sparql/model.rs | 7 +- lib/src/sparql/plan.rs | 26 +- lib/src/sparql/plan_builder.rs | 165 ++++++------- lib/src/sparql/update.rs | 17 +- lib/src/storage/backend/fallback.rs | 27 +- lib/src/storage/backend/rocksdb.rs | 60 ++--- lib/src/storage/mod.rs | 5 +- lib/src/store.rs | 4 +- lib/tests/store.rs | 2 +- python/src/io.rs | 12 +- python/src/sparql.rs | 2 +- python/src/store.rs | 8 +- server/src/main.rs | 6 +- testsuite/src/manifest.rs | 2 +- testsuite/src/sparql_evaluator.rs | 26 +- 35 files changed, 457 insertions(+), 469 deletions(-) diff --git a/.cargo/config.toml b/.cargo/config.toml index be86f8ae..e248c473 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -2,94 +2,124 @@ rustflags = [ "-Wtrivial-casts", "-Wtrivial-numeric-casts", - "-Wunsafe_code", + "-Wunsafe-code", "-Wunused-lifetimes", "-Wunused-qualifications", + # TODO: 1.63+ "-Wclippy::as-underscore", + # TODO: 1.65+ ""-Wclippy::bool-to-int-with-if", + "-Wclippy::borrow-as-ptr", + "-Wclippy::case-sensitive-file-extension-comparisons", "-Wclippy::cast-lossless", "-Wclippy::cast-possible-truncation", "-Wclippy::cast-possible-wrap", "-Wclippy::cast-precision-loss", + "-Wclippy::cast-ptr-alignment", "-Wclippy::cast-sign-loss", "-Wclippy::checked-conversions", + "-Wclippy::clone-on-ref-ptr", "-Wclippy::cloned-instead-of-copied", "-Wclippy::copy-iterator", "-Wclippy::dbg-macro", - "-Wclippy::debug-assert-with-mut-call", "-Wclippy::decimal-literal-representation", - "-Wclippy::empty-line-after-outer-attr", + "-Wclippy::default-trait-access", + "-Wclippy::default-union-representation", + # TODO: 1.61+ "-Wclippy::deref-by-slicing", + # TODO: 1.63+ "-Wclippy::doc-link-with-quotes", + # TODO: 1.62+ "-Wclippy::empty-drop", "-Wclippy::empty-enum", + # TODO: on major version "-Wclippy::empty-structs-with-brackets", "-Wclippy::enum-glob-use", + "-Wclippy::exit", "-Wclippy::expect-used", "-Wclippy::expl-impl-clone-on-copy", "-Wclippy::explicit-deref-methods", "-Wclippy::explicit-into-iter-loop", "-Wclippy::explicit-iter-loop", - "-Wclippy::fallible-impl-from", "-Wclippy::filter-map-next", "-Wclippy::flat-map-option", + "-Wclippy::fn-to-numeric-cast-any", + # TODO: 1.62+ "-Wclippy::format-push-string", "-Wclippy::from-iter-instead-of-collect", "-Wclippy::get-unwrap", "-Wclippy::if-not-else", + "-Wclippy::if-then-some-else-none", "-Wclippy::implicit-clone", - "-Wclippy::implicit-saturating-sub", - "-Wclippy::imprecise-flops", "-Wclippy::inconsistent-struct-constructor", + "-Wclippy::index-refutable-slice", "-Wclippy::inefficient-to-string", "-Wclippy::inline-always", + "-Wclippy::inline-asm-x86-att-syntax", + "-Wclippy::inline-asm-x86-intel-syntax", "-Wclippy::invalid-upcast-comparisons", "-Wclippy::items-after-statements", "-Wclippy::large-digit-groups", + # TODO: 1.68+ "-Wclippy::large-futures", "-Wclippy::large-stack-arrays", "-Wclippy::large-types-passed-by-value", "-Wclippy::let-underscore-must-use", "-Wclippy::let-unit-value", "-Wclippy::linkedlist", + "-Wclippy::lossy-float-literal", "-Wclippy::macro-use-imports", + "-Wclippy::manual-assert", + # TODO: 1.65+ "-Wclippy::manual-instant-elapsed", + # TODO: 1.67+ "-Wclippy::manual-let-else", "-Wclippy::manual-ok-or", - "-Wclippy::map-flatten", + # TODO: 1.65+ "-Wclippy::manual-string-new", + "-Wclippy::many-single-char-names", "-Wclippy::map-unwrap-or", "-Wclippy::match-bool", "-Wclippy::match-same-arms", "-Wclippy::match-wildcard-for-single-variants", "-Wclippy::maybe-infinite-iter", "-Wclippy::mem-forget", + # TODO: 1.63+ "-Wclippy::mismatching-type-param-order", "-Wclippy::multiple-inherent-impl", "-Wclippy::mut-mut", - "-Wclippy::mutex-integer", + "-Wclippy::mutex-atomic", "-Wclippy::naive-bytecount", "-Wclippy::needless-bitwise-bool", "-Wclippy::needless-continue", "-Wclippy::needless-pass-by-value", + "-Wclippy::no-effect-underscore-binding", + # TODO: 1.69+ "-Wclippy::no-mangle-with-rust-abi", "-Wclippy::non-ascii-literal", - "-Wclippy::nonstandard-macro-braces", - "-Wclippy::path-buf-push-overwrite", "-Wclippy::print-stderr", "-Wclippy::print-stdout", + "-Wclippy::ptr-as-ptr", "-Wclippy::range-minus-one", "-Wclippy::range-plus-one", + "-Wclippy::rc-buffer", "-Wclippy::rc-mutex", - "-Wclippy::enum-variant-names", + "-Wclippy::redundant-closure-for-method-calls", "-Wclippy::redundant-else", - "-Wclippy::redundant-pub-crate", + "-Wclippy::redundant-feature-names", "-Wclippy::ref-binding-to-reference", "-Wclippy::ref-option-ref", "-Wclippy::rest-pat-in-fully-bound-structs", + "-Wclippy::return-self-not-must-use", "-Wclippy::same-functions-in-if-condition", + # TODO: strange failure on 1.60 "-Wclippy::same-name-method", + # TODO: 1.68+ "-Wclippy::semicolon-outside-block", + "-Wclippy::single-match-else", + "-Wclippy::stable-sort-primitive", "-Wclippy::str-to-string", "-Wclippy::string-add", "-Wclippy::string-add-assign", "-Wclippy::string-lit-as-bytes", "-Wclippy::string-to-string", - "-Wclippy::suboptimal-flops", - "-Wclippy::suspicious-operation-groupings", + # TODO: 1.67+ "-Wclippy::suspicious-xor-used-as-pow", "-Wclippy::todo", - "-Wclippy::trait-duplication-in-bounds", "-Wclippy::transmute-ptr-to-ptr", - "-Wclippy::trivial-regex", "-Wclippy::trivially-copy-pass-by-ref", - "-Wclippy::type-repetition-in-bounds", + "-Wclippy::try-err", "-Wclippy::unicode-not-nfc", "-Wclippy::unimplemented", + # TODO: 1.66+ "-Wclippy::uninlined-format-args", + # TODO: 1.70+ "-Wclippy::unnecessary-box-returns", + # TODO: 1.61+ "-Wclippy::unnecessary-join", + # TODO: 1.67+ "-Wclippy::unnecessary-safety-comment", + # TODO: 1.67+ "-Wclippy::unnecessary-safety-doc", "-Wclippy::unnecessary-self-imports", "-Wclippy::unnecessary-wraps", "-Wclippy::unneeded-field-pattern", @@ -99,13 +129,9 @@ rustflags = [ "-Wclippy::unused-async", "-Wclippy::unused-self", "-Wclippy::use-debug", - "-Wclippy::use-self", "-Wclippy::used-underscore-binding", - "-Wclippy::useless-let-if-seq", - "-Wclippy::useless-transmute", "-Wclippy::verbose-bit-mask", "-Wclippy::verbose-file-reads", "-Wclippy::wildcard-dependencies", "-Wclippy::zero-sized-map-values", - "-Wclippy::wrong-self-convention", ] \ No newline at end of file diff --git a/js/src/model.rs b/js/src/model.rs index 179dafce..4929068f 100644 --- a/js/src/model.rs +++ b/js/src/model.rs @@ -19,7 +19,7 @@ thread_local! { #[wasm_bindgen(js_name = namedNode)] pub fn named_node(value: String) -> Result { NamedNode::new(value) - .map(|v| v.into()) + .map(Into::into) .map_err(|v| UriError::new(&v.to_string()).into()) } diff --git a/js/src/store.rs b/js/src/store.rs index 13b3f6b3..adee6eef 100644 --- a/js/src/store.rs +++ b/js/src/store.rs @@ -76,28 +76,28 @@ impl JsStore { None } .as_ref() - .map(|t: &NamedOrBlankNode| t.into()), + .map(<&Subject>::into), if let Some(predicate) = FROM_JS.with(|c| c.to_optional_term(predicate))? { Some(NamedNode::try_from(predicate)?) } else { None } .as_ref() - .map(|t: &NamedNode| t.into()), + .map(<&NamedNode>::into), if let Some(object) = FROM_JS.with(|c| c.to_optional_term(object))? { Some(object.try_into()?) } else { None } .as_ref() - .map(|t: &Term| t.into()), + .map(<&Term>::into), if let Some(graph_name) = FROM_JS.with(|c| c.to_optional_term(graph_name))? { Some(graph_name.try_into()?) } else { None } .as_ref() - .map(|t: &GraphName| t.into()), + .map(<&GraphName>::into), ) .map(|v| v.map(|v| JsQuad::from(v).into())) .collect::, _>>() diff --git a/lib/oxrdf/src/dataset.rs b/lib/oxrdf/src/dataset.rs index 12d07880..9925f7ca 100644 --- a/lib/oxrdf/src/dataset.rs +++ b/lib/oxrdf/src/dataset.rs @@ -705,9 +705,7 @@ impl Dataset { InternedTerm, InternedGraphName, )> { - let b_prime = partition - .iter() - .find_map(|(_, b)| if b.len() > 1 { Some(b) } else { None }); + let b_prime = partition.iter().find_map(|(_, b)| (b.len() > 1).then(|| b)); if let Some(b_prime) = b_prime { b_prime .iter() diff --git a/lib/oxrdf/src/interning.rs b/lib/oxrdf/src/interning.rs index 41725dca..54c8acde 100644 --- a/lib/oxrdf/src/interning.rs +++ b/lib/oxrdf/src/interning.rs @@ -463,11 +463,10 @@ impl InternedTriple { predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?, object: InternedTerm::encoded_from(triple.object, interner)?, }; - if interner.triples.contains_key(&interned_triple) { - Some(interned_triple) - } else { - None - } + interner + .triples + .contains_key(&interned_triple) + .then(|| interned_triple) } pub fn next(&self) -> Self { diff --git a/lib/oxrdf/src/lib.rs b/lib/oxrdf/src/lib.rs index 9d40ead4..aa6f712b 100644 --- a/lib/oxrdf/src/lib.rs +++ b/lib/oxrdf/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index 3b49e229..c7fa6ba9 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -380,7 +380,7 @@ impl FromStr for Decimal { }; let mut value = 0_i128; - let with_before_dot = input.first().map_or(false, |c| c.is_ascii_digit()); + let with_before_dot = input.first().map_or(false, u8::is_ascii_digit); while let Some(c) = input.first() { if c.is_ascii_digit() { value = value diff --git a/lib/oxsdatatypes/src/duration.rs b/lib/oxsdatatypes/src/duration.rs index 27f255eb..18d42912 100644 --- a/lib/oxsdatatypes/src/duration.rs +++ b/lib/oxsdatatypes/src/duration.rs @@ -170,8 +170,10 @@ impl fmt::Display for Duration { let h = (s_int % 86400) / 3600; let m = (s_int % 3600) / 60; let s = ss - .checked_sub(Decimal::try_from(d * 86400 + h * 3600 + m * 60).unwrap()) - .unwrap(); //could not fail + .checked_sub( + Decimal::try_from(d * 86400 + h * 3600 + m * 60).map_err(|_| fmt::Error)?, + ) + .ok_or(fmt::Error)?; if d != 0 { write!(f, "{d}D")?; diff --git a/lib/oxsdatatypes/src/integer.rs b/lib/oxsdatatypes/src/integer.rs index 50f2d002..016096b3 100644 --- a/lib/oxsdatatypes/src/integer.rs +++ b/lib/oxsdatatypes/src/integer.rs @@ -258,9 +258,9 @@ mod tests { assert!(Integer::try_from(Float::from(f32::MIN)).is_err()); assert!(Integer::try_from(Float::from(f32::MAX)).is_err()); assert!( - Integer::try_from(Float::from(1_672_507_302_466.)) + Integer::try_from(Float::from(1_672_507_300_000.)) .unwrap() - .checked_sub(Integer::from_str("1672507302466")?) + .checked_sub(Integer::from_str("1672507300000")?) .unwrap() .abs() < Integer::from(1_000_000) @@ -283,12 +283,12 @@ mod tests { Some(Integer::from_str("-123")?) ); assert!( - Integer::try_from(Double::from(1_672_507_302_466.)) + Integer::try_from(Double::from(1_672_507_300_000.)) .unwrap() - .checked_sub(Integer::from_str("1672507302466").unwrap()) + .checked_sub(Integer::from_str("1672507300000").unwrap()) .unwrap() .abs() - < Integer::from(1) + < Integer::from(10) ); assert!(Integer::try_from(Double::from(f64::NAN)).is_err()); assert!(Integer::try_from(Double::from(f64::INFINITY)).is_err()); diff --git a/lib/oxsdatatypes/src/lib.rs b/lib/oxsdatatypes/src/lib.rs index 7c06ca9e..67737b13 100644 --- a/lib/oxsdatatypes/src/lib.rs +++ b/lib/oxsdatatypes/src/lib.rs @@ -1,9 +1,9 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] +#![allow(clippy::return_self_not_must_use)] mod boolean; mod date_time; diff --git a/lib/sparesults/src/csv.rs b/lib/sparesults/src/csv.rs index 14991fe6..b365c4ac 100644 --- a/lib/sparesults/src/csv.rs +++ b/lib/sparesults/src/csv.rs @@ -160,7 +160,7 @@ fn write_tsv_term<'a>(term: impl Into>, sink: &mut impl Write) -> io let value = literal.value(); if let Some(language) = literal.language() { write_tsv_quoted_str(value, sink)?; - write!(sink, "@{}", language) + write!(sink, "@{language}") } else { match literal.datatype() { xsd::BOOLEAN if is_turtle_boolean(value) => sink.write_all(value.as_bytes()), @@ -216,7 +216,7 @@ fn is_turtle_integer(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_decimal(value: &str) -> bool { @@ -227,7 +227,7 @@ fn is_turtle_decimal(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; } if let Some(v) = value.strip_prefix(b".") { @@ -235,7 +235,7 @@ fn is_turtle_decimal(value: &str) -> bool { } else { return false; } - !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + !value.is_empty() && value.iter().all(u8::is_ascii_digit) } fn is_turtle_double(value: &str) -> bool { @@ -248,14 +248,14 @@ fn is_turtle_double(value: &str) -> bool { value = v; } let mut with_before = false; - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_before = true; } let mut with_after = false; if let Some(v) = value.strip_prefix(b".") { value = v; - while value.first().map_or(false, |c| c.is_ascii_digit()) { + while value.first().map_or(false, u8::is_ascii_digit) { value = &value[1..]; with_after = true; } @@ -272,7 +272,7 @@ fn is_turtle_double(value: &str) -> bool { } else if let Some(v) = value.strip_prefix(b"-") { value = v; } - (with_before || with_after) && !value.is_empty() && value.iter().all(|c| c.is_ascii_digit()) + (with_before || with_after) && !value.is_empty() && value.iter().all(u8::is_ascii_digit) } pub enum TsvQueryResultsReader { diff --git a/lib/sparesults/src/lib.rs b/lib/sparesults/src/lib.rs index dc9a3075..b30c17f0 100644 --- a/lib/sparesults/src/lib.rs +++ b/lib/sparesults/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] @@ -276,6 +275,7 @@ pub enum QueryResultsReader { /// } /// # Result::<(),sparesults::ParseError>::Ok(()) /// ``` +#[allow(clippy::rc_buffer)] pub struct SolutionsReader { variables: Rc>, solutions: SolutionsReaderKind, @@ -318,7 +318,7 @@ impl Iterator for SolutionsReader { SolutionsReaderKind::Tsv(reader) => reader.read_next(), } .transpose()? - .map(|values| (self.variables.clone(), values).into()), + .map(|values| (Rc::clone(&self.variables), values).into()), ) } } diff --git a/lib/sparesults/src/solution.rs b/lib/sparesults/src/solution.rs index b1be7c7d..a8059204 100644 --- a/lib/sparesults/src/solution.rs +++ b/lib/sparesults/src/solution.rs @@ -18,6 +18,7 @@ use std::rc::Rc; /// assert_eq!(solution.get("foo"), Some(&Literal::from(1).into())); // Get the value of the variable ?foo if it exists (here yes). /// assert_eq!(solution.get(1), None); // Get the value of the second column if it exists (here no). /// ``` +#[allow(clippy::rc_buffer)] pub struct QuerySolution { variables: Rc>, values: Vec>, @@ -69,7 +70,7 @@ impl QuerySolution { /// ``` #[inline] pub fn is_empty(&self) -> bool { - self.values.iter().all(|v| v.is_none()) + self.values.iter().all(Option::is_none) } /// Returns an iterator over bound variables. diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index fd0ed3b4..d493e7d9 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -482,20 +482,31 @@ impl XmlSolutionsReader { } state = State::Triple; } - State::Uri => state = self.stack.pop().unwrap(), + State::Uri => { + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))? + } State::BNode => { if term.is_none() { //We default to a random bnode term = Some(BlankNode::default().into()) } - state = self.stack.pop().unwrap() + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))? } State::Literal => { if term.is_none() { //We default to the empty literal term = Some(build_literal("", lang.take(), datatype.take())?.into()) } - state = self.stack.pop().unwrap(); + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } State::Triple => { #[cfg(feature = "rdf-star")] @@ -530,7 +541,10 @@ impl XmlSolutionsReader { ) .into(), ); - state = self.stack.pop().unwrap(); + state = self + .stack + .pop() + .ok_or_else(|| SyntaxError::msg("Empty stack"))?; } else { return Err( SyntaxError::msg("A should contain a , a and an ").into() diff --git a/lib/spargebra/src/lib.rs b/lib/spargebra/src/lib.rs index dc0e2aa7..e2d093f3 100644 --- a/lib/spargebra/src/lib.rs +++ b/lib/spargebra/src/lib.rs @@ -1,5 +1,4 @@ #![doc = include_str!("../README.md")] -#![deny(unsafe_code)] #![doc(test(attr(deny(warnings))))] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 689b95ce..0f85b830 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -352,7 +352,7 @@ impl> From> for FocusedTripleOrPathPattern fn from(input: FocusedTriplePattern) -> Self { Self { focus: input.focus.into(), - patterns: input.patterns.into_iter().map(|p| p.into()).collect(), + patterns: input.patterns.into_iter().map(Into::into).collect(), } } } @@ -736,7 +736,7 @@ impl ParserState { let aggregates = self.aggregates.last_mut().ok_or("Unexpected aggregate")?; Ok(aggregates .iter() - .find_map(|(v, a)| if a == &agg { Some(v) } else { None }) + .find_map(|(v, a)| (a == &agg).then(|| v)) .cloned() .unwrap_or_else(|| { let new_var = variable(); @@ -884,13 +884,14 @@ impl<'a> Iterator for UnescapeCharsIterator<'a> { } match self.iter.next()? { '\\' => match self.iter.next() { - Some(ch) => match self.replacement.get(ch) { - Some(replace) => Some(replace), - None => { + Some(ch) => { + if let Some(replace) = self.replacement.get(ch) { + Some(replace) + } else { self.buffer = Some(ch); Some('\\') } - }, + } None => Some('\\'), }, c => Some(c), @@ -1590,7 +1591,7 @@ parser! { //[74] rule ConstructTriples() -> Vec = p:ConstructTriples_item() ** ("." _) "."? { - p.into_iter().flat_map(|c| c.into_iter()).collect() + p.into_iter().flatten().collect() } rule ConstructTriples_item() -> Vec = t:TriplesSameSubject() _ { t } @@ -1701,7 +1702,7 @@ parser! { //[83] rule PropertyListPathNotEmpty() -> FocusedTripleOrPathPattern)>> = hp:(VerbPath() / VerbSimple()) _ ho:ObjectListPath() _ t:PropertyListPathNotEmpty_item()* { - t.into_iter().flat_map(|e| e.into_iter()).fold(FocusedTripleOrPathPattern { + t.into_iter().flatten().fold(FocusedTripleOrPathPattern { focus: vec![(hp, ho.focus)], patterns: ho.patterns }, |mut a, b| { @@ -2036,7 +2037,7 @@ parser! { //[121] rule BuiltInCall() -> Expression = - a:Aggregate() {? state.new_aggregation(a).map(|v| v.into()) } / + a:Aggregate() {? state.new_aggregation(a).map(Into::into) } / i("STR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Str, vec![e]) } / i("LANG") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Lang, vec![e]) } / i("LANGMATCHES") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::LangMatches, vec![a, b]) } / diff --git a/lib/src/io/error.rs b/lib/src/io/error.rs index 5584169b..6a90404b 100644 --- a/lib/src/io/error.rs +++ b/lib/src/io/error.rs @@ -45,12 +45,14 @@ impl Error for ParseError { } } -#[allow(clippy::fallible_impl_from)] impl From for ParseError { #[inline] fn from(error: TurtleError) -> Self { let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { + if error.get_ref().map_or( + false, + <(dyn Error + Send + Sync + 'static)>::is::, + ) { Self::Syntax(SyntaxError { inner: SyntaxErrorKind::Turtle(*error.into_inner().unwrap().downcast().unwrap()), }) @@ -60,12 +62,14 @@ impl From for ParseError { } } -#[allow(clippy::fallible_impl_from)] impl From for ParseError { #[inline] fn from(error: RdfXmlError) -> Self { let error = io::Error::from(error); - if error.get_ref().map_or(false, |e| e.is::()) { + if error.get_ref().map_or( + false, + <(dyn Error + Send + Sync + 'static)>::is::, + ) { Self::Syntax(SyntaxError { inner: SyntaxErrorKind::RdfXml(*error.into_inner().unwrap().downcast().unwrap()), }) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 2b67f0c9..29ef24ae 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -3,7 +3,7 @@ #![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![doc(test(attr(deny(warnings))))] -#![deny(unsafe_code)] +#![allow(clippy::return_self_not_must_use)] pub mod io; pub mod sparql; diff --git a/lib/src/sparql/dataset.rs b/lib/src/sparql/dataset.rs index 75191258..bf7e6195 100644 --- a/lib/src/sparql/dataset.rs +++ b/lib/src/sparql/dataset.rs @@ -40,7 +40,7 @@ impl DatasetView { ) -> impl Iterator> + 'static { self.reader .quads_for_pattern(subject, predicate, object, graph_name) - .map(|t| t.map_err(|e| e.into())) + .map(|t| t.map_err(Into::into)) } #[allow(clippy::needless_collect)] diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index a71b95cb..3650e2d0 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -65,6 +65,7 @@ impl SimpleEvaluator { } } + #[allow(clippy::rc_buffer)] pub fn evaluate_select_plan( &self, plan: Rc, @@ -73,7 +74,7 @@ impl SimpleEvaluator { let (eval, stats) = self.plan_evaluator(plan); ( QueryResults::Solutions(decode_bindings( - self.dataset.clone(), + Rc::clone(&self.dataset), eval(EncodedTuple::with_capacity(variables.len())), variables, )), @@ -164,16 +165,16 @@ impl SimpleEvaluator { graph_pattern, .. } => { - let variables = variables.clone(); + let variables = Rc::clone(variables); let silent = *silent; let service_name = service_name.clone(); - let graph_pattern = graph_pattern.clone(); + let graph_pattern = Rc::clone(graph_pattern); let eval = self.clone(); Rc::new(move |from| { match eval.evaluate_service( &service_name, &graph_pattern, - variables.clone(), + Rc::clone(&variables), &from, ) { Ok(result) => Box::new(result.filter_map(move |binding| { @@ -201,7 +202,7 @@ impl SimpleEvaluator { let predicate = TupleSelector::from(predicate); let object = TupleSelector::from(object); let graph_name = TupleSelector::from(graph_name); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let iter = dataset.encoded_quads_for_pattern( get_pattern_value(&subject, &from).as_ref(), @@ -233,16 +234,16 @@ impl SimpleEvaluator { graph_name, } => { let subject = TupleSelector::from(subject); - let path = path.clone(); + let path = Rc::clone(path); let object = TupleSelector::from(object); let graph_name = TupleSelector::from(graph_name); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let input_subject = get_pattern_value(&subject, &from); let input_object = get_pattern_value(&object, &from); let input_graph_name = get_pattern_value(&graph_name, &from); let path_eval = PathEvaluator { - dataset: dataset.clone(), + dataset: Rc::clone(&dataset), }; match (input_subject, input_object, input_graph_name) { (Some(input_subject), Some(input_object), Some(input_graph_name)) => { @@ -384,9 +385,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); if join_keys.is_empty() { // Cartesian product @@ -430,12 +431,12 @@ impl SimpleEvaluator { } } PlanNode::ForLoopJoin { left, right } => { - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); Rc::new(move |from| { - let right = right.clone(); + let right = Rc::clone(&right); Box::new(left(from).flat_map(move |t| match t { Ok(t) => right(t), Err(e) => Box::new(once(Err(e))), @@ -448,9 +449,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); if join_keys.is_empty() { Rc::new(move |from| { @@ -491,9 +492,9 @@ impl SimpleEvaluator { .intersection(&right.always_bound_variables()) .copied() .collect(); - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); let expression = self.expression_evaluator(expression, &mut stat_children); // Real hash join @@ -511,7 +512,7 @@ impl SimpleEvaluator { left_iter: left(from), right: right_values, buffered_results: errors, - expression: expression.clone(), + expression: Rc::clone(&expression), }) }) } @@ -520,36 +521,36 @@ impl SimpleEvaluator { right, possible_problem_vars, } => { - let (left, left_stats) = self.plan_evaluator(left.clone()); + let (left, left_stats) = self.plan_evaluator(Rc::clone(left)); stat_children.push(left_stats); - let (right, right_stats) = self.plan_evaluator(right.clone()); + let (right, right_stats) = self.plan_evaluator(Rc::clone(right)); stat_children.push(right_stats); - let possible_problem_vars = possible_problem_vars.clone(); + let possible_problem_vars = Rc::clone(possible_problem_vars); Rc::new(move |from| { if possible_problem_vars.is_empty() { Box::new(ForLoopLeftJoinIterator { - right_evaluator: right.clone(), + right_evaluator: Rc::clone(&right), left_iter: left(from), current_right: Box::new(empty()), }) } else { Box::new(BadForLoopLeftJoinIterator { from_tuple: from.clone(), - right_evaluator: right.clone(), + right_evaluator: Rc::clone(&right), left_iter: left(from), - current_left: None, + current_left: EncodedTuple::with_capacity(0), current_right: Box::new(empty()), - problem_vars: possible_problem_vars.clone(), + problem_vars: Rc::clone(&possible_problem_vars), }) } }) } PlanNode::Filter { child, expression } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { - let expression = expression.clone(); + let expression = Rc::clone(&expression); Box::new(child(from).filter(move |tuple| { match tuple { Ok(tuple) => expression(tuple) @@ -564,7 +565,7 @@ impl SimpleEvaluator { let children: Vec<_> = children .iter() .map(|child| { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); child }) @@ -583,12 +584,12 @@ impl SimpleEvaluator { variable, expression, } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let position = variable.encoded; let expression = self.expression_evaluator(expression, &mut stat_children); Rc::new(move |from| { - let expression = expression.clone(); + let expression = Rc::clone(&expression); Box::new(child(from).map(move |tuple| { let mut tuple = tuple?; if let Some(value) = expression(&tuple) { @@ -599,7 +600,7 @@ impl SimpleEvaluator { }) } PlanNode::Sort { child, by } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let by: Vec<_> = by .iter() @@ -612,7 +613,7 @@ impl SimpleEvaluator { ), }) .collect(); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |from| { let mut errors = Vec::default(); let mut values = child(from) @@ -657,12 +658,12 @@ impl SimpleEvaluator { }) } PlanNode::HashDeduplicate { child } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); Rc::new(move |from| Box::new(hash_deduplicate(child(from)))) } PlanNode::Reduced { child } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); Rc::new(move |from| { Box::new(ConsecutiveDeduplication { @@ -672,23 +673,23 @@ impl SimpleEvaluator { }) } PlanNode::Skip { child, count } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).skip(count))) } PlanNode::Limit { child, count } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); let count = *count; Rc::new(move |from| Box::new(child(from).take(count))) } PlanNode::Project { child, mapping } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); - let mapping = mapping.clone(); + let mapping = Rc::clone(mapping); Rc::new(move |from| { - let mapping = mapping.clone(); + let mapping = Rc::clone(&mapping); let mut input_tuple = EncodedTuple::with_capacity(mapping.len()); for (input_key, output_key) in mapping.iter() { if let Some(value) = from.get(output_key.encoded) { @@ -724,9 +725,9 @@ impl SimpleEvaluator { key_variables, aggregates, } => { - let (child, child_stats) = self.plan_evaluator(child.clone()); + let (child, child_stats) = self.plan_evaluator(Rc::clone(child)); stat_children.push(child_stats); - let key_variables = key_variables.clone(); + let key_variables = Rc::clone(key_variables); let aggregate_input_expressions: Vec<_> = aggregates .iter() .map(|(aggregate, _)| { @@ -750,7 +751,7 @@ impl SimpleEvaluator { aggregates.iter().map(|(_, var)| var.encoded).collect(); Rc::new(move |from| { let tuple_size = from.capacity(); - let key_variables = key_variables.clone(); + let key_variables = Rc::clone(&key_variables); let mut errors = Vec::default(); let mut accumulators_for_group = HashMap::>, Vec>>::default(); @@ -825,7 +826,7 @@ impl SimpleEvaluator { exec_duration: Cell::new(std::time::Duration::from_secs(0)), }); if self.run_stats { - let stats = stats.clone(); + let stats = Rc::clone(&stats); evaluator = Rc::new(move |tuple| { let start = Timer::now(); let inner = evaluator(tuple); @@ -834,7 +835,7 @@ impl SimpleEvaluator { .set(stats.exec_duration.get() + start.elapsed()); Box::new(StatsIterator { inner, - stats: stats.clone(), + stats: Rc::clone(&stats), }) }) } @@ -845,7 +846,7 @@ impl SimpleEvaluator { &self, service_name: &PatternValue, graph_pattern: &GraphPattern, - variables: Rc>, + variables: Rc<[Variable]>, from: &EncodedTuple, ) -> Result { let service_name = get_pattern_value(&service_name.into(), from) @@ -862,7 +863,7 @@ impl SimpleEvaluator { parsing_duration: None, }, )? { - Ok(encode_bindings(self.dataset.clone(), variables, iter)) + Ok(encode_bindings(Rc::clone(&self.dataset), variables, iter)) } else { Err(EvaluationError::msg( "The service call has not returned a set of solutions", @@ -892,12 +893,12 @@ impl SimpleEvaluator { } } PlanAggregationFunction::Min => { - let dataset = dataset.clone(); - Box::new(move || Box::new(MinAccumulator::new(dataset.clone()))) + let dataset = Rc::clone(dataset); + Box::new(move || Box::new(MinAccumulator::new(Rc::clone(&dataset)))) } // DISTINCT does not make sense with min PlanAggregationFunction::Max => { - let dataset = dataset.clone(); - Box::new(move || Box::new(MaxAccumulator::new(dataset.clone()))) + let dataset = Rc::clone(dataset); + Box::new(move || Box::new(MaxAccumulator::new(Rc::clone(&dataset)))) } // DISTINCT does not make sense with max PlanAggregationFunction::Avg => { if distinct { @@ -908,20 +909,20 @@ impl SimpleEvaluator { } PlanAggregationFunction::Sample => Box::new(|| Box::::default()), // DISTINCT does not make sense with sample PlanAggregationFunction::GroupConcat { separator } => { - let dataset = dataset.clone(); - let separator = separator.clone(); + let dataset = Rc::clone(dataset); + let separator = Rc::clone(separator); if distinct { Box::new(move || { Box::new(DistinctAccumulator::new(GroupConcatAccumulator::new( - dataset.clone(), - separator.clone(), + Rc::clone(&dataset), + Rc::clone(&separator), ))) }) } else { Box::new(move || { Box::new(GroupConcatAccumulator::new( - dataset.clone(), - separator.clone(), + Rc::clone(&dataset), + Rc::clone(&separator), )) }) } @@ -949,7 +950,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| tuple.get(v).cloned()) } PlanExpression::Exists(plan) => { - let (eval, stats) = self.plan_evaluator(plan.clone()); + let (eval, stats) = self.plan_evaluator(Rc::clone(plan)); stat_children.push(stats); Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) } @@ -959,13 +960,7 @@ impl SimpleEvaluator { Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { Some(true) => Some(true.into()), Some(false) => b(tuple), - None => { - if Some(true) == a(tuple).and_then(|v| to_bool(&v)) { - Some(true.into()) - } else { - None - } - } + None => (Some(true) == a(tuple).and_then(|v| to_bool(&v))).then(|| true.into()), }) } PlanExpression::And(a, b) => { @@ -975,23 +970,19 @@ impl SimpleEvaluator { Some(true) => b(tuple), Some(false) => Some(false.into()), None => { - if Some(false) == b(tuple).and_then(|v| to_bool(&v)) { - Some(false.into()) - } else { - None - } + (Some(false) == b(tuple).and_then(|v| to_bool(&v))).then(|| false.into()) } }) } PlanExpression::Equal(a, b) => { let a = self.expression_evaluator(a, stat_children); let b = self.expression_evaluator(b, stat_children); - Rc::new(move |tuple| equals(&a(tuple)?, &b(tuple)?).map(|v| v.into())) + Rc::new(move |tuple| equals(&a(tuple)?, &b(tuple)?).map(Into::into)) } PlanExpression::Greater(a, b) => { let a = self.expression_evaluator(a, stat_children); let b = self.expression_evaluator(b, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( (partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? == Ordering::Greater) @@ -1002,7 +993,7 @@ impl SimpleEvaluator { PlanExpression::GreaterOrEqual(a, b) => { let a = self.expression_evaluator(a, stat_children); let b = self.expression_evaluator(b, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( match partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? { @@ -1016,7 +1007,7 @@ impl SimpleEvaluator { PlanExpression::Less(a, b) => { let a = self.expression_evaluator(a, stat_children); let b = self.expression_evaluator(b, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some((partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? == Ordering::Less).into()) }) @@ -1024,7 +1015,7 @@ impl SimpleEvaluator { PlanExpression::LessOrEqual(a, b) => { let a = self.expression_evaluator(a, stat_children); let b = self.expression_evaluator(b, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( match partial_cmp(&dataset, &a(tuple)?, &b(tuple)?)? { @@ -1191,7 +1182,7 @@ impl SimpleEvaluator { } PlanExpression::Str(e) | PlanExpression::StringCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some(build_string_literal_from_id(to_string_id( &dataset, @@ -1201,7 +1192,7 @@ impl SimpleEvaluator { } PlanExpression::Lang(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::SmallSmallLangStringLiteral { language, .. } | EncodedTerm::BigSmallLangStringLiteral { language, .. } => { @@ -1218,7 +1209,7 @@ impl SimpleEvaluator { PlanExpression::LangMatches(language_tag, language_range) => { let language_tag = self.expression_evaluator(language_tag, stat_children); let language_range = self.expression_evaluator(language_range, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let mut language_tag = to_simple_string(&dataset, &language_tag(tuple)?)?; language_tag.make_ascii_lowercase(); @@ -1243,7 +1234,7 @@ impl SimpleEvaluator { } PlanExpression::Datatype(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| datatype(&dataset, &e(tuple)?)) } PlanExpression::Bound(v) => { @@ -1252,7 +1243,7 @@ impl SimpleEvaluator { } PlanExpression::Iri(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); let base_iri = self.base_iri.clone(); Rc::new(move |tuple| { let e = e(tuple)?; @@ -1276,7 +1267,7 @@ impl SimpleEvaluator { PlanExpression::BNode(id) => match id { Some(id) => { let id = self.expression_evaluator(id, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some( dataset.encode_term( @@ -1339,7 +1330,7 @@ impl SimpleEvaluator { .iter() .map(|e| self.expression_evaluator(e, stat_children)) .collect(); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let mut result = String::default(); let mut language = None; @@ -1367,7 +1358,7 @@ impl SimpleEvaluator { let length = length .as_ref() .map(|l| self.expression_evaluator(l, stat_children)); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (source, language) = to_string_and_language(&dataset, &source(tuple)?)?; @@ -1411,7 +1402,7 @@ impl SimpleEvaluator { } PlanExpression::StrLen(arg) => { let arg = self.expression_evaluator(arg, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some((to_string(&dataset, &arg(tuple)?)?.chars().count() as i64).into()) }) @@ -1420,7 +1411,7 @@ impl SimpleEvaluator { let arg = self.expression_evaluator(arg, stat_children); let regex = regex.clone(); let replacement = self.expression_evaluator(replacement, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (text, language) = to_string_and_language(&dataset, &arg(tuple)?)?; let replacement = to_simple_string(&dataset, &replacement(tuple)?)?; @@ -1438,7 +1429,7 @@ impl SimpleEvaluator { let flags = flags .as_ref() .map(|flags| self.expression_evaluator(flags, stat_children)); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; let options = if let Some(flags) = &flags { @@ -1458,7 +1449,7 @@ impl SimpleEvaluator { } PlanExpression::UCase(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; Some(build_plain_literal( @@ -1470,7 +1461,7 @@ impl SimpleEvaluator { } PlanExpression::LCase(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (value, language) = to_string_and_language(&dataset, &e(tuple)?)?; Some(build_plain_literal( @@ -1483,7 +1474,7 @@ impl SimpleEvaluator { PlanExpression::StrStarts(arg1, arg2) => { let arg1 = self.expression_evaluator(arg1, stat_children); let arg2 = self.expression_evaluator(arg2, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1492,7 +1483,7 @@ impl SimpleEvaluator { } PlanExpression::EncodeForUri(ltrl) => { let ltrl = self.expression_evaluator(ltrl, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let ltlr = to_string(&dataset, <rl(tuple)?)?; let mut result = Vec::with_capacity(ltlr.len()); @@ -1527,7 +1518,7 @@ impl SimpleEvaluator { PlanExpression::StrEnds(arg1, arg2) => { let arg1 = self.expression_evaluator(arg1, stat_children); let arg2 = self.expression_evaluator(arg2, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1537,7 +1528,7 @@ impl SimpleEvaluator { PlanExpression::Contains(arg1, arg2) => { let arg1 = self.expression_evaluator(arg1, stat_children); let arg2 = self.expression_evaluator(arg2, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, _) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1547,7 +1538,7 @@ impl SimpleEvaluator { PlanExpression::StrBefore(arg1, arg2) => { let arg1 = self.expression_evaluator(arg1, stat_children); let arg2 = self.expression_evaluator(arg2, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, language) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1561,7 +1552,7 @@ impl SimpleEvaluator { PlanExpression::StrAfter(arg1, arg2) => { let arg1 = self.expression_evaluator(arg1, stat_children); let arg2 = self.expression_evaluator(arg2, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let (arg1, arg2, language) = to_argument_compatible_strings(&dataset, &arg1(tuple)?, &arg2(tuple)?)?; @@ -1648,7 +1639,7 @@ impl SimpleEvaluator { } PlanExpression::Tz(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let timezone_offset = match e(tuple)? { EncodedTerm::DateTimeLiteral(date_time) => date_time.timezone_offset(), @@ -1706,7 +1697,7 @@ impl SimpleEvaluator { Rc::new(move |_| Some(now.into())) } PlanExpression::Uuid => { - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |_| { let mut buffer = String::with_capacity(44); buffer.push_str("urn:uuid:"); @@ -1715,7 +1706,7 @@ impl SimpleEvaluator { }) } PlanExpression::StrUuid => { - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |_| { let mut buffer = String::with_capacity(36); generate_uuid(&mut buffer); @@ -1756,7 +1747,7 @@ impl SimpleEvaluator { PlanExpression::StrLang(lexical_form, lang_tag) => { let lexical_form = self.expression_evaluator(lexical_form, stat_children); let lang_tag = self.expression_evaluator(lang_tag, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { Some(build_lang_string_literal_from_id( to_simple_string_id(&lexical_form(tuple)?)?, @@ -1767,7 +1758,7 @@ impl SimpleEvaluator { PlanExpression::StrDt(lexical_form, datatype) => { let lexical_form = self.expression_evaluator(lexical_form, stat_children); let datatype = self.expression_evaluator(datatype, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let value = to_simple_string(&dataset, &lexical_form(tuple)?)?; let datatype = if let EncodedTerm::NamedNode { iri_id } = datatype(tuple)? { @@ -1815,7 +1806,7 @@ impl SimpleEvaluator { } PlanExpression::StaticRegex(text, regex) => { let text = self.expression_evaluator(text, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); let regex = regex.clone(); Rc::new(move |tuple| { let text = to_string(&dataset, &text(tuple)?)?; @@ -1828,7 +1819,7 @@ impl SimpleEvaluator { let flags = flags .as_ref() .map(|flags| self.expression_evaluator(flags, stat_children)); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let pattern = to_simple_string(&dataset, &pattern(tuple)?)?; let options = if let Some(flags) = &flags { @@ -1849,15 +1840,11 @@ impl SimpleEvaluator { let s = s(tuple)?; let p = p(tuple)?; let o = o(tuple)?; - if !s.is_literal() + (!s.is_literal() && !s.is_default_graph() && p.is_named_node() - && !o.is_default_graph() - { - Some(EncodedTriple::new(s, p, o).into()) - } else { - None - } + && !o.is_default_graph()) + .then(|| EncodedTriple::new(s, p, o).into()) }) } PlanExpression::Subject(e) => { @@ -1908,7 +1895,7 @@ impl SimpleEvaluator { } PlanExpression::DoubleCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Double::from(value).into()), EncodedTerm::DoubleLiteral(value) => Some(value.into()), @@ -1924,7 +1911,7 @@ impl SimpleEvaluator { } PlanExpression::FloatCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(value.into()), EncodedTerm::DoubleLiteral(value) => Some(Float::from(value).into()), @@ -1940,7 +1927,7 @@ impl SimpleEvaluator { } PlanExpression::IntegerCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Integer::try_from(value).ok()?.into()), EncodedTerm::DoubleLiteral(value) => { @@ -1960,7 +1947,7 @@ impl SimpleEvaluator { } PlanExpression::DecimalCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some(Decimal::try_from(value).ok()?.into()), EncodedTerm::DoubleLiteral(value) => { @@ -1980,7 +1967,7 @@ impl SimpleEvaluator { } PlanExpression::DateCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Date::try_from(value).ok()?.into()), @@ -1993,7 +1980,7 @@ impl SimpleEvaluator { } PlanExpression::TimeCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::TimeLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(Time::try_from(value).ok()?.into()), @@ -2006,7 +1993,7 @@ impl SimpleEvaluator { } PlanExpression::DateTimeCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DateTimeLiteral(value) => Some(value.into()), EncodedTerm::DateLiteral(value) => Some(DateTime::try_from(value).ok()?.into()), @@ -2019,7 +2006,7 @@ impl SimpleEvaluator { } PlanExpression::DurationCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => Some(value.into()), EncodedTerm::YearMonthDurationLiteral(value) => { @@ -2037,7 +2024,7 @@ impl SimpleEvaluator { } PlanExpression::YearMonthDurationCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { Some(YearMonthDuration::try_from(value).ok()?.into()) @@ -2052,7 +2039,7 @@ impl SimpleEvaluator { } PlanExpression::DayTimeDurationCast(e) => { let e = self.expression_evaluator(e, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| match e(tuple)? { EncodedTerm::DurationLiteral(value) => { Some(DayTimeDuration::try_from(value).ok()?.into()) @@ -2071,7 +2058,7 @@ impl SimpleEvaluator { .iter() .map(|e| self.expression_evaluator(e, stat_children)) .collect::>(); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let args = args .iter() @@ -2092,7 +2079,7 @@ impl SimpleEvaluator { stat_children: &mut Vec>, ) -> Rc Option> { let arg = self.expression_evaluator(arg, stat_children); - let dataset = self.dataset.clone(); + let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { let input = to_simple_string(&dataset, &arg(tuple)?)?; let hash = hex::encode(H::new().chain_update(input.as_str()).finalize()); @@ -2305,11 +2292,7 @@ fn to_argument_compatible_strings( ) -> Option<(String, String, Option)> { let (value1, language1) = to_string_and_language(dataset, arg1)?; let (value2, language2) = to_string_and_language(dataset, arg2)?; - if language2.is_none() || language1 == language2 { - Some((value1, value2, language1)) - } else { - None - } + (language2.is_none() || language1 == language2).then(|| (value1, value2, language1)) } pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> Option { @@ -2337,6 +2320,7 @@ pub(super) fn compile_pattern(pattern: &str, flags: Option<&str>) -> Option, iter: EncodedTuplesIterator, @@ -2360,7 +2344,7 @@ fn decode_bindings( // this is used to encode results from a BindingIterator into an EncodedTuplesIterator. This happens when SERVICE clauses are evaluated fn encode_bindings( dataset: Rc, - variables: Rc>, + variables: Rc<[Variable]>, iter: QuerySolutionIter, ) -> EncodedTuplesIterator { Box::new(iter.map(move |solution| { @@ -3074,20 +3058,10 @@ fn put_pattern_value( tuple: &mut EncodedTuple, ) -> Option<()> { match selector { - TupleSelector::Constant(c) => { - if *c == value { - Some(()) - } else { - None - } - } + TupleSelector::Constant(c) => (*c == value).then(|| ()), TupleSelector::Variable(v) => { if let Some(old) = tuple.get(*v) { - if value == *old { - Some(()) - } else { - None - } + (value == *old).then(|| ()) } else { tuple.set(*v, value); Some(()) @@ -3163,11 +3137,9 @@ impl PathEvaluator { .find_map(|middle| { middle .and_then(|middle| { - Ok(if self.eval_closed_in_graph(b, &middle, end, graph_name)? { - Some(()) - } else { - None - }) + Ok(self + .eval_closed_in_graph(b, &middle, end, graph_name)? + .then(|| ())) }) .transpose() }) @@ -3233,12 +3205,12 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_closed_in_unknown_graph(p, end, start), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let end = end.clone(); Box::new(self.eval_from_in_unknown_graph(a, start).flat_map_ok( move |(middle, graph_name)| { eval.eval_closed_in_graph(&b, &middle, &end, &graph_name) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }, )) @@ -3251,21 +3223,21 @@ impl PathEvaluator { let eval = self.clone(); let start2 = start.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { look_in_transitive_closure( Some(Ok(start2.clone())), |e| eval.eval_from_in_graph(&p, &e, &graph_name), &end, ) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }) } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new( self.eval_from_in_unknown_graph(&p, start) .filter_map(move |r| { @@ -3275,13 +3247,7 @@ impl PathEvaluator { |e| eval.eval_from_in_graph(&p, &e, &graph_name), &end, ) - .map(|is_found| { - if is_found { - Some(graph_name) - } else { - None - } - }) + .map(|is_found| is_found.then(|| graph_name)) }) .transpose() }), @@ -3294,16 +3260,16 @@ impl PathEvaluator { let eval = self.clone(); let start2 = start.clone(); let end = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { eval.eval_closed_in_graph(&p, &start2, &end, &graph_name) - .map(|is_found| if is_found { Some(graph_name) } else { None }) + .map(|is_found| is_found.then(|| graph_name)) .transpose() }) } } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, Some(end), None) @@ -3342,7 +3308,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_to_in_graph(p, start, graph_name), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let graph_name2 = graph_name.clone(); Box::new( self.eval_from_in_graph(a, start, graph_name) @@ -3358,7 +3324,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { self.run_if_term_is_a_graph_node(start, graph_name, || { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(start.clone())), move |e| { eval.eval_from_in_graph(&p, &e, &graph_name2) @@ -3367,7 +3333,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_from_in_graph(&p, start, graph_name), @@ -3383,7 +3349,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, None, Some(graph_name)) @@ -3419,7 +3385,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_to_in_unknown_graph(p, start), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); Box::new(self.eval_from_in_unknown_graph(a, start).flat_map_ok( move |(middle, graph_name)| { eval.eval_from_in_graph(&b, &middle, &graph_name) @@ -3434,10 +3400,10 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { let start2 = start.clone(); let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { let eval = eval.clone(); - let p = p.clone(); + let p = Rc::clone(&p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(start2.clone())), move |e| { eval.eval_from_in_graph(&p, &e, &graph_name2) @@ -3447,7 +3413,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_from_in_unknown_graph(&p, start), move |(e, graph_name)| { @@ -3459,7 +3425,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrOne(p) => { let eval = self.clone(); let start2 = start.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(start, move |graph_name| { hash_deduplicate(once(Ok(start2.clone())).chain(eval.eval_from_in_graph( &p, @@ -3470,7 +3436,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(start), None, None, None) @@ -3504,7 +3470,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_from_in_graph(p, end, graph_name), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let a = a.clone(); + let a = Rc::clone(a); let graph_name2 = graph_name.clone(); Box::new( self.eval_to_in_graph(b, end, graph_name) @@ -3520,7 +3486,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { self.run_if_term_is_a_graph_node(end, graph_name, || { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(end.clone())), move |e| { eval.eval_to_in_graph(&p, &e, &graph_name2) @@ -3529,7 +3495,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_to_in_graph(&p, end, graph_name), @@ -3544,7 +3510,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, Some(end), Some(graph_name)) @@ -3579,7 +3545,7 @@ impl PathEvaluator { PlanPropertyPath::Reverse(p) => self.eval_from_in_unknown_graph(p, end), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let a = a.clone(); + let a = Rc::clone(a); Box::new(self.eval_to_in_unknown_graph(b, end).flat_map_ok( move |(middle, graph_name)| { eval.eval_from_in_graph(&a, &middle, &graph_name) @@ -3594,10 +3560,10 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrMore(p) => { let end2 = end.clone(); let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(end, move |graph_name| { let eval = eval.clone(); - let p = p.clone(); + let p = Rc::clone(&p); let graph_name2 = graph_name.clone(); transitive_closure(Some(Ok(end2.clone())), move |e| { eval.eval_to_in_graph(&p, &e, &graph_name2) @@ -3607,7 +3573,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_to_in_unknown_graph(&p, end), move |(e, graph_name)| { @@ -3619,7 +3585,7 @@ impl PathEvaluator { PlanPropertyPath::ZeroOrOne(p) => { let eval = self.clone(); let end2 = end.clone(); - let p = p.clone(); + let p = Rc::clone(p); self.run_if_term_is_a_dataset_node(end, move |graph_name| { hash_deduplicate(once(Ok(end2.clone())).chain(eval.eval_to_in_graph( &p, @@ -3630,7 +3596,7 @@ impl PathEvaluator { }) } PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(Some(end), None, None, None) @@ -3666,7 +3632,7 @@ impl PathEvaluator { ), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); let graph_name2 = graph_name.clone(); Box::new(self.eval_open_in_graph(a, graph_name).flat_map_ok( move |(start, middle)| { @@ -3681,7 +3647,7 @@ impl PathEvaluator { )), PlanPropertyPath::ZeroOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.get_subject_or_object_identity_pairs_in_graph(graph_name), @@ -3693,7 +3659,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); let graph_name2 = graph_name.clone(); Box::new(transitive_closure( self.eval_open_in_graph(&p, graph_name), @@ -3708,7 +3674,7 @@ impl PathEvaluator { .chain(self.eval_open_in_graph(p, graph_name)), )), PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, None, Some(graph_name)) @@ -3744,7 +3710,7 @@ impl PathEvaluator { ), PlanPropertyPath::Sequence(a, b) => { let eval = self.clone(); - let b = b.clone(); + let b = Rc::clone(b); Box::new(self.eval_open_in_unknown_graph(a).flat_map_ok( move |(start, middle, graph_name)| { eval.eval_from_in_graph(&b, &middle, &graph_name) @@ -3758,7 +3724,7 @@ impl PathEvaluator { )), PlanPropertyPath::ZeroOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.get_subject_or_object_identity_pairs_in_dataset(), move |(start, middle, graph_name)| { @@ -3769,7 +3735,7 @@ impl PathEvaluator { } PlanPropertyPath::OneOrMore(p) => { let eval = self.clone(); - let p = p.clone(); + let p = Rc::clone(p); Box::new(transitive_closure( self.eval_open_in_unknown_graph(&p), move |(start, middle, graph_name)| { @@ -3783,7 +3749,7 @@ impl PathEvaluator { .chain(self.eval_open_in_unknown_graph(p)), )), PlanPropertyPath::NegatedPropertySet(ps) => { - let ps = ps.clone(); + let ps = Rc::clone(ps); Box::new( self.dataset .encoded_quads_for_pattern(None, None, None, None) @@ -4050,9 +4016,9 @@ struct BadForLoopLeftJoinIterator { from_tuple: EncodedTuple, right_evaluator: Rc EncodedTuplesIterator>, left_iter: EncodedTuplesIterator, - current_left: Option, + current_left: EncodedTuple, current_right: EncodedTuplesIterator, - problem_vars: Rc>, + problem_vars: Rc<[usize]>, } impl Iterator for BadForLoopLeftJoinIterator { @@ -4062,9 +4028,7 @@ impl Iterator for BadForLoopLeftJoinIterator { for right_tuple in &mut self.current_right { match right_tuple { Ok(right_tuple) => { - if let Some(combined) = - right_tuple.combine_with(self.current_left.as_ref().unwrap()) - { + if let Some(combined) = right_tuple.combine_with(&self.current_left) { return Some(Ok(combined)); } } @@ -4086,7 +4050,7 @@ impl Iterator for BadForLoopLeftJoinIterator { match right_tuple { Ok(right_tuple) => { if let Some(combined) = right_tuple.combine_with(&left_tuple) { - self.current_left = Some(left_tuple); + self.current_left = left_tuple; return Some(Ok(combined)); } } @@ -4270,8 +4234,8 @@ impl Iterator for DescribeIterator { .eval .dataset .decode_quad(&quad) - .map(|q| q.into()) - .map_err(|e| e.into()), + .map(Into::into) + .map_err(Into::into), Err(error) => Err(error), }); } @@ -4522,9 +4486,9 @@ impl Accumulator for SumAccumulator { self.sum = match operands { NumericBinaryOperands::Float(v1, v2) => Some((v1 + v2).into()), NumericBinaryOperands::Double(v1, v2) => Some((v1 + v2).into()), - NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2).map(|v| v.into()), - NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2).map(|v| v.into()), - NumericBinaryOperands::Duration(v1, v2) => v1.checked_add(v2).map(|v| v.into()), + NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::Duration(v1, v2) => v1.checked_add(v2).map(Into::into), _ => None, }; } else { @@ -4562,9 +4526,9 @@ impl Accumulator for AvgAccumulator { NumericBinaryOperands::Float(v1, v2) => Some((v1 / v2).into()), NumericBinaryOperands::Double(v1, v2) => Some((v1 / v2).into()), NumericBinaryOperands::Integer(v1, v2) => { - Decimal::from(v1).checked_div(v2).map(|v| v.into()) + Decimal::from(v1).checked_div(v2).map(Into::into) } - NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2).map(|v| v.into()), + NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2).map(Into::into), _ => None, } } @@ -4649,14 +4613,14 @@ struct GroupConcatAccumulator { dataset: Rc, concat: Option, language: Option>, - separator: Rc, + separator: Rc, } impl GroupConcatAccumulator { - fn new(dataset: Rc, separator: Rc) -> Self { + fn new(dataset: Rc, separator: Rc) -> Self { Self { dataset, - concat: Some("".to_owned()), + concat: Some(String::new()), language: None, separator, } diff --git a/lib/src/sparql/model.rs b/lib/src/sparql/model.rs index 70cad955..d7c69ca7 100644 --- a/lib/src/sparql/model.rs +++ b/lib/src/sparql/model.rs @@ -160,6 +160,7 @@ impl From> for QueryResults { /// } /// # Result::<_,Box>::Ok(()) /// ``` +#[allow(clippy::rc_buffer)] pub struct QuerySolutionIter { variables: Rc>, iter: Box>>, @@ -171,8 +172,10 @@ impl QuerySolutionIter { iter: impl Iterator>, EvaluationError>> + 'static, ) -> Self { Self { - variables: variables.clone(), - iter: Box::new(iter.map(move |t| t.map(|values| (variables.clone(), values).into()))), + variables: Rc::clone(&variables), + iter: Box::new( + iter.map(move |t| t.map(|values| (Rc::clone(&variables), values).into())), + ), } } diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index bdd4b9e2..c9af7f22 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -22,7 +22,7 @@ pub enum PlanNode { }, Service { service_name: PatternValue, - variables: Rc>, + variables: Rc<[Variable]>, child: Rc, graph_pattern: Rc, silent: bool, @@ -71,7 +71,7 @@ pub enum PlanNode { ForLoopLeftJoin { left: Rc, right: Rc, - possible_problem_vars: Rc>, //Variables that should not be part of the entry of the left join + possible_problem_vars: Rc<[usize]>, //Variables that should not be part of the entry of the left join }, Extend { child: Rc, @@ -99,13 +99,13 @@ pub enum PlanNode { }, Project { child: Rc, - mapping: Rc>, // pairs of (variable key in child, variable key in output) + mapping: Rc<[(PlanVariable, PlanVariable)]>, // pairs of (variable key in child, variable key in output) }, Aggregate { // By definition the group by key are the range 0..key_mapping.len() child: Rc, - key_variables: Rc>, - aggregates: Rc>, + key_variables: Rc<[PlanVariable]>, + aggregates: Rc<[(PlanAggregation, PlanVariable)]>, }, } @@ -236,7 +236,10 @@ impl PlanNode { match self { Self::StaticBindings { encoded_tuples, .. } => { let mut variables = BTreeMap::default(); // value true iff always bound - let max_tuple_length = encoded_tuples.iter().map(|t| t.capacity()).fold(0, max); + let max_tuple_length = encoded_tuples + .iter() + .map(EncodedTuple::capacity) + .fold(0, max); for tuple in encoded_tuples { for key in 0..max_tuple_length { match variables.entry(key) { @@ -649,6 +652,7 @@ impl PlanExpression { } impl fmt::Display for PlanExpression { + #[allow(clippy::many_single_char_names)] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Variable(v) => { @@ -838,7 +842,7 @@ pub enum PlanAggregationFunction { Max, Avg, Sample, - GroupConcat { separator: Rc }, + GroupConcat { separator: Rc }, } #[derive(Debug, Clone)] @@ -850,7 +854,7 @@ pub enum PlanPropertyPath { ZeroOrMore(Rc), OneOrMore(Rc), ZeroOrOne(Rc), - NegatedPropertySet(Rc>>), + NegatedPropertySet(Rc<[PlanTerm]>), } impl fmt::Display for PlanPropertyPath { @@ -1046,7 +1050,7 @@ impl PlanNodeWithStats { "Aggregate({})", key_variables .iter() - .map(|c| c.to_string()) + .map(ToString::to_string) .chain(aggregates.iter().map(|(agg, v)| format!("{agg} -> {v}"))) .collect::>() .join(", ") @@ -1107,7 +1111,7 @@ impl PlanNodeWithStats { format!( "Sort({})", by.iter() - .map(|c| c.to_string()) + .map(ToString::to_string) .collect::>() .join(", ") ) @@ -1117,7 +1121,7 @@ impl PlanNodeWithStats { "StaticBindings({})", variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join(", ") ) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 0b152db1..9e6472ec 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -122,7 +122,7 @@ impl<'a> PlanBuilder<'a> { PlanNode::ForLoopLeftJoin { left: Rc::new(left), right: Rc::new(right), - possible_problem_vars: Rc::new(possible_problem_vars.into_iter().collect()), + possible_problem_vars: possible_problem_vars.into_iter().collect(), } } else { PlanNode::HashLeftJoin { @@ -191,7 +191,7 @@ impl<'a> PlanBuilder<'a> { let service_name = self.pattern_value_from_named_node_or_variable(name, variables); PlanNode::Service { service_name, - variables: Rc::new(variables.clone()), + variables: Rc::from(variables.as_slice()), child: Rc::new(child), graph_pattern: Rc::new(inner.as_ref().clone()), silent: *silent, @@ -203,22 +203,19 @@ impl<'a> PlanBuilder<'a> { aggregates, } => PlanNode::Aggregate { child: Rc::new(self.build_for_graph_pattern(inner, variables, graph_name)?), - key_variables: Rc::new( - by.iter() - .map(|k| build_plan_variable(variables, k)) - .collect(), - ), - aggregates: Rc::new( - aggregates - .iter() - .map(|(v, a)| { - Ok(( - self.build_for_aggregate(a, variables, graph_name)?, - build_plan_variable(variables, v), - )) - }) - .collect::, EvaluationError>>()?, - ), + key_variables: by + .iter() + .map(|k| build_plan_variable(variables, k)) + .collect(), + aggregates: aggregates + .iter() + .map(|(v, a)| { + Ok(( + self.build_for_aggregate(a, variables, graph_name)?, + build_plan_variable(variables, v), + )) + }) + .collect::>()?, }, GraphPattern::Values { variables: table_variables, @@ -283,21 +280,19 @@ impl<'a> PlanBuilder<'a> { &mut inner_variables, &inner_graph_name, )?), - mapping: Rc::new( - projection - .iter() - .enumerate() - .map(|(new_variable, variable)| { - ( - PlanVariable { - encoded: new_variable, - plain: variable.clone(), - }, - build_plan_variable(variables, variable), - ) - }) - .collect(), - ), + mapping: projection + .iter() + .enumerate() + .map(|(new_variable, variable)| { + ( + PlanVariable { + encoded: new_variable, + plain: variable.clone(), + }, + build_plan_variable(variables, variable), + ) + }) + .collect(), } } GraphPattern::Distinct { inner } => PlanNode::HashDeduplicate { @@ -378,16 +373,14 @@ impl<'a> PlanBuilder<'a> { PropertyPathExpression::ZeroOrOne(p) => { PlanPropertyPath::ZeroOrOne(Rc::new(self.build_for_path(p))) } - PropertyPathExpression::NegatedPropertySet(p) => { - PlanPropertyPath::NegatedPropertySet(Rc::new( - p.iter() - .map(|p| PlanTerm { - encoded: self.build_term(p), - plain: p.clone(), - }) - .collect(), - )) - } + PropertyPathExpression::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet( + p.iter() + .map(|p| PlanTerm { + encoded: self.build_term(p), + plain: p.clone(), + }) + .collect(), + ), } } @@ -1084,7 +1077,7 @@ impl<'a> PlanBuilder<'a> { separator, } => Ok(PlanAggregation { function: PlanAggregationFunction::GroupConcat { - separator: Rc::new(separator.clone().unwrap_or_else(|| " ".to_owned())), + separator: Rc::from(separator.as_deref().unwrap_or(" ")), }, parameter: Some(self.build_for_expression(expr, variables, graph_name)?), distinct: *distinct, @@ -1219,13 +1212,11 @@ impl<'a> PlanBuilder<'a> { } fn convert_plan_variable(from_variable: &PlanVariable, to: &mut Vec) -> PlanVariable { - let encoded = if let Some(to_id) = to.iter().enumerate().find_map(|(to_id, var)| { - if *var == from_variable.plain { - Some(to_id) - } else { - None - } - }) { + let encoded = if let Some(to_id) = to + .iter() + .enumerate() + .find_map(|(to_id, var)| (*var == from_variable.plain).then(|| to_id)) + { to_id } else { to.push(Variable::new_unchecked(format!("{:x}", random::()))); @@ -1423,25 +1414,25 @@ impl<'a> PlanBuilder<'a> { if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::HashJoin { - left: Rc::new(self.push_filter(left.clone(), filter.clone())), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::new(self.push_filter(Rc::clone(left), filter.clone())), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::HashJoin { - left: Rc::new(self.push_filter(left.clone(), filter)), - right: right.clone(), + left: Rc::new(self.push_filter(Rc::clone(left), filter)), + right: Rc::clone(right), } } } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::HashJoin { - left: left.clone(), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::clone(left), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::Filter { child: Rc::new(PlanNode::HashJoin { - left: left.clone(), - right: right.clone(), + left: Rc::clone(left), + right: Rc::clone(right), }), expression: filter, } @@ -1450,20 +1441,20 @@ impl<'a> PlanBuilder<'a> { PlanNode::ForLoopJoin { left, right } => { if filter_variables.iter().all(|v| left.is_variable_bound(*v)) { PlanNode::ForLoopJoin { - left: Rc::new(self.push_filter(left.clone(), filter)), - right: right.clone(), + left: Rc::new(self.push_filter(Rc::clone(left), filter)), + right: Rc::clone(right), } } else if filter_variables.iter().all(|v| right.is_variable_bound(*v)) { PlanNode::ForLoopJoin { //TODO: should we do that always? - left: left.clone(), - right: Rc::new(self.push_filter(right.clone(), filter)), + left: Rc::clone(left), + right: Rc::new(self.push_filter(Rc::clone(right), filter)), } } else { PlanNode::Filter { child: Rc::new(PlanNode::HashJoin { - left: left.clone(), - right: right.clone(), + left: Rc::clone(left), + right: Rc::clone(right), }), expression: filter, } @@ -1477,14 +1468,14 @@ impl<'a> PlanBuilder<'a> { //TODO: handle the case where the filter generates an expression variable if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { PlanNode::Extend { - child: Rc::new(self.push_filter(child.clone(), filter)), + child: Rc::new(self.push_filter(Rc::clone(child), filter)), expression: expression.clone(), variable: variable.clone(), } } else { PlanNode::Filter { child: Rc::new(PlanNode::Extend { - child: child.clone(), + child: Rc::clone(child), expression: expression.clone(), variable: variable.clone(), }), @@ -1495,12 +1486,12 @@ impl<'a> PlanBuilder<'a> { PlanNode::Filter { child, expression } => { if filter_variables.iter().all(|v| child.is_variable_bound(*v)) { PlanNode::Filter { - child: Rc::new(self.push_filter(child.clone(), filter)), + child: Rc::new(self.push_filter(Rc::clone(child), filter)), expression: expression.clone(), } } else { PlanNode::Filter { - child: child.clone(), + child: Rc::clone(child), expression: Box::new(PlanExpression::And(expression.clone(), filter)), } } @@ -1508,7 +1499,7 @@ impl<'a> PlanBuilder<'a> { PlanNode::Union { children } => PlanNode::Union { children: children .iter() - .map(|c| Rc::new(self.push_filter(c.clone(), filter.clone()))) + .map(|c| Rc::new(self.push_filter(Rc::clone(c), filter.clone()))) .collect(), }, _ => PlanNode::Filter { @@ -1541,12 +1532,11 @@ impl<'a> PlanBuilder<'a> { } fn build_plan_variable(variables: &mut Vec, variable: &Variable) -> PlanVariable { - let encoded = match slice_key(variables, variable) { - Some(key) => key, - None => { - variables.push(variable.clone()); - variables.len() - 1 - } + let encoded = if let Some(key) = slice_key(variables, variable) { + key + } else { + variables.push(variable.clone()); + variables.len() - 1 }; PlanVariable { plain: variable.clone(), @@ -1555,12 +1545,11 @@ fn build_plan_variable(variables: &mut Vec, variable: &Variable) -> Pl } fn bnode_key(blank_nodes: &mut Vec, blank_node: &BlankNode) -> usize { - match slice_key(blank_nodes, blank_node) { - Some(key) => key, - None => { - blank_nodes.push(blank_node.clone()); - blank_nodes.len() - 1 - } + if let Some(key) = slice_key(blank_nodes, blank_node) { + key + } else { + blank_nodes.push(blank_node.clone()); + blank_nodes.len() - 1 } } @@ -1673,21 +1662,13 @@ fn compile_static_pattern_if_exists( options: Option<&Expression>, ) -> Option { let static_pattern = if let Expression::Literal(pattern) = pattern { - if pattern.datatype() == xsd::STRING { - Some(pattern.value()) - } else { - None - } + (pattern.datatype() == xsd::STRING).then(|| pattern.value()) } else { None }; let static_options = if let Some(options) = options { if let Expression::Literal(options) = options { - if options.datatype() == xsd::STRING { - Some(Some(options.value())) - } else { - None - } + (options.datatype() == xsd::STRING).then(|| Some(options.value())) } else { None } diff --git a/lib/src/sparql/update.rs b/lib/src/sparql/update.rs index dff7d02c..2da5d08c 100644 --- a/lib/src/sparql/update.rs +++ b/lib/src/sparql/update.rs @@ -71,7 +71,14 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { insert, pattern, .. - } => self.eval_delete_insert(delete, insert, using_dataset.as_ref().unwrap(), pattern), + } => self.eval_delete_insert( + delete, + insert, + using_dataset + .as_ref() + .ok_or_else(|| EvaluationError::msg("No dataset"))?, + pattern, + ), GraphUpdateOperation::Load { silent, source, @@ -119,14 +126,14 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { ) -> Result<(), EvaluationError> { let dataset = Rc::new(DatasetView::new(self.transaction.reader(), using)); let (plan, variables) = PlanBuilder::build( - dataset.as_ref(), + &dataset, algebra, false, &self.options.query_options.custom_functions, !self.options.query_options.without_optimizations, )?; let evaluator = SimpleEvaluator::new( - dataset.clone(), + Rc::clone(&dataset), self.base_iri.clone(), self.options.query_options.service_handler(), Rc::new(self.options.query_options.custom_functions.clone()), @@ -374,7 +381,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { TermPattern::Literal(term) => Some(term.clone().into()), TermPattern::Triple(triple) => { Self::convert_triple_pattern(triple, variables, values, dataset, bnodes)? - .map(|t| t.into()) + .map(Into::into) } TermPattern::Variable(v) => Self::lookup_variable(v, variables, values) .map(|node| dataset.decode_term(&node)) @@ -507,7 +514,7 @@ impl<'a, 'b: 'a> SimpleUpdateEvaluator<'a, 'b> { GroundTermPattern::Literal(term) => Some(term.clone().into()), GroundTermPattern::Triple(triple) => { Self::convert_ground_triple_pattern(triple, variables, values, dataset)? - .map(|t| t.into()) + .map(Into::into) } GroundTermPattern::Variable(v) => Self::lookup_variable(v, variables, values) .map(|node| dataset.decode_term(&node)) diff --git a/lib/src/storage/backend/fallback.rs b/lib/src/storage/backend/fallback.rs index 4c4a1ded..6000863b 100644 --- a/lib/src/storage/backend/fallback.rs +++ b/lib/src/storage/backend/fallback.rs @@ -29,20 +29,18 @@ impl Db { Ok(Self(Arc::new(RwLock::new(trees)))) } + #[allow(clippy::unwrap_in_result)] pub fn column_family(&self, name: &'static str) -> Option { let name = ColumnFamily(name); - if self.0.read().unwrap().contains_key(&name) { - Some(name) - } else { - None - } + (self.0.read().unwrap().contains_key(&name)).then(|| name) } #[must_use] pub fn snapshot(&self) -> Reader { - Reader(InnerReader::Simple(self.0.clone())) + Reader(InnerReader::Simple(Arc::clone(&self.0))) } + #[allow(clippy::unwrap_in_result)] pub fn transaction<'a, 'b: 'a, T, E: Error + 'static + From>( &'b self, f: impl Fn(Transaction<'a>) -> Result, @@ -64,6 +62,7 @@ enum InnerReader { } impl Reader { + #[allow(clippy::unwrap_in_result)] pub fn get( &self, column_family: &ColumnFamily, @@ -90,6 +89,7 @@ impl Reader { } } + #[allow(clippy::unwrap_in_result)] pub fn contains_key( &self, column_family: &ColumnFamily, @@ -120,6 +120,7 @@ impl Reader { self.scan_prefix(column_family, &[]) } + #[allow(clippy::unwrap_in_result)] pub fn scan_prefix( &self, column_family: &ColumnFamily, @@ -176,19 +177,20 @@ impl Reader { Ok(Iter { iter, current }) } + #[allow(clippy::unwrap_in_result)] pub fn len(&self, column_family: &ColumnFamily) -> Result { match &self.0 { InnerReader::Simple(reader) => Ok(reader .read() .unwrap() .get(column_family) - .map_or(0, |tree| tree.len())), + .map_or(0, BTreeMap::len)), InnerReader::Transaction(reader) => { if let Some(reader) = reader.upgrade() { Ok((*reader) .borrow() .get(column_family) - .map_or(0, |tree| tree.len())) + .map_or(0, BTreeMap::len)) } else { Err(StorageError::Other( "The transaction is already ended".into(), @@ -198,19 +200,20 @@ impl Reader { } } + #[allow(clippy::unwrap_in_result)] pub fn is_empty(&self, column_family: &ColumnFamily) -> Result { match &self.0 { InnerReader::Simple(reader) => Ok(reader .read() .unwrap() .get(column_family) - .map_or(true, |tree| tree.is_empty())), + .map_or(true, BTreeMap::is_empty)), InnerReader::Transaction(reader) => { if let Some(reader) = reader.upgrade() { Ok((*reader) .borrow() .get(column_family) - .map_or(true, |tree| tree.is_empty())) + .map_or(true, BTreeMap::is_empty)) } else { Err(StorageError::Other( "The transaction is already ended".into(), @@ -246,7 +249,7 @@ impl Transaction<'_> { .map_or(false, |cf| cf.contains_key(key))) } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] pub fn insert( &mut self, column_family: &ColumnFamily, @@ -269,7 +272,7 @@ impl Transaction<'_> { self.insert(column_family, key, &[]) } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] pub fn remove(&mut self, column_family: &ColumnFamily, key: &[u8]) -> Result<(), StorageError> { self.0 .borrow_mut() diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index 6da4be07..7a1e22eb 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -4,7 +4,7 @@ use crate::storage::error::{CorruptionError, StorageError}; use lazy_static::lazy_static; -use libc::{self, c_char, c_void, free}; +use libc::{self, c_void, free}; use oxrocksdb_sys::*; use rand::random; use std::borrow::Borrow; @@ -241,7 +241,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), )) .map_err(|e| { @@ -359,7 +359,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), )) .map_err(|e| { @@ -393,11 +393,7 @@ impl Db { cf_handles, cf_options, is_secondary: true, - path_to_remove: if in_memory { - Some(secondary_path) - } else { - None - }, + path_to_remove: in_memory.then(|| secondary_path), })), }) } @@ -424,7 +420,7 @@ impl Db { .map(|cf| cf.as_ptr()) .collect::>() .as_ptr(), - cf_options.as_ptr() as *const *const rocksdb_options_t, + cf_options.as_ptr().cast(), cf_handles.as_mut_ptr(), 0, // false )) @@ -580,7 +576,7 @@ impl Db { } let options = rocksdb_readoptions_create_copy(db.read_options); Reader { - inner: InnerReader::PlainDb(db.clone()), + inner: InnerReader::PlainDb(Arc::clone(db)), options, } } @@ -594,7 +590,7 @@ impl Db { rocksdb_readoptions_set_snapshot(options, snapshot); Reader { inner: InnerReader::TransactionalSnapshot(Rc::new(TransactionalSnapshot { - db: db.clone(), + db: Arc::clone(db), snapshot, })), options, @@ -698,7 +694,7 @@ impl Db { db.db, db.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), )) } @@ -707,7 +703,7 @@ impl Db { db.db, db.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -740,9 +736,9 @@ impl Db { db.db, db.write_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), )) }?; @@ -940,7 +936,7 @@ impl Reader { inner.db.db, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -950,7 +946,7 @@ impl Reader { *inner, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } else { @@ -964,7 +960,7 @@ impl Reader { inner.db, self.options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() )) } @@ -1005,11 +1001,7 @@ impl Reader { break; } } - if found { - Some(bound) - } else { - None - } + found.then(|| bound) }; unsafe { @@ -1021,7 +1013,7 @@ impl Reader { if let Some(upper_bound) = &upper_bound { rocksdb_readoptions_set_iterate_upper_bound( options, - upper_bound.as_ptr() as *const c_char, + upper_bound.as_ptr().cast(), upper_bound.len(), ); } @@ -1046,7 +1038,7 @@ impl Reader { if prefix.is_empty() { rocksdb_iter_seek_to_first(iter); } else { - rocksdb_iter_seek(iter, prefix.as_ptr() as *const c_char, prefix.len()); + rocksdb_iter_seek(iter, prefix.as_ptr().cast(), prefix.len()); } let is_currently_valid = rocksdb_iter_valid(iter) != 0; Ok(Iter { @@ -1101,7 +1093,7 @@ impl Transaction<'_> { *self.transaction, self.read_options, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len() ))?; Ok(if slice.is_null() { @@ -1130,9 +1122,9 @@ impl Transaction<'_> { ffi_result!(rocksdb_transaction_put_cf_with_status( *self.transaction, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), ))?; } @@ -1152,7 +1144,7 @@ impl Transaction<'_> { ffi_result!(rocksdb_transaction_delete_cf_with_status( *self.transaction, column_family.0, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), ))?; } @@ -1177,7 +1169,7 @@ impl Deref for PinnableSlice { unsafe { let mut len = 0; let val = rocksdb_pinnableslice_value(self.0, &mut len); - slice::from_raw_parts(val as *const u8, len) + slice::from_raw_parts(val.cast(), len) } } } @@ -1208,7 +1200,7 @@ pub struct Buffer { impl Drop for Buffer { fn drop(&mut self) { unsafe { - free(self.base as *mut c_void); + free(self.base.cast()); } } } @@ -1285,7 +1277,7 @@ impl Iter { unsafe { let mut len = 0; let val = rocksdb_iter_key(self.iter, &mut len); - Some(slice::from_raw_parts(val as *const u8, len)) + Some(slice::from_raw_parts(val.cast(), len)) } } else { None @@ -1311,9 +1303,9 @@ impl SstFileWriter { unsafe { ffi_result!(rocksdb_sstfilewriter_put_with_status( self.writer, - key.as_ptr() as *const c_char, + key.as_ptr().cast(), key.len(), - value.as_ptr() as *const c_char, + value.as_ptr().cast(), value.len(), ))?; } diff --git a/lib/src/storage/mod.rs b/lib/src/storage/mod.rs index 1dbebcfa..8a92e77f 100644 --- a/lib/src/storage/mod.rs +++ b/lib/src/storage/mod.rs @@ -1,3 +1,4 @@ +#![allow(clippy::same_name_method)] #[cfg(not(target_family = "wasm"))] use crate::model::Quad; use crate::model::{GraphNameRef, NamedOrBlankNodeRef, QuadRef, TermRef}; @@ -181,7 +182,7 @@ impl Storage { ] } - #[allow(clippy::unnecessary_wraps)] + #[allow(clippy::unnecessary_wraps, clippy::unwrap_in_result)] fn setup(db: Db) -> Result { let this = Self { #[cfg(not(target_family = "wasm"))] @@ -1305,7 +1306,7 @@ impl StorageBulkLoader { let mut buffer_to_load = Vec::with_capacity(batch_size); swap(buffer, &mut buffer_to_load); let storage = self.storage.clone(); - let done_counter_clone = done_counter.clone(); + let done_counter_clone = Arc::clone(done_counter); threads.push_back(spawn(move || { FileBulkLoader::new(storage, batch_size).load(buffer_to_load, &done_counter_clone) })); diff --git a/lib/src/store.rs b/lib/src/store.rs index 95234498..3d854a4a 100644 --- a/lib/src/store.rs +++ b/lib/src/store.rs @@ -565,7 +565,7 @@ impl Store { &self, quads: impl IntoIterator>, ) -> Result<(), StorageError> { - let quads = quads.into_iter().map(|q| q.into()).collect::>(); + let quads = quads.into_iter().map(Into::into).collect::>(); self.transaction(move |mut t| t.extend(&quads)) } @@ -1569,7 +1569,7 @@ impl BulkLoader { quads: impl IntoIterator, EI>>, ) -> Result<(), EO> { self.storage - .load(quads.into_iter().map(|q| q.map(|q| q.into()))) + .load(quads.into_iter().map(|q| q.map(Into::into))) } } diff --git a/lib/tests/store.rs b/lib/tests/store.rs index 66d2fae0..5f8a6809 100644 --- a/lib/tests/store.rs +++ b/lib/tests/store.rs @@ -175,7 +175,7 @@ fn test_load_dataset() -> Result<(), Box> { #[test] #[cfg(not(target_family = "wasm"))] fn test_bulk_load_dataset() -> Result<(), Box> { - let store = Store::new().unwrap(); + let store = Store::new()?; store .bulk_loader() .load_dataset(Cursor::new(GRAPH_DATA), DatasetFormat::TriG, None)?; diff --git a/python/src/io.rs b/python/src/io.rs index b4fbf3c5..c3032990 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -9,6 +9,7 @@ use pyo3::exceptions::{PyIOError, PySyntaxError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; use pyo3::wrap_pyfunction; +use std::error::Error; use std::fs::File; use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Write}; @@ -289,11 +290,7 @@ impl Read for PyIo { .map_err(to_io_err)?; let bytes = read .extract::<&[u8]>(py) - .or_else(|e| { - read.extract::<&str>(py) - .map(|s| s.as_bytes()) - .map_err(|_| e) - }) + .or_else(|e| read.extract::<&str>(py).map(str::as_bytes).map_err(|_| e)) .map_err(to_io_err)?; buf.write_all(bytes)?; Ok(bytes.len()) @@ -325,7 +322,10 @@ fn to_io_err(error: impl Into) -> io::Error { } pub fn map_io_err(error: io::Error) -> PyErr { - if error.get_ref().map_or(false, |s| s.is::()) { + if error + .get_ref() + .map_or(false, <(dyn Error + Send + Sync + 'static)>::is::) + { *error.into_inner().unwrap().downcast().unwrap() } else { PyIOError::new_err(error.to_string()) diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 0fe9ceff..1cd0e1b2 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -225,7 +225,7 @@ impl PyQueryTriples { Ok(allow_threads_unsafe(|| self.inner.next()) .transpose() .map_err(map_evaluation_error)? - .map(|t| t.into())) + .map(Into::into)) } } diff --git a/python/src/store.rs b/python/src/store.rs index e0790aca..7dbb9a66 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -228,10 +228,10 @@ impl PyStore { extract_quads_pattern(subject, predicate, object, graph_name)?; Ok(QuadIter { inner: self.inner.quads_for_pattern( - subject.as_ref().map(|p| p.into()), - predicate.as_ref().map(|p| p.into()), - object.as_ref().map(|p| p.into()), - graph_name.as_ref().map(|p| p.into()), + subject.as_ref().map(Into::into), + predicate.as_ref().map(Into::into), + object.as_ref().map(Into::into), + graph_name.as_ref().map(Into::into), ), }) } diff --git a/server/src/main.rs b/server/src/main.rs index e095d495..1264f132 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -591,7 +591,7 @@ pub fn main() -> anyhow::Result<()> { let mut file = BufWriter::new(File::create(&explain_file)?); match explain_file .extension() - .and_then(|e| e.to_str()) { + .and_then(OsStr::to_str) { Some("json") => { explanation.write_in_json(file)?; }, @@ -734,7 +734,7 @@ fn format_from_path( path: &Path, from_extension: impl FnOnce(&str) -> anyhow::Result, ) -> anyhow::Result { - if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) { + if let Some(ext) = path.extension().and_then(OsStr::to_str) { from_extension(ext).map_err(|e| { e.context(format!( "Not able to guess the file format from file name extension '{ext}'" @@ -1636,7 +1636,7 @@ impl io::Result>) + 'static> ReadForWrite Result { let buffer = Rc::new(RefCell::new(Vec::new())); let state = initial_state_builder(ReadForWriteWriter { - buffer: buffer.clone(), + buffer: Rc::clone(&buffer), }) .map_err(internal_server_error)?; Ok(Response::builder(Status::OK) diff --git a/testsuite/src/manifest.rs b/testsuite/src/manifest.rs index fcf79b36..1b93f244 100644 --- a/testsuite/src/manifest.rs +++ b/testsuite/src/manifest.rs @@ -347,7 +347,7 @@ impl<'a> Iterator for RdfListIterator<'a> { let result = self .graph .object_for_subject_predicate(current, rdf::FIRST) - .map(|v| v.into_owned()); + .map(TermRef::into_owned); self.current_node = match self.graph.object_for_subject_predicate(current, rdf::REST) { Some(TermRef::NamedNode(n)) if n == rdf::NIL => None, diff --git a/testsuite/src/sparql_evaluator.rs b/testsuite/src/sparql_evaluator.rs index 44153c68..4790d175 100644 --- a/testsuite/src/sparql_evaluator.rs +++ b/testsuite/src/sparql_evaluator.rs @@ -287,21 +287,11 @@ fn evaluate_update_evaluation_test(test: &Test) -> Result<()> { } fn load_sparql_query_result(url: &str) -> Result { - if url.ends_with(".srx") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Xml)?, - false, - ) - } else if url.ends_with(".srj") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Json)?, - false, - ) - } else if url.ends_with(".tsv") { - StaticQueryResults::from_query_results( - QueryResults::read(read_file(url)?, QueryResultsFormat::Tsv)?, - false, - ) + if let Some(format) = url + .rsplit_once('.') + .and_then(|(_, extension)| QueryResultsFormat::from_extension(extension)) + { + StaticQueryResults::from_query_results(QueryResults::read(read_file(url)?, format)?, false) } else { StaticQueryResults::from_graph(&load_graph(url, guess_graph_format(url)?)?) } @@ -505,7 +495,7 @@ impl StaticQueryResults { fn from_graph(graph: &Graph) -> Result { // Hack to normalize literals - let store = Store::new().unwrap(); + let store = Store::new()?; for t in graph.iter() { store .insert(t.in_graph(GraphNameRef::DefaultGraph)) @@ -617,12 +607,12 @@ fn results_diff(expected: StaticQueryResults, actual: StaticQueryResults) -> Str format_diff( &expected_variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join("\n"), &actual_variables .iter() - .map(|v| v.to_string()) + .map(ToString::to_string) .collect::>() .join("\n"), "variables", From cfe52db3a31f43432a6e5d7891fe225429c07a19 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 17 Apr 2023 12:21:55 +0200 Subject: [PATCH 14/45] Spargebra: Makes GroundTermPattern::Triple properly gated with the rdf-star feature It is only RDF-star specific --- lib/spargebra/src/term.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/spargebra/src/term.rs b/lib/spargebra/src/term.rs index e2d29dc8..3ef91f68 100644 --- a/lib/spargebra/src/term.rs +++ b/lib/spargebra/src/term.rs @@ -577,6 +577,7 @@ pub enum GroundTermPattern { NamedNode(NamedNode), Literal(Literal), Variable(Variable), + #[cfg(feature = "rdf-star")] Triple(Box), } @@ -587,6 +588,7 @@ impl GroundTermPattern { Self::NamedNode(term) => write!(f, "{term}"), Self::Literal(term) => write!(f, "{term}"), Self::Variable(var) => write!(f, "{var}"), + #[cfg(feature = "rdf-star")] Self::Triple(triple) => triple.fmt_sse(f), } } @@ -599,6 +601,7 @@ impl fmt::Display for GroundTermPattern { Self::NamedNode(term) => term.fmt(f), Self::Literal(term) => term.fmt(f), Self::Variable(var) => var.fmt(f), + #[cfg(feature = "rdf-star")] Self::Triple(triple) => write!(f, "<<{triple}>>"), } } @@ -618,6 +621,7 @@ impl From for GroundTermPattern { } } +#[cfg(feature = "rdf-star")] impl From for GroundTermPattern { #[inline] fn from(triple: GroundTriplePattern) -> Self { @@ -818,6 +822,7 @@ pub struct GroundTriplePattern { impl GroundTriplePattern { /// Formats using the [SPARQL S-Expression syntax](https://jena.apache.org/documentation/notes/sse.html). + #[allow(dead_code)] pub(crate) fn fmt_sse(&self, f: &mut impl Write) -> fmt::Result { write!(f, "(triple ")?; self.subject.fmt_sse(f)?; From e96672a2a88af9951d037b7ae40de423d705700e Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 3 May 2023 13:57:44 +0200 Subject: [PATCH 15/45] SPARQL plan: allows AND and OR to have more than 2 children Adds tests on VALUES cardinality validation --- lib/src/sparql/eval.rs | 52 ++++++++++++++++++++--------- lib/src/sparql/plan.rs | 38 ++++++++++++++++----- lib/src/sparql/plan_builder.rs | 61 +++++++++++++++++++--------------- 3 files changed, 100 insertions(+), 51 deletions(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 3650e2d0..e95d2092 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -954,23 +954,45 @@ impl SimpleEvaluator { stat_children.push(stats); Rc::new(move |tuple| Some(eval(tuple.clone()).next().is_some().into())) } - PlanExpression::Or(a, b) => { - let a = self.expression_evaluator(a, stat_children); - let b = self.expression_evaluator(b, stat_children); - Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { - Some(true) => Some(true.into()), - Some(false) => b(tuple), - None => (Some(true) == a(tuple).and_then(|v| to_bool(&v))).then(|| true.into()), + PlanExpression::Or(inner) => { + let children = inner + .iter() + .map(|i| self.expression_evaluator(i, stat_children)) + .collect::>(); + Rc::new(move |tuple| { + let mut error = true; + for child in children.iter() { + match child(tuple).and_then(|v| to_bool(&v)) { + Some(true) => return Some(true.into()), + Some(false) => continue, + None => error = true, + } + } + if error { + None + } else { + Some(false.into()) + } }) } - PlanExpression::And(a, b) => { - let a = self.expression_evaluator(a, stat_children); - let b = self.expression_evaluator(b, stat_children); - Rc::new(move |tuple| match a(tuple).and_then(|v| to_bool(&v)) { - Some(true) => b(tuple), - Some(false) => Some(false.into()), - None => { - (Some(false) == b(tuple).and_then(|v| to_bool(&v))).then(|| false.into()) + PlanExpression::And(inner) => { + let children = inner + .iter() + .map(|i| self.expression_evaluator(i, stat_children)) + .collect::>(); + Rc::new(move |tuple| { + let mut error = false; + for child in children.iter() { + match child(tuple).and_then(|v| to_bool(&v)) { + Some(true) => continue, + Some(false) => return Some(false.into()), + None => error = true, + } + } + if error { + None + } else { + Some(true.into()) } }) } diff --git a/lib/src/sparql/plan.rs b/lib/src/sparql/plan.rs index c9af7f22..88469433 100644 --- a/lib/src/sparql/plan.rs +++ b/lib/src/sparql/plan.rs @@ -13,7 +13,7 @@ use std::rc::Rc; use std::time::Duration; use std::{fmt, io}; -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum PlanNode { StaticBindings { encoded_tuples: Vec, @@ -447,8 +447,8 @@ pub enum PlanExpression { Literal(PlanTerm), Variable(PlanVariable), Exists(Rc), - Or(Box, Box), - And(Box, Box), + Or(Vec), + And(Vec), Equal(Box, Box), Greater(Box, Box), GreaterOrEqual(Box, Box), @@ -597,9 +597,7 @@ impl PlanExpression { | Self::YearMonthDurationCast(e) | Self::DayTimeDurationCast(e) | Self::StringCast(e) => e.lookup_used_variables(callback), - Self::Or(a, b) - | Self::And(a, b) - | Self::Equal(a, b) + Self::Equal(a, b) | Self::Greater(a, b) | Self::GreaterOrEqual(a, b) | Self::Less(a, b) @@ -639,7 +637,11 @@ impl PlanExpression { c.lookup_used_variables(callback); d.lookup_used_variables(callback); } - Self::Concat(es) | Self::Coalesce(es) | Self::CustomFunction(_, es) => { + Self::Or(es) + | Self::And(es) + | Self::Concat(es) + | Self::Coalesce(es) + | Self::CustomFunction(_, es) => { for e in es { e.lookup_used_variables(callback); } @@ -723,8 +725,26 @@ impl fmt::Display for PlanExpression { Self::YearMonthDurationCast(e) => write!(f, "YearMonthDurationCast({e})"), Self::DayTimeDurationCast(e) => write!(f, "DayTimeDurationCast({e})"), Self::StringCast(e) => write!(f, "StringCast({e})"), - Self::Or(a, b) => write!(f, "Or({a}, {b})"), - Self::And(a, b) => write!(f, "And({a}, {b})"), + Self::Or(es) => { + write!(f, "Or(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } + Self::And(es) => { + write!(f, "And(")?; + for (i, e) in es.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{e}")?; + } + write!(f, ")") + } Self::Equal(a, b) => write!(f, "Equal({a}, {b})"), Self::Greater(a, b) => write!(f, "Greater({a}, {b})"), Self::GreaterOrEqual(a, b) => write!(f, "GreaterOrEqual({a}, {b})"), diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 9e6472ec..a0d6b603 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -400,14 +400,14 @@ impl<'a> PlanBuilder<'a> { plain: l.clone(), }), Expression::Variable(v) => PlanExpression::Variable(build_plan_variable(variables, v)), - Expression::Or(a, b) => PlanExpression::Or( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), - ), - Expression::And(a, b) => PlanExpression::And( - Box::new(self.build_for_expression(a, variables, graph_name)?), - Box::new(self.build_for_expression(b, variables, graph_name)?), - ), + Expression::Or(a, b) => PlanExpression::Or(vec![ + self.build_for_expression(a, variables, graph_name)?, + self.build_for_expression(b, variables, graph_name)?, + ]), + Expression::And(a, b) => PlanExpression::And(vec![ + self.build_for_expression(a, variables, graph_name)?, + self.build_for_expression(b, variables, graph_name)?, + ]), Expression::Equal(a, b) => PlanExpression::Equal( Box::new(self.build_for_expression(a, variables, graph_name)?), Box::new(self.build_for_expression(b, variables, graph_name)?), @@ -433,23 +433,23 @@ impl<'a> PlanBuilder<'a> { Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::In(e, l) => { + if l.is_empty() { + return Ok(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })); + } let e = self.build_for_expression(e, variables, graph_name)?; - l.iter() - .map(|v| { - Ok(PlanExpression::Equal( - Box::new(e.clone()), - Box::new(self.build_for_expression(v, variables, graph_name)?), - )) - }) - .reduce(|a: Result<_, EvaluationError>, b| { - Ok(PlanExpression::Or(Box::new(a?), Box::new(b?))) - }) - .unwrap_or_else(|| { - Ok(PlanExpression::Literal(PlanTerm { - encoded: false.into(), - plain: false.into(), - })) - })? + PlanExpression::Or( + l.iter() + .map(|v| { + Ok(PlanExpression::Equal( + Box::new(e.clone()), + Box::new(self.build_for_expression(v, variables, graph_name)?), + )) + }) + .collect::>()?, + ) } Expression::Add(a, b) => PlanExpression::Add( Box::new(self.build_for_expression(a, variables, graph_name)?), @@ -1402,8 +1402,12 @@ impl<'a> PlanBuilder<'a> { expression: filter, }; } - if let PlanExpression::And(f1, f2) = *filter { - return self.push_filter(Rc::new(self.push_filter(node, f1)), f2); + if let PlanExpression::And(filters) = *filter { + return filters + .into_iter() + .fold((*node.as_ref()).clone(), |acc, f| { + self.push_filter(Rc::new(acc), Box::new(f)) + }); } let mut filter_variables = BTreeSet::new(); filter.lookup_used_variables(&mut |v| { @@ -1492,7 +1496,10 @@ impl<'a> PlanBuilder<'a> { } else { PlanNode::Filter { child: Rc::clone(child), - expression: Box::new(PlanExpression::And(expression.clone(), filter)), + expression: Box::new(PlanExpression::And(vec![ + *expression.clone(), + *filter, + ])), } } } From f9d7b93abfb91ebad5b8947ea892ed4906f8b5f0 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 11 May 2023 19:02:10 +0200 Subject: [PATCH 16/45] Python: drops compatibility with Musl 1.1 Rust is moving to 1.2+: https://blog.rust-lang.org/2023/05/09/Updating-musl-targets.html --- .github/workflows/artifacts.yml | 2 +- .github/workflows/musllinux_build.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/artifacts.yml b/.github/workflows/artifacts.yml index 73bdc686..ccb95760 100644 --- a/.github/workflows/artifacts.yml +++ b/.github/workflows/artifacts.yml @@ -149,7 +149,7 @@ jobs: platforms: linux/${{ matrix.architecture }} if: github.event_name == 'release' && matrix.architecture != 'x86_64' - run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/musllinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/musllinux_build_script.sh - - run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_1_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh + - run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_2_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh if: github.event_name == 'release' || matrix.architecture == 'x86_64' - uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/musllinux_build.sh b/.github/workflows/musllinux_build.sh index 7dd2fb4c..4cf0d9e8 100644 --- a/.github/workflows/musllinux_build.sh +++ b/.github/workflows/musllinux_build.sh @@ -11,9 +11,9 @@ source venv/bin/activate pip install -r requirements.dev.txt maturin develop --release -m Cargo.toml python generate_stubs.py pyoxigraph pyoxigraph.pyi --black -maturin build --release -m Cargo.toml --features abi3 --compatibility musllinux_1_1 +maturin build --release -m Cargo.toml --features abi3 --compatibility musllinux_1_2 if [ %for_each_version% ]; then for VERSION in 7 8 9 10 11; do - maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility musllinux_1_1 + maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility musllinux_1_2 done fi From a25bf5591953dcf75fb6ee241cfee3b8c3f583d5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 May 2023 10:16:40 +0000 Subject: [PATCH 17/45] Update maturin requirement from ~=0.14.0 to ~=0.15.1 in /python Updates the requirements on [maturin](https://github.com/pyo3/maturin) to permit the latest version. - [Release notes](https://github.com/pyo3/maturin/releases) - [Changelog](https://github.com/PyO3/maturin/blob/main/Changelog.md) - [Commits](https://github.com/pyo3/maturin/compare/v0.14.0...v0.15.1) --- updated-dependencies: - dependency-name: maturin dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- .github/workflows/tests.yml | 2 +- python/pyproject.toml | 2 +- python/requirements.dev.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3eab4c72..bde185c3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -281,7 +281,7 @@ jobs: python-version: "3.10" cache: pip cache-dependency-path: '**/requirements.dev.txt' - - run: pip install "maturin~=0.14.0" + - run: pip install "maturin~=0.15.0" - run: maturin build -m python/Cargo.toml - run: pip install --no-index --find-links=target/wheels/ pyoxigraph - run: rm -r target/wheels diff --git a/python/pyproject.toml b/python/pyproject.toml index 8b7cdb91..c272d3d2 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin~=0.14.0"] +requires = ["maturin~=0.15.0"] build-backend = "maturin" [project] diff --git a/python/requirements.dev.txt b/python/requirements.dev.txt index 6bc286d4..bebb6736 100644 --- a/python/requirements.dev.txt +++ b/python/requirements.dev.txt @@ -1,6 +1,6 @@ black~=23.1 furo -maturin~=0.14.0 +maturin~=0.15.1 mypy~=1.0 ruff~=0.0.255 sphinx~=5.3 From 8f3af5a7fce9277b8d2f304642f36a20c245a5af Mon Sep 17 00:00:00 2001 From: Thomas Tanon Date: Fri, 12 May 2023 09:06:27 +0200 Subject: [PATCH 18/45] Revert "Python: drops compatibility with Musl 1.1" This reverts commit f9d7b93abfb91ebad5b8947ea892ed4906f8b5f0. --- .github/workflows/artifacts.yml | 2 +- .github/workflows/musllinux_build.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/artifacts.yml b/.github/workflows/artifacts.yml index ccb95760..73bdc686 100644 --- a/.github/workflows/artifacts.yml +++ b/.github/workflows/artifacts.yml @@ -149,7 +149,7 @@ jobs: platforms: linux/${{ matrix.architecture }} if: github.event_name == 'release' && matrix.architecture != 'x86_64' - run: sed 's/%arch%/${{ matrix.architecture }}/g' .github/workflows/musllinux_build.sh | sed 's/%for_each_version%/${{ github.event_name == 'release' || '' }}/g' > .github/workflows/musllinux_build_script.sh - - run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_2_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh + - run: docker run -v "$(pwd)":/workdir --platform linux/${{ matrix.architecture }} quay.io/pypa/musllinux_1_1_${{ matrix.architecture }} /bin/bash /workdir/.github/workflows/musllinux_build_script.sh if: github.event_name == 'release' || matrix.architecture == 'x86_64' - uses: actions/upload-artifact@v3 with: diff --git a/.github/workflows/musllinux_build.sh b/.github/workflows/musllinux_build.sh index 4cf0d9e8..7dd2fb4c 100644 --- a/.github/workflows/musllinux_build.sh +++ b/.github/workflows/musllinux_build.sh @@ -11,9 +11,9 @@ source venv/bin/activate pip install -r requirements.dev.txt maturin develop --release -m Cargo.toml python generate_stubs.py pyoxigraph pyoxigraph.pyi --black -maturin build --release -m Cargo.toml --features abi3 --compatibility musllinux_1_2 +maturin build --release -m Cargo.toml --features abi3 --compatibility musllinux_1_1 if [ %for_each_version% ]; then for VERSION in 7 8 9 10 11; do - maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility musllinux_1_2 + maturin build --release -m Cargo.toml --interpreter "python3.$VERSION" --compatibility musllinux_1_1 done fi From 05fbb0e07124066384b69d2b424c22f7e7c25d71 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 14 May 2023 16:37:11 +0200 Subject: [PATCH 19/45] JS: Upgrades Rome formatter --- js/package.json | 4 ++-- js/rome.json | 21 ++++++++++++--------- js/test/model.mjs | 2 +- js/test/store.mjs | 2 +- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/js/package.json b/js/package.json index e549c162..e8744994 100644 --- a/js/package.json +++ b/js/package.json @@ -5,10 +5,10 @@ "devDependencies": { "@rdfjs/data-model": "^2.0.1", "mocha": "^10.0.0", - "rome": "^11.0.0" + "rome": "^12.0.0" }, "scripts": { - "fmt": "rome format . --write && rome check . --apply-suggested", + "fmt": "rome format . --write && rome check . --apply-unsafe", "test": "rome ci . && wasm-pack build --debug --target nodejs && mocha", "build": "rm -rf pkg && wasm-pack build --release --target web --out-name web && mv pkg pkg-web && wasm-pack build --release --target nodejs --out-name node && mv pkg pkg-node && node build_package.js && rm -r pkg-web && rm -r pkg-node", "release": "npm run build && npm publish ./pkg", diff --git a/js/rome.json b/js/rome.json index d92e4ab6..272422a5 100644 --- a/js/rome.json +++ b/js/rome.json @@ -1,10 +1,13 @@ { - "formatter": { - "indentStyle": "space", - "indentSize": 4, - "lineWidth": 100 - }, - "linter": { - "ignore": ["pkg"] - } -} \ No newline at end of file + "formatter": { + "indentStyle": "space", + "indentSize": 4, + "lineWidth": 100 + }, + "linter": { + "ignore": ["pkg"] + }, + "organizeImports": { + "enabled": true + } +} diff --git a/js/test/model.mjs b/js/test/model.mjs index e001843b..37f83199 100644 --- a/js/test/model.mjs +++ b/js/test/model.mjs @@ -1,8 +1,8 @@ /* global describe, it */ +import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; import oxigraph from "../pkg/oxigraph.js"; import assert from "assert"; -import runTests from "../node_modules/@rdfjs/data-model/test/index.js"; runTests({ factory: oxigraph }); diff --git a/js/test/store.mjs b/js/test/store.mjs index 55a53a66..2317c022 100644 --- a/js/test/store.mjs +++ b/js/test/store.mjs @@ -1,8 +1,8 @@ /* global describe, it */ import { Store } from "../pkg/oxigraph.js"; -import assert from "assert"; import dataModel from "@rdfjs/data-model"; +import assert from "assert"; const ex = dataModel.namedNode("http://example.com"); const triple = dataModel.quad( From 5ce24dda017663163288a91e1e6fdd2bf92d20e0 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 14 May 2023 16:31:33 +0200 Subject: [PATCH 20/45] Dependabot: Adds JS dependencies --- .github/{dependabot.yml => DEPENDABOT.yml} | 5 +++++ 1 file changed, 5 insertions(+) rename .github/{dependabot.yml => DEPENDABOT.yml} (65%) diff --git a/.github/dependabot.yml b/.github/DEPENDABOT.yml similarity index 65% rename from .github/dependabot.yml rename to .github/DEPENDABOT.yml index a02a15df..507639d0 100644 --- a/.github/dependabot.yml +++ b/.github/DEPENDABOT.yml @@ -9,3 +9,8 @@ updates: versioning-strategy: increase-if-necessary schedule: interval: weekly + - package-ecosystem: "npm" + directory: "/js/" + versioning-strategy: increase-if-necessary + schedule: + interval: weekly From cb89166380ad91b8ae239e1cb5d023a04420d028 Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 15 May 2023 19:45:30 +0200 Subject: [PATCH 21/45] RocksDB: Avoids building twice util/crc32c_arm64.cc on aarch64 --- oxrocksdb-sys/build.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/oxrocksdb-sys/build.rs b/oxrocksdb-sys/build.rs index f1884dbf..29f3fdda 100644 --- a/oxrocksdb-sys/build.rs +++ b/oxrocksdb-sys/build.rs @@ -88,8 +88,6 @@ fn build_rocksdb() { config.define("HAVE_LZCNT", Some("1")); config.flag_if_supported("-mlzcnt"); } - } else if target.contains("aarch64") { - lib_sources.push("util/crc32c_arm64.cc") } if target.contains("apple-ios") { From 38af275451f2664f83a048ca6c82ad1521f28026 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 17 May 2023 22:10:46 +0200 Subject: [PATCH 22/45] Server: Improves systemd support Closes #499 --- server/README.md | 19 +++++++++++++++++++ server/src/main.rs | 14 ++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/server/README.md b/server/README.md index 4d9eb2cc..6bcf92b4 100644 --- a/server/README.md +++ b/server/README.md @@ -220,6 +220,25 @@ brew install oxigraph It installs the `oxigraph_server` binary. [See the usage documentation to know how to use it](#usage). +## Systemd + +It is possible to run Oxigraph in the background using systemd. + +For that, you can use the following `oxigraph_server.service` file (it might be inserted into `/etc/systemd/system/` or `$HOME/.config/systemd/user`): +```ini +[Unit] +Description=Oxigraph database server +After=network-online.target +Wants=network-online.target + +[Service] +Type=notify +ExecStart=/PATH/TO/oxigraph_server serve --location /PATH/TO/OXIGRAPH/DATA + +[Install] +WantedBy=multi-user.target +``` + ## Migration guide ### From 0.2 to 0.3 diff --git a/server/src/main.rs b/server/src/main.rs index 1264f132..57257e1b 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -17,9 +17,13 @@ use sparesults::{QueryResultsFormat, QueryResultsSerializer}; use std::borrow::Cow; use std::cell::RefCell; use std::cmp::{max, min}; +#[cfg(target_os = "linux")] +use std::env; use std::ffi::OsStr; use std::fs::File; use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Read, Write}; +#[cfg(target_os = "linux")] +use std::os::unix::net::UnixDatagram; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::str::FromStr; @@ -776,6 +780,8 @@ fn serve(store: Store, bind: String, read_only: bool, cors: bool) -> anyhow::Res }; server.set_global_timeout(HTTP_TIMEOUT); server.set_server_name(concat!("Oxigraph/", env!("CARGO_PKG_VERSION")))?; + #[cfg(target_os = "linux")] + systemd_notify_ready()?; eprintln!("Listening for requests at http://{}", &bind); server.listen(bind)?; Ok(()) @@ -1698,6 +1704,14 @@ impl Write for ReadForWriteWriter { } } +#[cfg(target_os = "linux")] +fn systemd_notify_ready() -> io::Result<()> { + if let Some(path) = env::var_os("NOTIFY_SOCKET") { + UnixDatagram::unbound()?.send_to(b"READY=1", path)?; + } + Ok(()) +} + #[cfg(test)] mod tests { use super::*; From d26731432cdf44f4855c7c7a367baeed96e4b54a Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 18 May 2023 16:17:24 +0200 Subject: [PATCH 23/45] Server: Use target graph name as base URI Task: #498 --- server/src/main.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/server/src/main.rs b/server/src/main.rs index 57257e1b..f57ada51 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -1537,19 +1537,24 @@ fn web_load_graph( format: GraphFormat, to_graph_name: GraphNameRef<'_>, ) -> Result<(), HttpError> { + let base_iri = if let GraphNameRef::NamedNode(graph_name) = to_graph_name { + Some(graph_name.as_str()) + } else { + None + }; if url_query_parameter(request, "no_transaction").is_some() { web_bulk_loader(store, request).load_graph( BufReader::new(request.body_mut()), format, to_graph_name, - None, + base_iri, ) } else { store.load_graph( BufReader::new(request.body_mut()), format, to_graph_name, - None, + base_iri, ) } .map_err(loader_to_http_error) @@ -2381,6 +2386,53 @@ mod tests { ) } + #[test] + fn graph_store_base_url() -> Result<()> { + let server = ServerTest::new()?; + + // POST + let request = Request::builder( + Method::POST, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::CONTENT_TYPE, "text/turtle")? + .with_body("<> ."); + server.test_status(request, Status::NO_CONTENT)?; + + // GET + let request = Request::builder( + Method::GET, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::ACCEPT, "application/n-triples")? + .build(); + server.test_body( + request, + " .\n", + )?; + + // PUT + let request = Request::builder( + Method::PUT, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::CONTENT_TYPE, "text/turtle")? + .with_body("<> ."); + server.test_status(request, Status::NO_CONTENT)?; + + // GET + let request = Request::builder( + Method::GET, + "http://localhost/store?graph=http://example.com".parse()?, + ) + .with_header(HeaderName::ACCEPT, "application/n-triples")? + .build(); + server.test_body( + request, + " .\n", + ) + } + #[test] fn graph_store_protocol() -> Result<()> { // Tests from https://www.w3.org/2009/sparql/docs/tests/data-sparql11/http-rdf-update/ From 9a6233b51138d0093124a77c5f91d1d9ba2547e8 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 19 May 2023 18:35:48 +0200 Subject: [PATCH 24/45] SPARQL parser: removes reference to rule numbers They are not stable and are changing with SPARQL 1.2. Let's avoid the hassle of maintaining them. We got the rule name to quickly match the code with the grammar. --- lib/spargebra/src/parser.rs | 179 ------------------------------------ 1 file changed, 179 deletions(-) diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 0f85b830..51b07800 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -958,31 +958,24 @@ fn variable() -> Variable { parser! { //See https://www.w3.org/TR/turtle/#sec-grammar grammar parser(state: &mut ParserState) for str { - //[1] pub rule QueryUnit() -> Query = Query() - //[2] rule Query() -> Query = _ Prologue() _ q:(SelectQuery() / ConstructQuery() / DescribeQuery() / AskQuery()) _ { q } - //[3] pub rule UpdateInit() -> Vec = Update() - //[4] rule Prologue() = (BaseDecl() _ / PrefixDecl() _)* {} - //[5] rule BaseDecl() = i("BASE") _ i:IRIREF() { state.base_iri = Some(i) } - //[6] rule PrefixDecl() = i("PREFIX") _ ns:PNAME_NS() _ i:IRIREF() { state.namespaces.insert(ns.into(), i.into_inner()); } - //[7] rule SelectQuery() -> Query = s:SelectClause() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Select { dataset: d, @@ -991,12 +984,10 @@ parser! { }) } - //[8] rule SubSelect() -> GraphPattern = s:SelectClause() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? build_select(s, w, g, h, o, l, v, state) } - //[9] rule SelectClause() -> Selection = i("SELECT") _ Selection_init() o:SelectClause_option() _ v:SelectClause_variables() { Selection { option: o, @@ -1017,7 +1008,6 @@ parser! { v:Var() _ { SelectionMember::Variable(v) } / "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" _ { SelectionMember::Expression(e, v) } - //[10] rule ConstructQuery() -> Query = i("CONSTRUCT") _ c:ConstructTemplate() _ d:DatasetClauses() _ w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Construct { @@ -1042,7 +1032,6 @@ parser! { rule ConstructQuery_optional_triple_template() -> Vec = TriplesTemplate() / { Vec::new() } - //[11] rule DescribeQuery() -> Query = i("DESCRIBE") _ "*" _ d:DatasetClauses() w:WhereClause()? _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Describe { @@ -1066,7 +1055,6 @@ parser! { } rule DescribeQuery_item() -> NamedNodePattern = i:VarOrIri() _ { i } - //[12] rule AskQuery() -> Query = i("ASK") _ d:DatasetClauses() w:WhereClause() _ g:GroupClause()? _ h:HavingClause()? _ o:OrderClause()? _ l:LimitOffsetClauses()? _ v:ValuesClause() {? Ok(Query::Ask { dataset: d, @@ -1075,7 +1063,6 @@ parser! { }) } - //[13] rule DatasetClause() -> (Option, Option) = i("FROM") _ d:(DefaultGraphClause() / NamedGraphClause()) { d } rule DatasetClauses() -> Option = d:DatasetClause() ** (_) { if d.is_empty() { @@ -1096,25 +1083,20 @@ parser! { }) } - //[14] rule DefaultGraphClause() -> (Option, Option) = s:SourceSelector() { (Some(s), None) } - //[15] rule NamedGraphClause() -> (Option, Option) = i("NAMED") _ s:SourceSelector() { (None, Some(s)) } - //[16] rule SourceSelector() -> NamedNode = iri() - //[17] rule WhereClause() -> GraphPattern = i("WHERE")? _ p:GroupGraphPattern() { p } - //[19] rule GroupClause() -> (Vec, Vec<(Expression,Variable)>) = i("GROUP") _ i("BY") _ c:GroupCondition_item()+ { let mut projections: Vec<(Expression,Variable)> = Vec::new(); let clauses = c.into_iter().map(|(e, vo)| { @@ -1130,7 +1112,6 @@ parser! { } rule GroupCondition_item() -> (Expression, Option) = c:GroupCondition() _ { c } - //[20] rule GroupCondition() -> (Expression, Option) = e:BuiltInCall() { (e, None) } / e:FunctionCall() { (e, None) } / @@ -1138,75 +1119,59 @@ parser! { e:Var() { (e.into(), None) } rule GroupCondition_as() -> Variable = i("AS") _ v:Var() _ { v } - //[21] rule HavingClause() -> Expression = i("HAVING") _ e:HavingCondition()+ {? not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) } - //[22] rule HavingCondition() -> Expression = Constraint() - //[23] rule OrderClause() -> Vec = i("ORDER") _ i("BY") _ c:OrderClause_item()+ { c } rule OrderClause_item() -> OrderExpression = c:OrderCondition() _ { c } - //[24] rule OrderCondition() -> OrderExpression = i("ASC") _ e: BrackettedExpression() { OrderExpression::Asc(e) } / i("DESC") _ e: BrackettedExpression() { OrderExpression::Desc(e) } / e: Constraint() { OrderExpression::Asc(e) } / v: Var() { OrderExpression::Asc(Expression::from(v)) } - //[25] rule LimitOffsetClauses() -> (usize, Option) = l:LimitClause() _ o:OffsetClause()? { (o.unwrap_or(0), Some(l)) } / o:OffsetClause() _ l:LimitClause()? { (o, l) } - //[26] rule LimitClause() -> usize = i("LIMIT") _ l:$(INTEGER()) {? usize::from_str(l).map_err(|_| "The query limit should be a non negative integer") } - //[27] rule OffsetClause() -> usize = i("OFFSET") _ o:$(INTEGER()) {? usize::from_str(o).map_err(|_| "The query offset should be a non negative integer") } - //[28] rule ValuesClause() -> Option = i("VALUES") _ p:DataBlock() { Some(p) } / { None } - - //[29] rule Update() -> Vec = _ Prologue() _ u:(Update1() ** (_ ";" _)) _ ( ";" _)? { u.into_iter().flatten().collect() } - //[30] rule Update1() -> Vec = Load() / Clear() / Drop() / Add() / Move() / Copy() / Create() / InsertData() / DeleteData() / DeleteWhere() / Modify() rule Update1_silent() -> bool = i("SILENT") { true } / { false } - //[31] rule Load() -> Vec = i("LOAD") _ silent:Update1_silent() _ source:iri() _ destination:Load_to()? { vec![GraphUpdateOperation::Load { silent, source, destination: destination.map_or(GraphName::DefaultGraph, GraphName::NamedNode) }] } rule Load_to() -> NamedNode = i("INTO") _ g: GraphRef() { g } - //[32] rule Clear() -> Vec = i("CLEAR") _ silent:Update1_silent() _ graph:GraphRefAll() { vec![GraphUpdateOperation::Clear { silent, graph }] } - //[33] rule Drop() -> Vec = i("DROP") _ silent:Update1_silent() _ graph:GraphRefAll() { vec![GraphUpdateOperation::Drop { silent, graph }] } - //[34] rule Create() -> Vec = i("CREATE") _ silent:Update1_silent() _ graph:GraphRef() { vec![GraphUpdateOperation::Create { silent, graph }] } - //[35] rule Add() -> Vec = i("ADD") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#add if from == to { @@ -1217,7 +1182,6 @@ parser! { } } - //[36] rule Move() -> Vec = i("MOVE") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#move if from == to { @@ -1228,7 +1192,6 @@ parser! { } } - //[37] rule Copy() -> Vec = i("COPY") _ silent:Update1_silent() _ from:GraphOrDefault() _ i("TO") _ to:GraphOrDefault() { // Rewriting defined by https://www.w3.org/TR/sparql11-update/#copy if from == to { @@ -1239,17 +1202,14 @@ parser! { } } - //[38] rule InsertData() -> Vec = i("INSERT") _ i("DATA") _ data:QuadData() { vec![GraphUpdateOperation::InsertData { data }] } - //[39] rule DeleteData() -> Vec = i("DELETE") _ i("DATA") _ data:GroundQuadData() { vec![GraphUpdateOperation::DeleteData { data }] } - //[40] rule DeleteWhere() -> Vec = i("DELETE") _ i("WHERE") _ d:QuadPattern() {? let pattern = d.iter().map(|q| { let bgp = GraphPattern::Bgp { patterns: vec![TriplePattern::new(q.subject.clone(), q.predicate.clone(), q.object.clone())] }; @@ -1268,7 +1228,6 @@ parser! { }]) } - //[41] rule Modify() -> Vec = with:Modify_with()? _ Modify_clear() c:Modify_clauses() _ u:(UsingClause() ** (_)) _ i("WHERE") _ pattern:GroupGraphPattern() { let (delete, insert) = c; let mut delete = delete.unwrap_or_default(); @@ -1336,15 +1295,12 @@ parser! { state.currently_used_bnodes.clear(); } - //[42] rule DeleteClause() -> Vec = i("DELETE") _ q:QuadPattern() {? q.into_iter().map(GroundQuadPattern::try_from).collect::,_>>().map_err(|_| "Blank nodes are not allowed in DELETE WHERE") } - //[43] rule InsertClause() -> Vec = i("INSERT") _ q:QuadPattern() { q } - //[44] rule UsingClause() -> (Option, Option) = i("USING") _ d:(UsingClause_default() / UsingClause_named()) { d } rule UsingClause_default() -> (Option, Option) = i:iri() { (Some(i), None) @@ -1353,26 +1309,21 @@ parser! { (None, Some(i)) } - //[45] rule GraphOrDefault() -> GraphName = i("DEFAULT") { GraphName::DefaultGraph } / (i("GRAPH") _)? g:iri() { GraphName::NamedNode(g) } - //[46] rule GraphRef() -> NamedNode = i("GRAPH") _ g:iri() { g } - //[47] rule GraphRefAll() -> GraphTarget = i: GraphRef() { i.into() } / i("DEFAULT") { GraphTarget::DefaultGraph } / i("NAMED") { GraphTarget::NamedGraphs } / i("ALL") { GraphTarget::AllGraphs } - //[48] rule QuadPattern() -> Vec = "{" _ q:Quads() _ "}" { q } - //[49] rule QuadData() -> Vec = "{" _ q:Quads() _ "}" {? q.into_iter().map(Quad::try_from).collect::, ()>>().map_err(|_| "Variables are not allowed in INSERT DATA") } @@ -1380,7 +1331,6 @@ parser! { q.into_iter().map(|q| GroundQuad::try_from(Quad::try_from(q)?)).collect::, ()>>().map_err(|_| "Variables and blank nodes are not allowed in DELETE DATA") } - //[50] rule Quads() -> Vec = q:(Quads_TriplesTemplate() / Quads_QuadsNotTriples()) ** (_) { q.into_iter().flatten().collect() } @@ -1389,18 +1339,15 @@ parser! { } //TODO: return iter? rule Quads_QuadsNotTriples() -> Vec = q:QuadsNotTriples() _ "."? { q } - //[51] rule QuadsNotTriples() -> Vec = i("GRAPH") _ g:VarOrIri() _ "{" _ t:TriplesTemplate()? _ "}" { t.unwrap_or_default().into_iter().map(|t| QuadPattern::new(t.subject, t.predicate, t.object, g.clone())).collect() } - //[52] rule TriplesTemplate() -> Vec = ts:TriplesTemplate_inner() ++ (".") ("." _)? { ts.into_iter().flatten().collect() } rule TriplesTemplate_inner() -> Vec = _ t:TriplesSameSubject() _ { t } - //[53] rule GroupGraphPattern() -> GraphPattern = "{" _ GroupGraphPattern_clear() p:GroupGraphPatternSub() GroupGraphPattern_clear() _ "}" { p } / "{" _ GroupGraphPattern_clear() p:SubSelect() GroupGraphPattern_clear() _ "}" { p } @@ -1410,7 +1357,6 @@ parser! { state.currently_used_bnodes.clear(); } - //[54] rule GroupGraphPatternSub() -> GraphPattern = a:TriplesBlock()? _ b:GroupGraphPatternSub_item()* {? let mut filter: Option = None; let mut g = a.map_or_else(GraphPattern::default, build_bgp); @@ -1472,16 +1418,13 @@ parser! { result } - //[55] rule TriplesBlock() -> Vec = hs:TriplesBlock_inner() ++ (".") ("." _)? { hs.into_iter().flatten().collect() } rule TriplesBlock_inner() -> Vec = _ h:TriplesSameSubjectPath() _ { h } - //[56] rule GraphPatternNotTriples() -> PartialGraphPattern = GroupOrUnionGraphPattern() / OptionalGraphPattern() / LateralGraphPattern() / MinusGraphPattern() / GraphGraphPattern() / ServiceGraphPattern() / Filter() / Bind() / InlineData() - //[57] rule OptionalGraphPattern() -> PartialGraphPattern = i("OPTIONAL") _ p:GroupGraphPattern() { if let GraphPattern::Filter { expr, inner } = p { PartialGraphPattern::Optional(*inner, Some(expr)) @@ -1495,36 +1438,29 @@ parser! { #[cfg(not(feature = "sep-0006"))]{Err("The LATERAL modifier is not supported")} } - //[58] rule GraphGraphPattern() -> PartialGraphPattern = i("GRAPH") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Graph { name, inner: Box::new(p) }) } - //[59] rule ServiceGraphPattern() -> PartialGraphPattern = i("SERVICE") _ i("SILENT") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service { name, inner: Box::new(p), silent: true }) } / i("SERVICE") _ name:VarOrIri() _ p:GroupGraphPattern() { PartialGraphPattern::Other(GraphPattern::Service{ name, inner: Box::new(p), silent: false }) } - //[60] rule Bind() -> PartialGraphPattern = i("BIND") _ "(" _ e:Expression() _ i("AS") _ v:Var() _ ")" { PartialGraphPattern::Bind(e, v) } - //[61] rule InlineData() -> PartialGraphPattern = i("VALUES") _ p:DataBlock() { PartialGraphPattern::Other(p) } - //[62] rule DataBlock() -> GraphPattern = l:(InlineDataOneVar() / InlineDataFull()) { GraphPattern::Values { variables: l.0, bindings: l.1 } } - //[63] rule InlineDataOneVar() -> (Vec, Vec>>) = var:Var() _ "{" _ d:InlineDataOneVar_value()* "}" { (vec![var], d) } rule InlineDataOneVar_value() -> Vec> = t:DataBlockValue() _ { vec![t] } - //[64] rule InlineDataFull() -> (Vec, Vec>>) = "(" _ vars:InlineDataFull_var()* _ ")" _ "{" _ vals:InlineDataFull_values()* "}" {? if vals.iter().all(|vs| vs.len() == vars.len()) { Ok((vars, vals)) @@ -1536,7 +1472,6 @@ parser! { rule InlineDataFull_values() -> Vec> = "(" _ v:InlineDataFull_value()* _ ")" _ { v } rule InlineDataFull_value() -> Option = v:DataBlockValue() _ { v } - //[65] rule DataBlockValue() -> Option = t:EmbTriple() {? #[cfg(feature = "rdf-star")]{Ok(Some(t.into()))} @@ -1548,12 +1483,10 @@ parser! { l:BooleanLiteral() { Some(l.into()) } / i("UNDEF") { None } - //[66] rule MinusGraphPattern() -> PartialGraphPattern = i("MINUS") _ p: GroupGraphPattern() { PartialGraphPattern::Minus(p) } - //[67] rule GroupOrUnionGraphPattern() -> PartialGraphPattern = p:GroupOrUnionGraphPattern_item() **<1,> (i("UNION") _) {? not_empty_fold(p.into_iter(), |a, b| { GraphPattern::Union { left: Box::new(a), right: Box::new(b) } @@ -1561,41 +1494,33 @@ parser! { } rule GroupOrUnionGraphPattern_item() -> GraphPattern = p:GroupGraphPattern() _ { p } - //[68] rule Filter() -> PartialGraphPattern = i("FILTER") _ c:Constraint() { PartialGraphPattern::Filter(c) } - //[69] rule Constraint() -> Expression = BrackettedExpression() / FunctionCall() / BuiltInCall() - //[70] rule FunctionCall() -> Expression = f: iri() _ a: ArgList() { Expression::FunctionCall(Function::Custom(f), a) } - //[71] rule ArgList() -> Vec = "(" _ e:ArgList_item() **<1,> ("," _) _ ")" { e } / NIL() { Vec::new() } rule ArgList_item() -> Expression = e:Expression() _ { e } - //[72] rule ExpressionList() -> Vec = "(" _ e:ExpressionList_item() **<1,> ("," _) ")" { e } / NIL() { Vec::new() } rule ExpressionList_item() -> Expression = e:Expression() _ { e } - //[73] rule ConstructTemplate() -> Vec = "{" _ t:ConstructTriples() _ "}" { t } - //[74] rule ConstructTriples() -> Vec = p:ConstructTriples_item() ** ("." _) "."? { p.into_iter().flatten().collect() } rule ConstructTriples_item() -> Vec = t:TriplesSameSubject() _ { t } - //[75] rule TriplesSameSubject() -> Vec = s:VarOrTermOrEmbTP() _ po:PropertyListNotEmpty() {? let mut patterns = po.patterns; @@ -1617,12 +1542,10 @@ parser! { Ok(patterns) } - //[76] rule PropertyList() -> FocusedTriplePattern)>> = PropertyListNotEmpty() / { FocusedTriplePattern::default() } - //[77] rule PropertyListNotEmpty() -> FocusedTriplePattern)>> = l:PropertyListNotEmpty_item() **<1,> (";" _) { l.into_iter().fold(FocusedTriplePattern::)>>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1637,10 +1560,8 @@ parser! { } } - //[78] rule Verb() -> NamedNodePattern = VarOrIri() / "a" { rdf::TYPE.into_owned().into() } - //[79] rule ObjectList() -> FocusedTriplePattern> = o:ObjectList_item() **<1,> ("," _) { o.into_iter().fold(FocusedTriplePattern::>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1650,7 +1571,6 @@ parser! { } rule ObjectList_item() -> FocusedTriplePattern = o:Object() _ { o } - //[80] rule Object() -> FocusedTriplePattern = g:GraphNode() _ a:AnnotationPattern()? { if let Some(a) = a { let mut patterns = g.patterns; @@ -1673,7 +1593,6 @@ parser! { } } - //[81] rule TriplesSameSubjectPath() -> Vec = s:VarOrTermOrEmbTP() _ po:PropertyListPathNotEmpty() {? let mut patterns = po.patterns; @@ -1695,12 +1614,10 @@ parser! { Ok(patterns) } - //[82] rule PropertyListPath() -> FocusedTripleOrPathPattern)>> = PropertyListPathNotEmpty() / { FocusedTripleOrPathPattern::default() } - //[83] rule PropertyListPathNotEmpty() -> FocusedTripleOrPathPattern)>> = hp:(VerbPath() / VerbSimple()) _ ho:ObjectListPath() _ t:PropertyListPathNotEmpty_item()* { t.into_iter().flatten().fold(FocusedTripleOrPathPattern { focus: vec![(hp, ho.focus)], @@ -1721,17 +1638,14 @@ parser! { } } - //[84] rule VerbPath() -> VariableOrPropertyPath = p:Path() { p.into() } - //[85] rule VerbSimple() -> VariableOrPropertyPath = v:Var() { v.into() } - //[86] rule ObjectListPath() -> FocusedTripleOrPathPattern> = o:ObjectListPath_item() **<1,> ("," _) { o.into_iter().fold(FocusedTripleOrPathPattern::>::default(), |mut a, b| { a.focus.push(b.focus); @@ -1741,7 +1655,6 @@ parser! { } rule ObjectListPath_item() -> FocusedTripleOrPathPattern = o:ObjectPath() _ { o } - //[87] rule ObjectPath() -> FocusedTripleOrPathPattern = g:GraphNodePath() _ a:AnnotationPatternPath()? { if let Some(a) = a { let mut patterns = g.patterns; @@ -1764,10 +1677,8 @@ parser! { } } - //[88] rule Path() -> PropertyPathExpression = PathAlternative() - //[89] rule PathAlternative() -> PropertyPathExpression = p:PathAlternative_item() **<1,> ("|" _) {? not_empty_fold(p.into_iter(), |a, b| { PropertyPathExpression::Alternative(Box::new(a), Box::new(b)) @@ -1775,7 +1686,6 @@ parser! { } rule PathAlternative_item() -> PropertyPathExpression = p:PathSequence() _ { p } - //[90] rule PathSequence() -> PropertyPathExpression = p:PathSequence_item() **<1,> ("/" _) {? not_empty_fold(p.into_iter(), |a, b| { PropertyPathExpression::Sequence(Box::new(a), Box::new(b)) @@ -1783,7 +1693,6 @@ parser! { } rule PathSequence_item() -> PropertyPathExpression = p:PathEltOrInverse() _ { p } - //[91] rule PathElt() -> PropertyPathExpression = p:PathPrimary() _ o:PathElt_op()? { match o { Some('?') => PropertyPathExpression::ZeroOrOne(Box::new(p)), @@ -1798,19 +1707,16 @@ parser! { "+" { '+' } / "?" !(['0'..='9'] / PN_CHARS_U()) { '?' } // We mandate that this is not a variable - //[92] rule PathEltOrInverse() -> PropertyPathExpression = "^" _ p:PathElt() { PropertyPathExpression::Reverse(Box::new(p)) } / PathElt() - //[94] rule PathPrimary() -> PropertyPathExpression = v:iri() { v.into() } / "a" { rdf::TYPE.into_owned().into() } / "!" _ p:PathNegatedPropertySet() { p } / "(" _ p:Path() _ ")" { p } - //[95] rule PathNegatedPropertySet() -> PropertyPathExpression = "(" _ p:PathNegatedPropertySet_item() **<1,> ("|" _) ")" { let mut direct = Vec::new(); @@ -1840,17 +1746,14 @@ parser! { } rule PathNegatedPropertySet_item() -> Either = p:PathOneInPropertySet() _ { p } - //[96] rule PathOneInPropertySet() -> Either = "^" _ v:iri() { Either::Right(v) } / "^" _ "a" { Either::Right(rdf::TYPE.into()) } / v:iri() { Either::Left(v) } / "a" { Either::Left(rdf::TYPE.into()) } - //[98] rule TriplesNode() -> FocusedTriplePattern = Collection() / BlankNodePropertyList() - //[99] rule BlankNodePropertyList() -> FocusedTriplePattern = "[" _ po:PropertyListNotEmpty() _ "]" {? let mut patterns = po.patterns; let mut bnode = TermPattern::from(BlankNode::default()); @@ -1865,10 +1768,8 @@ parser! { }) } - //[100] rule TriplesNodePath() -> FocusedTripleOrPathPattern = CollectionPath() / BlankNodePropertyListPath() - //[101] rule BlankNodePropertyListPath() -> FocusedTripleOrPathPattern = "[" _ po:PropertyListPathNotEmpty() _ "]" {? let mut patterns = po.patterns; let mut bnode = TermPattern::from(BlankNode::default()); @@ -1883,7 +1784,6 @@ parser! { }) } - //[102] rule Collection() -> FocusedTriplePattern = "(" _ o:Collection_item()+ ")" { let mut patterns: Vec = Vec::new(); let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); @@ -1901,7 +1801,6 @@ parser! { } rule Collection_item() -> FocusedTriplePattern = o:GraphNode() _ { o } - //[103] rule CollectionPath() -> FocusedTripleOrPathPattern = "(" _ o:CollectionPath_item()+ _ ")" { let mut patterns: Vec = Vec::new(); let mut current_list_node = TermPattern::from(rdf::NIL.into_owned()); @@ -1919,30 +1818,24 @@ parser! { } rule CollectionPath_item() -> FocusedTripleOrPathPattern = p:GraphNodePath() _ { p } - //[104] rule GraphNode() -> FocusedTriplePattern = t:VarOrTermOrEmbTP() { FocusedTriplePattern::new(t) } / TriplesNode() - //[105] rule GraphNodePath() -> FocusedTripleOrPathPattern = t:VarOrTermOrEmbTP() { FocusedTripleOrPathPattern::new(t) } / TriplesNodePath() - //[106] rule VarOrTerm() -> TermPattern = v:Var() { v.into() } / t:GraphTerm() { t.into() } - //[107] rule VarOrIri() -> NamedNodePattern = v:Var() { v.into() } / i:iri() { i.into() } - //[108] rule Var() -> Variable = name:(VAR1() / VAR2()) { Variable::new_unchecked(name) } - //[109] rule GraphTerm() -> Term = i:iri() { i.into() } / l:RDFLiteral() { l.into() } / @@ -1951,25 +1844,20 @@ parser! { b:BlankNode() { b.into() } / NIL() { rdf::NIL.into_owned().into() } - //[110] rule Expression() -> Expression = e:ConditionalOrExpression() {e} - //[111] rule ConditionalOrExpression() -> Expression = e:ConditionalOrExpression_item() **<1,> ("||" _) {? not_empty_fold(e.into_iter(), |a, b| Expression::Or(Box::new(a), Box::new(b))) } rule ConditionalOrExpression_item() -> Expression = e:ConditionalAndExpression() _ { e } - //[112] rule ConditionalAndExpression() -> Expression = e:ConditionalAndExpression_item() **<1,> ("&&" _) {? not_empty_fold(e.into_iter(), |a, b| Expression::And(Box::new(a), Box::new(b))) } rule ConditionalAndExpression_item() -> Expression = e:ValueLogical() _ { e } - //[113] rule ValueLogical() -> Expression = RelationalExpression() - //[114] rule RelationalExpression() -> Expression = a:NumericExpression() _ o: RelationalExpression_inner()? { match o { Some(("=", Some(b), None)) => Expression::Equal(Box::new(a), Box::new(b)), Some(("!=", Some(b), None)) => Expression::Not(Box::new(Expression::Equal(Box::new(a), Box::new(b)))), @@ -1987,10 +1875,8 @@ parser! { i("IN") _ l:ExpressionList() { ("IN", None, Some(l)) } / i("NOT") _ i("IN") _ l:ExpressionList() { ("NOT IN", None, Some(l)) } - //[115] rule NumericExpression() -> Expression = AdditiveExpression() - //[116] rule AdditiveExpression() -> Expression = a:MultiplicativeExpression() _ o:AdditiveExpression_inner()? { match o { Some(("+", b)) => Expression::Add(Box::new(a), Box::new(b)), Some(("-", b)) => Expression::Subtract(Box::new(a), Box::new(b)), @@ -2001,7 +1887,6 @@ parser! { (s, e) } - //[117] rule MultiplicativeExpression() -> Expression = a:UnaryExpression() _ o: MultiplicativeExpression_inner()? { match o { Some(("*", b)) => Expression::Multiply(Box::new(a), Box::new(b)), Some(("/", b)) => Expression::Divide(Box::new(a), Box::new(b)), @@ -2012,7 +1897,6 @@ parser! { (s, e) } - //[118] rule UnaryExpression() -> Expression = s: $("!" / "+" / "-")? _ e:PrimaryExpression() { match s { Some("!") => Expression::Not(Box::new(e)), Some("+") => Expression::UnaryPlus(Box::new(e)), @@ -2021,7 +1905,6 @@ parser! { None => e, } } - //[119] rule PrimaryExpression() -> Expression = BrackettedExpression() / ExprEmbTP() / @@ -2032,10 +1915,8 @@ parser! { l:BooleanLiteral() { l.into() } / BuiltInCall() - //[120] rule BrackettedExpression() -> Expression = "(" _ e:Expression() _ ")" { e } - //[121] rule BuiltInCall() -> Expression = a:Aggregate() {? state.new_aggregation(a).map(Into::into) } / i("STR") _ "(" _ e:Expression() _ ")" { Expression::FunctionCall(Function::Str, vec![e]) } / @@ -2116,7 +1997,6 @@ parser! { #[cfg(not(feature = "sep-0002"))]{Err("The ADJUST function is only available in SPARQL 1.2 SEP 0002")} } - //[122] rule RegexExpression() -> Expression = i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b, c]) } / i("REGEX") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::Regex, vec![a, b]) } @@ -2127,18 +2007,14 @@ parser! { i("SUBSTR") _ "(" _ a:Expression() _ "," _ b:Expression() _ ")" { Expression::FunctionCall(Function::SubStr, vec![a, b]) } - //[124] rule StrReplaceExpression() -> Expression = i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ "," _ d:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c, d]) } / i("REPLACE") _ "(" _ a:Expression() _ "," _ b:Expression() _ "," _ c:Expression() _ ")" { Expression::FunctionCall(Function::Replace, vec![a, b, c]) } - //[125] rule ExistsFunc() -> Expression = i("EXISTS") _ p:GroupGraphPattern() { Expression::Exists(Box::new(p)) } - //[126] rule NotExistsFunc() -> Expression = i("NOT") _ i("EXISTS") _ p:GroupGraphPattern() { Expression::Not(Box::new(Expression::Exists(Box::new(p)))) } - //[127] rule Aggregate() -> AggregateExpression = i("COUNT") _ "(" _ i("DISTINCT") _ "*" _ ")" { AggregateExpression::Count { expr: None, distinct: true } } / i("COUNT") _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Count { expr: Some(Box::new(e)), distinct: true } } / @@ -2161,7 +2037,6 @@ parser! { name:iri() _ "(" _ i("DISTINCT") _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: true } } / name:iri() _ "(" _ e:Expression() _ ")" { AggregateExpression::Custom { name, expr: Box::new(e), distinct: false } } - //[128] rule iriOrFunction() -> Expression = i: iri() _ a: ArgList()? { match a { Some(a) => Expression::FunctionCall(Function::Custom(i), a), @@ -2169,48 +2044,39 @@ parser! { } } - //[129] rule RDFLiteral() -> Literal = value:String() _ "^^" _ datatype:iri() { Literal::new_typed_literal(value, datatype) } / value:String() _ language:LANGTAG() { Literal::new_language_tagged_literal_unchecked(value, language.into_inner()) } / value:String() { Literal::new_simple_literal(value) } - //[130] rule NumericLiteral() -> Literal = NumericLiteralUnsigned() / NumericLiteralPositive() / NumericLiteralNegative() - //[131] rule NumericLiteralUnsigned() -> Literal = d:$(DOUBLE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[132] rule NumericLiteralPositive() -> Literal = d:$(DOUBLE_POSITIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL_POSITIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER_POSITIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[133] rule NumericLiteralNegative() -> Literal = d:$(DOUBLE_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DOUBLE) } / d:$(DECIMAL_NEGATIVE()) { Literal::new_typed_literal(d, xsd::DECIMAL) } / i:$(INTEGER_NEGATIVE()) { Literal::new_typed_literal(i, xsd::INTEGER) } - //[134] rule BooleanLiteral() -> Literal = "true" { Literal::new_typed_literal("true", xsd::BOOLEAN) } / "false" { Literal::new_typed_literal("false", xsd::BOOLEAN) } - //[135] rule String() -> String = STRING_LITERAL_LONG1() / STRING_LITERAL_LONG2() / STRING_LITERAL1() / STRING_LITERAL2() - //[136] rule iri() -> NamedNode = i:(IRIREF() / PrefixedName()) { NamedNode::new_unchecked(i.into_inner()) } - //[137] rule PrefixedName() -> Iri = PNAME_LN() / ns:PNAME_NS() {? if let Some(iri) = state.namespaces.get(ns).cloned() { Iri::parse(iri).map_err(|_| "IRI parsing failed") @@ -2218,7 +2084,6 @@ parser! { Err("Prefix not found") } } - //[138] rule BlankNode() -> BlankNode = id:BLANK_NODE_LABEL() {? let node = BlankNode::new_unchecked(id); if state.used_bnodes.contains(&node) { @@ -2229,17 +2094,14 @@ parser! { } } / ANON() { BlankNode::default() } - //[139] rule IRIREF() -> Iri = "<" i:$((!['>'] [_])*) ">" {? state.parse_iri(i).map_err(|_| "IRI parsing failed") } - //[140] rule PNAME_NS() -> &'input str = ns:$(PN_PREFIX()?) ":" { ns } - //[141] rule PNAME_LN() -> Iri = ns:PNAME_NS() local:$(PN_LOCAL()) {? if let Some(base) = state.namespaces.get(ns) { let mut iri = base.clone(); @@ -2250,127 +2112,93 @@ parser! { } } - //[142] rule BLANK_NODE_LABEL() -> &'input str = "_:" b:$((['0'..='9'] / PN_CHARS_U()) PN_CHARS()* ("."+ PN_CHARS()+)*) { b } - //[143] rule VAR1() -> &'input str = "?" v:$(VARNAME()) { v } - //[144] rule VAR2() -> &'input str = "$" v:$(VARNAME()) { v } - //[145] rule LANGTAG() -> LanguageTag = "@" l:$(['a' ..= 'z' | 'A' ..= 'Z']+ ("-" ['a' ..= 'z' | 'A' ..= 'Z' | '0' ..= '9']+)*) {? LanguageTag::parse(l.to_ascii_lowercase()).map_err(|_| "language tag parsing failed") } - //[146] rule INTEGER() = ['0'..='9']+ - //[147] rule DECIMAL() = ['0'..='9']+ "." ['0'..='9']* / ['0'..='9']* "." ['0'..='9']+ - //[148] rule DOUBLE() = (['0'..='9']+ "." ['0'..='9']* / "." ['0'..='9']+ / ['0'..='9']+) EXPONENT() - //[149] rule INTEGER_POSITIVE() = "+" _ INTEGER() - //[150] rule DECIMAL_POSITIVE() = "+" _ DECIMAL() - //[151] rule DOUBLE_POSITIVE() = "+" _ DOUBLE() - //[152] rule INTEGER_NEGATIVE() = "-" _ INTEGER() - //[153] rule DECIMAL_NEGATIVE() = "-" _ DECIMAL() - //[154] rule DOUBLE_NEGATIVE() = "-" _ DOUBLE() - //[155] rule EXPONENT() = ['e' | 'E'] ['+' | '-']? ['0'..='9']+ - //[156] rule STRING_LITERAL1() -> String = "'" l:$((STRING_LITERAL1_simple_char() / ECHAR())*) "'" { unescape_echars(l).to_string() } rule STRING_LITERAL1_simple_char() = !['\u{27}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] - //[157] rule STRING_LITERAL2() -> String = "\"" l:$((STRING_LITERAL2_simple_char() / ECHAR())*) "\"" { unescape_echars(l).to_string() } rule STRING_LITERAL2_simple_char() = !['\u{22}' | '\u{5C}' | '\u{A}' | '\u{D}'] [_] - //[158] rule STRING_LITERAL_LONG1() -> String = "'''" l:$(STRING_LITERAL_LONG1_inner()*) "'''" { unescape_echars(l).to_string() } rule STRING_LITERAL_LONG1_inner() = ("''" / "'")? (STRING_LITERAL_LONG1_simple_char() / ECHAR()) rule STRING_LITERAL_LONG1_simple_char() = !['\'' | '\\'] [_] - //[159] rule STRING_LITERAL_LONG2() -> String = "\"\"\"" l:$(STRING_LITERAL_LONG2_inner()*) "\"\"\"" { unescape_echars(l).to_string() } rule STRING_LITERAL_LONG2_inner() = ("\"\"" / "\"")? (STRING_LITERAL_LONG2_simple_char() / ECHAR()) rule STRING_LITERAL_LONG2_simple_char() = !['"' | '\\'] [_] - //[160] rule ECHAR() = "\\" ['t' | 'b' | 'n' | 'r' | 'f' | '"' |'\'' | '\\'] - //[161] rule NIL() = "(" WS()* ")" - //[162] rule WS() = quiet! { ['\u{20}' | '\u{9}' | '\u{D}' | '\u{A}'] } - //[163] rule ANON() = "[" WS()* "]" - //[164] rule PN_CHARS_BASE() = ['A' ..= 'Z' | 'a' ..= 'z' | '\u{00C0}' ..='\u{00D6}' | '\u{00D8}'..='\u{00F6}' | '\u{00F8}'..='\u{02FF}' | '\u{0370}'..='\u{037D}' | '\u{037F}'..='\u{1FFF}' | '\u{200C}'..='\u{200D}' | '\u{2070}'..='\u{218F}' | '\u{2C00}'..='\u{2FEF}' | '\u{3001}'..='\u{D7FF}' | '\u{F900}'..='\u{FDCF}' | '\u{FDF0}'..='\u{FFFD}'] - //[165] rule PN_CHARS_U() = ['_'] / PN_CHARS_BASE() - //[166] rule VARNAME() = (['0'..='9'] / PN_CHARS_U()) (['0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U())* - //[167] rule PN_CHARS() = ['-' | '0' ..= '9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}'] / PN_CHARS_U() - //[168] rule PN_PREFIX() = PN_CHARS_BASE() PN_CHARS()* ("."+ PN_CHARS()+)* - //[169] rule PN_LOCAL() = (PN_CHARS_U() / [':' | '0'..='9'] / PLX()) (PN_CHARS() / [':'] / PLX())* (['.']+ (PN_CHARS() / [':'] / PLX())+)? - //[170] rule PLX() = PERCENT() / PN_LOCAL_ESC() - //[171] rule PERCENT() = ['%'] HEX() HEX() - //[172] rule HEX() = ['0' ..= '9' | 'A' ..= 'F' | 'a' ..= 'f'] - //[173] rule PN_LOCAL_ESC() = ['\\'] ['_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%'] //TODO: added '/' to make tests pass but is it valid? - //[174] rule EmbTP() -> TriplePattern = "<<" _ s:EmbSubjectOrObject() _ p:Verb() _ o:EmbSubjectOrObject() _ ">>" { TriplePattern { subject: s, predicate: p, object: o } } - //[175] rule EmbTriple() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:EmbTriple_p() _ o:DataValueTerm() _ ">>" {? Ok(GroundTriple { subject: s.try_into().map_err(|_| "Literals are not allowed in subject position of nested patterns")?, @@ -2380,7 +2208,6 @@ parser! { } rule EmbTriple_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } - //[176] rule EmbSubjectOrObject() -> TermPattern = t:EmbTP() {? #[cfg(feature = "rdf-star")]{Ok(t.into())} @@ -2393,7 +2220,6 @@ parser! { l:NumericLiteral() { l.into() } / l:BooleanLiteral() { l.into() } - //[177] rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / l:RDFLiteral() { l.into() } / l:NumericLiteral() { l.into() } / @@ -2403,7 +2229,6 @@ parser! { #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} } - //[178] rule VarOrTermOrEmbTP() -> TermPattern = t:EmbTP() {? #[cfg(feature = "rdf-star")]{Ok(t.into())} @@ -2412,19 +2237,15 @@ parser! { v:Var() { v.into() } / t:GraphTerm() { t.into() } - //[179] rule AnnotationPattern() -> FocusedTriplePattern)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } - //[180] rule AnnotationPatternPath() -> FocusedTripleOrPathPattern)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } - //[181] rule ExprEmbTP() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} } - //[182] rule ExprVarOrTerm() -> Expression = ExprEmbTP() / i:iri() { i.into() } / From 8bec2e2ff990d779331c8b06afe7f285bb7db1ca Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 19 May 2023 18:51:28 +0200 Subject: [PATCH 25/45] SPARQL-star parser: align with SPARQL 1.2 draft Relevant PR: https://github.com/w3c/sparql-query/pull/78 --- lib/spargebra/src/parser.rs | 123 ++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 67 deletions(-) diff --git a/lib/spargebra/src/parser.rs b/lib/spargebra/src/parser.rs index 51b07800..7779e31c 100644 --- a/lib/spargebra/src/parser.rs +++ b/lib/spargebra/src/parser.rs @@ -1473,7 +1473,7 @@ parser! { rule InlineDataFull_value() -> Option = v:DataBlockValue() _ { v } rule DataBlockValue() -> Option = - t:EmbTriple() {? + t:QuotedTripleData() {? #[cfg(feature = "rdf-star")]{Ok(Some(t.into()))} #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} } / @@ -1522,7 +1522,7 @@ parser! { rule ConstructTriples_item() -> Vec = t:TriplesSameSubject() _ { t } rule TriplesSameSubject() -> Vec = - s:VarOrTermOrEmbTP() _ po:PropertyListNotEmpty() {? + s:VarOrTerm() _ po:PropertyListNotEmpty() {? let mut patterns = po.patterns; for (p, os) in po.focus { for o in os { @@ -1571,7 +1571,7 @@ parser! { } rule ObjectList_item() -> FocusedTriplePattern = o:Object() _ { o } - rule Object() -> FocusedTriplePattern = g:GraphNode() _ a:AnnotationPattern()? { + rule Object() -> FocusedTriplePattern = g:GraphNode() _ a:Annotation()? { if let Some(a) = a { let mut patterns = g.patterns; patterns.extend(a.patterns); @@ -1594,7 +1594,7 @@ parser! { } rule TriplesSameSubjectPath() -> Vec = - s:VarOrTermOrEmbTP() _ po:PropertyListPathNotEmpty() {? + s:VarOrTerm() _ po:PropertyListPathNotEmpty() {? let mut patterns = po.patterns; for (p, os) in po.focus { for o in os { @@ -1655,7 +1655,7 @@ parser! { } rule ObjectListPath_item() -> FocusedTripleOrPathPattern = o:ObjectPath() _ { o } - rule ObjectPath() -> FocusedTripleOrPathPattern = g:GraphNodePath() _ a:AnnotationPatternPath()? { + rule ObjectPath() -> FocusedTripleOrPathPattern = g:GraphNodePath() _ a:AnnotationPath()? { if let Some(a) = a { let mut patterns = g.patterns; patterns.extend(a.patterns); @@ -1818,18 +1818,53 @@ parser! { } rule CollectionPath_item() -> FocusedTripleOrPathPattern = p:GraphNodePath() _ { p } + + rule Annotation() -> FocusedTriplePattern)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } + + rule AnnotationPath() -> FocusedTripleOrPathPattern)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } + rule GraphNode() -> FocusedTriplePattern = - t:VarOrTermOrEmbTP() { FocusedTriplePattern::new(t) } / + t:VarOrTerm() { FocusedTriplePattern::new(t) } / TriplesNode() rule GraphNodePath() -> FocusedTripleOrPathPattern = - t:VarOrTermOrEmbTP() { FocusedTripleOrPathPattern::new(t) } / + t:VarOrTerm() { FocusedTripleOrPathPattern::new(t) } / TriplesNodePath() rule VarOrTerm() -> TermPattern = v:Var() { v.into() } / + t:QuotedTriple() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } / t:GraphTerm() { t.into() } + rule QuotedTriple() -> TriplePattern = "<<" _ s:VarOrTerm() _ p:Verb() _ o:VarOrTerm() _ ">>" {? + Ok(TriplePattern { + subject: s, + predicate: p, + object: o + }) + } + + rule QuotedTripleData() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:QuotedTripleData_p() _ o:DataValueTerm() _ ">>" {? + Ok(GroundTriple { + subject: s.try_into().map_err(|_| "Literals are not allowed in subject position of nested patterns")?, + predicate: p, + object: o + }) + } + rule QuotedTripleData_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } + + rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + t:QuotedTripleData() {? + #[cfg(feature = "rdf-star")]{Ok(t.into())} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + rule VarOrIri() -> NamedNodePattern = v:Var() { v.into() } / i:iri() { i.into() } @@ -1907,7 +1942,7 @@ parser! { rule PrimaryExpression() -> Expression = BrackettedExpression() / - ExprEmbTP() / + ExprQuotedTriple() / iriOrFunction() / v:Var() { v.into() } / l:RDFLiteral() { l.into() } / @@ -1915,6 +1950,19 @@ parser! { l:BooleanLiteral() { l.into() } / BuiltInCall() + rule ExprVarOrTerm() -> Expression = + ExprQuotedTriple() / + i:iri() { i.into() } / + l:RDFLiteral() { l.into() } / + l:NumericLiteral() { l.into() } / + l:BooleanLiteral() { l.into() } / + v:Var() { v.into() } + + rule ExprQuotedTriple() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? + #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} + #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} + } + rule BrackettedExpression() -> Expression = "(" _ e:Expression() _ ")" { e } rule BuiltInCall() -> Expression = @@ -2195,65 +2243,6 @@ parser! { rule PN_LOCAL_ESC() = ['\\'] ['_' | '~' | '.' | '-' | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%'] //TODO: added '/' to make tests pass but is it valid? - rule EmbTP() -> TriplePattern = "<<" _ s:EmbSubjectOrObject() _ p:Verb() _ o:EmbSubjectOrObject() _ ">>" { - TriplePattern { subject: s, predicate: p, object: o } - } - - rule EmbTriple() -> GroundTriple = "<<" _ s:DataValueTerm() _ p:EmbTriple_p() _ o:DataValueTerm() _ ">>" {? - Ok(GroundTriple { - subject: s.try_into().map_err(|_| "Literals are not allowed in subject position of nested patterns")?, - predicate: p, - object: o - }) - } - rule EmbTriple_p() -> NamedNode = i: iri() { i } / "a" { rdf::TYPE.into() } - - rule EmbSubjectOrObject() -> TermPattern = - t:EmbTP() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triple patterns are only available in SPARQL-star")} - } / - v:Var() { v.into() } / - b:BlankNode() { b.into() } / - i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } - - rule DataValueTerm() -> GroundTerm = i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } / - t:EmbTriple() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} - } - - rule VarOrTermOrEmbTP() -> TermPattern = - t:EmbTP() {? - #[cfg(feature = "rdf-star")]{Ok(t.into())} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triple patterns are only available in SPARQL-star")} - } / - v:Var() { v.into() } / - t:GraphTerm() { t.into() } - - rule AnnotationPattern() -> FocusedTriplePattern)>> = "{|" _ a:PropertyListNotEmpty() _ "|}" { a } - - rule AnnotationPatternPath() -> FocusedTripleOrPathPattern)>> = "{|" _ a: PropertyListPathNotEmpty() _ "|}" { a } - - rule ExprEmbTP() -> Expression = "<<" _ s:ExprVarOrTerm() _ p:Verb() _ o:ExprVarOrTerm() _ ">>" {? - #[cfg(feature = "rdf-star")]{Ok(Expression::FunctionCall(Function::Triple, vec![s, p.into(), o]))} - #[cfg(not(feature = "rdf-star"))]{Err("Embedded triples are only available in SPARQL-star")} - } - - rule ExprVarOrTerm() -> Expression = - ExprEmbTP() / - i:iri() { i.into() } / - l:RDFLiteral() { l.into() } / - l:NumericLiteral() { l.into() } / - l:BooleanLiteral() { l.into() } / - v:Var() { v.into() } - //space rule _() = quiet! { ([' ' | '\t' | '\n' | '\r'] / comment())* } From d24461fc4245a2f3778adbd2ddb53fbc7494c474 Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 19 May 2023 22:22:20 +0200 Subject: [PATCH 26/45] XSD: Improves arithmetic computations Avoids internal overflow inside some decimal operations --- lib/oxsdatatypes/src/boolean.rs | 4 +- lib/oxsdatatypes/src/date_time.rs | 13 ++ lib/oxsdatatypes/src/decimal.rs | 273 ++++++++++++++++++++---------- lib/oxsdatatypes/src/double.rs | 39 +++-- lib/oxsdatatypes/src/duration.rs | 2 +- lib/oxsdatatypes/src/float.rs | 45 +++-- lib/oxsdatatypes/src/integer.rs | 41 +++++ lib/src/sparql/eval.rs | 14 +- 8 files changed, 305 insertions(+), 126 deletions(-) diff --git a/lib/oxsdatatypes/src/boolean.rs b/lib/oxsdatatypes/src/boolean.rs index 9544cac9..fd213a90 100644 --- a/lib/oxsdatatypes/src/boolean.rs +++ b/lib/oxsdatatypes/src/boolean.rs @@ -43,14 +43,14 @@ impl From for Boolean { impl From for Boolean { #[inline] fn from(value: Float) -> Self { - (value != Float::from(0.) && !value.is_naan()).into() + (value != Float::from(0.) && !value.is_nan()).into() } } impl From for Boolean { #[inline] fn from(value: Double) -> Self { - (value != Double::from(0.) && !value.is_naan()).into() + (value != Double::from(0.) && !value.is_nan()).into() } } diff --git a/lib/oxsdatatypes/src/date_time.rs b/lib/oxsdatatypes/src/date_time.rs index c7fe48e8..63c45f0b 100644 --- a/lib/oxsdatatypes/src/date_time.rs +++ b/lib/oxsdatatypes/src/date_time.rs @@ -44,6 +44,7 @@ impl DateTime { }) } + /// [fn:current-dateTime](https://www.w3.org/TR/xpath-functions/#func-current-dateTime) #[inline] pub fn now() -> Result { Ok(Self { @@ -303,6 +304,12 @@ impl Time { } } + /// [fn:current-time](https://www.w3.org/TR/xpath-functions/#func-current-time) + #[inline] + pub fn now() -> Result { + DateTime::now()?.try_into() + } + /// [fn:hour-from-time](https://www.w3.org/TR/xpath-functions/#func-hour-from-time) #[inline] pub fn hour(&self) -> u8 { @@ -498,6 +505,12 @@ impl Date { } } + /// [fn:current-date](https://www.w3.org/TR/xpath-functions/#func-current-date) + #[inline] + pub fn now() -> Result { + DateTime::now()?.try_into() + } + /// [fn:year-from-date](https://www.w3.org/TR/xpath-functions/#func-year-from-date) #[inline] pub fn year(&self) -> i64 { diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index c7fa6ba9..1b6fa6cc 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -5,10 +5,9 @@ use std::fmt::Write; use std::ops::Neg; use std::str::FromStr; -const DECIMAL_PART_DIGITS: usize = 18; +const DECIMAL_PART_DIGITS: u32 = 18; const DECIMAL_PART_POW: i128 = 1_000_000_000_000_000_000; const DECIMAL_PART_POW_MINUS_ONE: i128 = 100_000_000_000_000_000; -const DECIMAL_PART_HALF_POW: i128 = 1_000_000_000; /// [XML Schema `decimal` datatype](https://www.w3.org/TR/xmlschema11-2/#decimal) /// @@ -22,10 +21,9 @@ pub struct Decimal { impl Decimal { /// Constructs the decimal i / 10^n - #[allow(clippy::cast_possible_truncation)] #[inline] pub fn new(i: i128, n: u32) -> Result { - let shift = (DECIMAL_PART_DIGITS as u32) + let shift = DECIMAL_PART_DIGITS .checked_sub(n) .ok_or(DecimalOverflowError)?; Ok(Self { @@ -66,29 +64,69 @@ impl Decimal { /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions/#func-numeric-multiply) #[inline] pub fn checked_mul(&self, rhs: impl Into) -> Option { - //TODO: better algorithm to keep precision + // Idea: we shift right as much as possible to keep as much precision as possible + // Do the multiplication and do the required left shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while left % 10 == 0 { + left /= 10; + shift_left += 1; + } + } + + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do multiplication + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; Some(Self { - value: self - .value - .checked_div(DECIMAL_PART_HALF_POW)? - .checked_mul(rhs.into().value.checked_div(DECIMAL_PART_HALF_POW)?)?, + value: left + .checked_mul(right)? + .checked_mul(10_i128.checked_pow(shift)?)?, }) } /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions/#func-numeric-divide) #[inline] pub fn checked_div(&self, rhs: impl Into) -> Option { - //TODO: better algorithm to keep precision + // Idea: we shift the dividend left as much as possible to keep as much precision as possible + // And we shift right the divisor as much as possible + // Do the multiplication and do the required shift + let mut left = self.value; + let mut shift_left = 0_u32; + if left != 0 { + while let Some(r) = left.checked_mul(10) { + assert_eq!(r / 10, left); + left = r; + shift_left += 1; + } + } + let mut right = rhs.into().value; + let mut shift_right = 0_u32; + if right != 0 { + while right % 10 == 0 { + right /= 10; + shift_right += 1; + } + } + + // We do division + shift + let shift = (shift_left + shift_right).checked_sub(DECIMAL_PART_DIGITS)?; Some(Self { - value: self - .value - .checked_mul(DECIMAL_PART_HALF_POW)? - .checked_div(rhs.into().value)? - .checked_mul(DECIMAL_PART_HALF_POW)?, + value: left + .checked_div(right)? + .checked_div(10_i128.checked_pow(shift)?)?, }) } - /// TODO: XSD? is well defined for not integer + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions/#func-numeric-mod) #[inline] pub fn checked_rem(&self, rhs: impl Into) -> Option { Some(Self { @@ -174,9 +212,7 @@ impl Decimal { pub const MAX: Self = Self { value: i128::MAX }; #[cfg(test)] - pub(super) const fn step() -> Self { - Self { value: 1 } - } + pub const STEP: Self = Self { value: 1 }; } impl From for Decimal { @@ -316,13 +352,10 @@ impl TryFrom for Decimal { #[inline] #[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)] fn try_from(value: Double) -> Result { - let shifted = value * Double::from(DECIMAL_PART_POW as f64); - if shifted.is_finite() - && Double::from(i128::MIN as f64) <= shifted - && shifted <= Double::from(i128::MAX as f64) - { + let shifted = f64::from(value) * (DECIMAL_PART_POW as f64); + if shifted.is_finite() && (i128::MIN as f64) <= shifted && shifted <= (i128::MAX as f64) { Ok(Self { - value: f64::from(shifted) as i128, + value: shifted as i128, }) } else { Err(DecimalOverflowError) @@ -334,7 +367,7 @@ impl From for Float { #[inline] #[allow(clippy::cast_precision_loss)] fn from(value: Decimal) -> Self { - ((value.value as f32) / (DECIMAL_PART_POW as f32)).into() + Double::from(value).into() } } @@ -342,7 +375,18 @@ impl From for Double { #[inline] #[allow(clippy::cast_precision_loss)] fn from(value: Decimal) -> Self { - ((value.value as f64) / (DECIMAL_PART_POW as f64)).into() + let mut value = value.value; + let mut shift = DECIMAL_PART_POW; + + // Hack to improve precision + if value != 0 { + while shift != 1 && value % 10 == 0 { + value /= 10; + shift /= 10; + } + } + + ((value as f64) / (shift as f64)).into() } } @@ -374,8 +418,8 @@ impl FromStr for Decimal { } let (sign, mut input) = match input.first() { - Some(b'+') => (1, &input[1..]), - Some(b'-') => (-1, &input[1..]), + Some(b'+') => (1_i128, &input[1..]), + Some(b'-') => (-1_i128, &input[1..]), _ => (1, input), }; @@ -386,7 +430,7 @@ impl FromStr for Decimal { value = value .checked_mul(10) .ok_or(PARSE_OVERFLOW)? - .checked_add((*c - b'0').into()) + .checked_add(sign * i128::from(*c - b'0')) .ok_or(PARSE_OVERFLOW)?; input = &input[1..]; } else { @@ -414,7 +458,7 @@ impl FromStr for Decimal { value = value .checked_mul(10) .ok_or(PARSE_OVERFLOW)? - .checked_add((*c - b'0').into()) + .checked_add(sign * i128::from(*c - b'0')) .ok_or(PARSE_OVERFLOW)?; input = &input[1..]; } else { @@ -431,11 +475,7 @@ impl FromStr for Decimal { } Ok(Self { - value: value - .checked_mul(sign) - .ok_or(PARSE_OVERFLOW)? - .checked_mul(exp) - .ok_or(PARSE_OVERFLOW)?, + value: value.checked_mul(exp).ok_or(PARSE_OVERFLOW)?, }) } } @@ -476,37 +516,38 @@ impl fmt::Display for Decimal { .find_map(|(i, v)| if v == b'0' { None } else { Some(i) }) .unwrap_or(40); - if last_non_zero >= DECIMAL_PART_DIGITS { + let decimal_part_digits = usize::try_from(DECIMAL_PART_DIGITS).unwrap(); + if last_non_zero >= decimal_part_digits { let end = if let Some(mut width) = f.width() { if self.value.is_negative() { width -= 1; } - if last_non_zero - DECIMAL_PART_DIGITS + 1 < width { - DECIMAL_PART_DIGITS + width + if last_non_zero - decimal_part_digits + 1 < width { + decimal_part_digits + width } else { last_non_zero + 1 } } else { last_non_zero + 1 }; - for c in digits[DECIMAL_PART_DIGITS..end].iter().rev() { + for c in digits[decimal_part_digits..end].iter().rev() { f.write_char(char::from(*c))?; } } else { f.write_char('0')? } - if DECIMAL_PART_DIGITS > first_non_zero { + if decimal_part_digits > first_non_zero { f.write_char('.')?; let start = if let Some(precision) = f.precision() { - if DECIMAL_PART_DIGITS - first_non_zero > precision { - DECIMAL_PART_DIGITS - precision + if decimal_part_digits - first_non_zero > precision { + decimal_part_digits - precision } else { first_non_zero } } else { first_non_zero }; - for c in digits[start..DECIMAL_PART_DIGITS].iter().rev() { + for c in digits[start..decimal_part_digits].iter().rev() { f.write_char(char::from(*c))?; } } @@ -626,15 +667,7 @@ mod tests { assert_eq!(Decimal::from_str("0")?.to_string(), "0"); assert_eq!(Decimal::from_str("-0")?.to_string(), "0"); assert_eq!(Decimal::from_str(&Decimal::MAX.to_string())?, Decimal::MAX); - assert_eq!( - Decimal::from_str( - &Decimal::MIN - .checked_add(Decimal::step()) - .unwrap() - .to_string() - )?, - Decimal::MIN.checked_add(Decimal::step()).unwrap() - ); + assert_eq!(Decimal::from_str(&Decimal::MIN.to_string())?, Decimal::MIN); assert!(Decimal::from_str("0.0000000000000000001").is_err()); assert!(Decimal::from_str("1000000000000000000000").is_err()); assert_eq!( @@ -663,58 +696,98 @@ mod tests { #[test] fn add() { - assert!(Decimal::MIN.checked_add(Decimal::step()).is_some()); - assert!(Decimal::MAX.checked_add(Decimal::step()).is_none()); - assert_eq!( - Decimal::MAX.checked_add(Decimal::MIN), - Some(-Decimal::step()) - ); + assert!(Decimal::MIN.checked_add(Decimal::STEP).is_some()); + assert!(Decimal::MAX.checked_add(Decimal::STEP).is_none()); + assert_eq!(Decimal::MAX.checked_add(Decimal::MIN), Some(-Decimal::STEP)); } #[test] fn sub() { - assert!(Decimal::MIN.checked_sub(Decimal::step()).is_none()); - assert!(Decimal::MAX.checked_sub(Decimal::step()).is_some()); + assert!(Decimal::MIN.checked_sub(Decimal::STEP).is_none()); + assert!(Decimal::MAX.checked_sub(Decimal::STEP).is_some()); } #[test] fn mul() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_mul(-1), Some(Decimal::from(-1))); assert_eq!( - Decimal::from_str("1")?.checked_mul(Decimal::from_str("-1")?), - Some(Decimal::from_str("-1")?) - ); - assert_eq!( - Decimal::from_str("1000")?.checked_mul(Decimal::from_str("1000")?), - Some(Decimal::from_str("1000000")?) + Decimal::from(1000).checked_mul(1000), + Some(Decimal::from(1000000)) ); assert_eq!( Decimal::from_str("0.1")?.checked_mul(Decimal::from_str("0.01")?), Some(Decimal::from_str("0.001")?) ); + assert_eq!(Decimal::from(0).checked_mul(1), Some(Decimal::from(0))); + assert_eq!(Decimal::from(1).checked_mul(0), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_mul(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_mul(1), Some(Decimal::MIN)); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MAX), + Some(Decimal::MAX) + ); + assert_eq!( + Decimal::from(1).checked_mul(Decimal::MIN), + Some(Decimal::MIN) + ); + assert_eq!( + Decimal::MAX.checked_mul(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) + ); + assert_eq!(Decimal::MIN.checked_mul(-1), None); + assert_eq!( + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_mul(-1), + Some(Decimal::MAX) + ); Ok(()) } #[test] fn div() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(1).checked_div(1), Some(Decimal::from(1))); + assert_eq!(Decimal::from(100).checked_div(10), Some(Decimal::from(10))); assert_eq!( - Decimal::from_str("1")?.checked_div(Decimal::from_str("1")?), - Some(Decimal::from_str("1")?) + Decimal::from(10).checked_div(100), + Some(Decimal::from_str("0.1")?) ); + assert_eq!(Decimal::from(1).checked_div(0), None); + assert_eq!(Decimal::from(0).checked_div(1), Some(Decimal::from(0))); + assert_eq!(Decimal::MAX.checked_div(1), Some(Decimal::MAX)); + assert_eq!(Decimal::MIN.checked_div(1), Some(Decimal::MIN)); assert_eq!( - Decimal::from_str("100")?.checked_div(Decimal::from_str("10")?), - Some(Decimal::from_str("10")?) + Decimal::MAX.checked_div(-1), + Some(Decimal::MIN.checked_add(Decimal::STEP).unwrap()) ); + assert_eq!(Decimal::MIN.checked_div(-1), None); assert_eq!( - Decimal::from_str("10")?.checked_div(Decimal::from_str("100")?), - Some(Decimal::from_str("0.1")?) + Decimal::MIN + .checked_add(Decimal::STEP) + .unwrap() + .checked_div(-1), + Some(Decimal::MAX) + ); + Ok(()) + } + + #[test] + fn rem() -> Result<(), ParseDecimalError> { + assert_eq!(Decimal::from(10).checked_rem(3), Some(Decimal::from(1))); + assert_eq!(Decimal::from(6).checked_rem(-2), Some(Decimal::from(0))); + assert_eq!( + Decimal::from_str("4.5")?.checked_rem(Decimal::from_str("1.2")?), + Some(Decimal::from_str("0.9")?) ); + assert_eq!(Decimal::from(1).checked_rem(0), None); Ok(()) } #[test] fn round() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.round(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.round(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).round(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).round(), Decimal::from(-10)); assert_eq!(Decimal::from_str("2.5")?.round(), Decimal::from(3)); assert_eq!(Decimal::from_str("2.4999")?.round(), Decimal::from(2)); assert_eq!(Decimal::from_str("-2.5")?.round(), Decimal::from(-2)); @@ -725,8 +798,8 @@ mod tests { #[test] fn ceil() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.ceil(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.ceil(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).ceil(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10)); assert_eq!(Decimal::from_str("10.5")?.ceil(), Decimal::from(11)); assert_eq!(Decimal::from_str("-10.5")?.ceil(), Decimal::from(-10)); assert_eq!(Decimal::from(i64::MIN).ceil(), Decimal::from(i64::MIN)); @@ -736,8 +809,8 @@ mod tests { #[test] fn floor() -> Result<(), ParseDecimalError> { - assert_eq!(Decimal::from_str("10")?.ceil(), Decimal::from(10)); - assert_eq!(Decimal::from_str("-10")?.ceil(), Decimal::from(-10)); + assert_eq!(Decimal::from(10).ceil(), Decimal::from(10)); + assert_eq!(Decimal::from(-10).ceil(), Decimal::from(-10)); assert_eq!(Decimal::from_str("10.5")?.floor(), Decimal::from(10)); assert_eq!(Decimal::from_str("-10.5")?.floor(), Decimal::from(-11)); assert_eq!(Decimal::from(i64::MIN).floor(), Decimal::from(i64::MIN)); @@ -780,11 +853,11 @@ mod tests { fn from_float() -> Result<(), ParseDecimalError> { assert_eq!( Decimal::try_from(Float::from(0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Float::from(-0.)).ok(), - Some(Decimal::from_str("0.")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Float::from(-123.5)).ok(), @@ -798,10 +871,10 @@ mod tests { assert!( Decimal::try_from(Float::from(1_672_507_302_466.)) .unwrap() - .checked_sub(Decimal::from_str("1672507302466")?) + .checked_sub(Decimal::from(1_672_507_293_696_i64)) .unwrap() .abs() - < Decimal::from(1_000_000) + < Decimal::from(1) ); Ok(()) } @@ -810,11 +883,11 @@ mod tests { fn from_double() -> Result<(), ParseDecimalError> { assert_eq!( Decimal::try_from(Double::from(0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Double::from(-0.)).ok(), - Some(Decimal::from_str("0")?) + Some(Decimal::from(0)) ); assert_eq!( Decimal::try_from(Double::from(-123.1)).ok(), @@ -823,7 +896,7 @@ mod tests { assert!( Decimal::try_from(Double::from(1_672_507_302_466.)) .unwrap() - .checked_sub(Decimal::from_str("1672507302466")?) + .checked_sub(Decimal::from(1_672_507_302_466_i64)) .unwrap() .abs() < Decimal::from(1) @@ -836,6 +909,34 @@ mod tests { Ok(()) } + #[test] + fn to_float() -> Result<(), ParseDecimalError> { + assert_eq!(Float::from(Decimal::from(0)), Float::from(0.)); + assert_eq!(Float::from(Decimal::from(1)), Float::from(1.)); + assert_eq!(Float::from(Decimal::from(10)), Float::from(10.)); + assert_eq!(Float::from(Decimal::from_str("0.1")?), Float::from(0.1)); + assert!((Float::from(Decimal::MAX) - Float::from(1.701412e20)).abs() < Float::from(1.)); + assert!((Float::from(Decimal::MIN) - Float::from(-1.701412e20)).abs() < Float::from(1.)); + Ok(()) + } + + #[test] + fn to_double() -> Result<(), ParseDecimalError> { + assert_eq!(Double::from(Decimal::from(0)), Double::from(0.)); + assert_eq!(Double::from(Decimal::from(1)), Double::from(1.)); + assert_eq!(Double::from(Decimal::from(10)), Double::from(10.)); + assert_eq!(Double::from(Decimal::from_str("0.1")?), Double::from(0.1)); + assert!( + (Double::from(Decimal::MAX) - Double::from(1.7014118346046924e20)).abs() + < Double::from(1.) + ); + assert!( + (Double::from(Decimal::MIN) - Double::from(-1.7014118346046924e20)).abs() + < Double::from(1.) + ); + Ok(()) + } + #[test] fn minimally_conformant() -> Result<(), ParseDecimalError> { // All minimally conforming processors must support decimal values whose absolute value can be expressed as i / 10^k, diff --git a/lib/oxsdatatypes/src/double.rs b/lib/oxsdatatypes/src/double.rs index de3b78c4..e9b26ba5 100644 --- a/lib/oxsdatatypes/src/double.rs +++ b/lib/oxsdatatypes/src/double.rs @@ -53,6 +53,12 @@ impl Double { self.value.round().into() } + #[inline] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + + #[deprecated(note = "Use .is_nan()")] #[inline] pub fn is_naan(self) -> bool { self.value.is_nan() @@ -68,6 +74,20 @@ impl Double { pub fn is_identical_with(&self, other: &Self) -> bool { self.value.to_ne_bytes() == other.value.to_ne_bytes() } + + pub const MIN: Self = Self { value: f64::MIN }; + + pub const MAX: Self = Self { value: f64::MAX }; + + pub const INFINITY: Self = Self { + value: f64::INFINITY, + }; + + pub const NEG_INFINITY: Self = Self { + value: f64::NEG_INFINITY, + }; + + pub const NAN: Self = Self { value: f64::NAN }; } impl From for f64 { @@ -243,7 +263,7 @@ mod tests { #[test] fn eq() { assert_eq!(Double::from(0_f64), Double::from(0_f64)); - assert_ne!(Double::from(f64::NAN), Double::from(f64::NAN)); + assert_ne!(Double::NAN, Double::NAN); assert_eq!(Double::from(-0.), Double::from(0.)); } @@ -254,18 +274,15 @@ mod tests { Some(Ordering::Equal) ); assert_eq!( - Double::from(f64::INFINITY).partial_cmp(&Double::from(f64::MAX)), + Double::INFINITY.partial_cmp(&Double::MAX), Some(Ordering::Greater) ); assert_eq!( - Double::from(f64::NEG_INFINITY).partial_cmp(&Double::from(f64::MIN)), + Double::NEG_INFINITY.partial_cmp(&Double::MIN), Some(Ordering::Less) ); - assert_eq!(Double::from(f64::NAN).partial_cmp(&Double::from(0.)), None); - assert_eq!( - Double::from(f64::NAN).partial_cmp(&Double::from(f64::NAN)), - None - ); + assert_eq!(Double::NAN.partial_cmp(&Double::from(0.)), None); + assert_eq!(Double::NAN.partial_cmp(&Double::NAN), None); assert_eq!( Double::from(0.).partial_cmp(&Double::from(-0.)), Some(Ordering::Equal) @@ -275,7 +292,7 @@ mod tests { #[test] fn is_identical_with() { assert!(Double::from(0.).is_identical_with(&Double::from(0.))); - assert!(Double::from(f64::NAN).is_identical_with(&Double::from(f64::NAN))); + assert!(Double::NAN.is_identical_with(&Double::NAN)); assert!(!Double::from(-0.).is_identical_with(&Double::from(0.))); } @@ -297,11 +314,11 @@ mod tests { assert_eq!(Double::from_str("-1.")?.to_string(), "-1"); assert_eq!( Double::from_str(&f64::MIN.to_string()).unwrap(), - Double::from(f64::MIN) + Double::MIN ); assert_eq!( Double::from_str(&f64::MAX.to_string()).unwrap(), - Double::from(f64::MAX) + Double::MAX ); Ok(()) } diff --git a/lib/oxsdatatypes/src/duration.rs b/lib/oxsdatatypes/src/duration.rs index 18d42912..0bc9d2bf 100644 --- a/lib/oxsdatatypes/src/duration.rs +++ b/lib/oxsdatatypes/src/duration.rs @@ -601,7 +601,7 @@ mod tests { fn from_str() -> Result<(), XsdParseError> { let min = Duration::new( i64::MIN + 1, - Decimal::MIN.checked_add(Decimal::step()).unwrap(), + Decimal::MIN.checked_add(Decimal::STEP).unwrap(), ); let max = Duration::new(i64::MAX, Decimal::MAX); diff --git a/lib/oxsdatatypes/src/float.rs b/lib/oxsdatatypes/src/float.rs index 001b5006..29ebde30 100644 --- a/lib/oxsdatatypes/src/float.rs +++ b/lib/oxsdatatypes/src/float.rs @@ -53,11 +53,17 @@ impl Float { self.value.round().into() } + #[deprecated(note = "Use .is_nan()")] #[inline] pub fn is_naan(self) -> bool { self.value.is_nan() } + #[inline] + pub fn is_nan(self) -> bool { + self.value.is_nan() + } + #[inline] pub fn is_finite(self) -> bool { self.value.is_finite() @@ -68,6 +74,20 @@ impl Float { pub fn is_identical_with(&self, other: &Self) -> bool { self.value.to_ne_bytes() == other.value.to_ne_bytes() } + + pub const MIN: Self = Self { value: f32::MIN }; + + pub const MAX: Self = Self { value: f32::MAX }; + + pub const INFINITY: Self = Self { + value: f32::INFINITY, + }; + + pub const NEG_INFINITY: Self = Self { + value: f32::NEG_INFINITY, + }; + + pub const NAN: Self = Self { value: f32::NAN }; } impl From for f32 { @@ -233,7 +253,7 @@ mod tests { #[test] fn eq() { assert_eq!(Float::from(0.), Float::from(0.)); - assert_ne!(Float::from(f32::NAN), Float::from(f32::NAN)); + assert_ne!(Float::NAN, Float::NAN); assert_eq!(Float::from(-0.), Float::from(0.)); } @@ -244,18 +264,15 @@ mod tests { Some(Ordering::Equal) ); assert_eq!( - Float::from(f32::INFINITY).partial_cmp(&Float::from(f32::MAX)), + Float::INFINITY.partial_cmp(&Float::MAX), Some(Ordering::Greater) ); assert_eq!( - Float::from(f32::NEG_INFINITY).partial_cmp(&Float::from(f32::MIN)), + Float::NEG_INFINITY.partial_cmp(&Float::MIN), Some(Ordering::Less) ); - assert_eq!(Float::from(f32::NAN).partial_cmp(&Float::from(0.)), None); - assert_eq!( - Float::from(f32::NAN).partial_cmp(&Float::from(f32::NAN)), - None - ); + assert_eq!(Float::NAN.partial_cmp(&Float::from(0.)), None); + assert_eq!(Float::NAN.partial_cmp(&Float::NAN), None); assert_eq!( Float::from(0.).partial_cmp(&Float::from(-0.)), Some(Ordering::Equal) @@ -265,7 +282,7 @@ mod tests { #[test] fn is_identical_with() { assert!(Float::from(0.).is_identical_with(&Float::from(0.))); - assert!(Float::from(f32::NAN).is_identical_with(&Float::from(f32::NAN))); + assert!(Float::NAN.is_identical_with(&Float::NAN)); assert!(!Float::from(-0.).is_identical_with(&Float::from(0.))); } @@ -285,14 +302,8 @@ mod tests { assert_eq!(Float::from_str("-1")?.to_string(), "-1"); assert_eq!(Float::from_str("1.")?.to_string(), "1"); assert_eq!(Float::from_str("-1.")?.to_string(), "-1"); - assert_eq!( - Float::from_str(&f32::MIN.to_string())?, - Float::from(f32::MIN) - ); - assert_eq!( - Float::from_str(&f32::MAX.to_string())?, - Float::from(f32::MAX) - ); + assert_eq!(Float::from_str(&f32::MIN.to_string())?, Float::MIN); + assert_eq!(Float::from_str(&f32::MAX.to_string())?, Float::MAX); Ok(()) } } diff --git a/lib/oxsdatatypes/src/integer.rs b/lib/oxsdatatypes/src/integer.rs index 016096b3..46175fc3 100644 --- a/lib/oxsdatatypes/src/integer.rs +++ b/lib/oxsdatatypes/src/integer.rs @@ -58,6 +58,7 @@ impl Integer { }) } + /// [op:numeric-mod](https://www.w3.org/TR/xpath-functions/#func-numeric-mod) #[inline] pub fn checked_rem(&self, rhs: impl Into) -> Option { Some(Self { @@ -95,6 +96,10 @@ impl Integer { pub fn is_identical_with(&self, other: &Self) -> bool { self == other } + + pub const MIN: Self = Self { value: i64::MIN }; + + pub const MAX: Self = Self { value: i64::MAX }; } impl From for Integer { @@ -312,4 +317,40 @@ mod tests { assert!(Integer::try_from(Decimal::MAX).is_err()); Ok(()) } + + #[test] + fn add() { + assert_eq!( + Integer::MIN.checked_add(1), + Some(Integer::from(i64::MIN + 1)) + ); + assert_eq!(Integer::MAX.checked_add(1), None); + } + + #[test] + fn sub() { + assert_eq!(Integer::MIN.checked_sub(1), None); + assert_eq!( + Integer::MAX.checked_sub(1), + Some(Integer::from(i64::MAX - 1)) + ); + } + + #[test] + fn mul() { + assert_eq!(Integer::MIN.checked_mul(2), None); + assert_eq!(Integer::MAX.checked_mul(2), None); + } + + #[test] + fn div() { + assert_eq!(Integer::from(1).checked_div(0), None); + } + + #[test] + fn rem() { + assert_eq!(Integer::from(10).checked_rem(3), Some(Integer::from(1))); + assert_eq!(Integer::from(6).checked_rem(-2), Some(Integer::from(0))); + assert_eq!(Integer::from(1).checked_rem(0), None); + } } diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index e95d2092..4d2d738f 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -930,7 +930,6 @@ impl SimpleEvaluator { } } - #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)] fn expression_evaluator( &self, expression: &PlanExpression, @@ -1426,7 +1425,11 @@ impl SimpleEvaluator { let arg = self.expression_evaluator(arg, stat_children); let dataset = Rc::clone(&self.dataset); Rc::new(move |tuple| { - Some((to_string(&dataset, &arg(tuple)?)?.chars().count() as i64).into()) + Some( + i64::try_from(to_string(&dataset, &arg(tuple)?)?.chars().count()) + .ok()? + .into(), + ) }) } PlanExpression::StaticReplace(arg, regex, replacement) => { @@ -2383,11 +2386,6 @@ fn encode_bindings( })) } -#[allow( - clippy::float_cmp, - clippy::cast_possible_truncation, - clippy::cast_precision_loss -)] fn equals(a: &EncodedTerm, b: &EncodedTerm) -> Option { match a { EncodedTerm::DefaultGraph @@ -2655,7 +2653,6 @@ fn partial_cmp(dataset: &DatasetView, a: &EncodedTerm, b: &EncodedTerm) -> Optio } } -#[allow(clippy::cast_precision_loss)] fn partial_cmp_literals( dataset: &DatasetView, a: &EncodedTerm, @@ -2913,7 +2910,6 @@ enum NumericBinaryOperands { } impl NumericBinaryOperands { - #[allow(clippy::cast_precision_loss)] fn new(a: EncodedTerm, b: EncodedTerm) -> Option { match (a, b) { (EncodedTerm::FloatLiteral(v1), EncodedTerm::FloatLiteral(v2)) => { From eb40457d5cc9b989e022cd77ce5618fa163995b9 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 20 May 2023 11:14:55 +0200 Subject: [PATCH 27/45] Upgrades RDF-tests --- .../sparql/halloween_problem.ru | 3 --- .../sparql/halloween_problem_result.ttl | 6 ----- testsuite/oxigraph-tests/sparql/manifest.ttl | 23 ------------------- .../sparql/values_property_path_all.rq | 4 ---- .../sparql/values_property_path_all.srx | 8 ------- .../oxigraph-tests/sparql/values_too_few.rq | 1 - .../oxigraph-tests/sparql/values_too_many.rq | 1 - testsuite/rdf-tests | 2 +- 8 files changed, 1 insertion(+), 47 deletions(-) delete mode 100644 testsuite/oxigraph-tests/sparql/halloween_problem.ru delete mode 100644 testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl delete mode 100644 testsuite/oxigraph-tests/sparql/values_property_path_all.rq delete mode 100644 testsuite/oxigraph-tests/sparql/values_property_path_all.srx delete mode 100644 testsuite/oxigraph-tests/sparql/values_too_few.rq delete mode 100644 testsuite/oxigraph-tests/sparql/values_too_many.rq diff --git a/testsuite/oxigraph-tests/sparql/halloween_problem.ru b/testsuite/oxigraph-tests/sparql/halloween_problem.ru deleted file mode 100644 index d62147d2..00000000 --- a/testsuite/oxigraph-tests/sparql/halloween_problem.ru +++ /dev/null @@ -1,3 +0,0 @@ -PREFIX ex: -INSERT DATA { ex:s ex:salary 1200 . ex:s2 ex:salary 1250 . ex:s3 ex:salary 1280 . ex:boss ex:salary 1600 . }; -DELETE { ?s ex:salary ?o } INSERT { ?s ex:salary ?v } WHERE { ?s ex:salary ?o FILTER(?o < 1500) BIND(?o + 100 AS ?v) } diff --git a/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl b/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl deleted file mode 100644 index 5e394780..00000000 --- a/testsuite/oxigraph-tests/sparql/halloween_problem_result.ttl +++ /dev/null @@ -1,6 +0,0 @@ -@prefix ex: . - -ex:s ex:salary 1300 . -ex:s2 ex:salary 1350 . -ex:s3 ex:salary 1380 . -ex:boss ex:salary 1600 . diff --git a/testsuite/oxigraph-tests/sparql/manifest.ttl b/testsuite/oxigraph-tests/sparql/manifest.ttl index 98701fcb..92684953 100644 --- a/testsuite/oxigraph-tests/sparql/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql/manifest.ttl @@ -23,15 +23,11 @@ :values_in_filter_not_exists :subquery_in_filter_not_exists :cmp_langString - :halloween_problem :nested_path :nested_expression :order_terms :nested_anonymous :unbound_variable_in_subquery - :values_too_many - :values_too_few - :values_property_path_all :one_or_more_shared :one_or_more_star ) . @@ -105,11 +101,6 @@ mf:action [ qt:query ] ; mf:result . -:halloween_problem rdf:type mf:UpdateEvaluationTest ; - mf:name "Halloween Problem: An update operation should not be able to read its own writes" ; - mf:action [ ut:request ] ; - mf:result [ ut:data ] . - :nested_path rdf:type mf:PositiveSyntaxTest11 ; mf:name "A very nested property path" ; mf:action . @@ -136,20 +127,6 @@ [ qt:query ] ; mf:result . -:values_too_many rdf:type mf:NegativeSyntaxTest11 ; - mf:name "Too many values in a VALUE clause compared to the number of variable" ; - mf:action . - -:values_too_few rdf:type mf:NegativeSyntaxTest11 ; - mf:name "Too few values in a VALUE clause compared to the number of variable" ; - mf:action . - -:values_property_path_all rdf:type mf:QueryEvaluationTest ; - mf:name "ZeroOrX property paths should only return terms in the graph and not also terms defined in the query" ; - mf:action - [ qt:query ] ; - mf:result . - :one_or_more_shared rdf:type mf:QueryEvaluationTest ; mf:name "SPARQL one or more with shared variable" ; mf:action diff --git a/testsuite/oxigraph-tests/sparql/values_property_path_all.rq b/testsuite/oxigraph-tests/sparql/values_property_path_all.rq deleted file mode 100644 index ef4d6c8d..00000000 --- a/testsuite/oxigraph-tests/sparql/values_property_path_all.rq +++ /dev/null @@ -1,4 +0,0 @@ -SELECT * WHERE { - VALUES ?v { 1 } - ?v ? ?v -} \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_property_path_all.srx b/testsuite/oxigraph-tests/sparql/values_property_path_all.srx deleted file mode 100644 index 0632c2aa..00000000 --- a/testsuite/oxigraph-tests/sparql/values_property_path_all.srx +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_too_few.rq b/testsuite/oxigraph-tests/sparql/values_too_few.rq deleted file mode 100644 index ceac3c3c..00000000 --- a/testsuite/oxigraph-tests/sparql/values_too_few.rq +++ /dev/null @@ -1 +0,0 @@ -SELECT * WHERE { VALUES (?a ?b) { (1) } } \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/values_too_many.rq b/testsuite/oxigraph-tests/sparql/values_too_many.rq deleted file mode 100644 index e4e3c7ab..00000000 --- a/testsuite/oxigraph-tests/sparql/values_too_many.rq +++ /dev/null @@ -1 +0,0 @@ -SELECT * WHERE { VALUES (?a ?b) { (1 2 3) } } \ No newline at end of file diff --git a/testsuite/rdf-tests b/testsuite/rdf-tests index 52be3f1b..9d70ac92 160000 --- a/testsuite/rdf-tests +++ b/testsuite/rdf-tests @@ -1 +1 @@ -Subproject commit 52be3f1b99a7890ec1266bac7b52be19a85a720c +Subproject commit 9d70ac9298f494bfc3a2becabc8fa8bc3d169685 From 5bfbbdbd3fdfb703388d951824e9902b6ec26058 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 20 May 2023 13:38:59 +0200 Subject: [PATCH 28/45] Python: Adds Store.contains_named_graph --- python/src/store.rs | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/python/src/store.rs b/python/src/store.rs index 7dbb9a66..49749c5b 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -597,10 +597,31 @@ impl PyStore { } } + /// Returns if the store contains the given named graph. + /// + /// :param graph_name: the name of the named graph. + /// :type graph_name: NamedNode or BlankNode or DefaultGraph + /// :rtype: None + /// :raises IOError: if an I/O error happens during the named graph lookup. + /// + /// >>> store = Store() + /// >>> store.add_graph(NamedNode('http://example.com/g')) + /// >>> store.contains_named_graph(NamedNode('http://example.com/g')) + /// True + fn contains_named_graph(&self, graph_name: &PyAny) -> PyResult { + let graph_name = GraphName::from(&PyGraphNameRef::try_from(graph_name)?); + match graph_name { + GraphName::DefaultGraph => Ok(true), + GraphName::NamedNode(graph_name) => self.inner.contains_named_graph(&graph_name), + GraphName::BlankNode(graph_name) => self.inner.contains_named_graph(&graph_name), + } + .map_err(map_storage_error) + } + /// Adds a named graph to the store. /// /// :param graph_name: the name of the name graph to add. - /// :type graph_name: NamedNode or BlankNode + /// :type graph_name: NamedNode or BlankNode or DefaultGraph /// :rtype: None /// :raises IOError: if an I/O error happens during the named graph insertion. /// From 63945638ea82ac5108c8b8e26af75ad1131704e7 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 21 May 2023 19:04:22 +0200 Subject: [PATCH 29/45] XSD: Adds checked_neg operation --- lib/oxsdatatypes/src/date_time.rs | 7 +++++-- lib/oxsdatatypes/src/decimal.rs | 13 ++++++++++++- lib/oxsdatatypes/src/duration.rs | 22 ++++++++++++++++++++++ lib/oxsdatatypes/src/integer.rs | 8 ++++++++ lib/src/sparql/eval.rs | 12 +++++++----- 5 files changed, 54 insertions(+), 8 deletions(-) diff --git a/lib/oxsdatatypes/src/date_time.rs b/lib/oxsdatatypes/src/date_time.rs index 63c45f0b..38508f66 100644 --- a/lib/oxsdatatypes/src/date_time.rs +++ b/lib/oxsdatatypes/src/date_time.rs @@ -188,8 +188,11 @@ impl DateTime { self.checked_sub_day_time_duration(rhs) } else { Some(Self { - timestamp: Timestamp::new(&date_time_plus_duration(-rhs, &self.properties())?) - .ok()?, + timestamp: Timestamp::new(&date_time_plus_duration( + rhs.checked_neg()?, + &self.properties(), + )?) + .ok()?, }) } } diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index 1b6fa6cc..73335f96 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -141,6 +141,14 @@ impl Decimal { }) } + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + value: self.value.checked_neg()?, + }) + } + /// [fn:abs](https://www.w3.org/TR/xpath-functions/#func-abs) #[inline] pub const fn abs(&self) -> Self { @@ -698,7 +706,10 @@ mod tests { fn add() { assert!(Decimal::MIN.checked_add(Decimal::STEP).is_some()); assert!(Decimal::MAX.checked_add(Decimal::STEP).is_none()); - assert_eq!(Decimal::MAX.checked_add(Decimal::MIN), Some(-Decimal::STEP)); + assert_eq!( + Decimal::MAX.checked_add(Decimal::MIN), + Decimal::STEP.checked_neg() + ); } #[test] diff --git a/lib/oxsdatatypes/src/duration.rs b/lib/oxsdatatypes/src/duration.rs index 0bc9d2bf..55fd6001 100644 --- a/lib/oxsdatatypes/src/duration.rs +++ b/lib/oxsdatatypes/src/duration.rs @@ -107,6 +107,14 @@ impl Duration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + year_month: self.year_month.checked_neg()?, + day_time: self.day_time.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { @@ -301,6 +309,13 @@ impl YearMonthDuration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + months: self.months.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { @@ -467,6 +482,13 @@ impl DayTimeDuration { }) } + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + seconds: self.seconds.checked_neg()?, + }) + } + /// Checks if the two values are [identical](https://www.w3.org/TR/xmlschema11-2/#identity). #[inline] pub fn is_identical_with(&self, other: &Self) -> bool { diff --git a/lib/oxsdatatypes/src/integer.rs b/lib/oxsdatatypes/src/integer.rs index 46175fc3..0c9d90e5 100644 --- a/lib/oxsdatatypes/src/integer.rs +++ b/lib/oxsdatatypes/src/integer.rs @@ -73,6 +73,14 @@ impl Integer { }) } + /// [op:numeric-unary-minus](https://www.w3.org/TR/xpath-functions/#func-numeric-unary-minus) + #[inline] + pub fn checked_neg(&self) -> Option { + Some(Self { + value: self.value.checked_neg()?, + }) + } + /// [fn:abs](https://www.w3.org/TR/xpath-functions/#func-abs) #[inline] pub const fn abs(&self) -> Self { diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 4d2d738f..e92c66cf 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1189,11 +1189,13 @@ impl SimpleEvaluator { Rc::new(move |tuple| match e(tuple)? { EncodedTerm::FloatLiteral(value) => Some((-value).into()), EncodedTerm::DoubleLiteral(value) => Some((-value).into()), - EncodedTerm::IntegerLiteral(value) => Some((-value).into()), - EncodedTerm::DecimalLiteral(value) => Some((-value).into()), - EncodedTerm::DurationLiteral(value) => Some((-value).into()), - EncodedTerm::YearMonthDurationLiteral(value) => Some((-value).into()), - EncodedTerm::DayTimeDurationLiteral(value) => Some((-value).into()), + EncodedTerm::IntegerLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::DecimalLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::DurationLiteral(value) => Some(value.checked_neg()?.into()), + EncodedTerm::YearMonthDurationLiteral(value) => { + Some(value.checked_neg()?.into()) + } + EncodedTerm::DayTimeDurationLiteral(value) => Some(value.checked_neg()?.into()), _ => None, }) } From b2d625e10ea51bb627ed596ed45055b800fc69a1 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 21 May 2023 18:53:27 +0200 Subject: [PATCH 30/45] XSD: Drops nom dependency --- Cargo.lock | 1 - lib/oxsdatatypes/Cargo.toml | 3 - lib/oxsdatatypes/src/date_time.rs | 21 +- lib/oxsdatatypes/src/duration.rs | 6 +- lib/oxsdatatypes/src/parser.rs | 880 ++++++++++++++++-------------- 5 files changed, 495 insertions(+), 416 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 052688a8..f2ee4cb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1048,7 +1048,6 @@ name = "oxsdatatypes" version = "0.1.2-dev" dependencies = [ "js-sys", - "nom", ] [[package]] diff --git a/lib/oxsdatatypes/Cargo.toml b/lib/oxsdatatypes/Cargo.toml index 9f622b28..e8488b1d 100644 --- a/lib/oxsdatatypes/Cargo.toml +++ b/lib/oxsdatatypes/Cargo.toml @@ -13,9 +13,6 @@ An implementation of some XSD datatypes for SPARQL implementations edition = "2021" rust-version = "1.60" -[dependencies] -nom = "7" - [target.'cfg(all(target_family = "wasm", target_os = "unknown"))'.dependencies] js-sys = "0.3" diff --git a/lib/oxsdatatypes/src/date_time.rs b/lib/oxsdatatypes/src/date_time.rs index 38508f66..734ebab5 100644 --- a/lib/oxsdatatypes/src/date_time.rs +++ b/lib/oxsdatatypes/src/date_time.rs @@ -1,8 +1,7 @@ -use super::parser::{date_lexical_rep, date_time_lexical_rep, parse_value, time_lexical_rep}; use super::{DayTimeDuration, Decimal, Duration, XsdParseError, YearMonthDuration}; use crate::parser::{ - g_day_lexical_rep, g_month_day_lexical_rep, g_month_lexical_rep, g_year_lexical_rep, - g_year_month_lexical_rep, + parse_date, parse_date_time, parse_g_day, parse_g_month, parse_g_month_day, parse_g_year, + parse_g_year_month, parse_time, }; use std::cmp::{min, Ordering}; use std::error::Error; @@ -234,7 +233,7 @@ impl FromStr for DateTime { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(date_time_lexical_rep, input) + parse_date_time(input) } } @@ -445,7 +444,7 @@ impl FromStr for Time { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(time_lexical_rep, input) + parse_time(input) } } @@ -648,7 +647,7 @@ impl FromStr for Date { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(date_lexical_rep, input) + parse_date(input) } } @@ -770,7 +769,7 @@ impl FromStr for GYearMonth { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_year_month_lexical_rep, input) + parse_g_year_month(input) } } @@ -891,7 +890,7 @@ impl FromStr for GYear { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_year_lexical_rep, input) + parse_g_year(input) } } @@ -1013,7 +1012,7 @@ impl FromStr for GMonthDay { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_month_day_lexical_rep, input) + parse_g_month_day(input) } } @@ -1139,7 +1138,7 @@ impl FromStr for GMonth { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_month_lexical_rep, input) + parse_g_month(input) } } @@ -1256,7 +1255,7 @@ impl FromStr for GDay { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(g_day_lexical_rep, input) + parse_g_day(input) } } diff --git a/lib/oxsdatatypes/src/duration.rs b/lib/oxsdatatypes/src/duration.rs index 55fd6001..a2d6ac47 100644 --- a/lib/oxsdatatypes/src/duration.rs +++ b/lib/oxsdatatypes/src/duration.rs @@ -135,7 +135,7 @@ impl FromStr for Duration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(duration_lexical_rep, input) + parse_duration(input) } } @@ -350,7 +350,7 @@ impl FromStr for YearMonthDuration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(year_month_duration_lexical_rep, input) + parse_year_month_duration(input) } } @@ -537,7 +537,7 @@ impl FromStr for DayTimeDuration { type Err = XsdParseError; fn from_str(input: &str) -> Result { - parse_value(day_time_duration_lexical_rep, input) + parse_day_time_duration(input) } } diff --git a/lib/oxsdatatypes/src/parser.rs b/lib/oxsdatatypes/src/parser.rs index 22d652ea..66fb260d 100644 --- a/lib/oxsdatatypes/src/parser.rs +++ b/lib/oxsdatatypes/src/parser.rs @@ -2,15 +2,6 @@ use super::date_time::{DateTimeError, GDay, GMonth, GMonthDay, GYear, GYearMonth use super::decimal::ParseDecimalError; use super::duration::{DayTimeDuration, YearMonthDuration}; use super::*; -use nom::branch::alt; -use nom::bytes::complete::{tag, take_while, take_while_m_n}; -use nom::character::complete::{char, digit0, digit1}; -use nom::combinator::{map, opt, recognize}; -use nom::error::{ErrorKind, ParseError}; -use nom::multi::many1; -use nom::sequence::{preceded, terminated, tuple}; -use nom::Err; -use nom::{IResult, Needed}; use std::error::Error; use std::fmt; use std::num::ParseIntError; @@ -24,46 +15,35 @@ pub struct XsdParseError { #[derive(Debug, Clone)] enum XsdParseErrorKind { - NomKind(ErrorKind), - NomChar(char), - MissingData(Needed), - TooMuchData { count: usize }, - Overflow, ParseInt(ParseIntError), ParseDecimal(ParseDecimalError), - OutOfIntegerRange { value: u8, min: u8, max: u8 }, DateTime(DateTimeError), + Message(&'static str), } +const OVERFLOW_ERROR: XsdParseError = XsdParseError { + kind: XsdParseErrorKind::Message("Overflow error"), +}; + impl fmt::Display for XsdParseError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match &self.kind { - XsdParseErrorKind::NomKind(kind) => { - write!(f, "Invalid XML Schema value: {}", kind.description()) - } - XsdParseErrorKind::NomChar(c) => { - write!(f, "Unexpected character in XML Schema value: '{c}'") - } - XsdParseErrorKind::MissingData(Needed::Unknown) => { - write!(f, "Too small XML Schema value") - } - XsdParseErrorKind::MissingData(Needed::Size(size)) => { - write!(f, "Too small XML Schema value: missing {size} chars") - } - XsdParseErrorKind::TooMuchData { count } => { - write!(f, "Too long XML Schema value: {count} extra chars") - } - XsdParseErrorKind::Overflow => write!(f, "Computation overflow or underflow"), XsdParseErrorKind::ParseInt(error) => { write!(f, "Error while parsing integer: {error}") } XsdParseErrorKind::ParseDecimal(error) => { write!(f, "Error while parsing decimal: {error}") } - XsdParseErrorKind::OutOfIntegerRange { value, min, max } => { - write!(f, "The integer {value} is not between {min} and {max}") - } XsdParseErrorKind::DateTime(error) => error.fmt(f), + XsdParseErrorKind::Message(msg) => write!(f, "{msg}"), + } + } +} + +impl XsdParseError { + const fn msg(message: &'static str) -> Self { + Self { + kind: XsdParseErrorKind::Message(message), } } } @@ -74,33 +54,11 @@ impl Error for XsdParseError { XsdParseErrorKind::ParseInt(error) => Some(error), XsdParseErrorKind::ParseDecimal(error) => Some(error), XsdParseErrorKind::DateTime(error) => Some(error), - _ => None, + XsdParseErrorKind::Message(_) => None, } } } -impl ParseError<&str> for XsdParseError { - fn from_error_kind(_input: &str, kind: ErrorKind) -> Self { - Self { - kind: XsdParseErrorKind::NomKind(kind), - } - } - - fn append(_input: &str, _kind: ErrorKind, other: Self) -> Self { - other - } - - fn from_char(_input: &str, c: char) -> Self { - Self { - kind: XsdParseErrorKind::NomChar(c), - } - } - - fn or(self, other: Self) -> Self { - other - } -} - impl From for XsdParseError { fn from(error: ParseIntError) -> Self { Self { @@ -125,412 +83,538 @@ impl From for XsdParseError { } } -impl From> for XsdParseError { - fn from(err: Err) -> Self { - match err { - Err::Incomplete(needed) => Self { - kind: XsdParseErrorKind::MissingData(needed), - }, - Err::Error(e) | Err::Failure(e) => e, - } - } -} - -type XsdResult<'a, T> = IResult<&'a str, T, XsdParseError>; - -const OVERFLOW_ERROR: XsdParseError = XsdParseError { - kind: XsdParseErrorKind::Overflow, -}; - -pub fn parse_value<'a, T>( - mut f: impl FnMut(&'a str) -> XsdResult<'a, T>, - input: &'a str, -) -> Result { - let (left, result) = f(input)?; - if left.is_empty() { - Ok(result) +// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y' +// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M' +// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D' +// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H' +// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M' +// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S' +// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag +// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag) +// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag +// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag) +struct DurationParts { + year_month: Option, + day_time: Option, +} + +fn duration_parts(input: &str) -> Result<(DurationParts, &str), XsdParseError> { + // States + const START: u32 = 0; + const AFTER_YEAR: u32 = 1; + const AFTER_MONTH: u32 = 2; + const AFTER_DAY: u32 = 3; + const AFTER_T: u32 = 4; + const AFTER_HOUR: u32 = 5; + const AFTER_MINUTE: u32 = 6; + const AFTER_SECOND: u32 = 7; + + let (negative, input) = if let Some(left) = input.strip_prefix('-') { + (true, left) } else { - Err(XsdParseError { - kind: XsdParseErrorKind::TooMuchData { count: left.len() }, - }) + (false, input) + }; + let mut input = expect_char(input, 'P', "Durations must start with 'P'")?; + let mut state = START; + let mut year_month: Option = None; + let mut day_time: Option = None; + while !input.is_empty() { + if let Some(left) = input.strip_prefix('T') { + if state >= AFTER_T { + return Err(XsdParseError::msg("Duplicated time separator 'T'")); + } + state = AFTER_T; + input = left; + } else { + let (number_str, left) = decimal_prefix(input); + match left.chars().next() { + Some('Y') if state < AFTER_YEAR => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add( + i64::from_str(number_str)? + .checked_mul(12) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_YEAR; + } + Some('M') if state < AFTER_MONTH => { + year_month = Some( + year_month + .unwrap_or_default() + .checked_add(i64::from_str(number_str)?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MONTH; + } + Some('D') if state < AFTER_DAY => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for days", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(86400) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_DAY; + } + Some('H') if state == AFTER_T => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for hours", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(3600) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_HOUR; + } + Some('M') if (AFTER_T..AFTER_MINUTE).contains(&state) => { + if number_str.contains('.') { + return Err(XsdParseError::msg( + "Decimal numbers are not allowed for minutes", + )); + } + day_time = Some( + day_time + .unwrap_or_default() + .checked_add( + Decimal::from_str(number_str)? + .checked_mul(60) + .ok_or(OVERFLOW_ERROR)?, + ) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_MINUTE; + } + Some('S') if (AFTER_T..AFTER_SECOND).contains(&state) => { + day_time = Some( + day_time + .unwrap_or_default() + .checked_add(Decimal::from_str(number_str)?) + .ok_or(OVERFLOW_ERROR)?, + ); + state = AFTER_SECOND; + } + Some(_) => return Err(XsdParseError::msg("Unexpected type character")), + None => { + return Err(XsdParseError::msg( + "Numbers in durations must be followed by a type character", + )) + } + } + input = &left[1..]; + } } -} - -//TODO: check every computation - -// [6] duYearFrag ::= unsignedNoDecimalPtNumeral 'Y' -fn du_year_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('Y'))(input) -} -// [7] duMonthFrag ::= unsignedNoDecimalPtNumeral 'M' -fn du_month_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('M'))(input) + Ok(( + DurationParts { + year_month: if let Some(v) = year_month { + Some(if negative { + v.checked_neg().ok_or(OVERFLOW_ERROR)? + } else { + v + }) + } else { + None + }, + day_time: if let Some(v) = day_time { + Some(if negative { + v.checked_neg().ok_or(OVERFLOW_ERROR)? + } else { + v + }) + } else { + None + }, + }, + input, + )) } -// [8] duDayFrag ::= unsignedNoDecimalPtNumeral 'D' -fn du_day_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('D'))(input) +pub fn parse_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_none() && parts.day_time.is_none() { + return Err(XsdParseError::msg("Empty duration")); + } + Ok(Duration::new( + parts.year_month.unwrap_or(0), + parts.day_time.unwrap_or_default(), + )) +} + +pub fn parse_year_month_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.day_time.is_some() { + return Err(XsdParseError::msg( + "There must not be any day or time component in a yearMonthDuration", + )); + } + Ok(YearMonthDuration::new(parts.year_month.ok_or( + XsdParseError::msg("No year and month values found"), + )?)) } -// [9] duHourFrag ::= unsignedNoDecimalPtNumeral 'H' -fn du_hour_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('H'))(input) +pub fn parse_day_time_duration(input: &str) -> Result { + let parts = ensure_complete(input, duration_parts)?; + if parts.year_month.is_some() { + return Err(XsdParseError::msg( + "There must not be any year or month component in a dayTimeDuration", + )); + } + Ok(DayTimeDuration::new(parts.day_time.ok_or( + XsdParseError::msg("No day or time values found"), + )?)) } -// [10] duMinuteFrag ::= unsignedNoDecimalPtNumeral 'M' -fn du_minute_frag(input: &str) -> XsdResult<'_, i64> { - terminated(unsigned_no_decimal_pt_numeral, char('M'))(input) +// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn date_time_lexical_rep(input: &str) -> Result<(DateTime, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let input = expect_char(input, 'T', "The date and time must be separated by 'T'")?; + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(XsdParseError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok(( + DateTime::new(year, month, day, hour, minute, second, timezone_offset)?, + input, + )) } -// [11] duSecondFrag ::= (unsignedNoDecimalPtNumeral | unsignedDecimalPtNumeral) 'S' -fn du_second_frag(input: &str) -> XsdResult<'_, Decimal> { - terminated( - map_res( - recognize(tuple((digit0, opt(preceded(char('.'), digit0))))), - Decimal::from_str, - ), - char('S'), - )(input) +pub fn parse_date_time(input: &str) -> Result { + ensure_complete(input, date_time_lexical_rep) } -// [12] duYearMonthFrag ::= (duYearFrag duMonthFrag?) | duMonthFrag -fn du_year_month_frag(input: &str) -> XsdResult<'_, i64> { - alt(( - map(tuple((du_year_frag, opt(du_month_frag))), |(y, m)| { - 12 * y + m.unwrap_or(0) - }), - du_month_frag, - ))(input) +// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? +fn time_lexical_rep(input: &str) -> Result<(Time, &str), XsdParseError> { + let (hour, input) = hour_frag(input)?; + let input = expect_char(input, ':', "The hours and minutes must be separated by ':'")?; + let (minute, input) = minute_frag(input)?; + let input = expect_char( + input, + ':', + "The minutes and seconds must be separated by ':'", + )?; + let (second, input) = second_frag(input)?; + // We validate 24:00:00 + if hour == 24 && minute != 0 && second != Decimal::from(0) { + return Err(XsdParseError::msg( + "Times are not allowed to be after 24:00:00", + )); + } + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((Time::new(hour, minute, second, timezone_offset)?, input)) } -// [13] duTimeFrag ::= 'T' ((duHourFrag duMinuteFrag? duSecondFrag?) | (duMinuteFrag duSecondFrag?) | duSecondFrag) -fn du_time_frag(input: &str) -> XsdResult<'_, Decimal> { - preceded( - char('T'), - alt(( - map_res( - tuple((du_hour_frag, opt(du_minute_frag), opt(du_second_frag))), - |(h, m, s)| { - Decimal::from(3600 * h + 60 * m.unwrap_or(0)) - .checked_add(s.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }, - ), - map_res(tuple((du_minute_frag, opt(du_second_frag))), |(m, s)| { - Decimal::from(m * 60) - .checked_add(s.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }), - du_second_frag, - )), - )(input) +pub fn parse_time(input: &str) -> Result { + ensure_complete(input, time_lexical_rep) } -// [14] duDayTimeFrag ::= (duDayFrag duTimeFrag?) | duTimeFrag -fn du_day_time_frag(input: &str) -> XsdResult<'_, Decimal> { - alt(( - map_res(tuple((du_day_frag, opt(du_time_frag))), |(d, t)| { - Decimal::from(d) - .checked_mul(Decimal::from(86400)) - .ok_or(OVERFLOW_ERROR)? - .checked_add(t.unwrap_or_default()) - .ok_or(OVERFLOW_ERROR) - }), - du_time_frag, - ))(input) +// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn date_lexical_rep(input: &str) -> Result<(Date, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((Date::new(year, month, day, timezone_offset)?, input)) } -// [15] durationLexicalRep ::= '-'? 'P' ((duYearMonthFrag duDayTimeFrag?) | duDayTimeFrag) -pub fn duration_lexical_rep(input: &str) -> XsdResult<'_, Duration> { - map( - tuple(( - opt(char('-')), - preceded( - char('P'), - alt(( - map( - tuple((du_year_month_frag, opt(du_day_time_frag))), - |(y, d)| Duration::new(y, d.unwrap_or_default()), - ), - map(du_day_time_frag, |d| Duration::new(0, d)), - )), - ), - )), - |(sign, duration)| { - if sign == Some('-') { - -duration - } else { - duration - } - }, - )(input) +pub fn parse_date(input: &str) -> Result { + ensure_complete(input, date_lexical_rep) } -// [16] dateTimeLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag 'T' ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? -pub fn date_time_lexical_rep(input: &str) -> XsdResult<'_, DateTime> { - map_res( - tuple(( - year_frag, - char('-'), - month_frag, - char('-'), - day_frag, - char('T'), - alt(( - map( - tuple((hour_frag, char(':'), minute_frag, char(':'), second_frag)), - |(h, _, m, _, s)| (h, m, s), - ), - end_of_day_frag, - )), - opt(timezone_frag), - )), - |(year, _, month, _, day, _, (hours, minutes, seconds), timezone)| { - DateTime::new(year, month, day, hours, minutes, seconds, timezone) - }, - )(input) +// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag? +fn g_year_month_lexical_rep(input: &str) -> Result<(GYearMonth, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let input = expect_char(input, '-', "The year and month must be separated by '-'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYearMonth::new(year, month, timezone_offset)?, input)) } -// [17] timeLexicalRep ::= ((hourFrag ':' minuteFrag ':' secondFrag) | endOfDayFrag) timezoneFrag? -pub fn time_lexical_rep(input: &str) -> XsdResult<'_, Time> { - map_res( - tuple(( - alt(( - map( - tuple((hour_frag, char(':'), minute_frag, char(':'), second_frag)), - |(h, _, m, _, s)| (h, m, s), - ), - end_of_day_frag, - )), - opt(timezone_frag), - )), - |((hours, minutes, seconds), timezone)| Time::new(hours, minutes, seconds, timezone), - )(input) +pub fn parse_g_year_month(input: &str) -> Result { + ensure_complete(input, g_year_month_lexical_rep) } -// [18] dateLexicalRep ::= yearFrag '-' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations -pub fn date_lexical_rep(input: &str) -> XsdResult<'_, Date> { - map_res( - tuple(( - year_frag, - char('-'), - month_frag, - char('-'), - day_frag, - opt(timezone_frag), - )), - |(year, _, month, _, day, timezone)| Date::new(year, month, day, timezone), - )(input) +// [20] gYearLexicalRep ::= yearFrag timezoneFrag? +fn g_year_lexical_rep(input: &str) -> Result<(GYear, &str), XsdParseError> { + let (year, input) = year_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GYear::new(year, timezone_offset)?, input)) } -// [19] gYearMonthLexicalRep ::= yearFrag '-' monthFrag timezoneFrag? -pub fn g_year_month_lexical_rep(input: &str) -> XsdResult<'_, GYearMonth> { - map_res( - tuple((year_frag, char('-'), month_frag, opt(timezone_frag))), - |(year, _, month, timezone)| GYearMonth::new(year, month, timezone), - )(input) +pub fn parse_g_year(input: &str) -> Result { + ensure_complete(input, g_year_lexical_rep) } -// [20] gYearLexicalRep ::= yearFrag timezoneFrag? -pub fn g_year_lexical_rep(input: &str) -> XsdResult<'_, GYear> { - map_res( - tuple((year_frag, opt(timezone_frag))), - |(year, timezone)| GYear::new(year, timezone), - )(input) +// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations +fn g_month_day_lexical_rep(input: &str) -> Result<(GMonthDay, &str), XsdParseError> { + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let input = expect_char(input, '-', "gMonthDay values must start with '--'")?; + let (month, input) = month_frag(input)?; + let input = expect_char(input, '-', "The month and day must be separated by '-'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GMonthDay::new(month, day, timezone_offset)?, input)) } -// [21] gMonthDayLexicalRep ::= '--' monthFrag '-' dayFrag timezoneFrag? Constraint: Day-of-month Representations -pub fn g_month_day_lexical_rep(input: &str) -> XsdResult<'_, GMonthDay> { - map_res( - tuple(( - char('-'), - char('-'), - month_frag, - char('-'), - day_frag, - opt(timezone_frag), - )), - |(_, _, month, _, day, timezone)| GMonthDay::new(month, day, timezone), - )(input) +pub fn parse_g_month_day(input: &str) -> Result { + ensure_complete(input, g_month_day_lexical_rep) } // [22] gDayLexicalRep ::= '---' dayFrag timezoneFrag? -pub fn g_day_lexical_rep(input: &str) -> XsdResult<'_, GDay> { - map_res( - tuple(( - char('-'), - char('-'), - char('-'), - day_frag, - opt(timezone_frag), - )), - |(_, _, _, day, timezone)| GDay::new(day, timezone), - )(input) +fn g_day_lexical_rep(input: &str) -> Result<(GDay, &str), XsdParseError> { + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let input = expect_char(input, '-', "gDay values must start with '---'")?; + let (day, input) = day_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GDay::new(day, timezone_offset)?, input)) } -// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag? -pub fn g_month_lexical_rep(input: &str) -> XsdResult<'_, GMonth> { - map_res( - tuple((char('-'), char('-'), month_frag, opt(timezone_frag))), - |(_, _, month, timezone)| GMonth::new(month, timezone), - )(input) -} - -// [42] yearMonthDurationLexicalRep ::= '-'? 'P' duYearMonthFrag -pub fn year_month_duration_lexical_rep(input: &str) -> XsdResult<'_, YearMonthDuration> { - map( - tuple((opt(char('-')), preceded(char('P'), du_year_month_frag))), - |(sign, duration)| { - YearMonthDuration::new(if sign == Some('-') { - -duration - } else { - duration - }) - }, - )(input) +pub fn parse_g_day(input: &str) -> Result { + ensure_complete(input, g_day_lexical_rep) } -// [43] dayTimeDurationLexicalRep ::= '-'? 'P' duDayTimeFrag -pub fn day_time_duration_lexical_rep(input: &str) -> XsdResult<'_, DayTimeDuration> { - map( - tuple((opt(char('-')), preceded(char('P'), du_day_time_frag))), - |(sign, duration)| { - DayTimeDuration::new(if sign == Some('-') { - -duration - } else { - duration - }) - }, - )(input) +// [23] gMonthLexicalRep ::= '--' monthFrag timezoneFrag? +fn g_month_lexical_rep(input: &str) -> Result<(GMonth, &str), XsdParseError> { + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let input = expect_char(input, '-', "gMonth values must start with '--'")?; + let (month, input) = month_frag(input)?; + let (timezone_offset, input) = optional_end(input, timezone_frag)?; + Ok((GMonth::new(month, timezone_offset)?, input)) } -// [46] unsignedNoDecimalPtNumeral ::= digit+ -fn unsigned_no_decimal_pt_numeral(input: &str) -> XsdResult<'_, i64> { - map_res(digit1, i64::from_str)(input) +pub fn parse_g_month(input: &str) -> Result { + ensure_complete(input, g_month_lexical_rep) } // [56] yearFrag ::= '-'? (([1-9] digit digit digit+)) | ('0' digit digit digit)) -fn year_frag(input: &str) -> XsdResult<'_, i64> { - map_res( - recognize(tuple(( - opt(char('-')), - take_while_m_n(4, usize::MAX, |c: char| c.is_ascii_digit()), - ))), - i64::from_str, - )(input) +fn year_frag(input: &str) -> Result<(i64, &str), XsdParseError> { + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else { + (1, input) + }; + let (number_str, input) = integer_prefix(input); + let number = i64::from_str(number_str)?; + if number < 1000 && number_str.len() != 4 { + return Err(XsdParseError::msg( + "The years below 1000 must be encoded on exactly 4 digits", + )); + } + Ok((sign * number, input)) } // [57] monthFrag ::= ('0' [1-9]) | ('1' [0-2]) -fn month_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 1, 12) - })(input) +fn month_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Month must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(1..=12).contains(&number) { + return Err(XsdParseError::msg("Month must be between 01 and 12")); + } + Ok((number, input)) } // [58] dayFrag ::= ('0' [1-9]) | ([12] digit) | ('3' [01]) -fn day_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 1, 31) - })(input) +fn day_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Day must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(1..=31).contains(&number) { + return Err(XsdParseError::msg("Day must be between 01 and 31")); + } + Ok((number, input)) } // [59] hourFrag ::= ([01] digit) | ('2' [0-3]) -fn hour_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 23) - })(input) +// We also allow 24 for ease of parsing +fn hour_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg("Hours must be encoded with two digits")); + } + let number = u8::from_str(number_str)?; + if !(0..=24).contains(&number) { + return Err(XsdParseError::msg("Hours must be between 00 and 24")); + } + Ok((number, input)) } // [60] minuteFrag ::= [0-5] digit -fn minute_frag(input: &str) -> XsdResult<'_, u8> { - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 59) - })(input) +fn minute_frag(input: &str) -> Result<(u8, &str), XsdParseError> { + let (number_str, input) = integer_prefix(input); + if number_str.len() != 2 { + return Err(XsdParseError::msg( + "Minutes must be encoded with two digits", + )); + } + let number = u8::from_str(number_str)?; + if !(0..=59).contains(&number) { + return Err(XsdParseError::msg("Minutes must be between 00 and 59")); + } + Ok((number, input)) } // [61] secondFrag ::= ([0-5] digit) ('.' digit+)? -#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] -fn second_frag(input: &str) -> XsdResult<'_, Decimal> { - map_res( - recognize(tuple(( - take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), - opt(preceded( - char('.'), - take_while(|c: char| c.is_ascii_digit()), - )), - ))), - |v| { - let value = Decimal::from_str(v)?; - if Decimal::from(0) <= value && value < Decimal::from(60) { - Ok(value) - } else { - Err(XsdParseError { - kind: XsdParseErrorKind::OutOfIntegerRange { - value: value.as_i128() as u8, - min: 0, - max: 60, - }, - }) - } - }, - )(input) +fn second_frag(input: &str) -> Result<(Decimal, &str), XsdParseError> { + let (number_str, input) = decimal_prefix(input); + let (before_dot_str, _) = number_str.split_once('.').unwrap_or((number_str, "")); + if before_dot_str.len() != 2 { + return Err(XsdParseError::msg( + "Seconds must be encoded with two digits", + )); + } + let number = Decimal::from_str(number_str)?; + if number < Decimal::from(0) || number >= Decimal::from(60) { + return Err(XsdParseError::msg("Seconds must be between 00 and 60")); + } + if number_str.ends_with('.') { + return Err(XsdParseError::msg( + "Seconds are not allowed to end with a dot", + )); + } + Ok((number, input)) } -// [62] endOfDayFrag ::= '24:00:00' ('.' '0'+)? -fn end_of_day_frag(input: &str) -> XsdResult<'_, (u8, u8, Decimal)> { - map( - recognize(tuple(( - tag("24:00:00"), - opt(preceded(char('.'), many1(char('0')))), - ))), - |_| (24, 0, 0.into()), - )(input) +// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00') +fn timezone_frag(input: &str) -> Result<(TimezoneOffset, &str), XsdParseError> { + if let Some(left) = input.strip_prefix('Z') { + return Ok((TimezoneOffset::UTC, left)); + } + let (sign, input) = if let Some(left) = input.strip_prefix('-') { + (-1, left) + } else if let Some(left) = input.strip_prefix('+') { + (1, left) + } else { + (1, input) + }; + + let (hour_str, input) = integer_prefix(input); + if hour_str.len() != 2 { + return Err(XsdParseError::msg( + "The timezone hours must be encoded with two digits", + )); + } + let hours = i16::from_str(hour_str)?; + + let input = expect_char( + input, + ':', + "The timezone hours and minutes must be separated by ':'", + )?; + let (minutes, input) = minute_frag(input)?; + + if hours > 13 && !(hours == 14 && minutes == 0) { + return Err(XsdParseError::msg( + "The timezone hours must be between 00 and 13", + )); + } + + Ok(( + TimezoneOffset::new(sign * (hours * 60 + i16::from(minutes)))?, + input, + )) } -// [63] timezoneFrag ::= 'Z' | ('+' | '-') (('0' digit | '1' [0-3]) ':' minuteFrag | '14:00') -fn timezone_frag(input: &str) -> XsdResult<'_, TimezoneOffset> { - alt(( - map(char('Z'), |_| TimezoneOffset::UTC), - map_res( - tuple(( - alt((map(char('+'), |_| 1), map(char('-'), |_| -1))), - alt(( - map( - tuple(( - map_res(take_while_m_n(2, 2, |c: char| c.is_ascii_digit()), |v| { - parsed_u8_range(v, 0, 13) - }), - char(':'), - minute_frag, - )), - |(hours, _, minutes)| i16::from(hours) * 60 + i16::from(minutes), - ), - map(tag("14:00"), |_| 14 * 60), - )), - )), - |(sign, value)| TimezoneOffset::new(sign * value), - ), - ))(input) -} - -fn parsed_u8_range(input: &str, min: u8, max: u8) -> Result { - let value = u8::from_str(input)?; - if min <= value && value <= max { - Ok(value) +fn ensure_complete( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, +) -> Result { + let (result, left) = parse(input)?; + if !left.is_empty() { + return Err(XsdParseError::msg("Unrecognized value suffix")); + } + Ok(result) +} + +fn expect_char<'a>( + input: &'a str, + constant: char, + error_message: &'static str, +) -> Result<&'a str, XsdParseError> { + if let Some(left) = input.strip_prefix(constant) { + Ok(left) } else { - Err(XsdParseError { - kind: XsdParseErrorKind::OutOfIntegerRange { value, min, max }, - }) + Err(XsdParseError::msg(error_message)) } } -fn map_res<'a, O1, O2, E2: Into>( - mut first: impl FnMut(&'a str) -> XsdResult<'a, O1>, - mut second: impl FnMut(O1) -> Result, -) -> impl FnMut(&'a str) -> XsdResult<'a, O2> { - move |input| { - let (input, o1) = first(input)?; - Ok((input, second(o1).map_err(|e| Err::Error(e.into()))?)) +fn integer_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + for (i, c) in input.char_indices() { + if !c.is_ascii_digit() { + end = i; + break; + } + } + input.split_at(end) +} + +fn decimal_prefix(input: &str) -> (&str, &str) { + let mut end = input.len(); + let mut dot_seen = false; + for (i, c) in input.char_indices() { + if c.is_ascii_digit() { + // Ok + } else if c == '.' && !dot_seen { + dot_seen = true; + } else { + end = i; + break; + } } + input.split_at(end) +} + +fn optional_end( + input: &str, + parse: impl FnOnce(&str) -> Result<(T, &str), XsdParseError>, +) -> Result<(Option, &str), XsdParseError> { + Ok(if input.is_empty() { + (None, input) + } else { + let (result, input) = parse(input)?; + (Some(result), input) + }) } From d992fb7545773717baecf542e65c5d920bfffff9 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 23 May 2023 12:33:54 +0200 Subject: [PATCH 31/45] Fixes cross compilation of RocksDB with zig targeting macOS --- oxrocksdb-sys/build.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oxrocksdb-sys/build.rs b/oxrocksdb-sys/build.rs index 29f3fdda..ff8a633b 100644 --- a/oxrocksdb-sys/build.rs +++ b/oxrocksdb-sys/build.rs @@ -1,7 +1,7 @@ // Code from https://github.com/rust-rocksdb/rust-rocksdb/blob/eb2d302682418b361a80ad8f4dcf335ade60dcf5/librocksdb-sys/build.rs // License: https://github.com/rust-rocksdb/rust-rocksdb/blob/master/LICENSE -use std::env::{set_var, var}; +use std::env::{remove_var, set_var, var}; use std::path::PathBuf; fn link(name: &str, bundled: bool) { @@ -98,11 +98,13 @@ fn build_rocksdb() { config.define("NPERF_CONTEXT", None); config.define("ROCKSDB_PLATFORM_POSIX", None); config.define("ROCKSDB_LIB_IO_POSIX", None); + remove_var("SDKROOT"); // We override SDKROOT for cross-compilation set_var("IPHONEOS_DEPLOYMENT_TARGET", "11.0"); } else if target.contains("darwin") { config.define("OS_MACOSX", None); config.define("ROCKSDB_PLATFORM_POSIX", None); config.define("ROCKSDB_LIB_IO_POSIX", None); + remove_var("SDKROOT"); // We override SDKROOT for cross-compilation } else if target.contains("android") { config.define("OS_ANDROID", None); config.define("ROCKSDB_PLATFORM_POSIX", None); @@ -177,6 +179,7 @@ fn build_rocksdb() { } config.file(&format!("rocksdb/{file}")); } + config.compile("rocksdb"); } From 7b9e9f9694e615a5026aec733ef223b83861f916 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 24 May 2023 22:02:13 +0200 Subject: [PATCH 32/45] SPARQL: Makes average function works with yearMonthDuration and dayTimeDuration --- lib/src/sparql/eval.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index e92c66cf..c3a9b033 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1087,7 +1087,9 @@ impl SimpleEvaluator { NumericBinaryOperands::TimeDayTimeDuration(v1, v2) => { Some(v1.checked_add_day_time_duration(v2)?.into()) } - _ => None, + NumericBinaryOperands::DateTime(_, _) + | NumericBinaryOperands::Time(_, _) + | NumericBinaryOperands::Date(_, _) => None, }, ) } @@ -4509,6 +4511,12 @@ impl Accumulator for SumAccumulator { NumericBinaryOperands::Integer(v1, v2) => v1.checked_add(v2).map(Into::into), NumericBinaryOperands::Decimal(v1, v2) => v1.checked_add(v2).map(Into::into), NumericBinaryOperands::Duration(v1, v2) => v1.checked_add(v2).map(Into::into), + NumericBinaryOperands::YearMonthDuration(v1, v2) => { + v1.checked_add(v2).map(Into::into) + } + NumericBinaryOperands::DayTimeDuration(v1, v2) => { + v1.checked_add(v2).map(Into::into) + } _ => None, }; } else { From 2650c5ed1338157cfcba414b44b740428c7d4fbe Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 24 May 2023 11:14:15 +0200 Subject: [PATCH 33/45] Speeds up sparql_eval fuzzer by hardcoding size_hint For some reasons size_hint() computation takes a lot of time. Hardcoding allows to increase the iter speed from 0.2iter/s to 1250iter/s --- lib/sparql-smith/src/lib.rs | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/lib/sparql-smith/src/lib.rs b/lib/sparql-smith/src/lib.rs index 6b554ec7..6005d827 100644 --- a/lib/sparql-smith/src/lib.rs +++ b/lib/sparql-smith/src/lib.rs @@ -1,6 +1,5 @@ use arbitrary::{Arbitrary, Result, Unstructured}; use std::fmt; -use std::fmt::Debug; use std::iter::once; use std::ops::ControlFlow; @@ -30,8 +29,12 @@ const LITERALS: [&str; 11] = [ "1e0", ]; -#[derive(Arbitrary)] pub struct Query { + inner: QueryContent, +} + +#[derive(Arbitrary)] +struct QueryContent { // [1] QueryUnit ::= Query // [2] Query ::= Prologue ( SelectQuery | ConstructQuery | DescribeQuery | AskQuery ) ValuesClause variant: QueryVariant, @@ -44,16 +47,34 @@ enum QueryVariant { //TODO: Other variants! } +impl<'a> Arbitrary<'a> for Query { + fn arbitrary(u: &mut Unstructured<'a>) -> Result { + Ok(Self { + inner: QueryContent::arbitrary(u)?, + }) + } + + fn arbitrary_take_rest(u: Unstructured<'a>) -> Result { + Ok(Self { + inner: QueryContent::arbitrary_take_rest(u)?, + }) + } + + fn size_hint(_depth: usize) -> (usize, Option) { + (20, None) + } +} + impl fmt::Display for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match &self.variant { + match &self.inner.variant { QueryVariant::Select(s) => write!(f, "{s}"), }?; - write!(f, "{}", self.values_clause) + write!(f, "{}", self.inner.values_clause) } } -impl Debug for Query { +impl fmt::Debug for Query { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(self, f) } From d500614fcc6b6e9ddd724a121431e260d7d8a185 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 27 May 2023 15:59:12 +0200 Subject: [PATCH 34/45] Fuzzer: do not consider OFFSET and LIMIT for now Result order is not kept by the optimizer. We can't assume it is unchanged. --- Cargo.lock | 2 +- lib/sparql-smith/Cargo.toml | 4 +++- lib/sparql-smith/src/lib.rs | 14 ++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f2ee4cb2..191f9136 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1657,7 +1657,7 @@ dependencies = [ [[package]] name = "sparql-smith" -version = "0.1.0-alpha.3" +version = "0.1.0-alpha.4-dev" dependencies = [ "arbitrary", ] diff --git a/lib/sparql-smith/Cargo.toml b/lib/sparql-smith/Cargo.toml index 64bd375b..2755b225 100644 --- a/lib/sparql-smith/Cargo.toml +++ b/lib/sparql-smith/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sparql-smith" -version = "0.1.0-alpha.3" +version = "0.1.0-alpha.4-dev" authors = ["Tpt "] license = "MIT OR Apache-2.0" readme = "README.md" @@ -14,6 +14,8 @@ edition = "2021" [features] default = [] +limit-offset = ["order"] +order = [] sep-0006 = [] [dependencies] diff --git a/lib/sparql-smith/src/lib.rs b/lib/sparql-smith/src/lib.rs index 6005d827..01ca45e5 100644 --- a/lib/sparql-smith/src/lib.rs +++ b/lib/sparql-smith/src/lib.rs @@ -190,7 +190,9 @@ struct SolutionModifier { // [18] SolutionModifier ::= GroupClause? HavingClause? OrderClause? LimitOffsetClauses? group: Option, having: Option, + #[cfg(feature = "order")] order: Option, + #[cfg(feature = "limit-offset")] limit_offset: Option, } @@ -202,9 +204,11 @@ impl fmt::Display for SolutionModifier { if let Some(having) = &self.having { write!(f, " {having}")?; } + #[cfg(feature = "order")] if let Some(order) = &self.order { write!(f, " {order}")?; } + #[cfg(feature = "limit-offset")] if let Some(limit_offset) = &self.limit_offset { write!(f, " {limit_offset}")?; } @@ -275,6 +279,7 @@ impl fmt::Display for HavingClause { // [22] HavingCondition ::= Constraint type HavingCondition = Constraint; +#[cfg(feature = "order")] #[derive(Arbitrary)] struct OrderClause { // [23] OrderClause ::= 'ORDER' 'BY' OrderCondition+ @@ -282,6 +287,7 @@ struct OrderClause { others: Vec, } +#[cfg(feature = "order")] impl fmt::Display for OrderClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ORDER BY {}", self.start)?; @@ -292,6 +298,7 @@ impl fmt::Display for OrderClause { } } +#[cfg(feature = "order")] #[derive(Arbitrary)] enum OrderCondition { // [24] OrderCondition ::= ( ( 'ASC' | 'DESC' ) BrackettedExpression ) | ( Constraint | Var ) @@ -303,6 +310,7 @@ enum OrderCondition { Var(Var), } +#[cfg(feature = "order")] impl fmt::Display for OrderCondition { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -319,6 +327,7 @@ impl fmt::Display for OrderCondition { } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] enum LimitOffsetClauses { // [25] LimitOffsetClauses ::= LimitClause OffsetClause? | OffsetClause LimitClause? @@ -326,6 +335,7 @@ enum LimitOffsetClauses { OffsetLimit(OffsetClause, Option), } +#[cfg(feature = "limit-offset")] impl fmt::Display for LimitOffsetClauses { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -337,24 +347,28 @@ impl fmt::Display for LimitOffsetClauses { } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] struct LimitClause { // [26] LimitClause ::= 'LIMIT' INTEGER value: u8, } +#[cfg(feature = "limit-offset")] impl fmt::Display for LimitClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "LIMIT {}", self.value) } } +#[cfg(feature = "limit-offset")] #[derive(Arbitrary)] struct OffsetClause { // [27] OffsetClause ::= 'OFFSET' INTEGER value: u8, } +#[cfg(feature = "limit-offset")] impl fmt::Display for OffsetClause { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "OFFSET {}", self.value) From d2804d8a8d74020e1ffc592da872dc37dee226ff Mon Sep 17 00:00:00 2001 From: Thomas Date: Wed, 31 May 2023 12:56:42 +0200 Subject: [PATCH 35/45] Python: fixes parsing from text I/O with not-ASCII char Python text IO is counting by number of character and not by number of bytes. However, we were asking to read a number of bytes and not a number of character, leading to strange I/O exceptions being raised. --- python/src/io.rs | 25 ++++++++++++++++--------- python/tests/test_io.py | 29 ++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/python/src/io.rs b/python/src/io.rs index c3032990..681a25fa 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -8,7 +8,8 @@ use oxigraph::io::{ use pyo3::exceptions::{PyIOError, PySyntaxError, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; -use pyo3::wrap_pyfunction; +use pyo3::{intern, wrap_pyfunction}; +use std::cmp::max; use std::error::Error; use std::fs::File; use std::io::{self, BufRead, BufReader, BufWriter, Cursor, Read, Write}; @@ -282,17 +283,22 @@ impl Write for PyWritable { pub struct PyIo(PyObject); impl Read for PyIo { - fn read(&mut self, mut buf: &mut [u8]) -> io::Result { + fn read(&mut self, buf: &mut [u8]) -> io::Result { Python::with_gil(|py| { + if buf.is_empty() { + return Ok(0); + } + let to_read = max(1, buf.len() / 4); // We divide by 4 because TextIO works with number of characters and not with number of bytes let read = self .0 - .call_method(py, "read", (buf.len(),), None) + .as_ref(py) + .call_method1(intern!(py, "read"), (to_read,)) .map_err(to_io_err)?; let bytes = read - .extract::<&[u8]>(py) - .or_else(|e| read.extract::<&str>(py).map(str::as_bytes).map_err(|_| e)) + .extract::<&[u8]>() + .or_else(|e| read.extract::<&str>().map(str::as_bytes).map_err(|_| e)) .map_err(to_io_err)?; - buf.write_all(bytes)?; + buf[..bytes.len()].copy_from_slice(bytes); Ok(bytes.len()) }) } @@ -302,16 +308,17 @@ impl Write for PyIo { fn write(&mut self, buf: &[u8]) -> io::Result { Python::with_gil(|py| { self.0 - .call_method(py, "write", (PyBytes::new(py, buf),), None) + .as_ref(py) + .call_method1(intern!(py, "write"), (PyBytes::new(py, buf),)) .map_err(to_io_err)? - .extract::(py) + .extract::() .map_err(to_io_err) }) } fn flush(&mut self) -> io::Result<()> { Python::with_gil(|py| { - self.0.call_method(py, "flush", (), None)?; + self.0.as_ref(py).call_method0(intern!(py, "flush"))?; Ok(()) }) } diff --git a/python/tests/test_io.py b/python/tests/test_io.py index 5dda57ca..e7519f5d 100644 --- a/python/tests/test_io.py +++ b/python/tests/test_io.py @@ -5,7 +5,9 @@ from tempfile import NamedTemporaryFile, TemporaryFile from pyoxigraph import Literal, NamedNode, Quad, Triple, parse, serialize EXAMPLE_TRIPLE = Triple( - NamedNode("http://example.com/foo"), NamedNode("http://example.com/p"), Literal("1") + NamedNode("http://example.com/foo"), + NamedNode("http://example.com/p"), + Literal("éù"), ) EXAMPLE_QUAD = Quad( NamedNode("http://example.com/foo"), @@ -18,7 +20,7 @@ EXAMPLE_QUAD = Quad( class TestParse(unittest.TestCase): def test_parse_file(self) -> None: with NamedTemporaryFile() as fp: - fp.write(b'

"1" .') + fp.write('

"éù" .'.encode()) fp.flush() self.assertEqual( list(parse(fp.name, "text/turtle", base_iri="http://example.com/")), @@ -33,7 +35,7 @@ class TestParse(unittest.TestCase): self.assertEqual( list( parse( - StringIO('

"1" .'), + StringIO('

"éù" .'), "text/turtle", base_iri="http://example.com/", ) @@ -41,11 +43,23 @@ class TestParse(unittest.TestCase): [EXAMPLE_TRIPLE], ) + def test_parse_long_str_io(self) -> None: + self.assertEqual( + list( + parse( + StringIO('

"éù" .\n' * 1024), + "text/turtle", + base_iri="http://example.com/", + ) + ), + [EXAMPLE_TRIPLE] * 1024, + ) + def test_parse_bytes_io(self) -> None: self.assertEqual( list( parse( - BytesIO(b'

"1" .'), + BytesIO('

"éù" .'.encode()), "text/turtle", base_iri="http://example.com/", ) @@ -75,15 +89,16 @@ class TestSerialize(unittest.TestCase): output = BytesIO() serialize([EXAMPLE_TRIPLE], output, "text/turtle") self.assertEqual( - output.getvalue(), - b' "1" .\n', + output.getvalue().decode(), + ' "éù" .\n', ) def test_serialize_to_file(self) -> None: with NamedTemporaryFile() as fp: serialize([EXAMPLE_TRIPLE], fp.name, "text/turtle") self.assertEqual( - fp.read(), b' "1" .\n' + fp.read().decode(), + ' "éù" .\n', ) def test_serialize_io_error(self) -> None: From 0a064a8704597e226de22d9b28d8069409936062 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 31 May 2023 17:58:21 +0200 Subject: [PATCH 36/45] SPARQL: Prevents ForLoopLeftJoin with MINUS It can flip the MINUS operation out of the "disjoint" case --- lib/src/sparql/plan_builder.rs | 35 ++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index a0d6b603..657d6f91 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -105,10 +105,9 @@ impl<'a> PlanBuilder<'a> { let left = self.build_for_graph_pattern(left, variables, graph_name)?; let right = self.build_for_graph_pattern(right, variables, graph_name)?; - let mut possible_problem_vars = BTreeSet::new(); - Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); - if self.with_optimizations { - // TODO: don't use if SERVICE is inside of for loop + if self.with_optimizations && Self::can_use_for_loop_left_join(&right) { + let mut possible_problem_vars = BTreeSet::new(); + Self::add_left_join_problematic_variables(&right, &mut possible_problem_vars); //We add the extra filter if needed let right = if let Some(expr) = expression { @@ -1228,6 +1227,34 @@ impl<'a> PlanBuilder<'a> { } } + fn can_use_for_loop_left_join(node: &PlanNode) -> bool { + // We forbid MINUS and SERVICE in for loop left joins + match node { + PlanNode::StaticBindings { .. } + | PlanNode::QuadPattern { .. } + | PlanNode::PathPattern { .. } => true, + PlanNode::Filter { child, .. } + | PlanNode::Extend { child, .. } + | PlanNode::Sort { child, .. } + | PlanNode::HashDeduplicate { child } + | PlanNode::Reduced { child } + | PlanNode::Skip { child, .. } + | PlanNode::Limit { child, .. } + | PlanNode::Project { child, .. } + | PlanNode::Aggregate { child, .. } => Self::can_use_for_loop_left_join(child), + PlanNode::Union { children } => { + children.iter().all(|c| Self::can_use_for_loop_left_join(c)) + } + PlanNode::HashJoin { left, right } + | PlanNode::ForLoopJoin { left, right } + | PlanNode::ForLoopLeftJoin { left, right, .. } + | PlanNode::HashLeftJoin { left, right, .. } => { + Self::can_use_for_loop_left_join(left) && Self::can_use_for_loop_left_join(right) + } + PlanNode::AntiJoin { .. } | PlanNode::Service { .. } => false, + } + } + fn add_left_join_problematic_variables(node: &PlanNode, set: &mut BTreeSet) { match node { PlanNode::StaticBindings { .. } From 4cc9e4008b77fe120e5bbc8494e5e6fcf03c052a Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 31 May 2023 20:34:16 +0200 Subject: [PATCH 37/45] Fixes empty IN expression optimization --- lib/src/sparql/plan_builder.rs | 39 ++++++++++++------- .../oxigraph-tests/sparql/in_empty_error.rq | 1 + .../oxigraph-tests/sparql/in_empty_error.srx | 9 +++++ testsuite/oxigraph-tests/sparql/manifest.ttl | 6 +++ 4 files changed, 40 insertions(+), 15 deletions(-) create mode 100644 testsuite/oxigraph-tests/sparql/in_empty_error.rq create mode 100644 testsuite/oxigraph-tests/sparql/in_empty_error.srx diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 657d6f91..6592d56c 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -432,23 +432,32 @@ impl<'a> PlanBuilder<'a> { Box::new(self.build_for_expression(b, variables, graph_name)?), ), Expression::In(e, l) => { + let e = self.build_for_expression(e, variables, graph_name)?; if l.is_empty() { - return Ok(PlanExpression::Literal(PlanTerm { - encoded: false.into(), - plain: false.into(), - })); + // False except on error + PlanExpression::If( + Box::new(e), + Box::new(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })), + Box::new(PlanExpression::Literal(PlanTerm { + encoded: false.into(), + plain: false.into(), + })), + ) + } else { + PlanExpression::Or( + l.iter() + .map(|v| { + Ok(PlanExpression::Equal( + Box::new(e.clone()), + Box::new(self.build_for_expression(v, variables, graph_name)?), + )) + }) + .collect::>()?, + ) } - let e = self.build_for_expression(e, variables, graph_name)?; - PlanExpression::Or( - l.iter() - .map(|v| { - Ok(PlanExpression::Equal( - Box::new(e.clone()), - Box::new(self.build_for_expression(v, variables, graph_name)?), - )) - }) - .collect::>()?, - ) } Expression::Add(a, b) => PlanExpression::Add( Box::new(self.build_for_expression(a, variables, graph_name)?), diff --git a/testsuite/oxigraph-tests/sparql/in_empty_error.rq b/testsuite/oxigraph-tests/sparql/in_empty_error.rq new file mode 100644 index 00000000..96060b9f --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/in_empty_error.rq @@ -0,0 +1 @@ +SELECT ?r WHERE { BIND((?foo IN ()) AS ?r) } \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/in_empty_error.srx b/testsuite/oxigraph-tests/sparql/in_empty_error.srx new file mode 100644 index 00000000..762d6c88 --- /dev/null +++ b/testsuite/oxigraph-tests/sparql/in_empty_error.srx @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/testsuite/oxigraph-tests/sparql/manifest.ttl b/testsuite/oxigraph-tests/sparql/manifest.ttl index 92684953..21ef9e76 100644 --- a/testsuite/oxigraph-tests/sparql/manifest.ttl +++ b/testsuite/oxigraph-tests/sparql/manifest.ttl @@ -30,6 +30,7 @@ :unbound_variable_in_subquery :one_or_more_shared :one_or_more_star + :in_empty_error ) . :small_unicode_escape_with_multibytes_char rdf:type mf:NegativeSyntaxTest ; @@ -140,3 +141,8 @@ [ qt:query ; qt:data ] ; mf:result . + +:in_empty_error rdf:type mf:QueryEvaluationTest ; + mf:name "IN should propagate errors on the left side, even on the empty input" ; + mf:action [ qt:query ] ; + mf:result . From 22f990344f44a5bef9b1cac3e58680f807853e37 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 31 May 2023 22:07:11 +0200 Subject: [PATCH 38/45] Fixes hash left join into for loop left join optimization It can inject variables into projections --- lib/src/sparql/plan_builder.rs | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 6592d56c..10d2ffe4 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -1237,7 +1237,7 @@ impl<'a> PlanBuilder<'a> { } fn can_use_for_loop_left_join(node: &PlanNode) -> bool { - // We forbid MINUS and SERVICE in for loop left joins + // We forbid MINUS, SERVICE and everything that affects cardinality in for loop left joins match node { PlanNode::StaticBindings { .. } | PlanNode::QuadPattern { .. } @@ -1245,10 +1245,6 @@ impl<'a> PlanBuilder<'a> { PlanNode::Filter { child, .. } | PlanNode::Extend { child, .. } | PlanNode::Sort { child, .. } - | PlanNode::HashDeduplicate { child } - | PlanNode::Reduced { child } - | PlanNode::Skip { child, .. } - | PlanNode::Limit { child, .. } | PlanNode::Project { child, .. } | PlanNode::Aggregate { child, .. } => Self::can_use_for_loop_left_join(child), PlanNode::Union { children } => { @@ -1260,7 +1256,12 @@ impl<'a> PlanBuilder<'a> { | PlanNode::HashLeftJoin { left, right, .. } => { Self::can_use_for_loop_left_join(left) && Self::can_use_for_loop_left_join(right) } - PlanNode::AntiJoin { .. } | PlanNode::Service { .. } => false, + PlanNode::AntiJoin { .. } + | PlanNode::Service { .. } + | PlanNode::HashDeduplicate { .. } + | PlanNode::Reduced { .. } + | PlanNode::Skip { .. } + | PlanNode::Limit { .. } => false, } } @@ -1326,7 +1327,8 @@ impl<'a> PlanBuilder<'a> { } PlanNode::Sort { child, .. } | PlanNode::HashDeduplicate { child } - | PlanNode::Reduced { child } => { + | PlanNode::Reduced { child } + | PlanNode::Project { child, .. } => { Self::add_left_join_problematic_variables(child, set); } PlanNode::Skip { child, .. } | PlanNode::Limit { child, .. } => { @@ -1344,15 +1346,6 @@ impl<'a> PlanBuilder<'a> { Self::add_left_join_problematic_variables(child, set) } } - PlanNode::Project { mapping, child } => { - let mut child_bound = BTreeSet::new(); - Self::add_left_join_problematic_variables(child, &mut child_bound); - for (child_i, output_i) in mapping.iter() { - if child_bound.contains(&child_i.encoded) { - set.insert(output_i.encoded); - } - } - } PlanNode::Aggregate { key_variables, aggregates, From adda2d2d7e4192a403543438d68df826478541e0 Mon Sep 17 00:00:00 2001 From: Tpt Date: Wed, 31 May 2023 22:04:04 +0200 Subject: [PATCH 39/45] Makes hash join into for loop join optimization more aggressive --- lib/src/sparql/plan_builder.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/src/sparql/plan_builder.rs b/lib/src/sparql/plan_builder.rs index 10d2ffe4..5e7370c1 100644 --- a/lib/src/sparql/plan_builder.rs +++ b/lib/src/sparql/plan_builder.rs @@ -1361,15 +1361,15 @@ impl<'a> PlanBuilder<'a> { } fn new_join(&self, mut left: PlanNode, mut right: PlanNode) -> PlanNode { + // We first use VALUES to filter the following patterns evaluation + if matches!(right, PlanNode::StaticBindings { .. }) { + swap(&mut left, &mut right); + } + if self.with_optimizations - && Self::is_fit_for_for_loop_join(&left) && Self::is_fit_for_for_loop_join(&right) && Self::has_some_common_variables(&left, &right) { - // We first use VALUES to filter the following patterns evaluation - if matches!(right, PlanNode::StaticBindings { .. }) { - swap(&mut left, &mut right); - } PlanNode::ForLoopJoin { left: Rc::new(left), right: Rc::new(right), @@ -1399,9 +1399,8 @@ impl<'a> PlanBuilder<'a> { match node { PlanNode::StaticBindings { .. } | PlanNode::QuadPattern { .. } - | PlanNode::PathPattern { .. } - | PlanNode::ForLoopJoin { .. } => true, - PlanNode::HashJoin { left, right } => { + | PlanNode::PathPattern { .. } => true, + PlanNode::ForLoopJoin { left, right } | PlanNode::HashJoin { left, right } => { Self::is_fit_for_for_loop_join(left) && Self::is_fit_for_for_loop_join(right) } PlanNode::Filter { child, .. } | PlanNode::Extend { child, .. } => { From 3e51020222dc26ac6026d28331b2db3a728572fc Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 1 Jun 2023 08:50:49 +0200 Subject: [PATCH 40/45] CI: Fuzz SPARQL query results I/O --- .clusterfuzzlite/build.sh | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.clusterfuzzlite/build.sh b/.clusterfuzzlite/build.sh index 31f59bd2..ff3d6712 100755 --- a/.clusterfuzzlite/build.sh +++ b/.clusterfuzzlite/build.sh @@ -15,11 +15,10 @@ function build_seed_corpus() { cd "$SRC"/oxigraph cargo fuzz build -O --debug-assertions -# shellcheck disable=SC2043 -# SC2043 (warning): This loop will only ever run once. -for TARGET in sparql_eval # sparql_results_json sparql_results_tsv +for TARGET in sparql_eval sparql_results_json sparql_results_tsv sparql_results_xml do cp fuzz/target/x86_64-unknown-linux-gnu/release/$TARGET "$OUT"/ done -# build_seed_corpus sparql_results_json json -# build_seed_corpus sparql_results_tsv tsv +build_seed_corpus sparql_results_json srj +build_seed_corpus sparql_results_tsv tsv +build_seed_corpus sparql_results_xml srx From 5d253c6afb996621d30b0e5d7b2840b25a917c13 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 1 Jun 2023 19:02:34 +0200 Subject: [PATCH 41/45] SPARQL XML results: circumvent quick-xml crash --- lib/sparesults/src/xml.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/sparesults/src/xml.rs b/lib/sparesults/src/xml.rs index d493e7d9..d4973fdf 100644 --- a/lib/sparesults/src/xml.rs +++ b/lib/sparesults/src/xml.rs @@ -186,6 +186,7 @@ impl XmlQueryResultsReader { //Read header loop { + buffer.clear(); let event = reader.read_event_into(&mut buffer)?; match event { Event::Start(event) => match state { @@ -275,7 +276,6 @@ impl XmlQueryResultsReader { Event::Eof => return Err(SyntaxError::msg("Unexpected early file end. All results file should have a and a or tag").into()), _ => (), } - buffer.clear(); } } } @@ -315,6 +315,7 @@ impl XmlSolutionsReader { let mut lang = None; let mut datatype = None; loop { + self.buffer.clear(); let event = self.reader.read_event_into(&mut self.buffer)?; match event { Event::Start(event) => match state { @@ -563,7 +564,6 @@ impl XmlSolutionsReader { Event::Eof => return Ok(None), _ => (), } - self.buffer.clear(); } } } From a7758484a59f86f60d31be7a05da07251723722b Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 1 Jun 2023 18:07:53 +0200 Subject: [PATCH 42/45] Python: Annotates immutable classes as frozen --- python/src/model.rs | 14 +++++++------- python/src/sparql.rs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/src/model.rs b/python/src/model.rs index 8fbeecbe..f92b862d 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -20,7 +20,7 @@ use std::vec::IntoIter; /// /// >>> str(NamedNode('http://example.com')) /// '' -#[pyclass(name = "NamedNode", module = "pyoxigraph")] +#[pyclass(frozen, name = "NamedNode", module = "pyoxigraph")] #[pyo3(text_signature = "(value)")] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct PyNamedNode { @@ -144,7 +144,7 @@ impl PyNamedNode { /// /// >>> str(BlankNode('ex')) /// '_:ex' -#[pyclass(name = "BlankNode", module = "pyoxigraph")] +#[pyclass(frozen, name = "BlankNode", module = "pyoxigraph")] #[pyo3(text_signature = "(value = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyBlankNode { @@ -280,7 +280,7 @@ impl PyBlankNode { /// '"example"@en' /// >>> str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) /// '"11"^^' -#[pyclass(name = "Literal", module = "pyoxigraph")] +#[pyclass(frozen, name = "Literal", module = "pyoxigraph")] #[pyo3(text_signature = "(value, *, datatype = None, language = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyLiteral { @@ -427,7 +427,7 @@ impl PyLiteral { } /// The RDF `default graph name `_. -#[pyclass(name = "DefaultGraph", module = "pyoxigraph")] +#[pyclass(frozen, name = "DefaultGraph", module = "pyoxigraph")] #[pyo3(text_signature = "()")] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct PyDefaultGraph {} @@ -625,7 +625,7 @@ impl IntoPy for PyTerm { /// A triple could also be easily destructed into its components: /// /// >>> (s, p, o) = Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')) -#[pyclass(name = "Triple", module = "pyoxigraph")] +#[pyclass(frozen, name = "Triple", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] #[pyo3(text_signature = "(subject, predicate, object)")] pub struct PyTriple { @@ -824,7 +824,7 @@ impl IntoPy for PyGraphName { /// A quad could also be easily destructed into its components: /// /// >>> (s, p, o, g) = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) -#[pyclass(name = "Quad", module = "pyoxigraph")] +#[pyclass(frozen, name = "Quad", module = "pyoxigraph")] #[pyo3(text_signature = "(subject, predicate, object, graph_name = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyQuad { @@ -1012,7 +1012,7 @@ impl PyQuad { /// /// >>> str(Variable('foo')) /// '?foo' -#[pyclass(name = "Variable", module = "pyoxigraph")] +#[pyclass(frozen, name = "Variable", module = "pyoxigraph")] #[pyo3(text_signature = "(value)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyVariable { diff --git a/python/src/sparql.rs b/python/src/sparql.rs index 1cd0e1b2..01298fa6 100644 --- a/python/src/sparql.rs +++ b/python/src/sparql.rs @@ -88,7 +88,7 @@ pub fn query_results_to_python(py: Python<'_>, results: QueryResults) -> PyObjec /// >>> s, p, o = solution /// >>> s /// -#[pyclass(unsendable, name = "QuerySolution", module = "pyoxigraph")] +#[pyclass(frozen, unsendable, name = "QuerySolution", module = "pyoxigraph")] pub struct PyQuerySolution { inner: QuerySolution, } From 57d39cad248526adc7cb09bf10cb1d1e89ebf221 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sun, 4 Jun 2023 11:46:22 +0200 Subject: [PATCH 43/45] Makes Clippy happy --- Cargo.toml | 1 + lib/oxsdatatypes/src/decimal.rs | 12 ++++++------ lib/src/storage/backend/rocksdb.rs | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 648ca11d..75a171d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "server", "testsuite" ] +resolver = "2" [profile.release] lto = true diff --git a/lib/oxsdatatypes/src/decimal.rs b/lib/oxsdatatypes/src/decimal.rs index 73335f96..11065901 100644 --- a/lib/oxsdatatypes/src/decimal.rs +++ b/lib/oxsdatatypes/src/decimal.rs @@ -723,7 +723,7 @@ mod tests { assert_eq!(Decimal::from(1).checked_mul(-1), Some(Decimal::from(-1))); assert_eq!( Decimal::from(1000).checked_mul(1000), - Some(Decimal::from(1000000)) + Some(Decimal::from(1_000_000)) ); assert_eq!( Decimal::from_str("0.1")?.checked_mul(Decimal::from_str("0.01")?), @@ -880,7 +880,7 @@ mod tests { assert!(Decimal::try_from(Float::from(f32::MIN)).is_err()); assert!(Decimal::try_from(Float::from(f32::MAX)).is_err()); assert!( - Decimal::try_from(Float::from(1_672_507_302_466.)) + Decimal::try_from(Float::from(1_672_507_300_000.)) .unwrap() .checked_sub(Decimal::from(1_672_507_293_696_i64)) .unwrap() @@ -926,8 +926,8 @@ mod tests { assert_eq!(Float::from(Decimal::from(1)), Float::from(1.)); assert_eq!(Float::from(Decimal::from(10)), Float::from(10.)); assert_eq!(Float::from(Decimal::from_str("0.1")?), Float::from(0.1)); - assert!((Float::from(Decimal::MAX) - Float::from(1.701412e20)).abs() < Float::from(1.)); - assert!((Float::from(Decimal::MIN) - Float::from(-1.701412e20)).abs() < Float::from(1.)); + assert!((Float::from(Decimal::MAX) - Float::from(1.701_412e20)).abs() < Float::from(1.)); + assert!((Float::from(Decimal::MIN) - Float::from(-1.701_412e20)).abs() < Float::from(1.)); Ok(()) } @@ -938,11 +938,11 @@ mod tests { assert_eq!(Double::from(Decimal::from(10)), Double::from(10.)); assert_eq!(Double::from(Decimal::from_str("0.1")?), Double::from(0.1)); assert!( - (Double::from(Decimal::MAX) - Double::from(1.7014118346046924e20)).abs() + (Double::from(Decimal::MAX) - Double::from(1.701_411_834_604_692_4e20)).abs() < Double::from(1.) ); assert!( - (Double::from(Decimal::MIN) - Double::from(-1.7014118346046924e20)).abs() + (Double::from(Decimal::MIN) - Double::from(-1.701_411_834_604_692_4e20)).abs() < Double::from(1.) ); Ok(()) diff --git a/lib/src/storage/backend/rocksdb.rs b/lib/src/storage/backend/rocksdb.rs index 7a1e22eb..fc8f4da2 100644 --- a/lib/src/storage/backend/rocksdb.rs +++ b/lib/src/storage/backend/rocksdb.rs @@ -628,7 +628,7 @@ impl Db { let result = f(Transaction { transaction: Rc::new(transaction), read_options, - _lifetime: PhantomData::default(), + _lifetime: PhantomData, }); match result { Ok(result) => { From f72a9600aed45c588b114456495a01a040ee2946 Mon Sep 17 00:00:00 2001 From: Tpt Date: Sat, 3 Jun 2023 14:54:02 +0200 Subject: [PATCH 44/45] CI: Makes sure to update APT cache before installing packages --- .github/workflows/artifacts.yml | 2 +- .github/workflows/tests.yml | 21 +++++---------------- 2 files changed, 6 insertions(+), 17 deletions(-) diff --git a/.github/workflows/artifacts.yml b/.github/workflows/artifacts.yml index 73bdc686..430a1c6e 100644 --- a/.github/workflows/artifacts.yml +++ b/.github/workflows/artifacts.yml @@ -21,7 +21,7 @@ jobs: submodules: true - run: rustup update && rustup target add aarch64-unknown-linux-gnu - run: | - sudo apt install -y g++-aarch64-linux-gnu + sudo apt update && sudo apt install -y g++-aarch64-linux-gnu echo -e "\n\n[target.aarch64-unknown-linux-gnu]\nlinker = \"aarch64-linux-gnu-gcc\"" >> .cargo/config.toml - uses: Swatinem/rust-cache@v2 - run: cargo build --release diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bde185c3..be981cc7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -347,29 +347,18 @@ jobs: minimize-crashes: true parallel-fuzzing: true storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git - - fuzz_prune: - if: github.event_name != 'pull_request' - needs: fuzz_repo - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - uses: google/clusterfuzzlite/actions/build_fuzzers@v1 - with: - language: rust - github-token: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true - uses: google/clusterfuzzlite/actions/run_fuzzers@v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 14400 + fuzz-seconds: 3600 mode: prune storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git + continue-on-error: true fuzz_coverage: if: github.event_name != 'pull_request' - needs: fuzz_prune + needs: fuzz_repo runs-on: ubuntu-latest steps: - uses: google/clusterfuzzlite/actions/build_fuzzers@v1 @@ -379,7 +368,7 @@ jobs: - uses: google/clusterfuzzlite/actions/run_fuzzers@v1 with: github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 600 + fuzz-seconds: 3600 mode: coverage sanitizer: coverage storage-repo: https://${{ secrets.FULL_ACCESS_TOKEN }}@github.com/oxigraph/clusterfuzzlite-oxigraph.git From 8c62137a0109ca537999801e5bbb6bed67b455fc Mon Sep 17 00:00:00 2001 From: Tpt Date: Fri, 2 Jun 2023 08:49:01 +0200 Subject: [PATCH 45/45] Upgrades PyO3 to 0.19 --- Cargo.lock | 33 +++++++++++++++++++++------------ python/Cargo.toml | 2 +- python/src/io.rs | 2 +- python/src/lib.rs | 6 ------ python/src/model.rs | 7 ------- python/src/store.rs | 20 ++++++++------------ 6 files changed, 31 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 191f9136..07f2bc46 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,7 @@ dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", - "memoffset", + "memoffset 0.8.0", "scopeguard", ] @@ -846,6 +846,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -1227,14 +1236,14 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" +checksum = "cffef52f74ec3b1a1baf295d9b8fcc3070327aefc39a6d00656b13c1d0b8885c" dependencies = [ "cfg-if", "indoc", "libc", - "memoffset", + "memoffset 0.9.0", "parking_lot", "pyo3-build-config", "pyo3-ffi", @@ -1244,9 +1253,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" +checksum = "713eccf888fb05f1a96eb78c0dbc51907fee42b3377272dc902eb38985f418d5" dependencies = [ "once_cell", "target-lexicon", @@ -1254,9 +1263,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" +checksum = "5b2ecbdcfb01cbbf56e179ce969a048fd7305a66d4cdf3303e0da09d69afe4c3" dependencies = [ "libc", "pyo3-build-config", @@ -1264,9 +1273,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" +checksum = "b78fdc0899f2ea781c463679b20cb08af9247febc8d052de941951024cd8aea0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -1276,9 +1285,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.3" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" +checksum = "60da7b84f1227c3e2fe7593505de274dcf4c8928b4e0a1c23d551a14e4e80a0f" dependencies = [ "proc-macro2", "quote", diff --git a/python/Cargo.toml b/python/Cargo.toml index 20488c8c..32bf13f4 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -20,4 +20,4 @@ abi3 = ["pyo3/abi3-py37"] [dependencies] oxigraph = { version = "0.3.17-dev", path="../lib", features = ["http_client"] } -pyo3 = { version = "0.18", features = ["extension-module"] } +pyo3 = { version = "0.19", features = ["extension-module"] } diff --git a/python/src/io.rs b/python/src/io.rs index 681a25fa..53e53af4 100644 --- a/python/src/io.rs +++ b/python/src/io.rs @@ -48,7 +48,7 @@ pub fn add_to_module(module: &PyModule) -> PyResult<()> { /// >>> list(parse(input, "text/turtle", base_iri="http://example.com/")) /// [ predicate= object=>>] #[pyfunction] -#[pyo3(text_signature = "(input, mime_type, *, base_iri = None)")] +#[pyo3(signature = (input, mime_type, *, base_iri = None))] pub fn parse( input: PyObject, mime_type: &str, diff --git a/python/src/lib.rs b/python/src/lib.rs index d20f80be..170d78b8 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,9 +1,3 @@ -#![allow( - clippy::redundant_pub_crate, - clippy::used_underscore_binding, - clippy::unused_self, - clippy::trivially_copy_pass_by_ref -)] mod io; mod model; mod sparql; diff --git a/python/src/model.rs b/python/src/model.rs index f92b862d..26791351 100644 --- a/python/src/model.rs +++ b/python/src/model.rs @@ -21,7 +21,6 @@ use std::vec::IntoIter; /// >>> str(NamedNode('http://example.com')) /// '' #[pyclass(frozen, name = "NamedNode", module = "pyoxigraph")] -#[pyo3(text_signature = "(value)")] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] pub struct PyNamedNode { inner: NamedNode, @@ -145,7 +144,6 @@ impl PyNamedNode { /// >>> str(BlankNode('ex')) /// '_:ex' #[pyclass(frozen, name = "BlankNode", module = "pyoxigraph")] -#[pyo3(text_signature = "(value = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyBlankNode { inner: BlankNode, @@ -281,7 +279,6 @@ impl PyBlankNode { /// >>> str(Literal('11', datatype=NamedNode('http://www.w3.org/2001/XMLSchema#integer'))) /// '"11"^^' #[pyclass(frozen, name = "Literal", module = "pyoxigraph")] -#[pyo3(text_signature = "(value, *, datatype = None, language = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyLiteral { inner: Literal, @@ -428,7 +425,6 @@ impl PyLiteral { /// The RDF `default graph name `_. #[pyclass(frozen, name = "DefaultGraph", module = "pyoxigraph")] -#[pyo3(text_signature = "()")] #[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)] pub struct PyDefaultGraph {} @@ -627,7 +623,6 @@ impl IntoPy for PyTerm { /// >>> (s, p, o) = Triple(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1')) #[pyclass(frozen, name = "Triple", module = "pyoxigraph")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] -#[pyo3(text_signature = "(subject, predicate, object)")] pub struct PyTriple { inner: Triple, } @@ -825,7 +820,6 @@ impl IntoPy for PyGraphName { /// /// >>> (s, p, o, g) = Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g')) #[pyclass(frozen, name = "Quad", module = "pyoxigraph")] -#[pyo3(text_signature = "(subject, predicate, object, graph_name = None)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyQuad { inner: Quad, @@ -1013,7 +1007,6 @@ impl PyQuad { /// >>> str(Variable('foo')) /// '?foo' #[pyclass(frozen, name = "Variable", module = "pyoxigraph")] -#[pyo3(text_signature = "(value)")] #[derive(Eq, PartialEq, Debug, Clone, Hash)] pub struct PyVariable { inner: Variable, diff --git a/python/src/store.rs b/python/src/store.rs index 49749c5b..9410aeed 100644 --- a/python/src/store.rs +++ b/python/src/store.rs @@ -35,8 +35,7 @@ use pyo3::prelude::*; /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> str(store) /// ' "1" .\n' -#[pyclass(name = "Store", module = "pyoxigraph")] -#[pyo3(text_signature = "(path = None)")] +#[pyclass(frozen, name = "Store", module = "pyoxigraph")] #[derive(Clone)] pub struct PyStore { inner: Store, @@ -94,7 +93,7 @@ impl PyStore { /// :rtype: Store /// :raises IOError: if the target directories contain invalid data or could not be accessed. #[staticmethod] - #[pyo3(signature = (primary_path, secondary_path = None), text_signature = "(primary_path, secondary_path = None)")] + #[pyo3(signature = (primary_path, secondary_path = None))] fn secondary( primary_path: &str, secondary_path: Option<&str>, @@ -216,7 +215,7 @@ impl PyStore { /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'), NamedNode('http://example.com/g'))) /// >>> list(store.quads_for_pattern(NamedNode('http://example.com'), None, None, None)) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (subject, predicate, object, graph_name = None), text_signature = "($self, subject, predicate, object, graph_name = None)")] + #[pyo3(signature = (subject, predicate, object, graph_name = None))] fn quads_for_pattern( &self, subject: &PyAny, @@ -273,10 +272,7 @@ impl PyStore { /// >>> store.add(Quad(NamedNode('http://example.com'), NamedNode('http://example.com/p'), Literal('1'))) /// >>> store.query('ASK { ?s ?p ?o }') /// True - #[pyo3( - signature = (query, *, base_iri = None, use_default_graph_as_union = false, default_graph = None, named_graphs = None), - text_signature = "($self, query, *, base_iri = None, use_default_graph_as_union = False, default_graph = None, named_graphs = None)" - )] + #[pyo3(signature = (query, *, base_iri = None, use_default_graph_as_union = false, default_graph = None, named_graphs = None))] fn query( &self, query: &str, @@ -332,7 +328,7 @@ impl PyStore { /// >>> store.update('DELETE WHERE { ?p ?o }') /// >>> list(store) /// [] - #[pyo3(signature = (update, *, base_iri = None), text_signature = "($self, update, *, base_iri = None)")] + #[pyo3(signature = (update, *, base_iri = None))] fn update(&self, update: &str, base_iri: Option<&str>, py: Python<'_>) -> PyResult<()> { py.allow_threads(|| { let update = @@ -377,7 +373,7 @@ impl PyStore { /// >>> store.load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None), text_signature = "($self, input, mime_type, *, base_iri = None, to_graph = None)")] + #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None))] fn load( &self, input: PyObject, @@ -459,7 +455,7 @@ impl PyStore { /// >>> store.bulk_load(io.BytesIO(b'

"1" .'), "text/turtle", base_iri="http://example.com/", to_graph=NamedNode("http://example.com/g")) /// >>> list(store) /// [ predicate= object=> graph_name=>] - #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None), text_signature = "($self, input, mime_type, *, base_iri = None, to_graph = None)")] + #[pyo3(signature = (input, mime_type, *, base_iri = None, to_graph = None))] fn bulk_load( &self, input: PyObject, @@ -537,7 +533,7 @@ impl PyStore { /// >>> store.dump(output, "text/turtle", from_graph=NamedNode("http://example.com/g")) /// >>> output.getvalue() /// b' "1" .\n' - #[pyo3(signature = (output, mime_type, *, from_graph = None), text_signature = "($self, output, mime_type, *, from_graph = None)")] + #[pyo3(signature = (output, mime_type, *, from_graph = None))] fn dump( &self, output: PyObject,