From e0bbe29dc2377fc1277d4c232c9c26c90bd13551 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 26 Nov 2019 21:39:59 +0100 Subject: [PATCH] Use our own xsd:decimal implementation and restrict xsd:integer precision to 64bits --- lib/Cargo.toml | 1 - lib/src/model/literal.rs | 2 +- lib/src/model/mod.rs | 1 + lib/src/model/xsd/decimal.rs | 452 +++++++++++++++++++++++++++++++ lib/src/model/xsd/mod.rs | 3 + lib/src/sparql/eval.rs | 92 +++---- lib/src/store/numeric_encoder.rs | 64 ++--- lib/tests/sparql_test_cases.rs | 5 - 8 files changed, 525 insertions(+), 95 deletions(-) create mode 100644 lib/src/model/xsd/decimal.rs create mode 100644 lib/src/model/xsd/mod.rs diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 297923c9..ed356cf3 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -19,7 +19,6 @@ byteorder = { version = "1", features = ["i128"] } quick-xml = "0.17" ordered-float = "1" num-traits = "0.2" -rust_decimal = "1" chrono = "0.4" rand = "0.7" md-5 = "0.8" diff --git a/lib/src/model/literal.rs b/lib/src/model/literal.rs index 760a4439..bd52cefc 100644 --- a/lib/src/model/literal.rs +++ b/lib/src/model/literal.rs @@ -1,9 +1,9 @@ use crate::model::named_node::NamedNode; use crate::model::vocab::rdf; use crate::model::vocab::xsd; +use crate::model::xsd::Decimal; use chrono::prelude::*; use rio_api::model as rio; -use rust_decimal::Decimal; use std::borrow::Cow; use std::fmt; use std::option::Option; diff --git a/lib/src/model/mod.rs b/lib/src/model/mod.rs index 73c14182..76effb32 100644 --- a/lib/src/model/mod.rs +++ b/lib/src/model/mod.rs @@ -9,6 +9,7 @@ mod literal; mod named_node; mod triple; pub mod vocab; +pub(crate) mod xsd; pub use crate::model::blank_node::BlankNode; pub use crate::model::graph::SimpleGraph; diff --git a/lib/src/model/xsd/decimal.rs b/lib/src/model/xsd/decimal.rs new file mode 100644 index 00000000..cc4eb1cf --- /dev/null +++ b/lib/src/model/xsd/decimal.rs @@ -0,0 +1,452 @@ +use std::convert::{TryFrom, TryInto}; +use std::error::Error; +use std::fmt; +use std::fmt::Write; +use std::ops::Neg; +use std::str::FromStr; + +const DECIMAL_PART_DIGITS: usize = 18; +const DECIMAL_PART_POW: i128 = 1_000_000_000_000_000_000; +const DECIMAL_PART_POW_MINUS_ONE: i128 = 100_000_000_000_000_000; +const DECIMAL_PART_HALF_POW: i128 = 1_000_000_000; + +/// [XML Schema `decimal` datatype](https://www.w3.org/TR/xmlschema11-2/#decimal) implementation. +/// +/// It stores the decimal in a fix point encoding allowing nearly 18 digits before and 18 digits after ".". +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash, Default)] +pub struct Decimal { + value: i128, // value * 10^18 +} + +impl Decimal { + pub fn from_le_bytes(bytes: [u8; 16]) -> Self { + Self { + value: i128::from_le_bytes(bytes), + } + } +} + +impl> From for Decimal { + fn from(value: I) -> Self { + let value: i64 = value.into(); + Self { + value: i128::from(value) * DECIMAL_PART_POW, + } + } +} + +impl FromStr for Decimal { + type Err = ParseDecimalError; + + /// Parses decimals lexical mapping + fn from_str(input: &str) -> Result { + // (\+|-)?([0-9]+(\.[0-9]*)?|\.[0-9]+) + let input = input.as_bytes(); + if input.is_empty() { + return Err(PARSE_UNEXPECTED_END); + } + + let (sign, mut cursor) = match input.get(0) { + Some(b'+') => (1, 1), + Some(b'-') => (-1, 1), + _ => (1, 0), + }; + + let mut value = 0i128; + let mut with_before_dot = false; + while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { + value = value + .checked_mul(10) + .ok_or(PARSE_OVERFLOW)? + .checked_add((input[cursor] - b'0').into()) + .ok_or(PARSE_OVERFLOW)?; + cursor += 1; + with_before_dot = true; + } + + let mut exp = DECIMAL_PART_POW; + if input.len() > cursor { + if input[cursor] != b'.' { + return Err(PARSE_UNEXPECTED_CHAR); + } + cursor += 1; + + let mut with_after_dot = false; + while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' { + exp = exp.checked_div(10).ok_or(PARSE_UNDERFLOW)?; + value = value + .checked_mul(10) + .ok_or(PARSE_OVERFLOW)? + .checked_add((input[cursor] - b'0').into()) + .ok_or(PARSE_OVERFLOW)?; + cursor += 1; + with_after_dot = true; + } + + if !with_before_dot && !with_after_dot { + //We only have a dot + return Err(PARSE_UNEXPECTED_END); + } + if input.len() > cursor { + return Err(PARSE_UNEXPECTED_CHAR); + } + } else if !with_before_dot { + //It's empty + return Err(PARSE_UNEXPECTED_END); + } + + Ok(Self { + value: value + .checked_mul(sign) + .ok_or(PARSE_OVERFLOW)? + .checked_mul(exp) + .ok_or(PARSE_OVERFLOW)?, + }) + } +} + +#[derive(Debug, Clone)] +pub struct ParseDecimalError { + kind: ParseDecimalErrorKind, +} + +#[derive(Debug, Clone)] +enum ParseDecimalErrorKind { + Overflow, + Underflow, + UnexpectedChar, + UnexpectedEnd, +} + +const PARSE_OVERFLOW: ParseDecimalError = ParseDecimalError { + kind: ParseDecimalErrorKind::Overflow, +}; +const PARSE_UNDERFLOW: ParseDecimalError = ParseDecimalError { + kind: ParseDecimalErrorKind::Underflow, +}; +const PARSE_UNEXPECTED_CHAR: ParseDecimalError = ParseDecimalError { + kind: ParseDecimalErrorKind::UnexpectedChar, +}; +const PARSE_UNEXPECTED_END: ParseDecimalError = ParseDecimalError { + kind: ParseDecimalErrorKind::UnexpectedEnd, +}; + +impl fmt::Display for ParseDecimalError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.kind { + ParseDecimalErrorKind::Overflow => write!(f, "Value overflow"), + ParseDecimalErrorKind::Underflow => write!(f, "Value underflow"), + ParseDecimalErrorKind::UnexpectedChar => write!(f, "Unexpected character"), + ParseDecimalErrorKind::UnexpectedEnd => write!(f, "Unexpected end of string"), + } + } +} + +impl Error for ParseDecimalError {} + +impl fmt::Display for Decimal { + /// Formats the decimal following its canonical representation + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut value = self.value; + if value < 0 { + f.write_char('-')?; + } + + let mut digits = [b'0'; 40]; + let mut i = 0; + while value != 0 { + digits[i] = b'0' + ((value % 10).abs() as u8); + value /= 10; + i += 1; + } + + if i == 0 { + return f.write_char('0'); + } + + let last_non_zero = i - 1; + let first_non_zero = digits + .iter() + .cloned() + .enumerate() + .find(|(_, v)| *v != b'0') + .map(|(i, _)| i) + .unwrap_or(40); + + if last_non_zero >= DECIMAL_PART_DIGITS { + for c in digits[DECIMAL_PART_DIGITS..=last_non_zero].iter().rev() { + f.write_char(char::from(*c))?; + } + } else { + f.write_char('0')? + } + if DECIMAL_PART_DIGITS > first_non_zero { + f.write_char('.')?; + for c in digits[first_non_zero..DECIMAL_PART_DIGITS].iter().rev() { + f.write_char(char::from(*c))?; + } + } + + Ok(()) + } +} + +impl Neg for Decimal { + type Output = Self; + + fn neg(self) -> Self { + Self { + value: self.value.neg(), + } + } +} + +impl Decimal { + /*pub fn trunc(self) -> i64 { + (self.value / DECIMAL_PART_POW) as i64 + }*/ + + pub fn to_le_bytes(&self) -> [u8; 16] { + self.value.to_le_bytes() + } + + /// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add) + pub fn checked_add(&self, rhs: Self) -> Option { + Some(Self { + value: self.value.checked_add(rhs.value)?, + }) + } + + /// [op:numeric-subtract](https://www.w3.org/TR/xpath-functions/#func-numeric-subtract) + pub fn checked_sub(&self, rhs: Self) -> Option { + Some(Self { + value: self.value.checked_sub(rhs.value)?, + }) + } + + /// [op:numeric-multiply](https://www.w3.org/TR/xpath-functions/#func-numeric-multiply) + pub fn checked_mul(&self, rhs: Self) -> Option { + //TODO: better algorithm to keep precision + Some(Self { + value: self + .value + .checked_div(DECIMAL_PART_HALF_POW)? + .checked_mul(rhs.value.checked_div(DECIMAL_PART_HALF_POW)?)?, + }) + } + + /// [op:numeric-divide](https://www.w3.org/TR/xpath-functions/#func-numeric-divide) + pub fn checked_div(&self, rhs: Self) -> Option { + //TODO: better algorithm to keep precision + Some(Self { + value: self + .value + .checked_mul(DECIMAL_PART_HALF_POW)? + .checked_div(rhs.value)? + .checked_mul(DECIMAL_PART_HALF_POW)?, + }) + } + + /// [fn:abs](https://www.w3.org/TR/xpath-functions/#func-abs) + pub fn abs(&self) -> Decimal { + Self { + value: self.value.abs(), + } + } + + /// [fn:round](https://www.w3.org/TR/xpath-functions/#func-round) + pub fn round(&self) -> Decimal { + let value = self.value / DECIMAL_PART_POW_MINUS_ONE; + Self { + value: if value >= 0 { + (value / 10 + if value % 10 >= 5 { 1 } else { 0 }) * DECIMAL_PART_POW + } else { + (value / 10 + if -value % 10 > 5 { -1 } else { 0 }) * DECIMAL_PART_POW + }, + } + } + + /// [fn:ceiling](https://www.w3.org/TR/xpath-functions/#func-ceiling) + pub fn ceil(&self) -> Decimal { + Self { + value: if self.value >= 0 && self.value % DECIMAL_PART_POW != 0 { + (self.value / DECIMAL_PART_POW + 1) * DECIMAL_PART_POW + } else { + (self.value / DECIMAL_PART_POW) * DECIMAL_PART_POW + }, + } + } + + /// [fn:floor](https://www.w3.org/TR/xpath-functions/#func-floor) + pub fn floor(&self) -> Decimal { + Self { + value: if self.value >= 0 || self.value % DECIMAL_PART_POW == 0 { + (self.value / DECIMAL_PART_POW) * DECIMAL_PART_POW + } else { + (self.value / DECIMAL_PART_POW - 1) * DECIMAL_PART_POW + }, + } + } + + pub fn to_f32(&self) -> Option { + //TODO: precision? + Some((self.value as f32) / (DECIMAL_PART_POW as f32)) + } + + pub fn to_f64(&self) -> Option { + //TODO: precision? + Some((self.value as f64) / (DECIMAL_PART_POW as f64)) + } +} + +impl TryFrom for i64 { + type Error = (); + + fn try_from(value: Decimal) -> Result { + value + .value + .checked_div(DECIMAL_PART_POW) + .ok_or(())? + .try_into() + .map_err(|_| ()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::i128; + use std::i64; + + const MIN: Decimal = Decimal { value: i128::MIN }; + const MAX: Decimal = Decimal { value: i128::MAX }; + const STEP: Decimal = Decimal { value: 1 }; + + #[test] + fn from_str() { + assert_eq!(Decimal::from_str("210").unwrap().to_string(), "210"); + assert_eq!(Decimal::from_str("1000").unwrap().to_string(), "1000"); + assert_eq!(Decimal::from_str("-1.23").unwrap().to_string(), "-1.23"); + assert_eq!( + Decimal::from_str("12678967.543233").unwrap().to_string(), + "12678967.543233" + ); + assert_eq!( + Decimal::from_str("+100000.00").unwrap().to_string(), + "100000" + ); + assert_eq!(Decimal::from_str("0.1220").unwrap().to_string(), "0.122"); + assert_eq!(Decimal::from_str(".12200").unwrap().to_string(), "0.122"); + assert_eq!(Decimal::from_str(&MAX.to_string()).unwrap(), MAX); + assert_eq!( + Decimal::from_str(&MIN.checked_add(STEP).unwrap().to_string()).unwrap(), + MIN.checked_add(STEP).unwrap() + ); + } + + #[test] + fn add() { + assert!(MIN.checked_add(STEP).is_some()); + assert!(MAX.checked_add(STEP).is_none()); + assert_eq!(MAX.checked_add(MIN), Some(-STEP)); + } + + #[test] + fn sub() { + assert!(MIN.checked_sub(STEP).is_none()); + assert!(MAX.checked_sub(STEP).is_some()); + } + + #[test] + fn mul() { + assert_eq!( + Decimal::from_str("1") + .unwrap() + .checked_mul(Decimal::from_str("-1").unwrap()), + Some(Decimal::from_str("-1").unwrap()) + ); + assert_eq!( + Decimal::from_str("1000") + .unwrap() + .checked_mul(Decimal::from_str("1000").unwrap()), + Some(Decimal::from_str("1000000").unwrap()) + ); + assert_eq!( + Decimal::from_str("0.1") + .unwrap() + .checked_mul(Decimal::from_str("0.01").unwrap()), + Some(Decimal::from_str("0.001").unwrap()) + ); + } + + #[test] + fn div() { + assert_eq!( + Decimal::from_str("1") + .unwrap() + .checked_div(Decimal::from_str("1").unwrap()), + Some(Decimal::from_str("1").unwrap()) + ); + assert_eq!( + Decimal::from_str("100") + .unwrap() + .checked_div(Decimal::from_str("10").unwrap()), + Some(Decimal::from_str("10").unwrap()) + ); + assert_eq!( + Decimal::from_str("10") + .unwrap() + .checked_div(Decimal::from_str("100").unwrap()), + Some(Decimal::from_str("0.1").unwrap()) + ); + } + + #[test] + fn round() { + assert_eq!(Decimal::from_str("10").unwrap().round(), Decimal::from(10)); + assert_eq!( + Decimal::from_str("-10").unwrap().round(), + Decimal::from(-10) + ); + assert_eq!(Decimal::from_str("2.5").unwrap().round(), Decimal::from(3)); + assert_eq!( + Decimal::from_str("2.4999").unwrap().round(), + Decimal::from(2) + ); + assert_eq!( + Decimal::from_str("-2.5").unwrap().round(), + Decimal::from(-2) + ); + assert_eq!(Decimal::from(i64::MIN).round(), Decimal::from(i64::MIN)); + assert_eq!(Decimal::from(i64::MAX).round(), Decimal::from(i64::MAX)); + } + + #[test] + fn ceil() { + assert_eq!(Decimal::from_str("10").unwrap().ceil(), Decimal::from(10)); + assert_eq!(Decimal::from_str("-10").unwrap().ceil(), Decimal::from(-10)); + assert_eq!(Decimal::from_str("10.5").unwrap().ceil(), Decimal::from(11)); + assert_eq!( + Decimal::from_str("-10.5").unwrap().ceil(), + Decimal::from(-10) + ); + assert_eq!(Decimal::from(i64::MIN).ceil(), Decimal::from(i64::MIN)); + assert_eq!(Decimal::from(i64::MAX).ceil(), Decimal::from(i64::MAX)); + } + + #[test] + fn floor() { + assert_eq!(Decimal::from_str("10").unwrap().ceil(), Decimal::from(10)); + assert_eq!(Decimal::from_str("-10").unwrap().ceil(), Decimal::from(-10)); + assert_eq!( + Decimal::from_str("10.5").unwrap().floor(), + Decimal::from(10) + ); + assert_eq!( + Decimal::from_str("-10.5").unwrap().floor(), + Decimal::from(-11) + ); + assert_eq!(Decimal::from(i64::MIN).floor(), Decimal::from(i64::MIN)); + assert_eq!(Decimal::from(i64::MAX).floor(), Decimal::from(i64::MAX)); + } +} diff --git a/lib/src/model/xsd/mod.rs b/lib/src/model/xsd/mod.rs new file mode 100644 index 00000000..d932b8cc --- /dev/null +++ b/lib/src/model/xsd/mod.rs @@ -0,0 +1,3 @@ +mod decimal; + +pub use self::decimal::Decimal; diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index eccd165c..a5e196d0 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -1,3 +1,4 @@ +use crate::model::xsd::Decimal; use crate::model::BlankNode; use crate::model::Triple; use crate::sparql::algebra::GraphPattern; @@ -11,20 +12,16 @@ use chrono::prelude::*; use digest::Digest; use failure::format_err; use md5::Md5; -use num_traits::identities::Zero; -use num_traits::FromPrimitive; -use num_traits::One; use num_traits::ToPrimitive; use rand::random; use regex::{Regex, RegexBuilder}; use rio_api::iri::Iri; use rio_api::model as rio; -use rust_decimal::{Decimal, RoundingStrategy}; use sha1::Sha1; use sha2::{Sha256, Sha384, Sha512}; use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::fmt::Write; use std::hash::Hash; use std::iter::Iterator; @@ -837,9 +834,9 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { PlanExpression::Div(a, b) => Some(match self.parse_numeric_operands(a, b, tuple)? { NumericBinaryOperands::Float(v1, v2) => (v1 / v2).into(), NumericBinaryOperands::Double(v1, v2) => (v1 / v2).into(), - NumericBinaryOperands::Integer(v1, v2) => Decimal::from_i128(v1)? - .checked_div(Decimal::from_i128(v2)?)? - .into(), + NumericBinaryOperands::Integer(v1, v2) => { + Decimal::from(v1).checked_div(Decimal::from(v2))?.into() + } NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2)?.into(), }), PlanExpression::UnaryPlus(e) => match self.eval_expression(e, tuple)? { @@ -954,11 +951,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { }, PlanExpression::Round(e) => match self.eval_expression(e, tuple)? { EncodedTerm::IntegerLiteral(value) => Some(value.into()), - EncodedTerm::DecimalLiteral(value) => Some( - value - .round_dp_with_strategy(0, RoundingStrategy::RoundHalfUp) - .into(), - ), + EncodedTerm::DecimalLiteral(value) => Some(value.round().into()), EncodedTerm::FloatLiteral(value) => Some(value.round().into()), EncodedTerm::DoubleLiteral(value) => Some(value.round().into()), _ => None, @@ -1026,7 +1019,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { (self .to_string(self.eval_expression(arg, tuple)?)? .chars() - .count() as i128) + .count() as i64) .into(), ), PlanExpression::Replace(arg, pattern, replacement, flags) => { @@ -1159,18 +1152,22 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { }, PlanExpression::Seconds(e) => match self.eval_expression(e, tuple)? { EncodedTerm::NaiveTimeLiteral(time) => Some( - (Decimal::new(time.nanosecond().into(), 9) + Decimal::from(time.second())) + Decimal::from(time.nanosecond()) + .checked_div(Decimal::from(1_000_000_000))? + .checked_add(Decimal::from(time.second()))? .into(), ), EncodedTerm::DateTimeLiteral(date_time) => Some( - (Decimal::new(date_time.nanosecond().into(), 9) - + Decimal::from(date_time.second())) - .into(), + Decimal::from(date_time.nanosecond()) + .checked_div(Decimal::from(1_000_000_000))? + .checked_add(Decimal::from(date_time.second()))? + .into(), ), EncodedTerm::NaiveDateTimeLiteral(date_time) => Some( - (Decimal::new(date_time.nanosecond().into(), 9) - + Decimal::from(date_time.second())) - .into(), + Decimal::from(date_time.nanosecond()) + .checked_div(Decimal::from(1_000_000_000))? + .checked_add(Decimal::from(date_time.second()))? + .into(), ), _ => None, }, @@ -1354,10 +1351,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::FloatLiteral(value) => Some(value.to_i128()?.into()), - EncodedTerm::DoubleLiteral(value) => Some(value.to_i128()?.into()), - EncodedTerm::IntegerLiteral(value) => Some(value.to_i128()?.into()), - EncodedTerm::DecimalLiteral(value) => Some(value.to_i128()?.into()), + EncodedTerm::FloatLiteral(value) => Some(value.to_i64()?.into()), + EncodedTerm::DoubleLiteral(value) => Some(value.to_i64()?.into()), + EncodedTerm::IntegerLiteral(value) => Some(value.to_i64()?.into()), + EncodedTerm::DecimalLiteral(value) => Some(i64::try_from(value).ok()?.into()), EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()), EncodedTerm::StringLiteral { value_id } => { parse_integer_str(&*self.dataset.get_str(value_id).ok()??) @@ -1365,18 +1362,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { _ => None, }, PlanExpression::DecimalCast(e) => match self.eval_expression(e, tuple)? { - EncodedTerm::FloatLiteral(value) => Some(Decimal::from_f32(*value)?.into()), - EncodedTerm::DoubleLiteral(value) => Some(Decimal::from_f64(*value)?.into()), - EncodedTerm::IntegerLiteral(value) => Some(Decimal::from_i128(value)?.into()), + //TODO: code EncodedTerm::FloatLiteral(value) => Some(Decimal::from_f32(*value)?.into()), + //TODO: code EncodedTerm::DoubleLiteral(value) => Some(Decimal::from_f64(*value)?.into()), + EncodedTerm::IntegerLiteral(value) => Some(Decimal::from(value).into()), EncodedTerm::DecimalLiteral(value) => Some(value.into()), - EncodedTerm::BooleanLiteral(value) => Some( - if value { - Decimal::one() - } else { - Decimal::zero() - } - .into(), - ), + EncodedTerm::BooleanLiteral(value) => { + Some(Decimal::from(if value { 1 } else { 0 }).into()) + } EncodedTerm::StringLiteral { value_id } => { parse_decimal_str(&*self.dataset.get_str(value_id).ok()??) } @@ -1419,10 +1411,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { match term { EncodedTerm::BooleanLiteral(value) => Some(value), EncodedTerm::StringLiteral { .. } => Some(term != ENCODED_EMPTY_STRING_LITERAL), - EncodedTerm::FloatLiteral(value) => Some(!value.is_zero()), - EncodedTerm::DoubleLiteral(value) => Some(!value.is_zero()), - EncodedTerm::IntegerLiteral(value) => Some(!value.is_zero()), - EncodedTerm::DecimalLiteral(value) => Some(!value.is_zero()), + EncodedTerm::FloatLiteral(value) => Some(*value != 0f32), + EncodedTerm::DoubleLiteral(value) => Some(*value != 0f64), + EncodedTerm::IntegerLiteral(value) => Some(value != 0), + EncodedTerm::DecimalLiteral(value) => Some(value != Decimal::from(0)), _ => None, } } @@ -1677,14 +1669,14 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::FloatLiteral(b) => Some(a.to_f32()? == *b), EncodedTerm::DoubleLiteral(b) => Some(a.to_f64()? == *b), EncodedTerm::IntegerLiteral(b) => Some(a == b), - EncodedTerm::DecimalLiteral(b) => Some(Decimal::from_i128(a)? == b), + EncodedTerm::DecimalLiteral(b) => Some(Decimal::from(a) == b), EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), }, EncodedTerm::DecimalLiteral(a) => match b { EncodedTerm::FloatLiteral(b) => Some(a.to_f32()? == *b), EncodedTerm::DoubleLiteral(b) => Some(a.to_f64()? == *b), - EncodedTerm::IntegerLiteral(b) => Some(a == Decimal::from_i128(b)?), + EncodedTerm::IntegerLiteral(b) => Some(a == Decimal::from(b)), EncodedTerm::DecimalLiteral(b) => Some(a == b), EncodedTerm::TypedLiteral { .. } => None, _ => Some(false), @@ -1821,13 +1813,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { EncodedTerm::FloatLiteral(b) => a.to_f32()?.partial_cmp(&*b), EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b), EncodedTerm::IntegerLiteral(b) => a.partial_cmp(&b), - EncodedTerm::DecimalLiteral(b) => Decimal::from_i128(a)?.partial_cmp(&b), + EncodedTerm::DecimalLiteral(b) => Decimal::from(a).partial_cmp(&b), _ => None, }, EncodedTerm::DecimalLiteral(a) => match b { EncodedTerm::FloatLiteral(b) => a.to_f32()?.partial_cmp(&*b), EncodedTerm::DoubleLiteral(b) => a.to_f64()?.partial_cmp(&*b), - EncodedTerm::IntegerLiteral(b) => a.partial_cmp(&Decimal::from_i128(b)?), + EncodedTerm::IntegerLiteral(b) => a.partial_cmp(&Decimal::from(b)), EncodedTerm::DecimalLiteral(b) => a.partial_cmp(&b), _ => None, }, @@ -1885,7 +1877,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator { enum NumericBinaryOperands { Float(f32, f32), Double(f64, f64), - Integer(i128, i128), + Integer(i64, i64), Decimal(Decimal, Decimal), } @@ -1926,7 +1918,7 @@ impl NumericBinaryOperands { Some(NumericBinaryOperands::Integer(v1, v2)) } (EncodedTerm::IntegerLiteral(v1), EncodedTerm::DecimalLiteral(v2)) => { - Some(NumericBinaryOperands::Decimal(Decimal::from_i128(v1)?, v2)) + Some(NumericBinaryOperands::Decimal(Decimal::from(v1), v2)) } (EncodedTerm::DecimalLiteral(v1), EncodedTerm::FloatLiteral(v2)) => { Some(NumericBinaryOperands::Float(v1.to_f32()?, *v2)) @@ -1935,7 +1927,7 @@ impl NumericBinaryOperands { Some(NumericBinaryOperands::Double(v1.to_f64()?, *v2)) } (EncodedTerm::DecimalLiteral(v1), EncodedTerm::IntegerLiteral(v2)) => { - Some(NumericBinaryOperands::Decimal(v1, Decimal::from_i128(v2)?)) + Some(NumericBinaryOperands::Decimal(v1, Decimal::from(v2))) } (EncodedTerm::DecimalLiteral(v1), EncodedTerm::DecimalLiteral(v2)) => { Some(NumericBinaryOperands::Decimal(v1, v2)) @@ -2424,7 +2416,7 @@ impl Accumulator for DistinctAccumulator { #[derive(Default, Debug)] struct CountAccumulator { - count: u64, + count: i64, } impl Accumulator for CountAccumulator { @@ -2494,8 +2486,8 @@ impl Accumulator for AvgAccumulator { match NumericBinaryOperands::new(sum, count)? { NumericBinaryOperands::Float(v1, v2) => Some((v1 / v2).into()), NumericBinaryOperands::Double(v1, v2) => Some((v1 / v2).into()), - NumericBinaryOperands::Integer(v1, v2) => Decimal::from_i128(v1)? - .checked_div(Decimal::from_i128(v2)?) + NumericBinaryOperands::Integer(v1, v2) => Decimal::from(v1) + .checked_div(Decimal::from(v2)) .map(|v| v.into()), NumericBinaryOperands::Decimal(v1, v2) => v1.checked_div(v2).map(|v| v.into()), } diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index 0f574a75..ce6776ca 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -1,5 +1,6 @@ use crate::model::vocab::rdf; use crate::model::vocab::xsd; +use crate::model::xsd::Decimal; use crate::model::*; use crate::Result; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; @@ -11,7 +12,6 @@ use md5::Md5; use ordered_float::OrderedFloat; use rand::random; use rio_api::model as rio; -use rust_decimal::Decimal; use std::collections::HashMap; use std::io::Read; use std::io::Write; @@ -100,7 +100,7 @@ pub enum EncodedTerm { BooleanLiteral(bool), FloatLiteral(OrderedFloat), DoubleLiteral(OrderedFloat), - IntegerLiteral(i128), + IntegerLiteral(i64), DecimalLiteral(Decimal), DateLiteral(Date), NaiveDateLiteral(NaiveDate), @@ -194,21 +194,9 @@ impl From for EncodedTerm { } } -impl From for EncodedTerm { - fn from(value: i128) -> Self { - EncodedTerm::IntegerLiteral(value) - } -} - impl From for EncodedTerm { fn from(value: i64) -> Self { - EncodedTerm::IntegerLiteral(value.into()) - } -} - -impl From for EncodedTerm { - fn from(value: u64) -> Self { - EncodedTerm::IntegerLiteral(value.into()) + EncodedTerm::IntegerLiteral(value) } } @@ -454,12 +442,12 @@ impl TermReader for R { self.read_f64::()?, ))), TYPE_INTEGER_LITERAL => Ok(EncodedTerm::IntegerLiteral( - self.read_i128::()?, + self.read_i64::()?, )), TYPE_DECIMAL_LITERAL => { let mut buffer = [0 as u8; 16]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DecimalLiteral(Decimal::deserialize(buffer))) + Ok(EncodedTerm::DecimalLiteral(Decimal::from_le_bytes(buffer))) } TYPE_DATE_LITERAL => Ok(EncodedTerm::DateLiteral(Date::from_utc( NaiveDate::from_num_days_from_ce_opt(self.read_i32::()?) @@ -594,49 +582,47 @@ impl TermWriter for W { self.write_u8(term.type_id())?; match term { EncodedTerm::DefaultGraph => {} - EncodedTerm::NamedNode { iri_id } => self.write_u128::(iri_id)?, - EncodedTerm::BlankNode { id } => self.write_u128::(id)?, - EncodedTerm::StringLiteral { value_id } => { - self.write_u128::(value_id)?; - } + EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_le_bytes())?, + EncodedTerm::BlankNode { id } => self.write_all(&id.to_le_bytes())?, + EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_le_bytes())?, EncodedTerm::LangStringLiteral { value_id, language_id, } => { - self.write_u128::(language_id)?; - self.write_u128::(value_id)?; + self.write_all(&language_id.to_le_bytes())?; + self.write_all(&value_id.to_le_bytes())?; } EncodedTerm::TypedLiteral { value_id, datatype_id, } => { - self.write_u128::(datatype_id)?; - self.write_u128::(value_id)?; + self.write_all(&datatype_id.to_le_bytes())?; + self.write_all(&value_id.to_le_bytes())?; } EncodedTerm::BooleanLiteral(_) => {} EncodedTerm::FloatLiteral(value) => self.write_f32::(*value)?, EncodedTerm::DoubleLiteral(value) => self.write_f64::(*value)?, - EncodedTerm::IntegerLiteral(value) => self.write_i128::(value)?, - EncodedTerm::DecimalLiteral(value) => self.write_all(&value.serialize())?, + EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_le_bytes())?, + EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DateLiteral(value) => { - self.write_i32::(value.num_days_from_ce())?; - self.write_i32::(value.timezone().local_minus_utc())?; + self.write_all(&value.num_days_from_ce().to_le_bytes())?; + self.write_all(&value.timezone().local_minus_utc().to_le_bytes())?; } EncodedTerm::NaiveDateLiteral(value) => { - self.write_i32::(value.num_days_from_ce())?; + self.write_all(&value.num_days_from_ce().to_le_bytes())? } EncodedTerm::NaiveTimeLiteral(value) => { - self.write_u32::(value.num_seconds_from_midnight())?; - self.write_u32::(value.nanosecond())?; + self.write_all(&value.num_seconds_from_midnight().to_le_bytes())?; + self.write_all(&value.nanosecond().to_le_bytes())?; } EncodedTerm::DateTimeLiteral(value) => { - self.write_i64::(value.timestamp())?; - self.write_u32::(value.timestamp_subsec_nanos())?; - self.write_i32::(value.timezone().local_minus_utc())?; + self.write_all(&value.timestamp().to_le_bytes())?; + self.write_all(&value.timestamp_subsec_nanos().to_le_bytes())?; + self.write_all(&value.timezone().local_minus_utc().to_le_bytes())?; } EncodedTerm::NaiveDateTimeLiteral(value) => { - self.write_i64::(value.timestamp())?; - self.write_u32::(value.timestamp_subsec_nanos())?; + self.write_all(&value.timestamp().to_le_bytes())?; + self.write_all(&value.timestamp_subsec_nanos().to_le_bytes())?; } } Ok(()) @@ -1140,6 +1126,8 @@ fn test_encoding() { Literal::from("foo").into(), Literal::new_language_tagged_literal("foo", "fr").into(), Literal::new_language_tagged_literal("foo", "FR").into(), + Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(), + Literal::new_typed_literal("-foo", NamedNode::new_from_string("http://foo.com")).into(), ]; for term in terms { let encoded = store.encode_term(&term).unwrap(); diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 751d837f..4c71a7a6 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -129,13 +129,8 @@ fn sparql_w3c_query_evaluation_testsuite() -> Result<()> { NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/construct/manifest#constructwhere04").unwrap(), //BNODE() scope is currently wrong NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#bnode01").unwrap(), - //Decimal precision problem - NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/functions/manifest#coalesce01").unwrap(), //Property path with unbound graph name are not supported yet NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/property-path/manifest#pp35").unwrap(), - //We write "2"^^xsd:decimal instead of "2.0"^^xsd:decimal - NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-err-02").unwrap(), - NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/aggregates/manifest#agg-avg-02").unwrap(), //SERVICE name from a BGP NamedNode::parse("http://www.w3.org/2009/sparql/docs/tests/data-sparql11/service/manifest#service5").unwrap(),