From 8c4c273edf2be9f73524c0151ef42347f49354cf Mon Sep 17 00:00:00 2001 From: Tpt Date: Mon, 20 Jan 2020 14:48:20 +0100 Subject: [PATCH] [Breaking] Uses big endian encoding Allows to do range search based on byte representation --- lib/src/model/xsd/date_time.rs | 44 ++++++++++---------- lib/src/model/xsd/decimal.rs | 8 ++-- lib/src/model/xsd/duration.rs | 12 +++--- lib/src/store/numeric_encoder.rs | 70 ++++++++++++++++---------------- 4 files changed, 68 insertions(+), 66 deletions(-) diff --git a/lib/src/model/xsd/date_time.rs b/lib/src/model/xsd/date_time.rs index a4b71790..38b11e9f 100644 --- a/lib/src/model/xsd/date_time.rs +++ b/lib/src/model/xsd/date_time.rs @@ -44,9 +44,9 @@ impl DateTime { }) } - pub fn from_le_bytes(bytes: [u8; 18]) -> Self { + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { Self { - timestamp: Timestamp::from_le_bytes(bytes), + timestamp: Timestamp::from_be_bytes(bytes), } } @@ -101,8 +101,8 @@ impl DateTime { } } - pub fn to_le_bytes(&self) -> [u8; 18] { - self.timestamp.to_le_bytes() + pub fn to_be_bytes(&self) -> [u8; 18] { + self.timestamp.to_be_bytes() } /// [op:subtract-dateTimes](https://www.w3.org/TR/xpath-functions/#func-subtract-dateTimes) @@ -217,9 +217,9 @@ impl Time { }) } - pub fn from_le_bytes(bytes: [u8; 18]) -> Self { + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { Self { - timestamp: Timestamp::from_le_bytes(bytes), + timestamp: Timestamp::from_be_bytes(bytes), } } @@ -247,8 +247,8 @@ impl Time { self.timestamp.timezone_offset() } - pub fn to_le_bytes(&self) -> [u8; 18] { - self.timestamp.to_le_bytes() + pub fn to_be_bytes(&self) -> [u8; 18] { + self.timestamp.to_be_bytes() } /// [op:subtract-times](https://www.w3.org/TR/xpath-functions/#func-subtract-times) @@ -358,9 +358,9 @@ impl Date { }) } - pub fn from_le_bytes(bytes: [u8; 18]) -> Self { + pub fn from_be_bytes(bytes: [u8; 18]) -> Self { Self { - timestamp: Timestamp::from_le_bytes(bytes), + timestamp: Timestamp::from_be_bytes(bytes), } } @@ -388,8 +388,8 @@ impl Date { self.timestamp.timezone_offset() } - pub fn to_le_bytes(&self) -> [u8; 18] { - self.timestamp.to_le_bytes() + pub fn to_be_bytes(&self) -> [u8; 18] { + self.timestamp.to_be_bytes() } /// [op:subtract-dates](https://www.w3.org/TR/xpath-functions/#func-subtract-dates) @@ -458,14 +458,14 @@ impl TimezoneOffset { Self { offset } } - pub fn from_le_bytes(bytes: [u8; 2]) -> Self { + pub fn from_be_bytes(bytes: [u8; 2]) -> Self { TimezoneOffset { - offset: i16::from_le_bytes(bytes), + offset: i16::from_be_bytes(bytes), } } - pub fn to_le_bytes(self) -> [u8; 2] { - self.offset.to_le_bytes() + pub fn to_be_bytes(self) -> [u8; 2] { + self.offset.to_be_bytes() } } @@ -601,18 +601,18 @@ impl Timestamp { ) } - fn from_le_bytes(bytes: [u8; 18]) -> Self { + fn from_be_bytes(bytes: [u8; 18]) -> Self { let mut value = [0; 16]; value.copy_from_slice(&bytes[0..16]); let mut timezone_offset = [0; 2]; timezone_offset.copy_from_slice(&bytes[16..18]); Self { - value: Decimal::from_le_bytes(value), + value: Decimal::from_be_bytes(value), timezone_offset: if timezone_offset == [u8::MAX; 2] { None } else { - Some(TimezoneOffset::from_le_bytes(timezone_offset)) + Some(TimezoneOffset::from_be_bytes(timezone_offset)) }, } } @@ -746,11 +746,11 @@ impl Timestamp { }) } - fn to_le_bytes(&self) -> [u8; 18] { + fn to_be_bytes(&self) -> [u8; 18] { let mut bytes = [0; 18]; - bytes[0..16].copy_from_slice(&self.value.to_le_bytes()); + bytes[0..16].copy_from_slice(&self.value.to_be_bytes()); bytes[16..18].copy_from_slice(&match &self.timezone_offset { - Some(timezone_offset) => timezone_offset.to_le_bytes(), + Some(timezone_offset) => timezone_offset.to_be_bytes(), None => [u8::MAX; 2], }); bytes diff --git a/lib/src/model/xsd/decimal.rs b/lib/src/model/xsd/decimal.rs index 29560217..7877f2fa 100644 --- a/lib/src/model/xsd/decimal.rs +++ b/lib/src/model/xsd/decimal.rs @@ -40,14 +40,14 @@ impl Decimal { } #[inline] - pub fn from_le_bytes(bytes: [u8; 16]) -> Self { + pub fn from_be_bytes(bytes: [u8; 16]) -> Self { Self { - value: i128::from_le_bytes(bytes), + value: i128::from_be_bytes(bytes), } } - pub fn to_le_bytes(&self) -> [u8; 16] { - self.value.to_le_bytes() + pub fn to_be_bytes(&self) -> [u8; 16] { + self.value.to_be_bytes() } /// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add) diff --git a/lib/src/model/xsd/duration.rs b/lib/src/model/xsd/duration.rs index 7a8aaf8b..88fbb2f2 100644 --- a/lib/src/model/xsd/duration.rs +++ b/lib/src/model/xsd/duration.rs @@ -29,14 +29,14 @@ impl Duration { } } - pub fn from_le_bytes(bytes: [u8; 24]) -> Self { + pub fn from_be_bytes(bytes: [u8; 24]) -> Self { let mut months = [0; 8]; months.copy_from_slice(&bytes[0..8]); let mut seconds = [8; 16]; seconds.copy_from_slice(&bytes[8..24]); Self { - months: i64::from_le_bytes(months), - seconds: Decimal::from_le_bytes(seconds), + months: i64::from_be_bytes(months), + seconds: Decimal::from_be_bytes(seconds), } } @@ -81,10 +81,10 @@ impl Duration { self.seconds } - pub fn to_le_bytes(&self) -> [u8; 24] { + pub fn to_be_bytes(&self) -> [u8; 24] { let mut bytes = [0; 24]; - bytes[0..8].copy_from_slice(&self.months.to_le_bytes()); - bytes[8..24].copy_from_slice(&self.seconds.to_le_bytes()); + bytes[0..8].copy_from_slice(&self.months.to_be_bytes()); + bytes[8..24].copy_from_slice(&self.seconds.to_be_bytes()); bytes } diff --git a/lib/src/store/numeric_encoder.rs b/lib/src/store/numeric_encoder.rs index b45b2c35..cf5ea1ce 100644 --- a/lib/src/store/numeric_encoder.rs +++ b/lib/src/store/numeric_encoder.rs @@ -30,9 +30,7 @@ const XSD_TIME_ID: u128 = 0x7af4_6a16_1b02_35d7_9a79_07ba_3da9_48bb; const XSD_DURATION_ID: u128 = 0x78ab_8431_984b_6b06_c42d_6271_b82e_487d; pub fn get_str_id(value: &str) -> u128 { - let mut id = [0; 16]; - id.copy_from_slice(&Md5::new().chain(value).result()); - u128::from_le_bytes(id) + u128::from_le_bytes(Md5::new().chain(value).result().into()) } const TYPE_DEFAULT_GRAPH_ID: u8 = 0; @@ -523,14 +521,14 @@ impl TermReader for R { let mut buffer = [0; 16]; self.read_exact(&mut buffer)?; Ok(EncodedTerm::NamedNode { - iri_id: u128::from_le_bytes(buffer), + iri_id: u128::from_be_bytes(buffer), }) } TYPE_BLANK_NODE_ID => { let mut buffer = [0; 16]; self.read_exact(&mut buffer)?; Ok(EncodedTerm::BlankNode { - id: u128::from_le_bytes(buffer), + id: u128::from_be_bytes(buffer), }) } TYPE_LANG_STRING_LITERAL_ID => { @@ -539,8 +537,8 @@ impl TermReader for R { let mut value_buffer = [0; 16]; self.read_exact(&mut value_buffer)?; Ok(EncodedTerm::LangStringLiteral { - language_id: u128::from_le_bytes(language_buffer), - value_id: u128::from_le_bytes(value_buffer), + language_id: u128::from_be_bytes(language_buffer), + value_id: u128::from_be_bytes(value_buffer), }) } TYPE_TYPED_LITERAL_ID => { @@ -549,15 +547,15 @@ impl TermReader for R { let mut value_buffer = [0; 16]; self.read_exact(&mut value_buffer)?; Ok(EncodedTerm::TypedLiteral { - datatype_id: u128::from_le_bytes(datatype_buffer), - value_id: u128::from_le_bytes(value_buffer), + datatype_id: u128::from_be_bytes(datatype_buffer), + value_id: u128::from_be_bytes(value_buffer), }) } TYPE_STRING_LITERAL => { let mut buffer = [0; 16]; self.read_exact(&mut buffer)?; Ok(EncodedTerm::StringLiteral { - value_id: u128::from_le_bytes(buffer), + value_id: u128::from_be_bytes(buffer), }) } TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)), @@ -565,44 +563,44 @@ impl TermReader for R { TYPE_FLOAT_LITERAL => { let mut buffer = [0; 4]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::FloatLiteral(f32::from_le_bytes(buffer))) + Ok(EncodedTerm::FloatLiteral(f32::from_be_bytes(buffer))) } TYPE_DOUBLE_LITERAL => { let mut buffer = [0; 8]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DoubleLiteral(f64::from_le_bytes(buffer))) + Ok(EncodedTerm::DoubleLiteral(f64::from_be_bytes(buffer))) } TYPE_INTEGER_LITERAL => { let mut buffer = [0; 8]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::IntegerLiteral(i64::from_le_bytes(buffer))) + Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer))) } TYPE_DECIMAL_LITERAL => { let mut buffer = [0; 16]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DecimalLiteral(Decimal::from_le_bytes(buffer))) + Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer))) } TYPE_DATE_LITERAL => { let mut buffer = [0; 18]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DateLiteral(Date::from_le_bytes(buffer))) + Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer))) } TYPE_TIME_LITERAL => { let mut buffer = [0; 18]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::TimeLiteral(Time::from_le_bytes(buffer))) + Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer))) } TYPE_DATE_TIME_LITERAL => { let mut buffer = [0; 18]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DateTimeLiteral(DateTime::from_le_bytes( + Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes( buffer, ))) } TYPE_DURATION_LITERAL => { let mut buffer = [0; 24]; self.read_exact(&mut buffer)?; - Ok(EncodedTerm::DurationLiteral(Duration::from_le_bytes( + Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes( buffer, ))) } @@ -703,35 +701,35 @@ pub trait TermWriter { impl TermWriter for W { fn write_term(&mut self, term: EncodedTerm) -> Result<()> { - self.write_all(&term.type_id().to_le_bytes())?; + self.write_all(&term.type_id().to_be_bytes())?; match term { EncodedTerm::DefaultGraph => {} - EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_le_bytes())?, - EncodedTerm::BlankNode { id } => self.write_all(&id.to_le_bytes())?, - EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_le_bytes())?, + EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_be_bytes())?, + EncodedTerm::BlankNode { id } => self.write_all(&id.to_be_bytes())?, + EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_be_bytes())?, EncodedTerm::LangStringLiteral { value_id, language_id, } => { - self.write_all(&language_id.to_le_bytes())?; - self.write_all(&value_id.to_le_bytes())?; + self.write_all(&language_id.to_be_bytes())?; + self.write_all(&value_id.to_be_bytes())?; } EncodedTerm::TypedLiteral { value_id, datatype_id, } => { - self.write_all(&datatype_id.to_le_bytes())?; - self.write_all(&value_id.to_le_bytes())?; + self.write_all(&datatype_id.to_be_bytes())?; + self.write_all(&value_id.to_be_bytes())?; } EncodedTerm::BooleanLiteral(_) => {} - EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::DateLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_le_bytes())?, - EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_le_bytes())?, + EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::DateLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_be_bytes())?, + EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_be_bytes())?, } Ok(()) } @@ -1214,6 +1212,10 @@ fn test_encoding() { Literal::new_language_tagged_literal("foo", "fr").into(), Literal::new_language_tagged_literal("foo", "FR").into(), Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(), + Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME.clone()).into(), + Literal::new_typed_literal("2020-01-01", xsd::DATE.clone()).into(), + Literal::new_typed_literal("01:01:01Z", xsd::TIME.clone()).into(), + Literal::new_typed_literal("PT1S", xsd::DURATION.clone()).into(), Literal::new_typed_literal("-foo", NamedNode::new_from_string("http://foo.com")).into(), ]; for term in terms {