[Breaking] Uses big endian encoding

Allows to do range search based on byte representation
pull/22/head
Tpt 5 years ago
parent 41c026877d
commit 8c4c273edf
  1. 44
      lib/src/model/xsd/date_time.rs
  2. 8
      lib/src/model/xsd/decimal.rs
  3. 12
      lib/src/model/xsd/duration.rs
  4. 70
      lib/src/store/numeric_encoder.rs

@ -44,9 +44,9 @@ impl DateTime {
}) })
} }
pub fn from_le_bytes(bytes: [u8; 18]) -> Self { pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self { Self {
timestamp: Timestamp::from_le_bytes(bytes), timestamp: Timestamp::from_be_bytes(bytes),
} }
} }
@ -101,8 +101,8 @@ impl DateTime {
} }
} }
pub fn to_le_bytes(&self) -> [u8; 18] { pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes() self.timestamp.to_be_bytes()
} }
/// [op:subtract-dateTimes](https://www.w3.org/TR/xpath-functions/#func-subtract-dateTimes) /// [op:subtract-dateTimes](https://www.w3.org/TR/xpath-functions/#func-subtract-dateTimes)
@ -217,9 +217,9 @@ impl Time {
}) })
} }
pub fn from_le_bytes(bytes: [u8; 18]) -> Self { pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self { Self {
timestamp: Timestamp::from_le_bytes(bytes), timestamp: Timestamp::from_be_bytes(bytes),
} }
} }
@ -247,8 +247,8 @@ impl Time {
self.timestamp.timezone_offset() self.timestamp.timezone_offset()
} }
pub fn to_le_bytes(&self) -> [u8; 18] { pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes() self.timestamp.to_be_bytes()
} }
/// [op:subtract-times](https://www.w3.org/TR/xpath-functions/#func-subtract-times) /// [op:subtract-times](https://www.w3.org/TR/xpath-functions/#func-subtract-times)
@ -358,9 +358,9 @@ impl Date {
}) })
} }
pub fn from_le_bytes(bytes: [u8; 18]) -> Self { pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self { Self {
timestamp: Timestamp::from_le_bytes(bytes), timestamp: Timestamp::from_be_bytes(bytes),
} }
} }
@ -388,8 +388,8 @@ impl Date {
self.timestamp.timezone_offset() self.timestamp.timezone_offset()
} }
pub fn to_le_bytes(&self) -> [u8; 18] { pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes() self.timestamp.to_be_bytes()
} }
/// [op:subtract-dates](https://www.w3.org/TR/xpath-functions/#func-subtract-dates) /// [op:subtract-dates](https://www.w3.org/TR/xpath-functions/#func-subtract-dates)
@ -458,14 +458,14 @@ impl TimezoneOffset {
Self { offset } Self { offset }
} }
pub fn from_le_bytes(bytes: [u8; 2]) -> Self { pub fn from_be_bytes(bytes: [u8; 2]) -> Self {
TimezoneOffset { TimezoneOffset {
offset: i16::from_le_bytes(bytes), offset: i16::from_be_bytes(bytes),
} }
} }
pub fn to_le_bytes(self) -> [u8; 2] { pub fn to_be_bytes(self) -> [u8; 2] {
self.offset.to_le_bytes() self.offset.to_be_bytes()
} }
} }
@ -601,18 +601,18 @@ impl Timestamp {
) )
} }
fn from_le_bytes(bytes: [u8; 18]) -> Self { fn from_be_bytes(bytes: [u8; 18]) -> Self {
let mut value = [0; 16]; let mut value = [0; 16];
value.copy_from_slice(&bytes[0..16]); value.copy_from_slice(&bytes[0..16]);
let mut timezone_offset = [0; 2]; let mut timezone_offset = [0; 2];
timezone_offset.copy_from_slice(&bytes[16..18]); timezone_offset.copy_from_slice(&bytes[16..18]);
Self { Self {
value: Decimal::from_le_bytes(value), value: Decimal::from_be_bytes(value),
timezone_offset: if timezone_offset == [u8::MAX; 2] { timezone_offset: if timezone_offset == [u8::MAX; 2] {
None None
} else { } else {
Some(TimezoneOffset::from_le_bytes(timezone_offset)) Some(TimezoneOffset::from_be_bytes(timezone_offset))
}, },
} }
} }
@ -746,11 +746,11 @@ impl Timestamp {
}) })
} }
fn to_le_bytes(&self) -> [u8; 18] { fn to_be_bytes(&self) -> [u8; 18] {
let mut bytes = [0; 18]; let mut bytes = [0; 18];
bytes[0..16].copy_from_slice(&self.value.to_le_bytes()); bytes[0..16].copy_from_slice(&self.value.to_be_bytes());
bytes[16..18].copy_from_slice(&match &self.timezone_offset { bytes[16..18].copy_from_slice(&match &self.timezone_offset {
Some(timezone_offset) => timezone_offset.to_le_bytes(), Some(timezone_offset) => timezone_offset.to_be_bytes(),
None => [u8::MAX; 2], None => [u8::MAX; 2],
}); });
bytes bytes

@ -40,14 +40,14 @@ impl Decimal {
} }
#[inline] #[inline]
pub fn from_le_bytes(bytes: [u8; 16]) -> Self { pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self { Self {
value: i128::from_le_bytes(bytes), value: i128::from_be_bytes(bytes),
} }
} }
pub fn to_le_bytes(&self) -> [u8; 16] { pub fn to_be_bytes(&self) -> [u8; 16] {
self.value.to_le_bytes() self.value.to_be_bytes()
} }
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add) /// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add)

@ -29,14 +29,14 @@ impl Duration {
} }
} }
pub fn from_le_bytes(bytes: [u8; 24]) -> Self { pub fn from_be_bytes(bytes: [u8; 24]) -> Self {
let mut months = [0; 8]; let mut months = [0; 8];
months.copy_from_slice(&bytes[0..8]); months.copy_from_slice(&bytes[0..8]);
let mut seconds = [8; 16]; let mut seconds = [8; 16];
seconds.copy_from_slice(&bytes[8..24]); seconds.copy_from_slice(&bytes[8..24]);
Self { Self {
months: i64::from_le_bytes(months), months: i64::from_be_bytes(months),
seconds: Decimal::from_le_bytes(seconds), seconds: Decimal::from_be_bytes(seconds),
} }
} }
@ -81,10 +81,10 @@ impl Duration {
self.seconds self.seconds
} }
pub fn to_le_bytes(&self) -> [u8; 24] { pub fn to_be_bytes(&self) -> [u8; 24] {
let mut bytes = [0; 24]; let mut bytes = [0; 24];
bytes[0..8].copy_from_slice(&self.months.to_le_bytes()); bytes[0..8].copy_from_slice(&self.months.to_be_bytes());
bytes[8..24].copy_from_slice(&self.seconds.to_le_bytes()); bytes[8..24].copy_from_slice(&self.seconds.to_be_bytes());
bytes bytes
} }

@ -30,9 +30,7 @@ const XSD_TIME_ID: u128 = 0x7af4_6a16_1b02_35d7_9a79_07ba_3da9_48bb;
const XSD_DURATION_ID: u128 = 0x78ab_8431_984b_6b06_c42d_6271_b82e_487d; const XSD_DURATION_ID: u128 = 0x78ab_8431_984b_6b06_c42d_6271_b82e_487d;
pub fn get_str_id(value: &str) -> u128 { pub fn get_str_id(value: &str) -> u128 {
let mut id = [0; 16]; u128::from_le_bytes(Md5::new().chain(value).result().into())
id.copy_from_slice(&Md5::new().chain(value).result());
u128::from_le_bytes(id)
} }
const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
@ -523,14 +521,14 @@ impl<R: Read> TermReader for R {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode { Ok(EncodedTerm::NamedNode {
iri_id: u128::from_le_bytes(buffer), iri_id: u128::from_be_bytes(buffer),
}) })
} }
TYPE_BLANK_NODE_ID => { TYPE_BLANK_NODE_ID => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::BlankNode { Ok(EncodedTerm::BlankNode {
id: u128::from_le_bytes(buffer), id: u128::from_be_bytes(buffer),
}) })
} }
TYPE_LANG_STRING_LITERAL_ID => { TYPE_LANG_STRING_LITERAL_ID => {
@ -539,8 +537,8 @@ impl<R: Read> TermReader for R {
let mut value_buffer = [0; 16]; let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?; self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral { Ok(EncodedTerm::LangStringLiteral {
language_id: u128::from_le_bytes(language_buffer), language_id: u128::from_be_bytes(language_buffer),
value_id: u128::from_le_bytes(value_buffer), value_id: u128::from_be_bytes(value_buffer),
}) })
} }
TYPE_TYPED_LITERAL_ID => { TYPE_TYPED_LITERAL_ID => {
@ -549,15 +547,15 @@ impl<R: Read> TermReader for R {
let mut value_buffer = [0; 16]; let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?; self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral { Ok(EncodedTerm::TypedLiteral {
datatype_id: u128::from_le_bytes(datatype_buffer), datatype_id: u128::from_be_bytes(datatype_buffer),
value_id: u128::from_le_bytes(value_buffer), value_id: u128::from_be_bytes(value_buffer),
}) })
} }
TYPE_STRING_LITERAL => { TYPE_STRING_LITERAL => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral { Ok(EncodedTerm::StringLiteral {
value_id: u128::from_le_bytes(buffer), value_id: u128::from_be_bytes(buffer),
}) })
} }
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)), TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)),
@ -565,44 +563,44 @@ impl<R: Read> TermReader for R {
TYPE_FLOAT_LITERAL => { TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4]; let mut buffer = [0; 4];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::FloatLiteral(f32::from_le_bytes(buffer))) Ok(EncodedTerm::FloatLiteral(f32::from_be_bytes(buffer)))
} }
TYPE_DOUBLE_LITERAL => { TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8]; let mut buffer = [0; 8];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DoubleLiteral(f64::from_le_bytes(buffer))) Ok(EncodedTerm::DoubleLiteral(f64::from_be_bytes(buffer)))
} }
TYPE_INTEGER_LITERAL => { TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8]; let mut buffer = [0; 8];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::IntegerLiteral(i64::from_le_bytes(buffer))) Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer)))
} }
TYPE_DECIMAL_LITERAL => { TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16]; let mut buffer = [0; 16];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DecimalLiteral(Decimal::from_le_bytes(buffer))) Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer)))
} }
TYPE_DATE_LITERAL => { TYPE_DATE_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateLiteral(Date::from_le_bytes(buffer))) Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer)))
} }
TYPE_TIME_LITERAL => { TYPE_TIME_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::TimeLiteral(Time::from_le_bytes(buffer))) Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer)))
} }
TYPE_DATE_TIME_LITERAL => { TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18]; let mut buffer = [0; 18];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_le_bytes( Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes(
buffer, buffer,
))) )))
} }
TYPE_DURATION_LITERAL => { TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24]; let mut buffer = [0; 24];
self.read_exact(&mut buffer)?; self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DurationLiteral(Duration::from_le_bytes( Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes(
buffer, buffer,
))) )))
} }
@ -703,35 +701,35 @@ pub trait TermWriter {
impl<W: Write> TermWriter for W { impl<W: Write> TermWriter for W {
fn write_term(&mut self, term: EncodedTerm) -> Result<()> { fn write_term(&mut self, term: EncodedTerm) -> Result<()> {
self.write_all(&term.type_id().to_le_bytes())?; self.write_all(&term.type_id().to_be_bytes())?;
match term { match term {
EncodedTerm::DefaultGraph => {} EncodedTerm::DefaultGraph => {}
EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_le_bytes())?, EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_be_bytes())?,
EncodedTerm::BlankNode { id } => self.write_all(&id.to_le_bytes())?, EncodedTerm::BlankNode { id } => self.write_all(&id.to_be_bytes())?,
EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_le_bytes())?, EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_be_bytes())?,
EncodedTerm::LangStringLiteral { EncodedTerm::LangStringLiteral {
value_id, value_id,
language_id, language_id,
} => { } => {
self.write_all(&language_id.to_le_bytes())?; self.write_all(&language_id.to_be_bytes())?;
self.write_all(&value_id.to_le_bytes())?; self.write_all(&value_id.to_be_bytes())?;
} }
EncodedTerm::TypedLiteral { EncodedTerm::TypedLiteral {
value_id, value_id,
datatype_id, datatype_id,
} => { } => {
self.write_all(&datatype_id.to_le_bytes())?; self.write_all(&datatype_id.to_be_bytes())?;
self.write_all(&value_id.to_le_bytes())?; self.write_all(&value_id.to_be_bytes())?;
} }
EncodedTerm::BooleanLiteral(_) => {} EncodedTerm::BooleanLiteral(_) => {}
EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DateLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DateLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_le_bytes())?, EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_be_bytes())?,
} }
Ok(()) Ok(())
} }
@ -1214,6 +1212,10 @@ fn test_encoding() {
Literal::new_language_tagged_literal("foo", "fr").into(), Literal::new_language_tagged_literal("foo", "fr").into(),
Literal::new_language_tagged_literal("foo", "FR").into(), Literal::new_language_tagged_literal("foo", "FR").into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(), Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME.clone()).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE.clone()).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME.clone()).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION.clone()).into(),
Literal::new_typed_literal("-foo", NamedNode::new_from_string("http://foo.com")).into(), Literal::new_typed_literal("-foo", NamedNode::new_from_string("http://foo.com")).into(),
]; ];
for term in terms { for term in terms {

Loading…
Cancel
Save