[Breaking] Uses big endian encoding

Allows to do range search based on byte representation
pull/22/head
Tpt 5 years ago
parent 41c026877d
commit 8c4c273edf
  1. 44
      lib/src/model/xsd/date_time.rs
  2. 8
      lib/src/model/xsd/decimal.rs
  3. 12
      lib/src/model/xsd/duration.rs
  4. 70
      lib/src/store/numeric_encoder.rs

@ -44,9 +44,9 @@ impl DateTime {
})
}
pub fn from_le_bytes(bytes: [u8; 18]) -> Self {
pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self {
timestamp: Timestamp::from_le_bytes(bytes),
timestamp: Timestamp::from_be_bytes(bytes),
}
}
@ -101,8 +101,8 @@ impl DateTime {
}
}
pub fn to_le_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes()
pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_be_bytes()
}
/// [op:subtract-dateTimes](https://www.w3.org/TR/xpath-functions/#func-subtract-dateTimes)
@ -217,9 +217,9 @@ impl Time {
})
}
pub fn from_le_bytes(bytes: [u8; 18]) -> Self {
pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self {
timestamp: Timestamp::from_le_bytes(bytes),
timestamp: Timestamp::from_be_bytes(bytes),
}
}
@ -247,8 +247,8 @@ impl Time {
self.timestamp.timezone_offset()
}
pub fn to_le_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes()
pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_be_bytes()
}
/// [op:subtract-times](https://www.w3.org/TR/xpath-functions/#func-subtract-times)
@ -358,9 +358,9 @@ impl Date {
})
}
pub fn from_le_bytes(bytes: [u8; 18]) -> Self {
pub fn from_be_bytes(bytes: [u8; 18]) -> Self {
Self {
timestamp: Timestamp::from_le_bytes(bytes),
timestamp: Timestamp::from_be_bytes(bytes),
}
}
@ -388,8 +388,8 @@ impl Date {
self.timestamp.timezone_offset()
}
pub fn to_le_bytes(&self) -> [u8; 18] {
self.timestamp.to_le_bytes()
pub fn to_be_bytes(&self) -> [u8; 18] {
self.timestamp.to_be_bytes()
}
/// [op:subtract-dates](https://www.w3.org/TR/xpath-functions/#func-subtract-dates)
@ -458,14 +458,14 @@ impl TimezoneOffset {
Self { offset }
}
pub fn from_le_bytes(bytes: [u8; 2]) -> Self {
pub fn from_be_bytes(bytes: [u8; 2]) -> Self {
TimezoneOffset {
offset: i16::from_le_bytes(bytes),
offset: i16::from_be_bytes(bytes),
}
}
pub fn to_le_bytes(self) -> [u8; 2] {
self.offset.to_le_bytes()
pub fn to_be_bytes(self) -> [u8; 2] {
self.offset.to_be_bytes()
}
}
@ -601,18 +601,18 @@ impl Timestamp {
)
}
fn from_le_bytes(bytes: [u8; 18]) -> Self {
fn from_be_bytes(bytes: [u8; 18]) -> Self {
let mut value = [0; 16];
value.copy_from_slice(&bytes[0..16]);
let mut timezone_offset = [0; 2];
timezone_offset.copy_from_slice(&bytes[16..18]);
Self {
value: Decimal::from_le_bytes(value),
value: Decimal::from_be_bytes(value),
timezone_offset: if timezone_offset == [u8::MAX; 2] {
None
} else {
Some(TimezoneOffset::from_le_bytes(timezone_offset))
Some(TimezoneOffset::from_be_bytes(timezone_offset))
},
}
}
@ -746,11 +746,11 @@ impl Timestamp {
})
}
fn to_le_bytes(&self) -> [u8; 18] {
fn to_be_bytes(&self) -> [u8; 18] {
let mut bytes = [0; 18];
bytes[0..16].copy_from_slice(&self.value.to_le_bytes());
bytes[0..16].copy_from_slice(&self.value.to_be_bytes());
bytes[16..18].copy_from_slice(&match &self.timezone_offset {
Some(timezone_offset) => timezone_offset.to_le_bytes(),
Some(timezone_offset) => timezone_offset.to_be_bytes(),
None => [u8::MAX; 2],
});
bytes

@ -40,14 +40,14 @@ impl Decimal {
}
#[inline]
pub fn from_le_bytes(bytes: [u8; 16]) -> Self {
pub fn from_be_bytes(bytes: [u8; 16]) -> Self {
Self {
value: i128::from_le_bytes(bytes),
value: i128::from_be_bytes(bytes),
}
}
pub fn to_le_bytes(&self) -> [u8; 16] {
self.value.to_le_bytes()
pub fn to_be_bytes(&self) -> [u8; 16] {
self.value.to_be_bytes()
}
/// [op:numeric-add](https://www.w3.org/TR/xpath-functions/#func-numeric-add)

@ -29,14 +29,14 @@ impl Duration {
}
}
pub fn from_le_bytes(bytes: [u8; 24]) -> Self {
pub fn from_be_bytes(bytes: [u8; 24]) -> Self {
let mut months = [0; 8];
months.copy_from_slice(&bytes[0..8]);
let mut seconds = [8; 16];
seconds.copy_from_slice(&bytes[8..24]);
Self {
months: i64::from_le_bytes(months),
seconds: Decimal::from_le_bytes(seconds),
months: i64::from_be_bytes(months),
seconds: Decimal::from_be_bytes(seconds),
}
}
@ -81,10 +81,10 @@ impl Duration {
self.seconds
}
pub fn to_le_bytes(&self) -> [u8; 24] {
pub fn to_be_bytes(&self) -> [u8; 24] {
let mut bytes = [0; 24];
bytes[0..8].copy_from_slice(&self.months.to_le_bytes());
bytes[8..24].copy_from_slice(&self.seconds.to_le_bytes());
bytes[0..8].copy_from_slice(&self.months.to_be_bytes());
bytes[8..24].copy_from_slice(&self.seconds.to_be_bytes());
bytes
}

@ -30,9 +30,7 @@ const XSD_TIME_ID: u128 = 0x7af4_6a16_1b02_35d7_9a79_07ba_3da9_48bb;
const XSD_DURATION_ID: u128 = 0x78ab_8431_984b_6b06_c42d_6271_b82e_487d;
pub fn get_str_id(value: &str) -> u128 {
let mut id = [0; 16];
id.copy_from_slice(&Md5::new().chain(value).result());
u128::from_le_bytes(id)
u128::from_le_bytes(Md5::new().chain(value).result().into())
}
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
@ -523,14 +521,14 @@ impl<R: Read> TermReader for R {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::NamedNode {
iri_id: u128::from_le_bytes(buffer),
iri_id: u128::from_be_bytes(buffer),
})
}
TYPE_BLANK_NODE_ID => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::BlankNode {
id: u128::from_le_bytes(buffer),
id: u128::from_be_bytes(buffer),
})
}
TYPE_LANG_STRING_LITERAL_ID => {
@ -539,8 +537,8 @@ impl<R: Read> TermReader for R {
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::LangStringLiteral {
language_id: u128::from_le_bytes(language_buffer),
value_id: u128::from_le_bytes(value_buffer),
language_id: u128::from_be_bytes(language_buffer),
value_id: u128::from_be_bytes(value_buffer),
})
}
TYPE_TYPED_LITERAL_ID => {
@ -549,15 +547,15 @@ impl<R: Read> TermReader for R {
let mut value_buffer = [0; 16];
self.read_exact(&mut value_buffer)?;
Ok(EncodedTerm::TypedLiteral {
datatype_id: u128::from_le_bytes(datatype_buffer),
value_id: u128::from_le_bytes(value_buffer),
datatype_id: u128::from_be_bytes(datatype_buffer),
value_id: u128::from_be_bytes(value_buffer),
})
}
TYPE_STRING_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::StringLiteral {
value_id: u128::from_le_bytes(buffer),
value_id: u128::from_be_bytes(buffer),
})
}
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)),
@ -565,44 +563,44 @@ impl<R: Read> TermReader for R {
TYPE_FLOAT_LITERAL => {
let mut buffer = [0; 4];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::FloatLiteral(f32::from_le_bytes(buffer)))
Ok(EncodedTerm::FloatLiteral(f32::from_be_bytes(buffer)))
}
TYPE_DOUBLE_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DoubleLiteral(f64::from_le_bytes(buffer)))
Ok(EncodedTerm::DoubleLiteral(f64::from_be_bytes(buffer)))
}
TYPE_INTEGER_LITERAL => {
let mut buffer = [0; 8];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::IntegerLiteral(i64::from_le_bytes(buffer)))
Ok(EncodedTerm::IntegerLiteral(i64::from_be_bytes(buffer)))
}
TYPE_DECIMAL_LITERAL => {
let mut buffer = [0; 16];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DecimalLiteral(Decimal::from_le_bytes(buffer)))
Ok(EncodedTerm::DecimalLiteral(Decimal::from_be_bytes(buffer)))
}
TYPE_DATE_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateLiteral(Date::from_le_bytes(buffer)))
Ok(EncodedTerm::DateLiteral(Date::from_be_bytes(buffer)))
}
TYPE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::TimeLiteral(Time::from_le_bytes(buffer)))
Ok(EncodedTerm::TimeLiteral(Time::from_be_bytes(buffer)))
}
TYPE_DATE_TIME_LITERAL => {
let mut buffer = [0; 18];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_le_bytes(
Ok(EncodedTerm::DateTimeLiteral(DateTime::from_be_bytes(
buffer,
)))
}
TYPE_DURATION_LITERAL => {
let mut buffer = [0; 24];
self.read_exact(&mut buffer)?;
Ok(EncodedTerm::DurationLiteral(Duration::from_le_bytes(
Ok(EncodedTerm::DurationLiteral(Duration::from_be_bytes(
buffer,
)))
}
@ -703,35 +701,35 @@ pub trait TermWriter {
impl<W: Write> TermWriter for W {
fn write_term(&mut self, term: EncodedTerm) -> Result<()> {
self.write_all(&term.type_id().to_le_bytes())?;
self.write_all(&term.type_id().to_be_bytes())?;
match term {
EncodedTerm::DefaultGraph => {}
EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_le_bytes())?,
EncodedTerm::BlankNode { id } => self.write_all(&id.to_le_bytes())?,
EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_le_bytes())?,
EncodedTerm::NamedNode { iri_id } => self.write_all(&iri_id.to_be_bytes())?,
EncodedTerm::BlankNode { id } => self.write_all(&id.to_be_bytes())?,
EncodedTerm::StringLiteral { value_id } => self.write_all(&value_id.to_be_bytes())?,
EncodedTerm::LangStringLiteral {
value_id,
language_id,
} => {
self.write_all(&language_id.to_le_bytes())?;
self.write_all(&value_id.to_le_bytes())?;
self.write_all(&language_id.to_be_bytes())?;
self.write_all(&value_id.to_be_bytes())?;
}
EncodedTerm::TypedLiteral {
value_id,
datatype_id,
} => {
self.write_all(&datatype_id.to_le_bytes())?;
self.write_all(&value_id.to_le_bytes())?;
self.write_all(&datatype_id.to_be_bytes())?;
self.write_all(&value_id.to_be_bytes())?;
}
EncodedTerm::BooleanLiteral(_) => {}
EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::DateLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_le_bytes())?,
EncodedTerm::FloatLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DoubleLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::IntegerLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DecimalLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DateLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::TimeLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DateTimeLiteral(value) => self.write_all(&value.to_be_bytes())?,
EncodedTerm::DurationLiteral(value) => self.write_all(&value.to_be_bytes())?,
}
Ok(())
}
@ -1214,6 +1212,10 @@ fn test_encoding() {
Literal::new_language_tagged_literal("foo", "fr").into(),
Literal::new_language_tagged_literal("foo", "FR").into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME.clone()).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE.clone()).into(),
Literal::new_typed_literal("01:01:01Z", xsd::TIME.clone()).into(),
Literal::new_typed_literal("PT1S", xsd::DURATION.clone()).into(),
Literal::new_typed_literal("-foo", NamedNode::new_from_string("http://foo.com")).into(),
];
for term in terms {

Loading…
Cancel
Save