Avoids code duplication of xsd: datatypes parsing

pull/10/head
Tpt 5 years ago
parent 25b5e20c2b
commit f2f3b8785d
  1. 77
      lib/src/sparql/eval.rs
  2. 2
      lib/src/store/mod.rs
  3. 202
      lib/src/store/numeric_encoder.rs

@ -11,7 +11,6 @@ use num_traits::identities::Zero;
use num_traits::FromPrimitive;
use num_traits::One;
use num_traits::ToPrimitive;
use ordered_float::OrderedFloat;
use regex::RegexBuilder;
use rust_decimal::Decimal;
use std::cmp::Ordering;
@ -639,13 +638,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
}
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => {
match &*self.dataset.get_str(value_id).ok()?? {
"true" | "1" => Some(true.into()),
"false" | "0" => Some(false.into()),
_ => None,
}
}
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_boolean_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::DoubleCast(e) => match self.eval_expression(e, tuple)? {
@ -656,9 +652,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1. as f64 } else { 0. }.into())
}
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DoubleLiteral(
OrderedFloat(self.dataset.get_str(value_id).ok()??.parse().ok()?),
)),
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_double_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::FloatCast(e) => match self.eval_expression(e, tuple)? {
@ -669,9 +666,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1. as f32 } else { 0. }.into())
}
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::FloatLiteral(
OrderedFloat(self.dataset.get_str(value_id).ok()??.parse().ok()?),
)),
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_float_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? {
@ -680,9 +678,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::IntegerLiteral(value) => Some(value.to_i128()?.into()),
EncodedTerm::DecimalLiteral(value) => Some(value.to_i128()?.into()),
EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()),
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral(
self.dataset.get_str(value_id).ok()??.parse().ok()?,
)),
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_integer_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::DecimalCast(e) => match self.eval_expression(e, tuple)? {
@ -698,43 +697,40 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
}
.into(),
),
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral(
self.dataset.get_str(value_id).ok()??.parse().ok()?,
)),
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_decimal_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateLiteral(value) => Some(value.into()),
EncodedTerm::NaiveDateLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(value.date().naive_utc().into()), //TODO: use date with timezone
EncodedTerm::DateTimeLiteral(value) => Some(value.date().into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.date().into()),
EncodedTerm::StringLiteral { value_id } => {
let value = self.dataset.get_str(value_id).ok()??;
Some(NaiveDate::parse_from_str(&value, "%Y-%m-%d").ok()?.into())
}
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_date_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::NaiveTimeLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(value.time().into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.time().into()),
EncodedTerm::StringLiteral { value_id } => {
let value = self.dataset.get_str(value_id).ok()??;
Some(NaiveTime::parse_from_str(&value, "%H:%M:%S").ok()?.into())
}
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_time_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateTimeLiteral(value) => Some(value.into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => {
let value = self.dataset.get_str(value_id).ok()??;
Some(match DateTime::parse_from_rfc3339(&value) {
Ok(value) => value.into(),
Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S")
.ok()?
.into(),
})
}
EncodedTerm::StringLiteral { value_id } => self
.dataset
.encoder()
.encode_date_time_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
},
PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral {
@ -1514,9 +1510,8 @@ fn get_triple_template_value(
TripleTemplateValue::Constant(term) => Some(*term),
TripleTemplateValue::Variable(v) => get_tuple_value(*v, tuple),
TripleTemplateValue::BlankNode(id) => {
//TODO use resize_with
while *id >= tuple.len() {
bnodes.push(BlankNode::default())
if *id >= tuple.len() {
bnodes.resize_with(*id, BlankNode::default)
}
tuple[*id]
}

@ -175,7 +175,6 @@ impl<S: StoreConnection> StoreRepositoryConnection<S> {
where
P::Error: Send + Sync + 'static,
{
//TODO: handle errors
let mut bnode_map = HashMap::default();
let encoder = self.inner.encoder();
let graph_name = if let Some(graph_name) = to_graph_name {
@ -194,7 +193,6 @@ impl<S: StoreConnection> StoreRepositoryConnection<S> {
where
P::Error: Send + Sync + 'static,
{
//TODO: handle errors
let mut bnode_map = HashMap::default();
let encoder = self.inner.encoder();
parser.parse_all(&mut move |q| {

@ -684,113 +684,45 @@ impl<S: StringStore> Encoder<S> {
}?,
}
}
rio::Literal::Typed { value, datatype } => match datatype.iri {
"http://www.w3.org/2001/XMLSchema#boolean" => match value {
"true" | "1" => EncodedTerm::BooleanLiteral(true),
"false" | "0" => EncodedTerm::BooleanLiteral(false),
_ => EncodedTerm::TypedLiteral {
rio::Literal::Typed { value, datatype } => {
match match datatype.iri {
"http://www.w3.org/2001/XMLSchema#boolean" => self.encode_boolean_str(value),
"http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_BOOLEAN_ID,
},
},
"http://www.w3.org/2001/XMLSchema#string" => EncodedTerm::StringLiteral {
value_id: self.string_store.insert_str(value)?,
},
"http://www.w3.org/2001/XMLSchema#float" => match value.parse() {
Ok(value) => EncodedTerm::FloatLiteral(OrderedFloat(value)),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_FLOAT_ID,
},
},
"http://www.w3.org/2001/XMLSchema#double" => match value.parse() {
Ok(value) => EncodedTerm::DoubleLiteral(OrderedFloat(value)),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DOUBLE_ID,
},
},
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => match value.parse() {
Ok(value) => EncodedTerm::IntegerLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
}),
"http://www.w3.org/2001/XMLSchema#float" => self.encode_float_str(value),
"http://www.w3.org/2001/XMLSchema#double" => self.encode_double_str(value),
"http://www.w3.org/2001/XMLSchema#integer"
| "http://www.w3.org/2001/XMLSchema#byte"
| "http://www.w3.org/2001/XMLSchema#short"
| "http://www.w3.org/2001/XMLSchema#int"
| "http://www.w3.org/2001/XMLSchema#long"
| "http://www.w3.org/2001/XMLSchema#unsignedByte"
| "http://www.w3.org/2001/XMLSchema#unsignedShort"
| "http://www.w3.org/2001/XMLSchema#unsignedInt"
| "http://www.w3.org/2001/XMLSchema#unsignedLong"
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
self.encode_integer_str(value)
}
"http://www.w3.org/2001/XMLSchema#decimal" => self.encode_decimal_str(value),
"http://www.w3.org/2001/XMLSchema#date" => self.encode_date_str(value),
"http://www.w3.org/2001/XMLSchema#time" => self.encode_time_str(value),
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
self.encode_date_time_str(value)
}
_ => None,
} {
Some(v) => v,
None => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: self.string_store.insert_str(datatype.iri)?,
},
},
"http://www.w3.org/2001/XMLSchema#decimal" => match value.parse() {
Ok(value) => EncodedTerm::DecimalLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DECIMAL_ID,
},
},
"http://www.w3.org/2001/XMLSchema#date" => {
let mut parsed = Parsed::new();
match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then(
|_| {
Ok(Date::from_utc(
parsed.to_naive_date()?,
parsed.to_fixed_offset()?,
))
},
) {
Ok(value) => EncodedTerm::DateLiteral(value),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") {
Ok(value) => EncodedTerm::DateLiteral(Date::from_utc(
value,
FixedOffset::east(0),
)),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%d") {
Ok(value) => EncodedTerm::NaiveDateLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DATE_ID,
},
},
},
}
}
"http://www.w3.org/2001/XMLSchema#time" => {
match NaiveTime::parse_from_str(&value, "%H:%M:%S") {
Ok(value) => EncodedTerm::NaiveTimeLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_TIME_ID,
},
}
}
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
match DateTime::parse_from_rfc3339(&value) {
Ok(value) => EncodedTerm::DateTimeLiteral(value),
Err(_) => {
match NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S") {
Ok(value) => EncodedTerm::NaiveDateTimeLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DATE_TIME_ID,
},
}
}
}
}
_ => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: self.string_store.insert_str(datatype.iri)?,
},
},
}
})
}
@ -849,6 +781,72 @@ impl<S: StringStore> Encoder<S> {
})
}
pub fn encode_boolean_str(&self, value: &str) -> Option<EncodedTerm> {
match value {
"true" | "1" => Some(EncodedTerm::BooleanLiteral(true)),
"false" | "0" => Some(EncodedTerm::BooleanLiteral(false)),
_ => None,
}
}
pub fn encode_float_str(&self, value: &str) -> Option<EncodedTerm> {
value
.parse()
.map(|value| EncodedTerm::FloatLiteral(OrderedFloat(value)))
.ok()
}
pub fn encode_double_str(&self, value: &str) -> Option<EncodedTerm> {
value
.parse()
.map(|value| EncodedTerm::DoubleLiteral(OrderedFloat(value)))
.ok()
}
pub fn encode_integer_str(&self, value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::IntegerLiteral).ok()
}
pub fn encode_decimal_str(&self, value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DecimalLiteral).ok()
}
pub fn encode_date_str(&self, value: &str) -> Option<EncodedTerm> {
let mut parsed = Parsed::new();
match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then(|_| {
Ok(Date::from_utc(
parsed.to_naive_date()?,
parsed.to_fixed_offset()?,
))
}) {
Ok(value) => Some(EncodedTerm::DateLiteral(value)),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") {
Ok(value) => Some(EncodedTerm::DateLiteral(Date::from_utc(
value,
FixedOffset::east(0),
))),
Err(_) => NaiveDate::parse_from_str(&value, "%Y-%m-%d")
.map(EncodedTerm::NaiveDateLiteral)
.ok(),
},
}
}
pub fn encode_time_str(&self, value: &str) -> Option<EncodedTerm> {
NaiveTime::parse_from_str(&value, "%H:%M:%S")
.map(EncodedTerm::NaiveTimeLiteral)
.ok()
}
pub fn encode_date_time_str(&self, value: &str) -> Option<EncodedTerm> {
match DateTime::parse_from_rfc3339(&value) {
Ok(value) => Some(EncodedTerm::DateTimeLiteral(value)),
Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S")
.map(EncodedTerm::NaiveDateTimeLiteral)
.ok(),
}
}
pub fn decode_term(&self, encoded: EncodedTerm) -> Result<Term> {
match encoded {
EncodedTerm::DefaultGraph => {

Loading…
Cancel
Save