Avoids code duplication of xsd: datatypes parsing

pull/10/head
Tpt 5 years ago
parent 25b5e20c2b
commit f2f3b8785d
  1. 77
      lib/src/sparql/eval.rs
  2. 2
      lib/src/store/mod.rs
  3. 202
      lib/src/store/numeric_encoder.rs

@ -11,7 +11,6 @@ use num_traits::identities::Zero;
use num_traits::FromPrimitive; use num_traits::FromPrimitive;
use num_traits::One; use num_traits::One;
use num_traits::ToPrimitive; use num_traits::ToPrimitive;
use ordered_float::OrderedFloat;
use regex::RegexBuilder; use regex::RegexBuilder;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::cmp::Ordering; use std::cmp::Ordering;
@ -639,13 +638,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => self
match &*self.dataset.get_str(value_id).ok()?? { .dataset
"true" | "1" => Some(true.into()), .encoder()
"false" | "0" => Some(false.into()), .encode_boolean_str(&*self.dataset.get_str(value_id).ok()??),
_ => None,
}
}
_ => None, _ => None,
}, },
PlanExpression::DoubleCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DoubleCast(e) => match self.eval_expression(e, tuple)? {
@ -656,9 +652,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1. as f64 } else { 0. }.into()) Some(if value { 1. as f64 } else { 0. }.into())
} }
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DoubleLiteral( EncodedTerm::StringLiteral { value_id } => self
OrderedFloat(self.dataset.get_str(value_id).ok()??.parse().ok()?), .dataset
)), .encoder()
.encode_double_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::FloatCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::FloatCast(e) => match self.eval_expression(e, tuple)? {
@ -669,9 +666,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::BooleanLiteral(value) => { EncodedTerm::BooleanLiteral(value) => {
Some(if value { 1. as f32 } else { 0. }.into()) Some(if value { 1. as f32 } else { 0. }.into())
} }
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::FloatLiteral( EncodedTerm::StringLiteral { value_id } => self
OrderedFloat(self.dataset.get_str(value_id).ok()??.parse().ok()?), .dataset
)), .encoder()
.encode_float_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::IntegerCast(e) => match self.eval_expression(e, tuple)? {
@ -680,9 +678,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::IntegerLiteral(value) => Some(value.to_i128()?.into()), EncodedTerm::IntegerLiteral(value) => Some(value.to_i128()?.into()),
EncodedTerm::DecimalLiteral(value) => Some(value.to_i128()?.into()), EncodedTerm::DecimalLiteral(value) => Some(value.to_i128()?.into()),
EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()), EncodedTerm::BooleanLiteral(value) => Some(if value { 1 } else { 0 }.into()),
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::IntegerLiteral( EncodedTerm::StringLiteral { value_id } => self
self.dataset.get_str(value_id).ok()??.parse().ok()?, .dataset
)), .encoder()
.encode_integer_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::DecimalCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DecimalCast(e) => match self.eval_expression(e, tuple)? {
@ -698,43 +697,40 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
.into(), .into(),
), ),
EncodedTerm::StringLiteral { value_id } => Some(EncodedTerm::DecimalLiteral( EncodedTerm::StringLiteral { value_id } => self
self.dataset.get_str(value_id).ok()??.parse().ok()?, .dataset
)), .encoder()
.encode_decimal_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DateCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateLiteral(value) => Some(value.into()),
EncodedTerm::NaiveDateLiteral(value) => Some(value.into()), EncodedTerm::NaiveDateLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(value.date().naive_utc().into()), //TODO: use date with timezone EncodedTerm::DateTimeLiteral(value) => Some(value.date().into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.date().into()), EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.date().into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => self
let value = self.dataset.get_str(value_id).ok()??; .dataset
Some(NaiveDate::parse_from_str(&value, "%Y-%m-%d").ok()?.into()) .encoder()
} .encode_date_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::TimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::NaiveTimeLiteral(value) => Some(value.into()), EncodedTerm::NaiveTimeLiteral(value) => Some(value.into()),
EncodedTerm::DateTimeLiteral(value) => Some(value.time().into()), EncodedTerm::DateTimeLiteral(value) => Some(value.time().into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.time().into()), EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.time().into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => self
let value = self.dataset.get_str(value_id).ok()??; .dataset
Some(NaiveTime::parse_from_str(&value, "%H:%M:%S").ok()?.into()) .encoder()
} .encode_time_str(&*self.dataset.get_str(value_id).ok()??),
_ => None, _ => None,
}, },
PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? { PlanExpression::DateTimeCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::DateTimeLiteral(value) => Some(value.into()), EncodedTerm::DateTimeLiteral(value) => Some(value.into()),
EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.into()), EncodedTerm::NaiveDateTimeLiteral(value) => Some(value.into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => self
let value = self.dataset.get_str(value_id).ok()??; .dataset
Some(match DateTime::parse_from_rfc3339(&value) { .encoder()
Ok(value) => value.into(), .encode_date_time_str(&*self.dataset.get_str(value_id).ok()??),
Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S")
.ok()?
.into(),
})
}
_ => None, _ => None,
}, },
PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral { PlanExpression::StringCast(e) => Some(EncodedTerm::StringLiteral {
@ -1514,9 +1510,8 @@ fn get_triple_template_value(
TripleTemplateValue::Constant(term) => Some(*term), TripleTemplateValue::Constant(term) => Some(*term),
TripleTemplateValue::Variable(v) => get_tuple_value(*v, tuple), TripleTemplateValue::Variable(v) => get_tuple_value(*v, tuple),
TripleTemplateValue::BlankNode(id) => { TripleTemplateValue::BlankNode(id) => {
//TODO use resize_with if *id >= tuple.len() {
while *id >= tuple.len() { bnodes.resize_with(*id, BlankNode::default)
bnodes.push(BlankNode::default())
} }
tuple[*id] tuple[*id]
} }

@ -175,7 +175,6 @@ impl<S: StoreConnection> StoreRepositoryConnection<S> {
where where
P::Error: Send + Sync + 'static, P::Error: Send + Sync + 'static,
{ {
//TODO: handle errors
let mut bnode_map = HashMap::default(); let mut bnode_map = HashMap::default();
let encoder = self.inner.encoder(); let encoder = self.inner.encoder();
let graph_name = if let Some(graph_name) = to_graph_name { let graph_name = if let Some(graph_name) = to_graph_name {
@ -194,7 +193,6 @@ impl<S: StoreConnection> StoreRepositoryConnection<S> {
where where
P::Error: Send + Sync + 'static, P::Error: Send + Sync + 'static,
{ {
//TODO: handle errors
let mut bnode_map = HashMap::default(); let mut bnode_map = HashMap::default();
let encoder = self.inner.encoder(); let encoder = self.inner.encoder();
parser.parse_all(&mut move |q| { parser.parse_all(&mut move |q| {

@ -684,113 +684,45 @@ impl<S: StringStore> Encoder<S> {
}?, }?,
} }
} }
rio::Literal::Typed { value, datatype } => match datatype.iri { rio::Literal::Typed { value, datatype } => {
"http://www.w3.org/2001/XMLSchema#boolean" => match value { match match datatype.iri {
"true" | "1" => EncodedTerm::BooleanLiteral(true), "http://www.w3.org/2001/XMLSchema#boolean" => self.encode_boolean_str(value),
"false" | "0" => EncodedTerm::BooleanLiteral(false), "http://www.w3.org/2001/XMLSchema#string" => Some(EncodedTerm::StringLiteral {
_ => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?, value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_BOOLEAN_ID, }),
}, "http://www.w3.org/2001/XMLSchema#float" => self.encode_float_str(value),
}, "http://www.w3.org/2001/XMLSchema#double" => self.encode_double_str(value),
"http://www.w3.org/2001/XMLSchema#string" => EncodedTerm::StringLiteral { "http://www.w3.org/2001/XMLSchema#integer"
value_id: self.string_store.insert_str(value)?, | "http://www.w3.org/2001/XMLSchema#byte"
}, | "http://www.w3.org/2001/XMLSchema#short"
"http://www.w3.org/2001/XMLSchema#float" => match value.parse() { | "http://www.w3.org/2001/XMLSchema#int"
Ok(value) => EncodedTerm::FloatLiteral(OrderedFloat(value)), | "http://www.w3.org/2001/XMLSchema#long"
Err(_) => EncodedTerm::TypedLiteral { | "http://www.w3.org/2001/XMLSchema#unsignedByte"
value_id: self.string_store.insert_str(value)?, | "http://www.w3.org/2001/XMLSchema#unsignedShort"
datatype_id: XSD_FLOAT_ID, | "http://www.w3.org/2001/XMLSchema#unsignedInt"
}, | "http://www.w3.org/2001/XMLSchema#unsignedLong"
}, | "http://www.w3.org/2001/XMLSchema#positiveInteger"
"http://www.w3.org/2001/XMLSchema#double" => match value.parse() { | "http://www.w3.org/2001/XMLSchema#negativeInteger"
Ok(value) => EncodedTerm::DoubleLiteral(OrderedFloat(value)), | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
Err(_) => EncodedTerm::TypedLiteral { | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => {
value_id: self.string_store.insert_str(value)?, self.encode_integer_str(value)
datatype_id: XSD_DOUBLE_ID, }
}, "http://www.w3.org/2001/XMLSchema#decimal" => self.encode_decimal_str(value),
}, "http://www.w3.org/2001/XMLSchema#date" => self.encode_date_str(value),
"http://www.w3.org/2001/XMLSchema#integer" "http://www.w3.org/2001/XMLSchema#time" => self.encode_time_str(value),
| "http://www.w3.org/2001/XMLSchema#byte" "http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#short" | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
| "http://www.w3.org/2001/XMLSchema#int" self.encode_date_time_str(value)
| "http://www.w3.org/2001/XMLSchema#long" }
| "http://www.w3.org/2001/XMLSchema#unsignedByte" _ => None,
| "http://www.w3.org/2001/XMLSchema#unsignedShort" } {
| "http://www.w3.org/2001/XMLSchema#unsignedInt" Some(v) => v,
| "http://www.w3.org/2001/XMLSchema#unsignedLong" None => EncodedTerm::TypedLiteral {
| "http://www.w3.org/2001/XMLSchema#positiveInteger"
| "http://www.w3.org/2001/XMLSchema#negativeInteger"
| "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
| "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => match value.parse() {
Ok(value) => EncodedTerm::IntegerLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?, value_id: self.string_store.insert_str(value)?,
datatype_id: self.string_store.insert_str(datatype.iri)?, datatype_id: self.string_store.insert_str(datatype.iri)?,
}, },
},
"http://www.w3.org/2001/XMLSchema#decimal" => match value.parse() {
Ok(value) => EncodedTerm::DecimalLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DECIMAL_ID,
},
},
"http://www.w3.org/2001/XMLSchema#date" => {
let mut parsed = Parsed::new();
match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then(
|_| {
Ok(Date::from_utc(
parsed.to_naive_date()?,
parsed.to_fixed_offset()?,
))
},
) {
Ok(value) => EncodedTerm::DateLiteral(value),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") {
Ok(value) => EncodedTerm::DateLiteral(Date::from_utc(
value,
FixedOffset::east(0),
)),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%d") {
Ok(value) => EncodedTerm::NaiveDateLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DATE_ID,
},
},
},
}
}
"http://www.w3.org/2001/XMLSchema#time" => {
match NaiveTime::parse_from_str(&value, "%H:%M:%S") {
Ok(value) => EncodedTerm::NaiveTimeLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_TIME_ID,
},
}
}
"http://www.w3.org/2001/XMLSchema#dateTime"
| "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => {
match DateTime::parse_from_rfc3339(&value) {
Ok(value) => EncodedTerm::DateTimeLiteral(value),
Err(_) => {
match NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S") {
Ok(value) => EncodedTerm::NaiveDateTimeLiteral(value),
Err(_) => EncodedTerm::TypedLiteral {
value_id: self.string_store.insert_str(value)?,
datatype_id: XSD_DATE_TIME_ID,
},
}
}
}
} }
_ => EncodedTerm::TypedLiteral { }
value_id: self.string_store.insert_str(value)?,
datatype_id: self.string_store.insert_str(datatype.iri)?,
},
},
}) })
} }
@ -849,6 +781,72 @@ impl<S: StringStore> Encoder<S> {
}) })
} }
pub fn encode_boolean_str(&self, value: &str) -> Option<EncodedTerm> {
match value {
"true" | "1" => Some(EncodedTerm::BooleanLiteral(true)),
"false" | "0" => Some(EncodedTerm::BooleanLiteral(false)),
_ => None,
}
}
pub fn encode_float_str(&self, value: &str) -> Option<EncodedTerm> {
value
.parse()
.map(|value| EncodedTerm::FloatLiteral(OrderedFloat(value)))
.ok()
}
pub fn encode_double_str(&self, value: &str) -> Option<EncodedTerm> {
value
.parse()
.map(|value| EncodedTerm::DoubleLiteral(OrderedFloat(value)))
.ok()
}
pub fn encode_integer_str(&self, value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::IntegerLiteral).ok()
}
pub fn encode_decimal_str(&self, value: &str) -> Option<EncodedTerm> {
value.parse().map(EncodedTerm::DecimalLiteral).ok()
}
pub fn encode_date_str(&self, value: &str) -> Option<EncodedTerm> {
let mut parsed = Parsed::new();
match parse(&mut parsed, &value, StrftimeItems::new("%Y-%m-%d%:z")).and_then(|_| {
Ok(Date::from_utc(
parsed.to_naive_date()?,
parsed.to_fixed_offset()?,
))
}) {
Ok(value) => Some(EncodedTerm::DateLiteral(value)),
Err(_) => match NaiveDate::parse_from_str(&value, "%Y-%m-%dZ") {
Ok(value) => Some(EncodedTerm::DateLiteral(Date::from_utc(
value,
FixedOffset::east(0),
))),
Err(_) => NaiveDate::parse_from_str(&value, "%Y-%m-%d")
.map(EncodedTerm::NaiveDateLiteral)
.ok(),
},
}
}
pub fn encode_time_str(&self, value: &str) -> Option<EncodedTerm> {
NaiveTime::parse_from_str(&value, "%H:%M:%S")
.map(EncodedTerm::NaiveTimeLiteral)
.ok()
}
pub fn encode_date_time_str(&self, value: &str) -> Option<EncodedTerm> {
match DateTime::parse_from_rfc3339(&value) {
Ok(value) => Some(EncodedTerm::DateTimeLiteral(value)),
Err(_) => NaiveDateTime::parse_from_str(&value, "%Y-%m-%dT%H:%M:%S")
.map(EncodedTerm::NaiveDateTimeLiteral)
.ok(),
}
}
pub fn decode_term(&self, encoded: EncodedTerm) -> Result<Term> { pub fn decode_term(&self, encoded: EncodedTerm) -> Result<Term> {
match encoded { match encoded {
EncodedTerm::DefaultGraph => { EncodedTerm::DefaultGraph => {

Loading…
Cancel
Save