Introduces EncodedTuple structs

Allows to make sure that all access to it are safe and won't panic
pull/22/head
Tpt 5 years ago
parent 5481262bf9
commit 1cd0691520
  1. 182
      lib/src/sparql/eval.rs
  2. 78
      lib/src/sparql/plan.rs
  3. 8
      lib/src/sparql/plan_builder.rs

@ -22,7 +22,6 @@ use rio_api::model as rio;
use rust_decimal::{Decimal, RoundingStrategy}; use rust_decimal::{Decimal, RoundingStrategy};
use sha1::Sha1; use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512}; use sha2::{Sha256, Sha384, Sha512};
use std::cmp::min;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::convert::TryInto; use std::convert::TryInto;
@ -69,7 +68,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
where where
'a: 'b, 'a: 'b,
{ {
let iter = self.eval_plan(plan, vec![None; variables.len()]); let iter = self.eval_plan(plan, EncodedTuple::with_capacity(variables.len()));
Ok(QueryResult::Bindings( Ok(QueryResult::Bindings(
self.decode_bindings(iter, variables.to_vec()), self.decode_bindings(iter, variables.to_vec()),
)) ))
@ -79,7 +78,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
where where
'a: 'b, 'a: 'b,
{ {
match self.eval_plan(plan, vec![]).next() { match self
.eval_plan(
plan,
EncodedTuple::with_capacity(plan.maybe_bound_variables().len()),
)
.next()
{
Some(Ok(_)) => Ok(QueryResult::Boolean(true)), Some(Ok(_)) => Ok(QueryResult::Boolean(true)),
Some(Err(error)) => Err(error), Some(Err(error)) => Err(error),
None => Ok(QueryResult::Boolean(false)), None => Ok(QueryResult::Boolean(false)),
@ -96,7 +101,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
{ {
Ok(QueryResult::Graph(Box::new(ConstructIterator { Ok(QueryResult::Graph(Box::new(ConstructIterator {
eval: self, eval: self,
iter: self.eval_plan(plan, vec![]), iter: self.eval_plan(
plan,
EncodedTuple::with_capacity(plan.maybe_bound_variables().len()),
),
template: construct, template: construct,
buffered_results: Vec::default(), buffered_results: Vec::default(),
bnodes: Vec::default(), bnodes: Vec::default(),
@ -109,7 +117,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
{ {
Ok(QueryResult::Graph(Box::new(DescribeIterator { Ok(QueryResult::Graph(Box::new(DescribeIterator {
eval: self, eval: self,
iter: self.eval_plan(plan, vec![]), iter: self.eval_plan(
plan,
EncodedTuple::with_capacity(plan.maybe_bound_variables().len()),
),
quads: Box::new(empty()), quads: Box::new(empty()),
}))) })))
} }
@ -127,10 +138,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
service_name, service_name,
graph_pattern, graph_pattern,
.. ..
} => match self.evaluate_service(service_name, graph_pattern, variables) { } => match self.evaluate_service(service_name, graph_pattern, variables, &from) {
Ok(result) => Box::new(result.flat_map(move |binding| { Ok(result) => Box::new(result.flat_map(move |binding| {
binding binding
.map(|binding| combine_tuples(&binding, &from)) .map(|binding| binding.combine_with(&from))
.transpose() .transpose()
})), })),
Err(e) => { Err(e) => {
@ -345,7 +356,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
Box::new(self.eval_plan(&*child, from).map(move |tuple| { Box::new(self.eval_plan(&*child, from).map(move |tuple| {
let mut tuple = tuple?; let mut tuple = tuple?;
if let Some(value) = eval.eval_expression(&expression, &tuple) { if let Some(value) = eval.eval_expression(&expression, &tuple) {
put_value(*position, value, &mut tuple) tuple.set(*position, value)
} }
Ok(tuple) Ok(tuple)
})) }))
@ -395,13 +406,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
PlanNode::Project { child, mapping } => { PlanNode::Project { child, mapping } => {
//TODO: use from somewhere? //TODO: use from somewhere?
Box::new( Box::new(
self.eval_plan(&*child, vec![None; mapping.len()]) self.eval_plan(&*child, EncodedTuple::with_capacity(mapping.len()))
.map(move |tuple| { .map(move |tuple| {
let tuple = tuple?; let tuple = tuple?;
let mut output_tuple = vec![None; from.len()]; let mut output_tuple = EncodedTuple::with_capacity(from.capacity());
for (input_key, output_key) in mapping.iter() { for (input_key, output_key) in mapping.iter() {
if let Some(value) = tuple[*input_key] { if let Some(value) = tuple.get(*input_key) {
put_value(*output_key, value, &mut output_tuple) output_tuple.set(*output_key, value)
} }
} }
Ok(output_tuple) Ok(output_tuple)
@ -413,7 +424,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
key_mapping, key_mapping,
aggregates, aggregates,
} => { } => {
let tuple_size = from.len(); //TODO: not nice let tuple_size = from.capacity(); //TODO: not nice
let mut errors = Vec::default(); let mut errors = Vec::default();
let mut accumulators_for_group = let mut accumulators_for_group =
HashMap::<Vec<Option<EncodedTerm>>, Vec<Box<dyn Accumulator>>>::default(); HashMap::<Vec<Option<EncodedTerm>>, Vec<Box<dyn Accumulator>>>::default();
@ -427,10 +438,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
}) })
.for_each(|tuple| { .for_each(|tuple| {
//TODO avoid copy for key? //TODO avoid copy for key?
let key = key_mapping let key = key_mapping.iter().map(|(v, _)| tuple.get(*v)).collect();
.iter()
.map(|(v, _)| get_tuple_value(*v, &tuple))
.collect();
let key_accumulators = let key_accumulators =
accumulators_for_group.entry(key).or_insert_with(|| { accumulators_for_group.entry(key).or_insert_with(|| {
@ -464,15 +472,15 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
.map(Err) .map(Err)
.chain(accumulators_for_group.into_iter().map( .chain(accumulators_for_group.into_iter().map(
move |(key, accumulators)| { move |(key, accumulators)| {
let mut result = vec![None; tuple_size]; let mut result = EncodedTuple::with_capacity(tuple_size);
for (from_position, to_position) in key_mapping.iter() { for (from_position, to_position) in key_mapping.iter() {
if let Some(value) = key[*from_position] { if let Some(value) = key[*from_position] {
put_value(*to_position, value, &mut result); result.set(*to_position, value);
} }
} }
for (i, accumulator) in accumulators.into_iter().enumerate() { for (i, accumulator) in accumulators.into_iter().enumerate() {
if let Some(value) = accumulator.state() { if let Some(value) = accumulator.state() {
put_value(aggregates[i].1, value, &mut result); result.set(aggregates[i].1, value);
} }
} }
Ok(result) Ok(result)
@ -488,9 +496,10 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
service_name: &PatternValue, service_name: &PatternValue,
graph_pattern: &'b GraphPattern, graph_pattern: &'b GraphPattern,
variables: &'b [Variable], variables: &'b [Variable],
from: &EncodedTuple,
) -> Result<EncodedTuplesIterator<'b>> { ) -> Result<EncodedTuplesIterator<'b>> {
let service_name = self.dataset.decode_named_node( let service_name = self.dataset.decode_named_node(
get_pattern_value(service_name, &[]) get_pattern_value(service_name, &from)
.ok_or_else(|| format_err!("The SERVICE name is not bound"))?, .ok_or_else(|| format_err!("The SERVICE name is not bound"))?,
)?; )?;
Ok(self.encode_bindings( Ok(self.encode_bindings(
@ -709,13 +718,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
fn eval_expression<'b>( fn eval_expression<'b>(
&'b self, &'b self,
expression: &PlanExpression, expression: &PlanExpression,
tuple: &[Option<EncodedTerm>], tuple: &EncodedTuple,
) -> Option<EncodedTerm> { ) -> Option<EncodedTerm> {
match expression { match expression {
PlanExpression::Constant(t) => Some(*t), PlanExpression::Constant(t) => Some(*t),
PlanExpression::Variable(v) => get_tuple_value(*v, tuple), PlanExpression::Variable(v) => tuple.get(*v),
PlanExpression::Exists(node) => { PlanExpression::Exists(node) => {
Some(self.eval_plan(node, tuple.to_vec()).next().is_some().into()) Some(self.eval_plan(node, tuple.clone()).next().is_some().into())
} }
PlanExpression::Or(a, b) => { PlanExpression::Or(a, b) => {
match self.eval_expression(a, tuple).and_then(|v| self.to_bool(v)) { match self.eval_expression(a, tuple).and_then(|v| self.to_bool(v)) {
@ -886,7 +895,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
) )
} }
PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(), PlanExpression::Datatype(e) => self.eval_expression(e, tuple)?.datatype(),
PlanExpression::Bound(v) => Some(has_tuple_value(*v, tuple).into()), PlanExpression::Bound(v) => Some(tuple.contains(*v).into()),
PlanExpression::IRI(e) => { PlanExpression::IRI(e) => {
let iri_id = match self.eval_expression(e, tuple)? { let iri_id = match self.eval_expression(e, tuple)? {
EncodedTerm::NamedNode { iri_id } => Some(iri_id), EncodedTerm::NamedNode { iri_id } => Some(iri_id),
@ -1571,7 +1580,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
&'b self, &'b self,
e1: &PlanExpression, e1: &PlanExpression,
e2: &PlanExpression, e2: &PlanExpression,
tuple: &[Option<EncodedTerm>], tuple: &EncodedTuple,
) -> Option<NumericBinaryOperands> { ) -> Option<NumericBinaryOperands> {
NumericBinaryOperands::new( NumericBinaryOperands::new(
self.eval_expression(&e1, tuple)?, self.eval_expression(&e1, tuple)?,
@ -1593,7 +1602,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
variables, variables,
Box::new(iter.map(move |values| { Box::new(iter.map(move |values| {
let mut result = vec![None; tuple_size]; let mut result = vec![None; tuple_size];
for (i, value) in values?.into_iter().enumerate() { for (i, value) in values?.iter().enumerate() {
if let Some(term) = value { if let Some(term) = value {
result[i] = Some(eval.dataset.decode_term(term)?) result[i] = Some(eval.dataset.decode_term(term)?)
} }
@ -1621,7 +1630,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
Box::new(iter.map(move |terms| { Box::new(iter.map(move |terms| {
let mut encoder = self.dataset.encoder(); let mut encoder = self.dataset.encoder();
let mut encoded_terms = vec![None; combined_variables.len()]; let mut encoded_terms = EncodedTuple::with_capacity(combined_variables.len());
for (i, term_option) in terms?.into_iter().enumerate() { for (i, term_option) in terms?.into_iter().enumerate() {
match term_option { match term_option {
None => (), None => (),
@ -1756,8 +1765,8 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
fn cmp_according_to_expression<'b>( fn cmp_according_to_expression<'b>(
&'b self, &'b self,
tuple_a: &[Option<EncodedTerm>], tuple_a: &EncodedTuple,
tuple_b: &[Option<EncodedTerm>], tuple_b: &EncodedTuple,
expression: &PlanExpression, expression: &PlanExpression,
) -> Ordering { ) -> Ordering {
self.cmp_terms( self.cmp_terms(
@ -1876,7 +1885,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
fn hash<'b, H: Digest>( fn hash<'b, H: Digest>(
&'b self, &'b self,
arg: &PlanExpression, arg: &PlanExpression,
tuple: &[Option<EncodedTerm>], tuple: &EncodedTuple,
) -> Option<EncodedTerm> { ) -> Option<EncodedTerm> {
let input = self.to_simple_string(self.eval_expression(arg, tuple)?)?; let input = self.to_simple_string(self.eval_expression(arg, tuple)?)?;
let hash = hex::encode(H::new().chain(&input as &str).result()); let hash = hex::encode(H::new().chain(&input as &str).result());
@ -1981,36 +1990,17 @@ impl NumericBinaryOperands {
} }
} }
fn get_tuple_value(variable: usize, tuple: &[Option<EncodedTerm>]) -> Option<EncodedTerm> { fn get_pattern_value(selector: &PatternValue, tuple: &EncodedTuple) -> Option<EncodedTerm> {
if variable < tuple.len() {
tuple[variable]
} else {
None
}
}
fn has_tuple_value(variable: usize, tuple: &[Option<EncodedTerm>]) -> bool {
if variable < tuple.len() {
tuple[variable].is_some()
} else {
false
}
}
fn get_pattern_value(
selector: &PatternValue,
tuple: &[Option<EncodedTerm>],
) -> Option<EncodedTerm> {
match selector { match selector {
PatternValue::Constant(term) => Some(*term), PatternValue::Constant(term) => Some(*term),
PatternValue::Variable(v) => get_tuple_value(*v, tuple), PatternValue::Variable(v) => tuple.get(*v),
} }
} }
fn put_pattern_value(selector: &PatternValue, value: EncodedTerm, tuple: &mut EncodedTuple) { fn put_pattern_value(selector: &PatternValue, value: EncodedTerm, tuple: &mut EncodedTuple) {
match selector { match selector {
PatternValue::Constant(_) => (), PatternValue::Constant(_) => (),
PatternValue::Variable(v) => put_value(*v, value, tuple), PatternValue::Variable(v) => tuple.set(*v, value),
} }
} }
@ -2022,79 +2012,29 @@ fn put_variable_value(
) { ) {
for (i, v) in variables.iter().enumerate() { for (i, v) in variables.iter().enumerate() {
if selector == v { if selector == v {
put_value(i, value, tuple); tuple.set(i, value);
break; break;
} }
} }
} }
fn put_value(position: usize, value: EncodedTerm, tuple: &mut EncodedTuple) { fn bind_variables_in_set(binding: &EncodedTuple, set: &[usize]) -> Vec<usize> {
if position < tuple.len() {
tuple[position] = Some(value)
} else {
if position > tuple.len() {
tuple.resize(position, None);
}
tuple.push(Some(value))
}
}
fn bind_variables_in_set(binding: &[Option<EncodedTerm>], set: &[usize]) -> Vec<usize> {
set.iter() set.iter()
.cloned() .cloned()
.filter(|key| *key < binding.len() && binding[*key].is_some()) .filter(|key| binding.contains(*key))
.collect() .collect()
} }
fn unbind_variables(binding: &mut [Option<EncodedTerm>], variables: &[usize]) { fn unbind_variables(binding: &mut EncodedTuple, variables: &[usize]) {
for var in variables { for var in variables {
if *var < binding.len() { binding.unset(*var)
binding[*var] = None
}
}
}
fn combine_tuples(a: &[Option<EncodedTerm>], b: &[Option<EncodedTerm>]) -> Option<EncodedTuple> {
if a.len() < b.len() {
let mut result = b.to_owned();
for (key, a_value) in a.iter().enumerate() {
if let Some(a_value) = a_value {
match b[key] {
Some(ref b_value) => {
if a_value != b_value {
return None;
}
}
None => result[key] = Some(*a_value),
}
}
}
Some(result)
} else {
let mut result = a.to_owned();
for (key, b_value) in b.iter().enumerate() {
if let Some(b_value) = b_value {
match a[key] {
Some(ref a_value) => {
if a_value != b_value {
return None;
}
}
None => result[key] = Some(*b_value),
}
}
}
Some(result)
} }
} }
fn are_tuples_compatible_and_not_disjointed( pub fn are_compatible_and_not_disjointed(a: &EncodedTuple, b: &EncodedTuple) -> bool {
a: &[Option<EncodedTerm>],
b: &[Option<EncodedTerm>],
) -> bool {
let mut found_intersection = false; let mut found_intersection = false;
for i in 0..min(a.len(), b.len()) { for (a_value, b_value) in a.iter().zip(b.iter()) {
if let (Some(a_value), Some(b_value)) = (a[i], b[i]) { if let (Some(a_value), Some(b_value)) = (a_value, b_value) {
if a_value != b_value { if a_value != b_value {
return false; return false;
} }
@ -2123,7 +2063,7 @@ impl<'a> Iterator for JoinIterator<'a> {
Err(error) => return Some(Err(error)), Err(error) => return Some(Err(error)),
}; };
for left_tuple in &self.left { for left_tuple in &self.left {
if let Some(result_tuple) = combine_tuples(left_tuple, &right_tuple) { if let Some(result_tuple) = left_tuple.combine_with(&right_tuple) {
self.buffered_results.push(Ok(result_tuple)) self.buffered_results.push(Ok(result_tuple))
} }
} }
@ -2144,7 +2084,7 @@ impl<'a> Iterator for AntiJoinIterator<'a> {
match self.left_iter.next()? { match self.left_iter.next()? {
Ok(left_tuple) => { Ok(left_tuple) => {
let exists_compatible_right = self.right.iter().any(|right_tuple| { let exists_compatible_right = self.right.iter().any(|right_tuple| {
are_tuples_compatible_and_not_disjointed(&left_tuple, right_tuple) are_compatible_and_not_disjointed(&left_tuple, right_tuple)
}); });
if !exists_compatible_right { if !exists_compatible_right {
return Some(Ok(left_tuple)); return Some(Ok(left_tuple));
@ -2199,14 +2139,14 @@ impl<'a, S: StoreConnection> Iterator for BadLeftJoinIterator<'a, S> {
Ok(mut tuple) => { Ok(mut tuple) => {
let mut conflict = false; let mut conflict = false;
for problem_var in &self.problem_vars { for problem_var in &self.problem_vars {
if let Some(input_value) = self.input[*problem_var] { if let Some(input_value) = self.input.get(*problem_var) {
if let Some(result_value) = get_tuple_value(*problem_var, &tuple) { if let Some(result_value) = tuple.get(*problem_var) {
if input_value != result_value { if input_value != result_value {
conflict = true; conflict = true;
continue; //Binding conflict continue; //Binding conflict
} }
} else { } else {
put_value(*problem_var, input_value, &mut tuple); tuple.set(*problem_var, input_value);
} }
} }
} }
@ -2290,17 +2230,17 @@ impl<'a, S: StoreConnection + 'a> Iterator for ConstructIterator<'a, S> {
fn get_triple_template_value( fn get_triple_template_value(
selector: &TripleTemplateValue, selector: &TripleTemplateValue,
tuple: &[Option<EncodedTerm>], tuple: &EncodedTuple,
bnodes: &mut Vec<BlankNode>, bnodes: &mut Vec<BlankNode>,
) -> Option<EncodedTerm> { ) -> Option<EncodedTerm> {
match selector { match selector {
TripleTemplateValue::Constant(term) => Some(*term), TripleTemplateValue::Constant(term) => Some(*term),
TripleTemplateValue::Variable(v) => get_tuple_value(*v, tuple), TripleTemplateValue::Variable(v) => tuple.get(*v),
TripleTemplateValue::BlankNode(id) => { TripleTemplateValue::BlankNode(id) => {
if *id >= tuple.len() { if *id >= bnodes.len() {
bnodes.resize_with(*id, BlankNode::default) bnodes.resize_with(*id, BlankNode::default)
} }
tuple[*id] Some((&bnodes[*id]).into())
} }
} }
} }
@ -2343,7 +2283,7 @@ impl<'a, S: StoreConnection + 'a> Iterator for DescribeIterator<'a, S> {
Ok(tuple) => tuple, Ok(tuple) => tuple,
Err(error) => return Some(Err(error)), Err(error) => return Some(Err(error)),
}; };
for subject in tuple { for subject in tuple.iter() {
if let Some(subject) = subject { if let Some(subject) = subject {
self.quads = self.quads =
self.eval self.eval

@ -10,8 +10,6 @@ use crate::Result;
use std::cell::{RefCell, RefMut}; use std::cell::{RefCell, RefMut};
use std::collections::BTreeSet; use std::collections::BTreeSet;
pub type EncodedTuple = Vec<Option<EncodedTerm>>;
#[derive(Eq, PartialEq, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub enum PlanNode { pub enum PlanNode {
Init, Init,
@ -359,6 +357,82 @@ pub enum TripleTemplateValue {
Variable(usize), Variable(usize),
} }
#[derive(Eq, PartialEq, Debug, Clone, Hash)]
pub struct EncodedTuple {
inner: Vec<Option<EncodedTerm>>,
}
impl EncodedTuple {
pub fn with_capacity(capacity: usize) -> Self {
Self {
inner: Vec::with_capacity(capacity),
}
}
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
pub fn contains(&self, index: usize) -> bool {
self.inner.get(index).map_or(false, |v| v.is_some())
}
pub fn get(&self, index: usize) -> Option<EncodedTerm> {
self.inner.get(index).cloned().unwrap_or(None)
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Option<EncodedTerm>> + 'a {
self.inner.iter().cloned()
}
pub fn set(&mut self, index: usize, value: EncodedTerm) {
if self.inner.len() <= index {
self.inner.resize(index + 1, None);
}
self.inner[index] = Some(value);
}
pub fn unset(&mut self, index: usize) {
if let Some(v) = self.inner.get_mut(index) {
*v = None;
}
}
pub fn combine_with(&self, other: &EncodedTuple) -> Option<Self> {
if self.inner.len() < other.inner.len() {
let mut result = other.inner.to_owned();
for (key, self_value) in self.inner.iter().enumerate() {
if let Some(self_value) = self_value {
match other.inner[key] {
Some(ref other_value) => {
if self_value != other_value {
return None;
}
}
None => result[key] = Some(*self_value),
}
}
}
Some(EncodedTuple { inner: result })
} else {
let mut result = self.inner.to_owned();
for (key, other_value) in other.inner.iter().enumerate() {
if let Some(other_value) = other_value {
match self.inner[key] {
Some(ref self_value) => {
if self_value != other_value {
return None;
}
}
None => result[key] = Some(*other_value),
}
}
}
Some(EncodedTuple { inner: result })
}
}
}
pub struct DatasetView<S: StoreConnection> { pub struct DatasetView<S: StoreConnection> {
store: S, store: S,
extra: RefCell<MemoryStrStore>, extra: RefCell<MemoryStrStore>,

@ -735,11 +735,13 @@ impl<E: Encoder> PlanBuilder<E> {
bindings bindings
.values_iter() .values_iter()
.map(move |values| { .map(move |values| {
let mut result = vec![None; variables.len()]; let mut result = EncodedTuple::with_capacity(variables.len());
for (key, value) in values.iter().enumerate() { for (key, value) in values.iter().enumerate() {
if let Some(term) = value { if let Some(term) = value {
result[bindings_variables_keys[key]] = result.set(
Some(self.encoder.encode_term(term)?); bindings_variables_keys[key],
self.encoder.encode_term(term)?,
);
} }
} }
Ok(result) Ok(result)

Loading…
Cancel
Save