|
|
@ -20,9 +20,10 @@ use sha1::Sha1; |
|
|
|
use sha2::{Sha256, Sha384, Sha512}; |
|
|
|
use sha2::{Sha256, Sha384, Sha512}; |
|
|
|
use spargebra::algebra::GraphPattern; |
|
|
|
use spargebra::algebra::GraphPattern; |
|
|
|
use std::cmp::Ordering; |
|
|
|
use std::cmp::Ordering; |
|
|
|
|
|
|
|
use std::collections::hash_map::DefaultHasher; |
|
|
|
use std::collections::{HashMap, HashSet}; |
|
|
|
use std::collections::{HashMap, HashSet}; |
|
|
|
use std::convert::{TryFrom, TryInto}; |
|
|
|
use std::convert::{TryFrom, TryInto}; |
|
|
|
use std::hash::Hash; |
|
|
|
use std::hash::{Hash, Hasher}; |
|
|
|
use std::iter::Iterator; |
|
|
|
use std::iter::Iterator; |
|
|
|
use std::iter::{empty, once}; |
|
|
|
use std::iter::{empty, once}; |
|
|
|
use std::rc::Rc; |
|
|
|
use std::rc::Rc; |
|
|
@ -301,27 +302,17 @@ impl SimpleEvaluator { |
|
|
|
// Real hash join
|
|
|
|
// Real hash join
|
|
|
|
Rc::new(move |from| { |
|
|
|
Rc::new(move |from| { |
|
|
|
let mut errors = Vec::default(); |
|
|
|
let mut errors = Vec::default(); |
|
|
|
let mut right_values = HashMap::new(); |
|
|
|
let mut right_values = EncodedTupleSet::new(join_keys.clone()); |
|
|
|
for result in right(from.clone()) { |
|
|
|
for result in right(from.clone()) { |
|
|
|
match result { |
|
|
|
match result { |
|
|
|
Ok(result) => { |
|
|
|
Ok(result) => right_values.insert(result), |
|
|
|
let key = |
|
|
|
Err(error) => errors.push(Err(error)), |
|
|
|
join_keys.iter().map(|k| result.get(*k).cloned()).collect(); |
|
|
|
|
|
|
|
right_values |
|
|
|
|
|
|
|
.entry(key) |
|
|
|
|
|
|
|
.or_insert_with(Vec::new) |
|
|
|
|
|
|
|
.push(result); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
Err(error) => { |
|
|
|
|
|
|
|
errors.push(Err(error)); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
Box::new(HashJoinIterator { |
|
|
|
Box::new(HashJoinIterator { |
|
|
|
left_iter: left(from), |
|
|
|
left_iter: left(from), |
|
|
|
right: right_values, |
|
|
|
right: right_values, |
|
|
|
buffered_results: errors, |
|
|
|
buffered_results: errors, |
|
|
|
join_keys: join_keys.clone(), |
|
|
|
|
|
|
|
}) |
|
|
|
}) |
|
|
|
}) |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
@ -357,18 +348,13 @@ impl SimpleEvaluator { |
|
|
|
}) |
|
|
|
}) |
|
|
|
} else { |
|
|
|
} else { |
|
|
|
Rc::new(move |from| { |
|
|
|
Rc::new(move |from| { |
|
|
|
let mut right_values = HashMap::new(); |
|
|
|
let mut right_values = EncodedTupleSet::new(join_keys.clone()); |
|
|
|
for result in right(from.clone()).filter_map(std::result::Result::ok) { |
|
|
|
for result in right(from.clone()).filter_map(std::result::Result::ok) { |
|
|
|
let key = join_keys.iter().map(|k| result.get(*k).cloned()).collect(); |
|
|
|
right_values.insert(result); |
|
|
|
right_values |
|
|
|
|
|
|
|
.entry(key) |
|
|
|
|
|
|
|
.or_insert_with(Vec::new) |
|
|
|
|
|
|
|
.push(result); |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
Box::new(HashAntiJoinIterator { |
|
|
|
Box::new(HashAntiJoinIterator { |
|
|
|
left_iter: left(from), |
|
|
|
left_iter: left(from), |
|
|
|
right: right_values, |
|
|
|
right: right_values, |
|
|
|
join_keys: join_keys.clone(), |
|
|
|
|
|
|
|
}) |
|
|
|
}) |
|
|
|
}) |
|
|
|
}) |
|
|
|
} |
|
|
|
} |
|
|
@ -2983,9 +2969,8 @@ impl Iterator for CartesianProductJoinIterator { |
|
|
|
|
|
|
|
|
|
|
|
struct HashJoinIterator { |
|
|
|
struct HashJoinIterator { |
|
|
|
left_iter: EncodedTuplesIterator, |
|
|
|
left_iter: EncodedTuplesIterator, |
|
|
|
right: HashMap<Vec<Option<EncodedTerm>>, Vec<EncodedTuple>>, |
|
|
|
right: EncodedTupleSet, |
|
|
|
buffered_results: Vec<Result<EncodedTuple, EvaluationError>>, |
|
|
|
buffered_results: Vec<Result<EncodedTuple, EvaluationError>>, |
|
|
|
join_keys: Vec<usize>, |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
impl Iterator for HashJoinIterator { |
|
|
|
impl Iterator for HashJoinIterator { |
|
|
@ -3000,20 +2985,13 @@ impl Iterator for HashJoinIterator { |
|
|
|
Ok(left_tuple) => left_tuple, |
|
|
|
Ok(left_tuple) => left_tuple, |
|
|
|
Err(error) => return Some(Err(error)), |
|
|
|
Err(error) => return Some(Err(error)), |
|
|
|
}; |
|
|
|
}; |
|
|
|
let key: Vec<_> = self |
|
|
|
for right_tuple in self.right.get(&left_tuple) { |
|
|
|
.join_keys |
|
|
|
|
|
|
|
.iter() |
|
|
|
|
|
|
|
.map(|k| left_tuple.get(*k).cloned()) |
|
|
|
|
|
|
|
.collect(); |
|
|
|
|
|
|
|
if let Some(right_tuples) = self.right.get(&key) { |
|
|
|
|
|
|
|
for right_tuple in right_tuples { |
|
|
|
|
|
|
|
if let Some(result_tuple) = left_tuple.combine_with(right_tuple) { |
|
|
|
if let Some(result_tuple) = left_tuple.combine_with(right_tuple) { |
|
|
|
self.buffered_results.push(Ok(result_tuple)) |
|
|
|
self.buffered_results.push(Ok(result_tuple)) |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
struct CartesianProductAntiJoinIterator { |
|
|
|
struct CartesianProductAntiJoinIterator { |
|
|
@ -3043,8 +3021,7 @@ impl Iterator for CartesianProductAntiJoinIterator { |
|
|
|
|
|
|
|
|
|
|
|
struct HashAntiJoinIterator { |
|
|
|
struct HashAntiJoinIterator { |
|
|
|
left_iter: EncodedTuplesIterator, |
|
|
|
left_iter: EncodedTuplesIterator, |
|
|
|
right: HashMap<Vec<Option<EncodedTerm>>, Vec<EncodedTuple>>, |
|
|
|
right: EncodedTupleSet, |
|
|
|
join_keys: Vec<usize>, |
|
|
|
|
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
impl Iterator for HashAntiJoinIterator { |
|
|
|
impl Iterator for HashAntiJoinIterator { |
|
|
@ -3054,15 +3031,9 @@ impl Iterator for HashAntiJoinIterator { |
|
|
|
loop { |
|
|
|
loop { |
|
|
|
match self.left_iter.next()? { |
|
|
|
match self.left_iter.next()? { |
|
|
|
Ok(left_tuple) => { |
|
|
|
Ok(left_tuple) => { |
|
|
|
let key: Vec<_> = self |
|
|
|
let exists_compatible_right = |
|
|
|
.join_keys |
|
|
|
self.right.get(&left_tuple).iter().any(|right_tuple| { |
|
|
|
.iter() |
|
|
|
|
|
|
|
.map(|k| left_tuple.get(*k).cloned()) |
|
|
|
|
|
|
|
.collect(); |
|
|
|
|
|
|
|
let exists_compatible_right = self.right.get(&key).map_or(false, |r| { |
|
|
|
|
|
|
|
r.iter().any(|right_tuple| { |
|
|
|
|
|
|
|
are_compatible_and_not_disjointed(&left_tuple, right_tuple) |
|
|
|
are_compatible_and_not_disjointed(&left_tuple, right_tuple) |
|
|
|
}) |
|
|
|
|
|
|
|
}); |
|
|
|
}); |
|
|
|
if !exists_compatible_right { |
|
|
|
if !exists_compatible_right { |
|
|
|
return Some(Ok(left_tuple)); |
|
|
|
return Some(Ok(left_tuple)); |
|
|
@ -3755,6 +3726,40 @@ pub enum ComparatorFunction { |
|
|
|
Desc(Rc<dyn Fn(&EncodedTuple) -> Option<EncodedTerm>>), |
|
|
|
Desc(Rc<dyn Fn(&EncodedTuple) -> Option<EncodedTerm>>), |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct EncodedTupleSet { |
|
|
|
|
|
|
|
key: Vec<usize>, |
|
|
|
|
|
|
|
map: HashMap<u64, Vec<EncodedTuple>>, |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
impl EncodedTupleSet { |
|
|
|
|
|
|
|
fn new(key: Vec<usize>) -> Self { |
|
|
|
|
|
|
|
Self { |
|
|
|
|
|
|
|
key, |
|
|
|
|
|
|
|
map: HashMap::new(), |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
fn insert(&mut self, tuple: EncodedTuple) { |
|
|
|
|
|
|
|
self.map |
|
|
|
|
|
|
|
.entry(self.tuple_key(&tuple)) |
|
|
|
|
|
|
|
.or_default() |
|
|
|
|
|
|
|
.push(tuple); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn get(&self, tuple: &EncodedTuple) -> &[EncodedTuple] { |
|
|
|
|
|
|
|
self.map.get(&self.tuple_key(tuple)).map_or(&[], |v| v) |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn tuple_key(&self, tuple: &EncodedTuple) -> u64 { |
|
|
|
|
|
|
|
let mut hasher = DefaultHasher::default(); |
|
|
|
|
|
|
|
for v in &self.key { |
|
|
|
|
|
|
|
if let Some(val) = tuple.get(*v) { |
|
|
|
|
|
|
|
val.hash(&mut hasher); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
hasher.finish() |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#[test] |
|
|
|
#[test] |
|
|
|
fn uuid() { |
|
|
|
fn uuid() { |
|
|
|
let mut buffer = String::default(); |
|
|
|
let mut buffer = String::default(); |
|
|
|