Avoids to insert strings in the store while building the query plan

pull/10/head
Tpt 5 years ago
parent a017d6e1c3
commit b0988aa4b9
  1. 64
      lib/src/sparql/eval.rs
  2. 21
      lib/src/sparql/mod.rs
  3. 67
      lib/src/sparql/plan.rs
  4. 39
      lib/src/sparql/plan_builder.rs

@ -3,7 +3,6 @@ use crate::model::Triple;
use crate::sparql::model::*; use crate::sparql::model::*;
use crate::sparql::plan::*; use crate::sparql::plan::*;
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::store::numeric_encoder::{MemoryStringStore, ENCODED_EMPTY_STRING_LITERAL};
use crate::store::StoreConnection; use crate::store::StoreConnection;
use crate::Result; use crate::Result;
use chrono::prelude::*; use chrono::prelude::*;
@ -47,9 +46,9 @@ pub struct SimpleEvaluator<S: StoreConnection> {
} }
impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> { impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
pub fn new(dataset: S, base_iri: Option<Iri<String>>) -> Self { pub fn new(dataset: DatasetView<S>, base_iri: Option<Iri<String>>) -> Self {
Self { Self {
dataset: DatasetView::new(dataset), dataset,
bnodes_map: Mutex::new(BTreeMap::default()), bnodes_map: Mutex::new(BTreeMap::default()),
base_iri, base_iri,
now: Utc::now().with_timezone(&FixedOffset::east(0)), now: Utc::now().with_timezone(&FixedOffset::east(0)),
@ -1822,65 +1821,6 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
} }
} }
struct DatasetView<S: StoreConnection> {
store: S,
extra: MemoryStringStore,
}
impl<S: StoreConnection> DatasetView<S> {
fn new(store: S) -> Self {
Self {
store,
extra: MemoryStringStore::default(),
}
}
fn quads_for_pattern<'a>(
&'a self,
subject: Option<EncodedTerm>,
predicate: Option<EncodedTerm>,
object: Option<EncodedTerm>,
graph_name: Option<EncodedTerm>,
) -> Box<dyn Iterator<Item = Result<EncodedQuad>> + 'a> {
self.store
.quads_for_pattern(subject, predicate, object, graph_name)
}
fn encoder(&self) -> Encoder<&Self> {
Encoder::new(&self)
}
}
impl<S: StoreConnection> StringStore for DatasetView<S> {
type StringType = StringOrStoreString<S::StringType>;
fn get_str(&self, id: u64) -> Result<Option<StringOrStoreString<S::StringType>>> {
Ok(if let Some(value) = self.store.get_str(id)? {
Some(StringOrStoreString::Store(value))
} else if let Some(value) = self.extra.get_str(u64::MAX - id)? {
Some(StringOrStoreString::String(value))
} else {
None
})
}
fn get_str_id(&self, value: &str) -> Result<Option<u64>> {
Ok(if let Some(id) = self.store.get_str_id(value)? {
Some(id)
} else {
self.extra.get_str_id(value)?.map(|id| u64::MAX - id)
})
}
fn insert_str(&self, value: &str) -> Result<u64> {
Ok(if let Some(id) = self.store.get_str_id(value)? {
id
} else {
u64::MAX - self.extra.insert_str(value)?
})
}
}
pub enum StringOrStoreString<S: Deref<Target = str> + ToString + Into<String>> { pub enum StringOrStoreString<S: Deref<Target = str> + ToString + Into<String>> {
String(String), String(String),
Store(S), Store(S),

@ -12,8 +12,8 @@ mod xml_results;
use crate::sparql::algebra::QueryVariants; use crate::sparql::algebra::QueryVariants;
use crate::sparql::eval::SimpleEvaluator; use crate::sparql::eval::SimpleEvaluator;
use crate::sparql::parser::read_sparql_query; use crate::sparql::parser::read_sparql_query;
use crate::sparql::plan::PlanNode;
use crate::sparql::plan::TripleTemplate; use crate::sparql::plan::TripleTemplate;
use crate::sparql::plan::{DatasetView, PlanNode};
use crate::sparql::plan_builder::PlanBuilder; use crate::sparql::plan_builder::PlanBuilder;
use crate::store::StoreConnection; use crate::store::StoreConnection;
use crate::Result; use crate::Result;
@ -56,6 +56,7 @@ enum SimplePreparedQueryOptions<S: StoreConnection> {
impl<S: StoreConnection> SimplePreparedQuery<S> { impl<S: StoreConnection> SimplePreparedQuery<S> {
pub(crate) fn new(connection: S, query: &str, base_iri: Option<&str>) -> Result<Self> { pub(crate) fn new(connection: S, query: &str, base_iri: Option<&str>) -> Result<Self> {
let dataset = DatasetView::new(connection);
//TODO avoid inserting terms in the Repository StringStore //TODO avoid inserting terms in the Repository StringStore
Ok(Self(match read_sparql_query(query, base_iri)? { Ok(Self(match read_sparql_query(query, base_iri)? {
QueryVariants::Select { QueryVariants::Select {
@ -63,11 +64,11 @@ impl<S: StoreConnection> SimplePreparedQuery<S> {
dataset: _, dataset: _,
base_iri, base_iri,
} => { } => {
let (plan, variables) = PlanBuilder::build(&connection, &algebra)?; let (plan, variables) = PlanBuilder::build(dataset.encoder(), &algebra)?;
SimplePreparedQueryOptions::Select { SimplePreparedQueryOptions::Select {
plan, plan,
variables, variables,
evaluator: SimpleEvaluator::new(connection, base_iri), evaluator: SimpleEvaluator::new(dataset, base_iri),
} }
} }
QueryVariants::Ask { QueryVariants::Ask {
@ -75,10 +76,10 @@ impl<S: StoreConnection> SimplePreparedQuery<S> {
dataset: _, dataset: _,
base_iri, base_iri,
} => { } => {
let (plan, _) = PlanBuilder::build(&connection, &algebra)?; let (plan, _) = PlanBuilder::build(dataset.encoder(), &algebra)?;
SimplePreparedQueryOptions::Ask { SimplePreparedQueryOptions::Ask {
plan, plan,
evaluator: SimpleEvaluator::new(connection, base_iri), evaluator: SimpleEvaluator::new(dataset, base_iri),
} }
} }
QueryVariants::Construct { QueryVariants::Construct {
@ -87,15 +88,15 @@ impl<S: StoreConnection> SimplePreparedQuery<S> {
dataset: _, dataset: _,
base_iri, base_iri,
} => { } => {
let (plan, variables) = PlanBuilder::build(&connection, &algebra)?; let (plan, variables) = PlanBuilder::build(dataset.encoder(), &algebra)?;
SimplePreparedQueryOptions::Construct { SimplePreparedQueryOptions::Construct {
plan, plan,
construct: PlanBuilder::build_graph_template( construct: PlanBuilder::build_graph_template(
&connection, dataset.encoder(),
&construct, &construct,
variables, variables,
)?, )?,
evaluator: SimpleEvaluator::new(connection, base_iri), evaluator: SimpleEvaluator::new(dataset, base_iri),
} }
} }
QueryVariants::Describe { QueryVariants::Describe {
@ -103,10 +104,10 @@ impl<S: StoreConnection> SimplePreparedQuery<S> {
dataset: _, dataset: _,
base_iri, base_iri,
} => { } => {
let (plan, _) = PlanBuilder::build(&connection, &algebra)?; let (plan, _) = PlanBuilder::build(dataset.encoder(), &algebra)?;
SimplePreparedQueryOptions::Describe { SimplePreparedQueryOptions::Describe {
plan, plan,
evaluator: SimpleEvaluator::new(connection, base_iri), evaluator: SimpleEvaluator::new(dataset, base_iri),
} }
} }
})) }))

@ -1,5 +1,11 @@
use crate::store::numeric_encoder::EncodedTerm; use crate::sparql::eval::StringOrStoreString;
use crate::store::numeric_encoder::{
EncodedQuad, EncodedTerm, Encoder, MemoryStringStore, StringStore,
};
use crate::store::StoreConnection;
use crate::Result;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::u64;
pub type EncodedTuple = Vec<Option<EncodedTerm>>; pub type EncodedTuple = Vec<Option<EncodedTerm>>;
@ -452,3 +458,62 @@ pub enum TripleTemplateValue {
BlankNode(usize), BlankNode(usize),
Variable(usize), Variable(usize),
} }
pub struct DatasetView<S: StoreConnection> {
store: S,
extra: MemoryStringStore,
}
impl<S: StoreConnection> DatasetView<S> {
pub fn new(store: S) -> Self {
Self {
store,
extra: MemoryStringStore::default(),
}
}
pub fn quads_for_pattern<'a>(
&'a self,
subject: Option<EncodedTerm>,
predicate: Option<EncodedTerm>,
object: Option<EncodedTerm>,
graph_name: Option<EncodedTerm>,
) -> Box<dyn Iterator<Item = Result<EncodedQuad>> + 'a> {
self.store
.quads_for_pattern(subject, predicate, object, graph_name)
}
pub fn encoder(&self) -> Encoder<&Self> {
Encoder::new(&self)
}
}
impl<S: StoreConnection> StringStore for DatasetView<S> {
type StringType = StringOrStoreString<S::StringType>;
fn get_str(&self, id: u64) -> Result<Option<StringOrStoreString<S::StringType>>> {
Ok(if let Some(value) = self.store.get_str(id)? {
Some(StringOrStoreString::Store(value))
} else if let Some(value) = self.extra.get_str(u64::MAX - id)? {
Some(StringOrStoreString::String(value))
} else {
None
})
}
fn get_str_id(&self, value: &str) -> Result<Option<u64>> {
Ok(if let Some(id) = self.store.get_str_id(value)? {
Some(id)
} else {
self.extra.get_str_id(value)?.map(|id| u64::MAX - id)
})
}
fn insert_str(&self, value: &str) -> Result<u64> {
Ok(if let Some(id) = self.store.get_str_id(value)? {
id
} else {
u64::MAX - self.extra.insert_str(value)?
})
}
}

@ -4,20 +4,19 @@ use crate::sparql::algebra::*;
use crate::sparql::model::*; use crate::sparql::model::*;
use crate::sparql::plan::PlanPropertyPath; use crate::sparql::plan::PlanPropertyPath;
use crate::sparql::plan::*; use crate::sparql::plan::*;
use crate::store::numeric_encoder::ENCODED_DEFAULT_GRAPH; use crate::store::numeric_encoder::{Encoder, StringStore, ENCODED_DEFAULT_GRAPH};
use crate::store::StoreConnection;
use crate::Result; use crate::Result;
use failure::format_err; use failure::format_err;
use std::collections::HashSet; use std::collections::HashSet;
pub struct PlanBuilder<'a, S: StoreConnection> { pub struct PlanBuilder<S: StringStore> {
store: &'a S, encoder: Encoder<S>,
} }
impl<'a, S: StoreConnection> PlanBuilder<'a, S> { impl<S: StringStore> PlanBuilder<S> {
pub fn build(store: &'a S, pattern: &GraphPattern) -> Result<(PlanNode, Vec<Variable>)> { pub fn build(encoder: Encoder<S>, pattern: &GraphPattern) -> Result<(PlanNode, Vec<Variable>)> {
let mut variables = Vec::default(); let mut variables = Vec::default();
let plan = PlanBuilder { store }.build_for_graph_pattern( let plan = PlanBuilder { encoder }.build_for_graph_pattern(
pattern, pattern,
&mut variables, &mut variables,
PatternValue::Constant(ENCODED_DEFAULT_GRAPH), PatternValue::Constant(ENCODED_DEFAULT_GRAPH),
@ -26,11 +25,11 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
} }
pub fn build_graph_template( pub fn build_graph_template(
store: &S, encoder: Encoder<S>,
template: &[TriplePattern], template: &[TriplePattern],
mut variables: Vec<Variable>, mut variables: Vec<Variable>,
) -> Result<Vec<TripleTemplate>> { ) -> Result<Vec<TripleTemplate>> {
PlanBuilder { store }.build_for_graph_template(template, &mut variables) PlanBuilder { encoder }.build_for_graph_template(template, &mut variables)
} }
fn build_for_graph_pattern( fn build_for_graph_pattern(
@ -231,7 +230,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
fn build_for_path(&self, path: &PropertyPath) -> Result<PlanPropertyPath> { fn build_for_path(&self, path: &PropertyPath) -> Result<PlanPropertyPath> {
Ok(match path { Ok(match path {
PropertyPath::PredicatePath(p) => { PropertyPath::PredicatePath(p) => {
PlanPropertyPath::PredicatePath(self.store.encoder().encode_named_node(p)?) PlanPropertyPath::PredicatePath(self.encoder.encode_named_node(p)?)
} }
PropertyPath::InversePath(p) => { PropertyPath::InversePath(p) => {
PlanPropertyPath::InversePath(Box::new(self.build_for_path(p)?)) PlanPropertyPath::InversePath(Box::new(self.build_for_path(p)?))
@ -255,7 +254,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
} }
PropertyPath::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet( PropertyPath::NegatedPropertySet(p) => PlanPropertyPath::NegatedPropertySet(
p.iter() p.iter()
.map(|p| self.store.encoder().encode_named_node(p)) .map(|p| self.encoder.encode_named_node(p))
.collect::<Result<Vec<_>>>()?, .collect::<Result<Vec<_>>>()?,
), ),
}) })
@ -269,9 +268,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
) -> Result<PlanExpression> { ) -> Result<PlanExpression> {
Ok(match expression { Ok(match expression {
Expression::Constant(t) => match t { Expression::Constant(t) => match t {
TermOrVariable::Term(t) => { TermOrVariable::Term(t) => PlanExpression::Constant(self.encoder.encode_term(t)?),
PlanExpression::Constant(self.store.encoder().encode_term(t)?)
}
TermOrVariable::Variable(v) => PlanExpression::Variable(variable_key(variables, v)), TermOrVariable::Variable(v) => PlanExpression::Variable(variable_key(variables, v)),
}, },
Expression::Or(a, b) => PlanExpression::Or( Expression::Or(a, b) => PlanExpression::Or(
@ -691,9 +688,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<PatternValue> { ) -> Result<PatternValue> {
Ok(match term_or_variable { Ok(match term_or_variable {
TermOrVariable::Term(term) => { TermOrVariable::Term(term) => PatternValue::Constant(self.encoder.encode_term(term)?),
PatternValue::Constant(self.store.encoder().encode_term(term)?)
}
TermOrVariable::Variable(variable) => { TermOrVariable::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable)) PatternValue::Variable(variable_key(variables, variable))
} }
@ -707,7 +702,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
) -> Result<PatternValue> { ) -> Result<PatternValue> {
Ok(match named_node_or_variable { Ok(match named_node_or_variable {
NamedNodeOrVariable::NamedNode(named_node) => { NamedNodeOrVariable::NamedNode(named_node) => {
PatternValue::Constant(self.store.encoder().encode_named_node(named_node)?) PatternValue::Constant(self.encoder.encode_named_node(named_node)?)
} }
NamedNodeOrVariable::Variable(variable) => { NamedNodeOrVariable::Variable(variable) => {
PatternValue::Variable(variable_key(variables, variable)) PatternValue::Variable(variable_key(variables, variable))
@ -720,7 +715,6 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
bindings: &StaticBindings, bindings: &StaticBindings,
variables: &mut Vec<Variable>, variables: &mut Vec<Variable>,
) -> Result<Vec<EncodedTuple>> { ) -> Result<Vec<EncodedTuple>> {
let encoder = self.store.encoder();
let bindings_variables_keys = bindings let bindings_variables_keys = bindings
.variables() .variables()
.iter() .iter()
@ -732,7 +726,8 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
let mut result = vec![None; variables.len()]; let mut result = vec![None; variables.len()];
for (key, value) in values.iter().enumerate() { for (key, value) in values.iter().enumerate() {
if let Some(term) = value { if let Some(term) = value {
result[bindings_variables_keys[key]] = Some(encoder.encode_term(term)?); result[bindings_variables_keys[key]] =
Some(self.encoder.encode_term(term)?);
} }
} }
Ok(result) Ok(result)
@ -828,7 +823,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
) -> Result<TripleTemplateValue> { ) -> Result<TripleTemplateValue> {
Ok(match term_or_variable { Ok(match term_or_variable {
TermOrVariable::Term(term) => { TermOrVariable::Term(term) => {
TripleTemplateValue::Constant(self.store.encoder().encode_term(term)?) TripleTemplateValue::Constant(self.encoder.encode_term(term)?)
} }
TermOrVariable::Variable(variable) => { TermOrVariable::Variable(variable) => {
if variable.has_name() { if variable.has_name() {
@ -848,7 +843,7 @@ impl<'a, S: StoreConnection> PlanBuilder<'a, S> {
) -> Result<TripleTemplateValue> { ) -> Result<TripleTemplateValue> {
Ok(match named_node_or_variable { Ok(match named_node_or_variable {
NamedNodeOrVariable::NamedNode(term) => { NamedNodeOrVariable::NamedNode(term) => {
TripleTemplateValue::Constant(self.store.encoder().encode_named_node(term)?) TripleTemplateValue::Constant(self.encoder.encode_named_node(term)?)
} }
NamedNodeOrVariable::Variable(variable) => { NamedNodeOrVariable::Variable(variable) => {
if variable.has_name() { if variable.has_name() {

Loading…
Cancel
Save