Adds beggining of SPARQL FILTER evaluation

pull/10/head
Tpt 6 years ago
parent aa7cd02629
commit 793734f2a8
  1. 28
      src/model/literal.rs
  2. 8
      src/model/triple.rs
  3. 4
      src/sparql/algebra.rs
  4. 39
      src/store/numeric_encoder.rs
  5. 74
      src/store/sparql.rs

@ -119,6 +119,34 @@ impl Literal {
_ => false, _ => false,
} }
} }
/// Checks if the literal has the datatype [xsd:string](http://www.w3.org/2001/XMLSchema#string) and is valid
pub fn is_string(&self) -> bool {
match self.0 {
LiteralContent::String(_) => true,
_ => false,
}
}
/// Checks if the literal has the datatype [xsd:boolean](http://www.w3.org/2001/XMLSchema#string) and is valid
pub fn is_boolean(&self) -> bool {
match self.0 {
LiteralContent::Boolean(_) => true,
_ => false,
}
}
/// Returns the [effective boolean value](https://www.w3.org/TR/sparql11-query/#ebv) of the literal if it exists
pub fn to_bool(&self) -> Option<bool> {
//TODO: numeric literals
match self.0 {
LiteralContent::SimpleLiteral(ref value) => Some(!value.is_empty()),
LiteralContent::String(ref value) => Some(!value.is_empty()),
LiteralContent::LanguageTaggedString { .. } => None,
LiteralContent::Boolean(value) => Some(value),
LiteralContent::TypedLiteral { .. } => None,
}
}
} }
impl fmt::Display for Literal { impl fmt::Display for Literal {

@ -80,6 +80,14 @@ impl Term {
Term::Literal(_) => true, Term::Literal(_) => true,
} }
} }
/// Returns the [effective boolean value](https://www.w3.org/TR/sparql11-query/#ebv) of the term if it exists
pub fn to_bool(&self) -> Option<bool> {
match self {
Term::Literal(literal) => literal.to_bool(),
_ => None,
}
}
} }
impl fmt::Display for Term { impl fmt::Display for Term {

@ -522,8 +522,8 @@ impl fmt::Display for Expression {
Expression::IRIFunctionCall(e) => write!(f, "IRI({})", e), Expression::IRIFunctionCall(e) => write!(f, "IRI({})", e),
Expression::BNodeFunctionCall(v) => v Expression::BNodeFunctionCall(v) => v
.as_ref() .as_ref()
.map(|id| write!(f, "BOUND({})", id)) .map(|id| write!(f, "BNODE({})", id))
.unwrap_or_else(|| write!(f, "BOUND()")), .unwrap_or_else(|| write!(f, "BNODE()")),
Expression::RandFunctionCall() => write!(f, "RAND()"), Expression::RandFunctionCall() => write!(f, "RAND()"),
Expression::AbsFunctionCall(e) => write!(f, "ABS({})", e), Expression::AbsFunctionCall(e) => write!(f, "ABS({})", e),
Expression::CeilFunctionCall(e) => write!(f, "CEIL({})", e), Expression::CeilFunctionCall(e) => write!(f, "CEIL({})", e),

@ -7,6 +7,7 @@ use std::str;
use std::str::FromStr; use std::str::FromStr;
use url::Url; use url::Url;
use uuid::Uuid; use uuid::Uuid;
use Error;
use Result; use Result;
pub trait BytesStore { pub trait BytesStore {
@ -22,6 +23,9 @@ const TYPE_BLANK_NODE_ID: u8 = 2;
const TYPE_SIMPLE_LITERAL_ID: u8 = 3; const TYPE_SIMPLE_LITERAL_ID: u8 = 3;
const TYPE_LANG_STRING_LITERAL_ID: u8 = 4; const TYPE_LANG_STRING_LITERAL_ID: u8 = 4;
const TYPE_TYPED_LITERAL_ID: u8 = 5; const TYPE_TYPED_LITERAL_ID: u8 = 5;
const TYPE_STRING_LITERAL: u8 = 6;
const TYPE_BOOLEAN_LITERAL_TRUE: u8 = 6;
const TYPE_BOOLEAN_LITERAL_FALSE: u8 = 7;
pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {}; pub static ENCODED_DEFAULT_GRAPH: EncodedTerm = EncodedTerm::DefaultGraph {};
@ -33,6 +37,8 @@ pub enum EncodedTerm {
SimpleLiteral { value_id: u64 }, SimpleLiteral { value_id: u64 },
LangStringLiteral { value_id: u64, language_id: u64 }, LangStringLiteral { value_id: u64, language_id: u64 },
TypedLiteral { value_id: u64, datatype_id: u64 }, TypedLiteral { value_id: u64, datatype_id: u64 },
StringLiteral { value_id: u64 },
BooleanLiteral(bool),
} }
impl EncodedTerm { impl EncodedTerm {
@ -44,10 +50,19 @@ impl EncodedTerm {
EncodedTerm::SimpleLiteral { .. } => TYPE_SIMPLE_LITERAL_ID, EncodedTerm::SimpleLiteral { .. } => TYPE_SIMPLE_LITERAL_ID,
EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID,
EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID,
EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL,
EncodedTerm::BooleanLiteral(true) => TYPE_BOOLEAN_LITERAL_TRUE,
EncodedTerm::BooleanLiteral(false) => TYPE_BOOLEAN_LITERAL_FALSE,
} }
} }
} }
impl From<bool> for EncodedTerm {
fn from(val: bool) -> Self {
EncodedTerm::BooleanLiteral(val)
}
}
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct EncodedQuad { pub struct EncodedQuad {
pub subject: EncodedTerm, pub subject: EncodedTerm,
@ -102,6 +117,11 @@ impl<R: Read> TermReader for R {
datatype_id: self.read_u64::<NetworkEndian>()?, datatype_id: self.read_u64::<NetworkEndian>()?,
value_id: self.read_u64::<NetworkEndian>()?, value_id: self.read_u64::<NetworkEndian>()?,
}), }),
TYPE_STRING_LITERAL => Ok(EncodedTerm::SimpleLiteral {
value_id: self.read_u64::<NetworkEndian>()?,
}),
TYPE_BOOLEAN_LITERAL_TRUE => Ok(EncodedTerm::BooleanLiteral(true)),
TYPE_BOOLEAN_LITERAL_FALSE => Ok(EncodedTerm::BooleanLiteral(false)),
_ => Err("the term buffer has an invalid type id".into()), _ => Err("the term buffer has an invalid type id".into()),
} }
} }
@ -177,6 +197,11 @@ impl<R: Write> TermWriter for R {
self.write_u64::<NetworkEndian>(datatype_id)?; self.write_u64::<NetworkEndian>(datatype_id)?;
self.write_u64::<NetworkEndian>(value_id)?; self.write_u64::<NetworkEndian>(value_id)?;
} }
EncodedTerm::StringLiteral { value_id } => {
self.write_u64::<NetworkEndian>(value_id)?;
}
EncodedTerm::BooleanLiteral(_) => {}
} }
Ok(()) Ok(())
} }
@ -237,6 +262,16 @@ impl<S: BytesStore> Encoder<S> {
value_id: self.encode_str_value(&literal.value())?, value_id: self.encode_str_value(&literal.value())?,
} }
} }
} else if literal.is_string() {
EncodedTerm::StringLiteral {
value_id: self.encode_str_value(&literal.value())?,
}
} else if literal.is_boolean() {
EncodedTerm::BooleanLiteral(
literal
.to_bool()
.ok_or_else(|| Error::from("boolean literal without boolean value"))?,
)
} else { } else {
EncodedTerm::TypedLiteral { EncodedTerm::TypedLiteral {
value_id: self.encode_str_value(&literal.value())?, value_id: self.encode_str_value(&literal.value())?,
@ -309,6 +344,10 @@ impl<S: BytesStore> Encoder<S> {
self.decode_str_value(value_id)?, self.decode_str_value(value_id)?,
NamedNode::from(self.decode_url_value(datatype_id)?), NamedNode::from(self.decode_url_value(datatype_id)?),
).into()), ).into()),
EncodedTerm::StringLiteral { value_id } => {
Ok(Literal::from(self.decode_str_value(value_id)?).into())
}
EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(value).into()),
} }
} }

@ -124,7 +124,7 @@ impl EncodedBindingsIterator {
iter: Box::new(oks.clone().into_iter().map(Ok)), iter: Box::new(oks.clone().into_iter().map(Ok)),
}, },
EncodedBindingsIterator { EncodedBindingsIterator {
variables: variables, variables,
iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))), iter: Box::new(errors.into_iter().map(Err).chain(oks.into_iter().map(Ok))),
}, },
) )
@ -149,6 +149,7 @@ fn slice_key<T: Eq>(slice: &[T], element: &T) -> Option<usize> {
None None
} }
#[derive(Clone)]
pub struct SparqlEvaluator<S: EncodedQuadsStore> { pub struct SparqlEvaluator<S: EncodedQuadsStore> {
store: Arc<S>, store: Arc<S>,
} }
@ -218,7 +219,26 @@ impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
self.eval_multi_set_pattern(b, self.eval_multi_set_pattern(a, from)?) self.eval_multi_set_pattern(b, self.eval_multi_set_pattern(a, from)?)
} }
MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(), MultiSetPattern::LeftJoin(a, b, e) => unimplemented!(),
MultiSetPattern::Filter(e, p) => unimplemented!(), MultiSetPattern::Filter(e, p) => {
let EncodedBindingsIterator { variables, iter } =
self.eval_multi_set_pattern(p, from)?;
let expression = e.clone();
let evaluator = Self {
store: self.store.clone(),
};
Ok(EncodedBindingsIterator {
variables: variables.clone(),
iter: Box::new(iter.filter(move |val| match val {
Ok(binding) => {
match evaluator.eval_expression(&expression, binding, &variables) {
Ok(Some(term)) => true,
_ => false,
}
}
Err(_) => true,
})),
})
}
MultiSetPattern::Union(a, b) => { MultiSetPattern::Union(a, b) => {
let (from1, from2) = from.duplicate(); let (from1, from2) = from.duplicate();
Ok(self Ok(self
@ -314,6 +334,56 @@ impl<S: EncodedQuadsStore> SparqlEvaluator<S> {
unimplemented!() unimplemented!()
} }
fn eval_expression(
&self,
expr: &Expression,
binding: &[Option<EncodedTerm>],
variables: &[Variable],
) -> Result<Option<EncodedTerm>> {
match expr {
Expression::ConstantExpression(TermOrVariable::Term(t)) => {
Ok(Some(self.store.encoder().encode_term(t)?))
}
Expression::ConstantExpression(TermOrVariable::Variable(v)) => {
Ok(slice_key(variables, v).and_then(|key| binding[key]))
}
Expression::OrExpression(a, b) => Ok(match self
.to_bool(self.eval_expression(a, binding, variables)?)?
{
Some(true) => Some(true.into()),
Some(false) => self.eval_expression(b, binding, variables)?,
None => match self.to_bool(self.eval_expression(b, binding, variables)?)? {
Some(true) => Some(true.into()),
_ => None,
},
}),
Expression::AndExpression(a, b) => Ok(match self
.to_bool(self.eval_expression(a, binding, variables)?)?
{
Some(true) => self.eval_expression(b, binding, variables)?,
Some(false) => Some(false.into()),
None => match self.to_bool(self.eval_expression(b, binding, variables)?)? {
Some(false) => Some(false.into()),
_ => None,
},
}),
Expression::UnaryNotExpression(e) => Ok(self
.to_bool(self.eval_expression(e, binding, variables)?)?
.map(|v| (!v).into())),
e => Err(format!("Evaluation of expression {} is not implemented yet", e).into()),
}
}
fn to_bool(&self, term: Option<EncodedTerm>) -> Result<Option<bool>> {
Ok(match term {
Some(EncodedTerm::BooleanLiteral(value)) => Some(value),
Some(EncodedTerm::NamedNode { .. }) => None,
Some(EncodedTerm::BlankNode(_)) => None,
Some(term) => self.store.encoder().decode_term(term)?.to_bool(),
None => None,
})
}
fn binding_value_lookup_from_term_or_variable( fn binding_value_lookup_from_term_or_variable(
&self, &self,
term_or_variable: &TermOrVariable, term_or_variable: &TermOrVariable,

Loading…
Cancel
Save