SPARQL: adds support of REGEX

pull/10/head
Tpt 6 years ago
parent 4ac6828311
commit ee0a8574ba
  1. 1
      lib/Cargo.toml
  2. 9
      lib/src/lib.rs
  3. 47
      lib/src/sparql/eval.rs
  4. 4
      lib/tests/sparql_test_cases.rs

@ -28,6 +28,7 @@ rust_decimal = "0.10"
chrono = "0.4"
language-tags = "0.2"
failure = "0.1"
regex = "1"
[build-dependencies]
peg = "0.5"

@ -32,20 +32,21 @@
)]
extern crate byteorder;
#[macro_use]
extern crate lazy_static;
extern crate chrono;
#[macro_use]
extern crate failure;
extern crate language_tags;
#[macro_use]
extern crate lazy_static;
extern crate num_traits;
extern crate ordered_float;
extern crate quick_xml;
extern crate regex;
#[cfg(feature = "rocksdb")]
extern crate rocksdb;
extern crate rust_decimal;
extern crate url;
extern crate uuid;
#[macro_use]
extern crate failure;
pub mod model;
pub mod rio;

@ -8,6 +8,7 @@ use num_traits::FromPrimitive;
use num_traits::One;
use num_traits::ToPrimitive;
use ordered_float::OrderedFloat;
use regex::RegexBuilder;
use rust_decimal::Decimal;
use sparql::algebra::*;
use sparql::plan::*;
@ -22,6 +23,8 @@ use store::encoded::EncodedQuadsStore;
use store::numeric_encoder::*;
use Result;
const REGEX_SIZE_LIMIT: usize = 1_000_000;
type EncodedTuplesIterator<'a> = Box<dyn Iterator<Item = Result<EncodedTuple>> + 'a>;
pub struct SimpleEvaluator<S: EncodedQuadsStore> {
@ -484,7 +487,36 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
}.into(),
)
}
PlanExpression::Regex(text, pattern, flags) => unimplemented!(),
PlanExpression::Regex(text, pattern, flags) => {
// TODO Avoid to compile the regex each time
let pattern = self.to_simple_string(self.eval_expression(pattern, tuple)?)?;
let mut regex_builder = RegexBuilder::new(&pattern);
regex_builder.size_limit(REGEX_SIZE_LIMIT);
if let Some(flags) = flags {
let flags = self.to_simple_string(self.eval_expression(flags, tuple)?)?;
for flag in flags.chars() {
match flag {
's' => {
regex_builder.dot_matches_new_line(true);
}
'm' => {
regex_builder.multi_line(true);
}
'i' => {
regex_builder.case_insensitive(true);
}
'x' => {
regex_builder.ignore_whitespace(true);
}
'q' => (), //TODO: implement
_ => (),
}
}
}
let regex = regex_builder.build().ok()?;
let text = self.to_string(self.eval_expression(text, tuple)?)?;
Some(regex_builder.build().ok()?.is_match(&text).into())
}
PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? {
EncodedTerm::BooleanLiteral(value) => Some(value.into()),
EncodedTerm::SimpleLiteral { value_id }
@ -652,6 +684,19 @@ impl<S: EncodedQuadsStore> SimpleEvaluator<S> {
}
}
fn to_string(&self, term: EncodedTerm) -> Option<String> {
match term {
EncodedTerm::SimpleLiteral { value_id }
| EncodedTerm::StringLiteral { value_id }
| EncodedTerm::LangStringLiteral { value_id, .. } => Some(
str::from_utf8(&self.store.get_bytes(value_id).ok()??)
.ok()?
.to_owned(),
),
_ => None,
}
}
fn parse_numeric_operands(
&self,
e1: &PlanExpression,

@ -82,7 +82,7 @@ fn sparql_w3c_syntax_testsuite() {
#[test]
fn sparql_w3c_query_evaluation_testsuite() {
//TODO: dataset graph open-world regex
//TODO: dataset graph open-world
let manifest_10_urls = vec![
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest.ttl")
.unwrap(),
@ -118,6 +118,8 @@ fn sparql_w3c_query_evaluation_testsuite() {
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/regex/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/solution-seq/manifest.ttl")
.unwrap(),
Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest.ttl").unwrap(),

Loading…
Cancel
Save