From ee0a8574ba6da12ad1610a47525c3cef1deeebf4 Mon Sep 17 00:00:00 2001 From: Tpt Date: Thu, 15 Nov 2018 14:54:53 +0100 Subject: [PATCH] SPARQL: adds support of REGEX --- lib/Cargo.toml | 1 + lib/src/lib.rs | 9 ++++--- lib/src/sparql/eval.rs | 47 +++++++++++++++++++++++++++++++++- lib/tests/sparql_test_cases.rs | 4 ++- 4 files changed, 55 insertions(+), 6 deletions(-) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index cc4af126..ab811567 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -28,6 +28,7 @@ rust_decimal = "0.10" chrono = "0.4" language-tags = "0.2" failure = "0.1" +regex = "1" [build-dependencies] peg = "0.5" diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 80a577ec..8cfa3214 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -32,20 +32,21 @@ )] extern crate byteorder; -#[macro_use] -extern crate lazy_static; extern crate chrono; +#[macro_use] +extern crate failure; extern crate language_tags; +#[macro_use] +extern crate lazy_static; extern crate num_traits; extern crate ordered_float; extern crate quick_xml; +extern crate regex; #[cfg(feature = "rocksdb")] extern crate rocksdb; extern crate rust_decimal; extern crate url; extern crate uuid; -#[macro_use] -extern crate failure; pub mod model; pub mod rio; diff --git a/lib/src/sparql/eval.rs b/lib/src/sparql/eval.rs index 4e7036d8..2dca7822 100644 --- a/lib/src/sparql/eval.rs +++ b/lib/src/sparql/eval.rs @@ -8,6 +8,7 @@ use num_traits::FromPrimitive; use num_traits::One; use num_traits::ToPrimitive; use ordered_float::OrderedFloat; +use regex::RegexBuilder; use rust_decimal::Decimal; use sparql::algebra::*; use sparql::plan::*; @@ -22,6 +23,8 @@ use store::encoded::EncodedQuadsStore; use store::numeric_encoder::*; use Result; +const REGEX_SIZE_LIMIT: usize = 1_000_000; + type EncodedTuplesIterator<'a> = Box> + 'a>; pub struct SimpleEvaluator { @@ -484,7 +487,36 @@ impl SimpleEvaluator { }.into(), ) } - PlanExpression::Regex(text, pattern, flags) => unimplemented!(), + PlanExpression::Regex(text, pattern, flags) => { + // TODO Avoid to compile the regex each time + let pattern = self.to_simple_string(self.eval_expression(pattern, tuple)?)?; + let mut regex_builder = RegexBuilder::new(&pattern); + regex_builder.size_limit(REGEX_SIZE_LIMIT); + if let Some(flags) = flags { + let flags = self.to_simple_string(self.eval_expression(flags, tuple)?)?; + for flag in flags.chars() { + match flag { + 's' => { + regex_builder.dot_matches_new_line(true); + } + 'm' => { + regex_builder.multi_line(true); + } + 'i' => { + regex_builder.case_insensitive(true); + } + 'x' => { + regex_builder.ignore_whitespace(true); + } + 'q' => (), //TODO: implement + _ => (), + } + } + } + let regex = regex_builder.build().ok()?; + let text = self.to_string(self.eval_expression(text, tuple)?)?; + Some(regex_builder.build().ok()?.is_match(&text).into()) + } PlanExpression::BooleanCast(e) => match self.eval_expression(e, tuple)? { EncodedTerm::BooleanLiteral(value) => Some(value.into()), EncodedTerm::SimpleLiteral { value_id } @@ -652,6 +684,19 @@ impl SimpleEvaluator { } } + fn to_string(&self, term: EncodedTerm) -> Option { + match term { + EncodedTerm::SimpleLiteral { value_id } + | EncodedTerm::StringLiteral { value_id } + | EncodedTerm::LangStringLiteral { value_id, .. } => Some( + str::from_utf8(&self.store.get_bytes(value_id).ok()??) + .ok()? + .to_owned(), + ), + _ => None, + } + } + fn parse_numeric_operands( &self, e1: &PlanExpression, diff --git a/lib/tests/sparql_test_cases.rs b/lib/tests/sparql_test_cases.rs index 5b4ccb3a..68e4300b 100644 --- a/lib/tests/sparql_test_cases.rs +++ b/lib/tests/sparql_test_cases.rs @@ -82,7 +82,7 @@ fn sparql_w3c_syntax_testsuite() { #[test] fn sparql_w3c_query_evaluation_testsuite() { - //TODO: dataset graph open-world regex + //TODO: dataset graph open-world let manifest_10_urls = vec![ Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/algebra/manifest.ttl") .unwrap(), @@ -118,6 +118,8 @@ fn sparql_w3c_query_evaluation_testsuite() { .unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/reduced/manifest.ttl") .unwrap(), + Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/regex/manifest.ttl") + .unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/solution-seq/manifest.ttl") .unwrap(), Url::parse("http://www.w3.org/2001/sw/DataAccess/tests/data-r2/sort/manifest.ttl").unwrap(),