Makes Variable::new validate the variable name

Allows to be sure that the variables name is always valid and easily serializable
pull/58/head
Tpt 4 years ago
parent d93a05ed4d
commit 359da8d9c3
  1. 1
      CHANGELOG.md
  2. 5
      lib/src/sparql/csv_results.rs
  3. 2
      lib/src/sparql/mod.rs
  4. 91
      lib/src/sparql/model.rs
  5. 20
      lib/src/sparql/parser.rs
  6. 5
      lib/src/sparql/plan_builder.rs
  7. 4
      lib/src/sparql/xml_results.rs
  8. 8
      python/src/model.rs
  9. 7
      testsuite/src/sparql_evaluator.rs

@ -5,6 +5,7 @@
## Changed
- Fixes evaluation of `MONTH()` and `DAY()` functions on the `xsd:date` values.
- `Variable::new` now validates the variable name.
## [0.1.1] - 2020-08-14

@ -172,7 +172,10 @@ mod tests {
fn build_example() -> QueryResults {
QuerySolutionIter::new(
Rc::new(vec![Variable::new("x"), Variable::new("literal")]),
Rc::new(vec![
Variable::new_unchecked("x"),
Variable::new_unchecked("literal"),
]),
Box::new(
vec![
Ok(vec![

@ -27,7 +27,7 @@ pub use crate::sparql::model::QueryResultsFormat;
pub use crate::sparql::model::QuerySolution;
pub use crate::sparql::model::QuerySolutionIter;
pub use crate::sparql::model::QueryTripleIter;
pub use crate::sparql::model::Variable;
pub use crate::sparql::model::{Variable, VariableNameParseError};
pub use crate::sparql::parser::ParseError;
pub use crate::sparql::parser::{Query, Update};
use crate::sparql::plan::{PlanNode, TripleTemplate};

@ -7,6 +7,7 @@ use crate::sparql::error::EvaluationError;
use crate::sparql::json_results::write_json_results;
use crate::sparql::xml_results::{read_xml_results, write_xml_results};
use rand::random;
use std::error::Error;
use std::io::{BufRead, Write};
use std::rc::Rc;
use std::{fmt, io};
@ -248,7 +249,7 @@ impl QuerySolutionIter {
///
/// let store = MemoryStore::new();
/// if let QueryResults::Solutions(solutions) = store.query("SELECT ?s ?o WHERE { ?s ?p ?o }", QueryOptions::default())? {
/// assert_eq!(solutions.variables(), &[Variable::new("s"), Variable::new("o")]);
/// assert_eq!(solutions.variables(), &[Variable::new("s")?, Variable::new("o")?]);
/// }
/// # Result::<_,Box<dyn std::error::Error>>::Ok(())
/// ```
@ -409,8 +410,9 @@ impl Iterator for QueryTripleIter {
///
/// assert_eq!(
/// "?foo",
/// Variable::new("foo").to_string()
/// )
/// Variable::new("foo")?.to_string()
/// );
/// # Result::<_,oxigraph::sparql::VariableNameParseError>::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Variable {
@ -418,8 +420,23 @@ pub struct Variable {
}
impl Variable {
/// Creates a variable name from a unique identifier.
///
/// The variable identifier must be valid according to the SPARQL grammar.
pub fn new(name: impl Into<String>) -> Result<Self, VariableNameParseError> {
let name = name.into();
validate_variable_identifier(&name)?;
Ok(Self::new_unchecked(name))
}
/// Creates a variable name from a unique identifier without validation.
///
/// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
/// according to the SPARQL grammar.
///
/// [`new`](#method.new) is a safe version of this constructor and should be used for untrusted data.
#[inline]
pub fn new(name: impl Into<String>) -> Self {
pub fn new_unchecked(name: impl Into<String>) -> Self {
Variable { name: name.into() }
}
@ -435,7 +452,7 @@ impl Variable {
#[inline]
pub(crate) fn new_random() -> Self {
Self::new(format!("{:x}", random::<u128>()))
Self::new_unchecked(format!("{:x}", random::<u128>()))
}
}
@ -445,3 +462,67 @@ impl fmt::Display for Variable {
write!(f, "?{}", self.name)
}
}
fn validate_variable_identifier(id: &str) -> Result<(), VariableNameParseError> {
let mut chars = id.chars();
let front = chars.next().ok_or(VariableNameParseError {})?;
match front {
'0'..='9'
| '_'
| ':'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}),
}
for c in chars {
match c {
'0'..='9'
| '\u{00B7}'
| '\u{00300}'..='\u{036F}'
| '\u{203F}'..='\u{2040}'
| '_'
| 'A'..='Z'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => (),
_ => return Err(VariableNameParseError {}),
}
}
Ok(())
}
/// An error raised during [`Variable`](struct.Variable.html) name validation.
#[allow(missing_copy_implementations)]
#[derive(Debug)]
pub struct VariableNameParseError {}
impl fmt::Display for VariableNameParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "The variable name is invalid")
}
}
impl Error for VariableNameParseError {}

@ -475,17 +475,17 @@ fn copy_graph(
to: impl Into<Option<NamedNodeOrVariable>>,
) -> GraphUpdateOperation {
let bgp = GraphPattern::BGP(vec![TriplePattern::new(
Variable::new("s"),
Variable::new("p"),
Variable::new("o"),
Variable::new_unchecked("s"),
Variable::new_unchecked("p"),
Variable::new_unchecked("o"),
)
.into()]);
GraphUpdateOperation::DeleteInsert {
delete: Vec::new(),
insert: vec![QuadPattern::new(
Variable::new("s"),
Variable::new("p"),
Variable::new("o"),
Variable::new_unchecked("s"),
Variable::new_unchecked("p"),
Variable::new_unchecked("o"),
to.into(),
)],
using: DatasetSpec::default(),
@ -984,7 +984,7 @@ parser! {
if from == to {
Vec::new() // identity case
} else {
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new("s"), Variable::new("p"), Variable::new("o")).into()]);
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o")).into()]);
vec![copy_graph(from, to)]
}
}
@ -995,7 +995,7 @@ parser! {
if from == to {
Vec::new() // identity case
} else {
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new("s"), Variable::new("p"), Variable::new("o")).into()]);
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o")).into()]);
vec![GraphUpdateOperation::Drop { silent, graph: to.clone().into() }, copy_graph(from.clone(), to), GraphUpdateOperation::Drop { silent, graph: from.into() }]
}
}
@ -1006,7 +1006,7 @@ parser! {
if from == to {
Vec::new() // identity case
} else {
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new("s"), Variable::new("p"), Variable::new("o")).into()]);
let bgp = GraphPattern::BGP(vec![TriplePattern::new(Variable::new_unchecked("s"), Variable::new_unchecked("p"), Variable::new_unchecked("o")).into()]);
vec![GraphUpdateOperation::Drop { silent, graph: to.clone().into() }, copy_graph(from, to)]
}
}
@ -1606,7 +1606,7 @@ parser! {
i:iri() { i.into() }
//[108]
rule Var() -> Variable = v:(VAR1() / VAR2()) { Variable::new(v) }
rule Var() -> Variable = v:(VAR1() / VAR2()) { Variable::new_unchecked(v) }
//[109]
rule GraphTerm() -> Term =

@ -734,7 +734,10 @@ impl<E: WriteEncoder<Error = EvaluationError>> PlanBuilder<E> {
PatternValue::Variable(variable_key(variables, variable))
}
TermOrVariable::Term(Term::BlankNode(bnode)) => {
PatternValue::Variable(variable_key(variables, &Variable::new(bnode.as_str())))
PatternValue::Variable(variable_key(
variables,
&Variable::new_unchecked(bnode.as_str()),
))
//TODO: very bad hack to convert bnode to variable
}
TermOrVariable::Term(term) => PatternValue::Constant(self.build_term(term)?),

@ -217,7 +217,7 @@ pub fn read_xml_results(source: impl BufRead + 'static) -> Result<QueryResults,
mapping.insert(var.as_bytes().to_vec(), i);
}
return Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables.into_iter().map(Variable::new).collect()),
Rc::new(variables.into_iter().map(Variable::new).collect::<Result<Vec<_>,_>>().map_err(invalid_data_error)?),
Box::new(ResultsIterator {
reader,
buffer: Vec::default(),
@ -255,7 +255,7 @@ pub fn read_xml_results(source: impl BufRead + 'static) -> Result<QueryResults,
State::AfterHead => {
return if event.name() == b"results" {
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables.into_iter().map(Variable::new).collect()),
Rc::new(variables.into_iter().map(Variable::new).collect::<Result<Vec<_>,_>>().map_err(invalid_data_error)?),
Box::new(empty()),
)))
} else {

@ -113,6 +113,7 @@ impl PyObjectProtocol for PyNamedNode {
///
/// :param value: the `blank node ID <https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node-identifier>`_ (if not present, a random blank node ID is automatically generated).
/// :type value: str, optional
/// :raises ValueError: if the blank node ID is invalid according to NTriples, Turtle and SPARQL grammars.
///
/// The :py:func:`str` function provides a serialization compatible with NTriples, Turtle and SPARQL:
///
@ -746,6 +747,7 @@ impl PyIterProtocol for PyQuad {
///
/// :param value: the variable name as a string
/// :type value: str
/// :raises ValueError: if the variable name is invalid according to the SPARQL grammar.
///
/// The :py:func:`str` function provides a serialization compatible with SPARQL:
///
@ -779,8 +781,10 @@ impl<'a> From<&'a PyVariable> for &'a Variable {
#[pymethods]
impl PyVariable {
#[new]
fn new(value: String) -> Self {
Variable::new(value).into()
fn new(value: String) -> PyResult<Self> {
Ok(Variable::new(value)
.map_err(|e| ValueError::py_err(e.to_string()))?
.into())
}
/// :return: the variable name

@ -466,7 +466,7 @@ impl StaticQueryResults {
objects_for_subject_predicate(&dataset, &result_set, rs::RESULT_VARIABLE)
.filter_map(|object| {
if let Term::Literal(l) = object {
Some(Variable::new(l.value()))
Some(Variable::new_unchecked(l.value()))
} else {
None
}
@ -497,7 +497,10 @@ impl StaticQueryResults {
rs::VALUE,
),
) {
Some((Variable::new(variable.value()), value))
Some((
Variable::new_unchecked(variable.value()),
value,
))
} else {
None
}

Loading…
Cancel
Save