Easy parsing of terms from their serialization and SPARQL TSV parser

pull/58/head
Tpt 4 years ago
parent 359da8d9c3
commit c6d26a2a37
  1. 2
      CHANGELOG.md
  2. 2
      lib/src/model/mod.rs
  3. 340
      lib/src/model/parser.rs
  4. 62
      lib/src/sparql/csv_results.rs
  5. 5
      lib/src/sparql/model.rs
  6. 5
      testsuite/src/sparql_evaluator.rs
  7. 13
      testsuite/tests/sparql.rs

@ -2,6 +2,8 @@
### Added ### Added
- [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/) support for Rust, Python and JavaScript. - [SPARQL 1.1 Update](https://www.w3.org/TR/sparql11-update/) support for Rust, Python and JavaScript.
- [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) serializers and TSV format parser.
- `std::str::FromStr` implementations to `NamedNode`, `BlankNode`, `Literal`, `Term` and `Variable` allowing to easily parse Turtle/SPARQL serialization of these terms.
## Changed ## Changed
- Fixes evaluation of `MONTH()` and `DAY()` functions on the `xsd:date` values. - Fixes evaluation of `MONTH()` and `DAY()` functions on the `xsd:date` values.

@ -5,6 +5,7 @@
mod blank_node; mod blank_node;
mod literal; mod literal;
mod named_node; mod named_node;
mod parser;
#[cfg(feature = "sophia")] #[cfg(feature = "sophia")]
mod sophia; mod sophia;
mod triple; mod triple;
@ -14,6 +15,7 @@ pub(crate) mod xsd;
pub use crate::model::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef}; pub use crate::model::blank_node::{BlankNode, BlankNodeIdParseError, BlankNodeRef};
pub use crate::model::literal::{Literal, LiteralRef}; pub use crate::model::literal::{Literal, LiteralRef};
pub use crate::model::named_node::{NamedNode, NamedNodeRef}; pub use crate::model::named_node::{NamedNode, NamedNodeRef};
pub use crate::model::parser::TermParseError;
pub use crate::model::triple::{ pub use crate::model::triple::{
GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Term, TermRef, GraphName, GraphNameRef, NamedOrBlankNode, NamedOrBlankNodeRef, Quad, QuadRef, Term, TermRef,
Triple, TripleRef, Triple, TripleRef,

@ -0,0 +1,340 @@
use crate::model::blank_node::{BlankNode, BlankNodeIdParseError};
use crate::model::named_node::NamedNode;
use crate::model::vocab::xsd;
use crate::model::{Literal, Term};
use crate::sparql::{Variable, VariableNameParseError};
use oxilangtag::LanguageTagParseError;
use oxiri::IriParseError;
use std::char;
use std::error::Error;
use std::fmt;
use std::str::{Chars, FromStr};
impl FromStr for NamedNode {
type Err = TermParseError;
/// Parses a named node from its NTriples and Turtle serialization
///
/// ```
/// use oxigraph::model::NamedNode;
/// use std::str::FromStr;
///
/// assert_eq!(NamedNode::from_str("<http://example.com>").unwrap(), NamedNode::new("http://example.com").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
if !s.starts_with('<') || !s.ends_with('>') {
return Err(TermParseError::msg(
"Named node serialization should be enclosed between < and >",
));
}
NamedNode::new(&s[1..s.len() - 1]).map_err(|error| TermParseError {
kind: TermParseErrorKind::Iri {
value: s.to_owned(),
error,
},
})
}
}
impl FromStr for BlankNode {
type Err = TermParseError;
/// Parses a blank node from its NTriples and Turtle serialization
///
/// ```
/// use oxigraph::model::BlankNode;
/// use std::str::FromStr;
///
/// assert_eq!(BlankNode::from_str("_:ex").unwrap(), BlankNode::new("ex").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
if !s.starts_with("_:") {
return Err(TermParseError::msg(
"Blank node serialization should start with '_:'",
));
}
BlankNode::new(&s[2..]).map_err(|error| TermParseError {
kind: TermParseErrorKind::BlankNode {
value: s.to_owned(),
error,
},
})
}
}
impl FromStr for Literal {
type Err = TermParseError;
/// Parses a literal from its NTriples or Turtle serialization
///
/// ```
/// use oxigraph::model::{Literal, NamedNode, vocab::xsd};
/// use std::str::FromStr;
///
/// assert_eq!(Literal::from_str("\"ex\\n\"").unwrap(), Literal::new_simple_literal("ex\n"));
/// assert_eq!(Literal::from_str("\"ex\"@en").unwrap(), Literal::new_language_tagged_literal("ex", "en").unwrap());
/// assert_eq!(Literal::from_str("\"2020\"^^<http://www.w3.org/2001/XMLSchema#gYear>").unwrap(), Literal::new_typed_literal("2020", NamedNode::new("http://www.w3.org/2001/XMLSchema#gYear").unwrap()));
/// assert_eq!(Literal::from_str("true").unwrap(), Literal::new_typed_literal("true", xsd::BOOLEAN));
/// assert_eq!(Literal::from_str("+122").unwrap(), Literal::new_typed_literal("+122", xsd::INTEGER));
/// assert_eq!(Literal::from_str("-122.23").unwrap(), Literal::new_typed_literal("-122.23", xsd::DECIMAL));
/// assert_eq!(Literal::from_str("-122e+1").unwrap(), Literal::new_typed_literal("-122e+1", xsd::DOUBLE));
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
if s.starts_with('"') {
let mut value = String::with_capacity(s.len() - 2);
let mut chars = s[1..].chars();
while let Some(c) = chars.next() {
match c {
'"' => {
let remain = chars.as_str();
return if remain.is_empty() {
Ok(Literal::new_simple_literal(value))
} else if remain.starts_with('@') {
Literal::new_language_tagged_literal(value, &remain[1..]).map_err(
|error| TermParseError {
kind: TermParseErrorKind::LanguageTag {
value: remain[1..].to_owned(),
error,
},
},
)
} else if remain.starts_with("^^") {
Ok(Literal::new_typed_literal(
value,
NamedNode::from_str(&remain[2..])?,
))
} else {
Err(TermParseError::msg("Unexpected characters after a literal"))
};
}
'\\' => {
if let Some(c) = chars.next() {
value.push(match c {
't' => '\t',
'b' => '\u{8}',
'n' => '\n',
'r' => '\r',
'f' => '\u{C}',
'"' => '"',
'\'' => '\'',
'\\' => '\\',
'u' => read_hexa_char(&mut chars, 4)?,
'U' => read_hexa_char(&mut chars, 8)?,
_ => return Err(TermParseError::msg("Unexpected escaped char")),
})
} else {
return Err(TermParseError::msg("Unexpected literal end"));
}
}
c => value.push(c),
}
}
Err(TermParseError::msg("Unexpected literal end"))
} else if s == "true" {
Ok(Literal::new_typed_literal("true", xsd::BOOLEAN))
} else if s == "false" {
Ok(Literal::new_typed_literal("false", xsd::BOOLEAN))
} else {
let input = s.as_bytes();
if input.is_empty() {
return Err(TermParseError::msg("Empty term serialization"));
}
let mut cursor = match input.get(0) {
Some(b'+') | Some(b'-') => 1,
_ => 0,
};
let mut count_before: usize = 0;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_before += 1;
cursor += 1;
}
if cursor == input.len() {
return if count_before > 0 {
Ok(Literal::new_typed_literal(s, xsd::INTEGER))
} else {
Err(TermParseError::msg("Empty integer serialization"))
};
}
let mut count_after: usize = 0;
if input[cursor] == b'.' {
cursor += 1;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_after += 1;
cursor += 1;
}
}
if cursor == input.len() {
return if count_after > 0 {
Ok(Literal::new_typed_literal(s, xsd::DECIMAL))
} else {
Err(TermParseError::msg(
"Decimal serialization without floating part",
))
};
}
if input[cursor] != b'e' && input[cursor] != b'E' {
return Err(TermParseError::msg("Double serialization without exponent"));
}
cursor += 1;
cursor += match input.get(cursor) {
Some(b'+') | Some(b'-') => 1,
_ => 0,
};
let mut count_exponent = 0;
while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
count_exponent += 1;
cursor += 1;
}
if cursor == input.len() && count_exponent > 0 {
Ok(Literal::new_typed_literal(s, xsd::DOUBLE))
} else {
Err(TermParseError::msg(
"Double serialization with an invalid exponent",
))
}
}
}
}
fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> {
let mut value = 0;
for _ in 0..len {
if let Some(c) = input.next() {
value = value * 16
+ match c {
'0'..='9' => u32::from(c) - u32::from('0'),
'a'..='f' => u32::from(c) - u32::from('a') + 10,
'A'..='F' => u32::from(c) - u32::from('A') + 10,
_ => {
return Err(TermParseError::msg(
"Unexpected character in a unicode escape",
))
}
}
} else {
return Err(TermParseError::msg("Unexpected literal string end"));
}
}
char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point"))
}
impl FromStr for Term {
type Err = TermParseError;
/// Parses a term from its NTriples or Turtle serialization
///
/// ```
/// use oxigraph::model::{Literal, Term};
/// use std::str::FromStr;
///
/// assert_eq!(Term::from_str("\"ex\"").unwrap(), Literal::new_simple_literal("ex").into())
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
Ok(if s.starts_with('<') {
NamedNode::from_str(s)?.into()
} else if s.starts_with('_') {
BlankNode::from_str(s)?.into()
} else {
Literal::from_str(s)?.into()
})
}
}
impl FromStr for Variable {
type Err = TermParseError;
/// Parses a variable from its SPARQL serialization
///
/// ```
/// use oxigraph::sparql::Variable;
/// use std::str::FromStr;
///
/// assert_eq!(Variable::from_str("$foo").unwrap(), Variable::new("foo").unwrap())
/// ```
fn from_str(s: &str) -> Result<Self, TermParseError> {
if !s.starts_with("?") && !s.starts_with('$') {
return Err(TermParseError::msg(
"Variable serialization should start with ? or $",
));
}
Variable::new(&s[1..]).map_err(|error| TermParseError {
kind: TermParseErrorKind::Variable {
value: s.to_owned(),
error,
},
})
}
}
/// An error raised during term serialization parsing.
#[allow(missing_copy_implementations)]
#[derive(Debug)]
pub struct TermParseError {
kind: TermParseErrorKind,
}
#[derive(Debug)]
enum TermParseErrorKind {
Iri {
error: IriParseError,
value: String,
},
BlankNode {
error: BlankNodeIdParseError,
value: String,
},
LanguageTag {
error: LanguageTagParseError,
value: String,
},
Variable {
error: VariableNameParseError,
value: String,
},
Msg {
msg: &'static str,
},
}
impl fmt::Display for TermParseError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.kind {
TermParseErrorKind::Iri { error, value } => write!(
f,
"Error while parsing the named node '{}': {}",
value, error
),
TermParseErrorKind::BlankNode { error, value } => write!(
f,
"Error while parsing the blank node '{}': {}",
value, error
),
TermParseErrorKind::LanguageTag { error, value } => write!(
f,
"Error while parsing the language tag '{}': {}",
value, error
),
TermParseErrorKind::Variable { error, value } => {
write!(f, "Error while parsing the variable '{}': {}", value, error)
}
TermParseErrorKind::Msg { msg } => write!(f, "{}", msg),
}
}
}
impl Error for TermParseError {}
impl TermParseError {
pub(crate) fn msg(msg: &'static str) -> Self {
Self {
kind: TermParseErrorKind::Msg { msg },
}
}
}

@ -1,9 +1,12 @@
//! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/) //! Implementation of [SPARQL 1.1 Query Results CSV and TSV Formats](https://www.w3.org/TR/sparql11-results-csv-tsv/)
use crate::error::invalid_data_error;
use crate::model::{vocab::xsd, *}; use crate::model::{vocab::xsd, *};
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::model::*; use crate::sparql::model::*;
use std::io::{self, Write}; use std::io::{self, BufRead, Write};
use std::rc::Rc;
use std::str::FromStr;
pub fn write_csv_results( pub fn write_csv_results(
results: QueryResults, results: QueryResults,
@ -164,6 +167,63 @@ fn write_tsv_term<'a>(term: impl Into<TermRef<'a>>, mut sink: impl Write) -> io:
} }
} }
pub fn read_tsv_results(mut source: impl BufRead + 'static) -> Result<QueryResults, io::Error> {
let mut buffer = String::new();
// We read the header
source.read_line(&mut buffer)?;
if buffer.trim().eq_ignore_ascii_case("true") {
return Ok(QueryResults::Boolean(true));
}
if buffer.trim().eq_ignore_ascii_case("false") {
return Ok(QueryResults::Boolean(false));
}
let variables = buffer
.split('\t')
.map(|v| Variable::from_str(v.trim()).map_err(invalid_data_error))
.collect::<Result<Vec<_>, io::Error>>()?;
Ok(QueryResults::Solutions(QuerySolutionIter::new(
Rc::new(variables),
Box::new(TsvResultsIterator { buffer, source }),
)))
}
struct TsvResultsIterator<R: BufRead> {
source: R,
buffer: String,
}
impl<R: BufRead> Iterator for TsvResultsIterator<R> {
type Item = Result<Vec<Option<Term>>, EvaluationError>;
fn next(&mut self) -> Option<Result<Vec<Option<Term>>, EvaluationError>> {
self.read_next().transpose()
}
}
impl<R: BufRead> TsvResultsIterator<R> {
fn read_next(&mut self) -> Result<Option<Vec<Option<Term>>>, EvaluationError> {
self.buffer.clear();
if self.source.read_line(&mut self.buffer)? == 0 {
return Ok(None);
}
Ok(Some(
self.buffer
.split('\t')
.map(|v| {
let v = v.trim();
if v.is_empty() {
Ok(None)
} else {
Ok(Some(Term::from_str(v).map_err(invalid_data_error)?))
}
})
.collect::<Result<Vec<_>, EvaluationError>>()?,
))
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;

@ -2,7 +2,7 @@ use crate::error::invalid_input_error;
use crate::io::GraphFormat; use crate::io::GraphFormat;
use crate::io::GraphSerializer; use crate::io::GraphSerializer;
use crate::model::*; use crate::model::*;
use crate::sparql::csv_results::{write_csv_results, write_tsv_results}; use crate::sparql::csv_results::{read_tsv_results, write_csv_results, write_tsv_results};
use crate::sparql::error::EvaluationError; use crate::sparql::error::EvaluationError;
use crate::sparql::json_results::write_json_results; use crate::sparql::json_results::write_json_results;
use crate::sparql::xml_results::{read_xml_results, write_xml_results}; use crate::sparql::xml_results::{read_xml_results, write_xml_results};
@ -33,9 +33,10 @@ impl QueryResults {
QueryResultsFormat::Json => Err(invalid_input_error( QueryResultsFormat::Json => Err(invalid_input_error(
"JSON SPARQL results format parsing has not been implemented yet", "JSON SPARQL results format parsing has not been implemented yet",
)), //TODO: implement )), //TODO: implement
QueryResultsFormat::Csv | QueryResultsFormat::Tsv => Err(invalid_input_error( QueryResultsFormat::Csv => Err(invalid_input_error(
"CSV and TSV SPARQL results format parsing is not implemented", "CSV and TSV SPARQL results format parsing is not implemented",
)), )),
QueryResultsFormat::Tsv => read_tsv_results(reader),
} }
} }

@ -223,6 +223,11 @@ fn load_sparql_query_result(url: &str) -> Result<StaticQueryResults> {
QueryResults::read(read_file(url)?, QueryResultsFormat::Json)?, QueryResults::read(read_file(url)?, QueryResultsFormat::Json)?,
false, false,
) )
} else if url.ends_with(".tsv") {
StaticQueryResults::from_query_results(
QueryResults::read(read_file(url)?, QueryResultsFormat::Tsv)?,
false,
)
} else { } else {
Ok(StaticQueryResults::from_dataset(load_store(url)?)) Ok(StaticQueryResults::from_dataset(load_store(url)?))
} }

@ -128,3 +128,16 @@ fn sparql11_update_w3c_evaluation_testsuite() -> Result<()> {
vec![], vec![],
) )
} }
#[test]
fn sparql11_tsv_w3c_evaluation_testsuite() -> Result<()> {
run_testsuite(
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/csv-tsv-res/manifest.ttl",
vec![
// We do not run CSVResultFormatTest tests yet
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/csv-tsv-res/manifest#csv01",
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/csv-tsv-res/manifest#csv02",
"http://www.w3.org/2009/sparql/docs/tests/data-sparql11/csv-tsv-res/manifest#csv03",
],
)
}

Loading…
Cancel
Save