Validates language tags

Makes also sure their are all lowercase

Closes #24
pull/26/head
Tpt 5 years ago
parent a8dbc94d6f
commit 4c9bd52614
  1. 12
      lib/src/error.rs
  2. 20
      lib/src/model/literal.rs
  3. 18
      lib/src/sparql/eval.rs
  4. 2
      lib/src/sparql/parser.rs
  5. 10
      lib/src/sparql/xml_results.rs
  6. 26
      lib/src/store/numeric_encoder.rs

@ -1,6 +1,7 @@
use peg::error::ParseError;
use peg::str::LineCol;
use rio_api::iri::IriParseError;
use rio_api::language_tag::LanguageTagParseError;
use rio_turtle::TurtleError;
use rio_xml::RdfXmlError;
use std::error;
@ -26,6 +27,7 @@ impl fmt::Display for Error {
ErrorKind::FromUtf8(e) => e.fmt(f),
ErrorKind::Poison => write!(f, "Mutex was poisoned"),
ErrorKind::Iri(e) => e.fmt(f),
ErrorKind::LanguageTag(e) => e.fmt(f),
ErrorKind::Other(e) => e.fmt(f),
}
}
@ -39,6 +41,7 @@ impl error::Error for Error {
ErrorKind::FromUtf8(e) => Some(e),
ErrorKind::Poison => None,
ErrorKind::Iri(e) => Some(e),
ErrorKind::LanguageTag(e) => Some(e),
ErrorKind::Other(e) => Some(e.as_ref()),
}
}
@ -67,6 +70,7 @@ enum ErrorKind {
FromUtf8(FromUtf8Error),
Poison,
Iri(IriParseError),
LanguageTag(LanguageTagParseError),
Other(Box<dyn error::Error + Send + Sync + 'static>),
}
@ -94,6 +98,14 @@ impl From<IriParseError> for Error {
}
}
impl From<LanguageTagParseError> for Error {
fn from(error: LanguageTagParseError) -> Self {
Self {
inner: ErrorKind::LanguageTag(error),
}
}
}
impl From<TurtleError> for Error {
fn from(error: TurtleError) -> Self {
Self::wrap(error)

@ -2,6 +2,8 @@ use crate::model::named_node::NamedNode;
use crate::model::vocab::rdf;
use crate::model::vocab::xsd;
use crate::model::xsd::*;
use crate::Result;
use rio_api::language_tag::LanguageTag;
use rio_api::model as rio;
use std::borrow::Cow;
use std::fmt;
@ -11,6 +13,7 @@ use std::option::Option;
///
/// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// # use oxigraph::Result;
/// use oxigraph::model::Literal;
/// use oxigraph::model::vocab::xsd;
///
@ -26,8 +29,9 @@ use std::option::Option;
///
/// assert_eq!(
/// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", "en").to_string()
/// Literal::new_language_tagged_literal("foo", "en")?.to_string()
/// );
/// # Result::Ok(())
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct Literal(LiteralContent);
@ -60,12 +64,22 @@ impl Literal {
pub fn new_language_tagged_literal(
value: impl Into<String>,
language: impl Into<String>,
) -> Self {
) -> Result<Self> {
let mut language = language.into();
language.make_ascii_lowercase();
Ok(Literal(LiteralContent::LanguageTaggedString {
value: value.into(),
language: LanguageTag::parse(language)?.into_inner(),
}))
}
pub(crate) fn new_language_tagged_literal_unchecked(
value: impl Into<String>,
language: impl Into<String>,
) -> Self {
Literal(LiteralContent::LanguageTaggedString {
value: value.into(),
language,
language: language.into(),
})
}

@ -14,6 +14,7 @@ use md5::Md5;
use rand::random;
use regex::{Regex, RegexBuilder};
use rio_api::iri::Iri;
use rio_api::language_tag::LanguageTag;
use rio_api::model as rio;
use sha1::Sha1;
use sha2::{Sha256, Sha384, Sha512};
@ -906,10 +907,12 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
_ => None,
},
PlanExpression::LangMatches(language_tag, language_range) => {
let language_tag =
let mut language_tag =
self.to_simple_string(self.eval_expression(language_tag, tuple)?)?;
let language_range =
language_tag.make_ascii_lowercase();
let mut language_range =
self.to_simple_string(self.eval_expression(language_range, tuple)?)?;
language_range.make_ascii_lowercase();
Some(
if &*language_range == "*" {
!language_tag.is_empty()
@ -917,7 +920,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
!ZipLongest::new(language_range.split('-'), language_tag.split('-')).any(
|parts| match parts {
(Some(range_subtag), Some(language_subtag)) => {
!range_subtag.eq_ignore_ascii_case(language_subtag)
range_subtag != language_subtag
}
(Some(_), None) => true,
(None, _) => false,
@ -1243,8 +1246,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
Some(EncodedTerm::LangStringLiteral {
value_id: self
.to_simple_string_id(self.eval_expression(lexical_form, tuple)?)?,
language_id: self
.to_simple_string_id(self.eval_expression(lang_tag, tuple)?)?,
language_id: self.build_language_id(self.eval_expression(lang_tag, tuple)?)?,
})
}
PlanExpression::StrDT(lexical_form, datatype) => {
@ -1498,6 +1500,12 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
Some(value_id)
}
fn build_language_id(&self, value: EncodedTerm) -> Option<u128> {
let mut language = self.to_simple_string(value)?;
language.make_ascii_lowercase();
self.build_string_id(LanguageTag::parse(language).ok()?.as_str())
}
fn to_argument_compatible_strings(
&self,
arg1: EncodedTerm,

@ -1431,7 +1431,7 @@ parser! {
//[129]
rule RDFLiteral() -> Literal =
v:String() _ "^^" _ t:iri() { Literal::new_typed_literal(v, t) } /
v:String() _ l:LANGTAG() { Literal::new_language_tagged_literal(v, l) } /
v:String() _ l:LANGTAG() {? Literal::new_language_tagged_literal(v, l).map_err(|_| "language tag parsing failed") } /
v:String() { Literal::new_simple_literal(v) }
//[130]

@ -385,7 +385,7 @@ impl<R: BufRead> ResultsIterator<R> {
self.reader.decode(&data)?,
lang.take(),
datatype.take(),
)
)?
.into(),
);
}
@ -413,7 +413,7 @@ impl<R: BufRead> ResultsIterator<R> {
State::Literal => {
if term.is_none() {
//We default to the empty literal
term = Some(build_literal("", lang.take(), datatype.take()).into())
term = Some(build_literal("", lang.take(), datatype.take())?.into())
}
state = State::Binding;
}
@ -430,12 +430,12 @@ fn build_literal(
value: impl Into<String>,
lang: Option<String>,
datatype: Option<NamedNode>,
) -> Literal {
) -> Result<Literal> {
match datatype {
Some(datatype) => Literal::new_typed_literal(value, datatype),
Some(datatype) => Ok(Literal::new_typed_literal(value, datatype)),
None => match lang {
Some(lang) => Literal::new_language_tagged_literal(value, lang),
None => Literal::new_simple_literal(value),
None => Ok(Literal::new_simple_literal(value)),
},
}
}

@ -389,11 +389,7 @@ impl<'a> From<rio::Literal<'a>> for EncodedTerm {
rio::Literal::LanguageTaggedString { value, language } => {
EncodedTerm::LangStringLiteral {
value_id: get_str_id(value),
language_id: if language.bytes().all(|b| b.is_ascii_lowercase()) {
get_str_id(language)
} else {
get_str_id(&language.to_ascii_lowercase())
},
language_id: get_str_id(language),
}
}
rio::Literal::Typed { value, datatype } => {
@ -986,18 +982,8 @@ impl<S: StrContainer> Encoder for S {
rio::Literal::LanguageTaggedString { value, language } => {
let value_id = get_str_id(value);
self.insert_str(value_id, value)?;
let language_id = if language.bytes().all(|b| b.is_ascii_lowercase()) {
let language_id = get_str_id(language);
self.insert_str(language_id, language)?;
language_id
} else {
let language = language.to_ascii_lowercase();
let language_id = get_str_id(&language);
self.insert_str(language_id, &language)?;
language_id
};
EncodedTerm::LangStringLiteral {
value_id,
language_id,
@ -1161,7 +1147,7 @@ impl<S: StrLookup> Decoder for S {
EncodedTerm::LangStringLiteral {
value_id,
language_id,
} => Ok(Literal::new_language_tagged_literal(
} => Ok(Literal::new_language_tagged_literal_unchecked(
get_required_str(self, value_id)?,
get_required_str(self, language_id)?,
)
@ -1209,8 +1195,12 @@ fn test_encoding() {
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo").into(),
Literal::new_language_tagged_literal("foo", "fr").into(),
Literal::new_language_tagged_literal("foo", "FR").into(),
Literal::new_language_tagged_literal("foo", "fr")
.unwrap()
.into(),
Literal::new_language_tagged_literal("foo", "FR")
.unwrap()
.into(),
Literal::new_typed_literal("-1.32", xsd::DECIMAL.clone()).into(),
Literal::new_typed_literal("2020-01-01T01:01:01Z", xsd::DATE_TIME.clone()).into(),
Literal::new_typed_literal("2020-01-01", xsd::DATE.clone()).into(),

Loading…
Cancel
Save