Drops LanguageTag and normalizes all tags to lowercase

Makes code simpler

LanguageTag code has been pushed upstream to the `language-tags` crate
pull/10/head
Tpt 5 years ago
parent 4500ba7b68
commit cac68a4473
  1. 1147
      lib/src/model/language_tag.rs
  2. 27
      lib/src/model/literal.rs
  3. 2
      lib/src/model/mod.rs
  4. 7
      lib/src/sparql/sparql_grammar.rustpeg
  5. 7
      lib/src/sparql/xml_results.rs
  6. 5
      lib/src/store/memory.rs
  7. 28
      lib/src/store/numeric_encoder.rs
  8. 5
      lib/src/store/rocksdb.rs

File diff suppressed because it is too large Load Diff

@ -1,4 +1,3 @@
use crate::model::language_tag::LanguageTag;
use crate::model::named_node::NamedNode; use crate::model::named_node::NamedNode;
use crate::model::vocab::rdf; use crate::model::vocab::rdf;
use crate::model::vocab::xsd; use crate::model::vocab::xsd;
@ -19,7 +18,6 @@ use std::option::Option;
/// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation: /// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation:
/// ``` /// ```
/// use rudf::model::Literal; /// use rudf::model::Literal;
/// use rudf::model::LanguageTag;
/// use rudf::model::vocab::xsd; /// use rudf::model::vocab::xsd;
/// ///
/// assert_eq!( /// assert_eq!(
@ -34,7 +32,7 @@ use std::option::Option;
/// ///
/// assert_eq!( /// assert_eq!(
/// "\"foo\"@en", /// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", LanguageTag::parse("en").unwrap()).to_string() /// Literal::new_language_tagged_literal("foo", "en").to_string()
/// ); /// );
/// ``` /// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -43,10 +41,7 @@ pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Clone, Hash)]
enum LiteralContent { enum LiteralContent {
String(String), String(String),
LanguageTaggedString { LanguageTaggedString { value: String, language: String },
value: String,
language: LanguageTag,
},
Boolean(bool), Boolean(bool),
Float(OrderedFloat<f32>), Float(OrderedFloat<f32>),
Double(OrderedFloat<f64>), Double(OrderedFloat<f64>),
@ -56,10 +51,7 @@ enum LiteralContent {
NaiveTime(NaiveTime), NaiveTime(NaiveTime),
DateTime(DateTime<FixedOffset>), DateTime(DateTime<FixedOffset>),
NaiveDateTime(NaiveDateTime), NaiveDateTime(NaiveDateTime),
TypedLiteral { TypedLiteral { value: String, datatype: NamedNode },
value: String,
datatype: NamedNode,
},
} }
impl Literal { impl Literal {
@ -139,11 +131,16 @@ impl Literal {
/// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string) /// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
pub fn new_language_tagged_literal( pub fn new_language_tagged_literal(
value: impl Into<String>, value: impl Into<String>,
language: impl Into<LanguageTag>, language: impl Into<String>,
) -> Self { ) -> Self {
let language = language.into();
Literal(LiteralContent::LanguageTaggedString { Literal(LiteralContent::LanguageTaggedString {
value: value.into(), value: value.into(),
language: language.into(), language: if language.bytes().all(|c| c.is_ascii_lowercase()) {
language
} else {
language.to_ascii_lowercase()
},
}) })
} }
@ -166,8 +163,10 @@ impl Literal {
} }
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string). /// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47). /// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
pub fn language(&self) -> Option<&LanguageTag> { /// They are normalized to lowercase by this implementation.
pub fn language(&self) -> Option<&String> {
match self.0 { match self.0 {
LiteralContent::LanguageTaggedString { ref language, .. } => Some(language), LiteralContent::LanguageTaggedString { ref language, .. } => Some(language),
_ => None, _ => None,

@ -5,7 +5,6 @@
mod blank_node; mod blank_node;
mod graph; mod graph;
mod isomorphism; mod isomorphism;
mod language_tag;
mod literal; mod literal;
mod named_node; mod named_node;
mod triple; mod triple;
@ -13,7 +12,6 @@ pub mod vocab;
pub use crate::model::blank_node::BlankNode; pub use crate::model::blank_node::BlankNode;
pub use crate::model::graph::SimpleGraph; pub use crate::model::graph::SimpleGraph;
pub use crate::model::language_tag::LanguageTag;
pub use crate::model::literal::Literal; pub use crate::model::literal::Literal;
pub use crate::model::named_node::NamedNode; pub use crate::model::named_node::NamedNode;
pub use crate::model::triple::NamedOrBlankNode; pub use crate::model::triple::NamedOrBlankNode;

@ -948,11 +948,8 @@ VAR1 -> &'input str = '?' v:$(VARNAME) { v }
VAR2 -> &'input str = '$' v:$(VARNAME) { v } VAR2 -> &'input str = '$' v:$(VARNAME) { v }
//[145] //[145]
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {? LANGTAG -> String = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
match LanguageTag::parse(&l) { l.to_ascii_lowercase()
Ok(langtag) => Ok(langtag),
Err(error) => Err("language tag parsing failed")
}
} }
//[146] //[146]

@ -333,10 +333,7 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
if let Ok(attr) = attr { if let Ok(attr) = attr {
if attr.key == b"xml:lang" { if attr.key == b"xml:lang" {
match attr.unescape_and_decode_value(&self.reader) { match attr.unescape_and_decode_value(&self.reader) {
Ok(val) => match LanguageTag::parse(&val) { Ok(val) => lang = Some(val),
Ok(val) => lang = Some(val),
Err(error) => return Some(Err(error.into())),
},
Err(error) => return Some(Err(error.into())), Err(error) => return Some(Err(error.into())),
} }
} else if attr.key == b"datatype" { } else if attr.key == b"datatype" {
@ -424,7 +421,7 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
fn build_literal( fn build_literal(
value: impl Into<String>, value: impl Into<String>,
lang: &Option<LanguageTag>, lang: &Option<String>,
datatype: &Option<NamedNode>, datatype: &Option<NamedNode>,
) -> Literal { ) -> Literal {
match datatype { match datatype {

@ -1,4 +1,3 @@
use crate::model::LanguageTag;
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::store::*; use crate::store::*;
use crate::{Repository, Result}; use crate::{Repository, Result};
@ -85,10 +84,6 @@ impl StringStore for MemoryStore {
fn get_str(&self, id: u64) -> Result<String> { fn get_str(&self, id: u64) -> Result<String> {
self.string_store.get_str(id) self.string_store.get_str(id)
} }
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
self.string_store.get_language_tag(id)
}
} }
impl<'a> StoreConnection for &'a MemoryStore { impl<'a> StoreConnection for &'a MemoryStore {

@ -36,7 +36,6 @@ pub trait StringStore {
fn insert_str(&self, value: &str) -> Result<u64>; fn insert_str(&self, value: &str) -> Result<u64>;
fn get_str(&self, id: u64) -> Result<Self::StringType>; fn get_str(&self, id: u64) -> Result<Self::StringType>;
fn get_language_tag(&self, id: u64) -> Result<LanguageTag>;
/// Should be called when the bytes store is created /// Should be called when the bytes store is created
fn set_first_strings(&self) -> Result<()> { fn set_first_strings(&self) -> Result<()> {
@ -71,10 +70,6 @@ impl<'a, S: StringStore> StringStore for &'a S {
fn get_str(&self, id: u64) -> Result<S::StringType> { fn get_str(&self, id: u64) -> Result<S::StringType> {
(*self).get_str(id) (*self).get_str(id)
} }
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
(*self).get_language_tag(id)
}
} }
pub struct MemoryStringStore { pub struct MemoryStringStore {
@ -115,15 +110,6 @@ impl StringStore for MemoryStringStore {
Ok(id2str[id as usize].to_owned()) Ok(id2str[id as usize].to_owned())
} }
} }
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
let id2str = self.id2str.read().map_err(MutexPoisonError::from)?;
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(LanguageTag::parse(&id2str[id as usize])?)
}
}
} }
const TYPE_DEFAULT_GRAPH_ID: u8 = 0; const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
@ -717,10 +703,11 @@ impl<S: StringStore> Encoder<S> {
rio::Literal::LanguageTaggedString { value, language } => { rio::Literal::LanguageTaggedString { value, language } => {
Ok(EncodedTerm::LangStringLiteral { Ok(EncodedTerm::LangStringLiteral {
value_id: self.string_store.insert_str(value)?, value_id: self.string_store.insert_str(value)?,
language_id: self language_id: if language.bytes().all(|b| b.is_ascii_lowercase()) {
.string_store self.string_store.insert_str(language)
.insert_str(LanguageTag::parse(language)?.as_str())?, } else {
//TODO: avoid self.string_store.insert_str(&language.to_ascii_lowercase())
}?,
}) })
} }
rio::Literal::Typed { value, datatype } => { rio::Literal::Typed { value, datatype } => {
@ -805,7 +792,7 @@ impl<S: StringStore> Encoder<S> {
language_id, language_id,
} => Ok(Literal::new_language_tagged_literal( } => Ok(Literal::new_language_tagged_literal(
self.string_store.get_str(value_id)?, self.string_store.get_str(value_id)?,
self.string_store.get_language_tag(language_id)?, self.string_store.get_str(language_id)?,
) )
.into()), .into()),
EncodedTerm::TypedLiteral { EncodedTerm::TypedLiteral {
@ -906,7 +893,8 @@ fn test_encoding() {
Literal::from(1.2).into(), Literal::from(1.2).into(),
Literal::from(1).into(), Literal::from(1).into(),
Literal::from("foo").into(), Literal::from("foo").into(),
Literal::new_language_tagged_literal("foo", LanguageTag::parse("fr").unwrap()).into(), Literal::new_language_tagged_literal("foo", "fr").into(),
Literal::new_language_tagged_literal("foo", "FR").into(),
]; ];
for term in terms { for term in terms {
let encoded = encoder.encode_term(&term).unwrap(); let encoded = encoder.encode_term(&term).unwrap();

@ -1,4 +1,3 @@
use crate::model::LanguageTag;
use crate::store::numeric_encoder::*; use crate::store::numeric_encoder::*;
use crate::store::{Store, StoreConnection, StoreRepositoryConnection}; use crate::store::{Store, StoreConnection, StoreRepositoryConnection};
use crate::{Repository, Result}; use crate::{Repository, Result};
@ -162,10 +161,6 @@ impl StringStore for RocksDbStoreConnection<'_> {
Err(format_err!("value not found in the dictionary")) Err(format_err!("value not found in the dictionary"))
} }
} }
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
Ok(LanguageTag::parse(&self.get_str(id)?)?)
}
} }
impl<'a> StoreConnection for RocksDbStoreConnection<'a> { impl<'a> StoreConnection for RocksDbStoreConnection<'a> {

Loading…
Cancel
Save