Drops LanguageTag and normalizes all tags to lowercase

Makes code simpler

LanguageTag code has been pushed upstream to the `language-tags` crate
pull/10/head
Tpt 5 years ago
parent 4500ba7b68
commit cac68a4473
  1. 1147
      lib/src/model/language_tag.rs
  2. 27
      lib/src/model/literal.rs
  3. 2
      lib/src/model/mod.rs
  4. 7
      lib/src/sparql/sparql_grammar.rustpeg
  5. 7
      lib/src/sparql/xml_results.rs
  6. 5
      lib/src/store/memory.rs
  7. 28
      lib/src/store/numeric_encoder.rs
  8. 5
      lib/src/store/rocksdb.rs

File diff suppressed because it is too large Load Diff

@ -1,4 +1,3 @@
use crate::model::language_tag::LanguageTag;
use crate::model::named_node::NamedNode;
use crate::model::vocab::rdf;
use crate::model::vocab::xsd;
@ -19,7 +18,6 @@ use std::option::Option;
/// The default string formatter is returning a N-Triples, Turtle and SPARQL compatible representation:
/// ```
/// use rudf::model::Literal;
/// use rudf::model::LanguageTag;
/// use rudf::model::vocab::xsd;
///
/// assert_eq!(
@ -34,7 +32,7 @@ use std::option::Option;
///
/// assert_eq!(
/// "\"foo\"@en",
/// Literal::new_language_tagged_literal("foo", LanguageTag::parse("en").unwrap()).to_string()
/// Literal::new_language_tagged_literal("foo", "en").to_string()
/// );
/// ```
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
@ -43,10 +41,7 @@ pub struct Literal(LiteralContent);
#[derive(PartialEq, Eq, Ord, PartialOrd, Debug, Clone, Hash)]
enum LiteralContent {
String(String),
LanguageTaggedString {
value: String,
language: LanguageTag,
},
LanguageTaggedString { value: String, language: String },
Boolean(bool),
Float(OrderedFloat<f32>),
Double(OrderedFloat<f64>),
@ -56,10 +51,7 @@ enum LiteralContent {
NaiveTime(NaiveTime),
DateTime(DateTime<FixedOffset>),
NaiveDateTime(NaiveDateTime),
TypedLiteral {
value: String,
datatype: NamedNode,
},
TypedLiteral { value: String, datatype: NamedNode },
}
impl Literal {
@ -139,11 +131,16 @@ impl Literal {
/// Builds a RDF [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string)
pub fn new_language_tagged_literal(
value: impl Into<String>,
language: impl Into<LanguageTag>,
language: impl Into<String>,
) -> Self {
let language = language.into();
Literal(LiteralContent::LanguageTaggedString {
value: value.into(),
language: language.into(),
language: if language.bytes().all(|c| c.is_ascii_lowercase()) {
language
} else {
language.to_ascii_lowercase()
},
})
}
@ -166,8 +163,10 @@ impl Literal {
}
/// The literal [language tag](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tag) if it is a [language-tagged string](https://www.w3.org/TR/rdf11-concepts/#dfn-language-tagged-string).
///
/// Language tags are defined by the [BCP47](https://tools.ietf.org/html/bcp47).
pub fn language(&self) -> Option<&LanguageTag> {
/// They are normalized to lowercase by this implementation.
pub fn language(&self) -> Option<&String> {
match self.0 {
LiteralContent::LanguageTaggedString { ref language, .. } => Some(language),
_ => None,

@ -5,7 +5,6 @@
mod blank_node;
mod graph;
mod isomorphism;
mod language_tag;
mod literal;
mod named_node;
mod triple;
@ -13,7 +12,6 @@ pub mod vocab;
pub use crate::model::blank_node::BlankNode;
pub use crate::model::graph::SimpleGraph;
pub use crate::model::language_tag::LanguageTag;
pub use crate::model::literal::Literal;
pub use crate::model::named_node::NamedNode;
pub use crate::model::triple::NamedOrBlankNode;

@ -948,11 +948,8 @@ VAR1 -> &'input str = '?' v:$(VARNAME) { v }
VAR2 -> &'input str = '$' v:$(VARNAME) { v }
//[145]
LANGTAG -> LanguageTag = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {?
match LanguageTag::parse(&l) {
Ok(langtag) => Ok(langtag),
Err(error) => Err("language tag parsing failed")
}
LANGTAG -> String = "@" l:$([a-zA-Z]+ ("-" [a-zA-Z0-9]+)*) {
l.to_ascii_lowercase()
}
//[146]

@ -333,10 +333,7 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
if let Ok(attr) = attr {
if attr.key == b"xml:lang" {
match attr.unescape_and_decode_value(&self.reader) {
Ok(val) => match LanguageTag::parse(&val) {
Ok(val) => lang = Some(val),
Err(error) => return Some(Err(error.into())),
},
Ok(val) => lang = Some(val),
Err(error) => return Some(Err(error.into())),
}
} else if attr.key == b"datatype" {
@ -424,7 +421,7 @@ impl<R: BufRead> Iterator for ResultsIterator<R> {
fn build_literal(
value: impl Into<String>,
lang: &Option<LanguageTag>,
lang: &Option<String>,
datatype: &Option<NamedNode>,
) -> Literal {
match datatype {

@ -1,4 +1,3 @@
use crate::model::LanguageTag;
use crate::store::numeric_encoder::*;
use crate::store::*;
use crate::{Repository, Result};
@ -85,10 +84,6 @@ impl StringStore for MemoryStore {
fn get_str(&self, id: u64) -> Result<String> {
self.string_store.get_str(id)
}
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
self.string_store.get_language_tag(id)
}
}
impl<'a> StoreConnection for &'a MemoryStore {

@ -36,7 +36,6 @@ pub trait StringStore {
fn insert_str(&self, value: &str) -> Result<u64>;
fn get_str(&self, id: u64) -> Result<Self::StringType>;
fn get_language_tag(&self, id: u64) -> Result<LanguageTag>;
/// Should be called when the bytes store is created
fn set_first_strings(&self) -> Result<()> {
@ -71,10 +70,6 @@ impl<'a, S: StringStore> StringStore for &'a S {
fn get_str(&self, id: u64) -> Result<S::StringType> {
(*self).get_str(id)
}
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
(*self).get_language_tag(id)
}
}
pub struct MemoryStringStore {
@ -115,15 +110,6 @@ impl StringStore for MemoryStringStore {
Ok(id2str[id as usize].to_owned())
}
}
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
let id2str = self.id2str.read().map_err(MutexPoisonError::from)?;
if id2str.len() as u64 <= id {
Err(format_err!("value not found in the dictionary"))
} else {
Ok(LanguageTag::parse(&id2str[id as usize])?)
}
}
}
const TYPE_DEFAULT_GRAPH_ID: u8 = 0;
@ -717,10 +703,11 @@ impl<S: StringStore> Encoder<S> {
rio::Literal::LanguageTaggedString { value, language } => {
Ok(EncodedTerm::LangStringLiteral {
value_id: self.string_store.insert_str(value)?,
language_id: self
.string_store
.insert_str(LanguageTag::parse(language)?.as_str())?,
//TODO: avoid
language_id: if language.bytes().all(|b| b.is_ascii_lowercase()) {
self.string_store.insert_str(language)
} else {
self.string_store.insert_str(&language.to_ascii_lowercase())
}?,
})
}
rio::Literal::Typed { value, datatype } => {
@ -805,7 +792,7 @@ impl<S: StringStore> Encoder<S> {
language_id,
} => Ok(Literal::new_language_tagged_literal(
self.string_store.get_str(value_id)?,
self.string_store.get_language_tag(language_id)?,
self.string_store.get_str(language_id)?,
)
.into()),
EncodedTerm::TypedLiteral {
@ -906,7 +893,8 @@ fn test_encoding() {
Literal::from(1.2).into(),
Literal::from(1).into(),
Literal::from("foo").into(),
Literal::new_language_tagged_literal("foo", LanguageTag::parse("fr").unwrap()).into(),
Literal::new_language_tagged_literal("foo", "fr").into(),
Literal::new_language_tagged_literal("foo", "FR").into(),
];
for term in terms {
let encoded = encoder.encode_term(&term).unwrap();

@ -1,4 +1,3 @@
use crate::model::LanguageTag;
use crate::store::numeric_encoder::*;
use crate::store::{Store, StoreConnection, StoreRepositoryConnection};
use crate::{Repository, Result};
@ -162,10 +161,6 @@ impl StringStore for RocksDbStoreConnection<'_> {
Err(format_err!("value not found in the dictionary"))
}
}
fn get_language_tag(&self, id: u64) -> Result<LanguageTag> {
Ok(LanguageTag::parse(&self.get_str(id)?)?)
}
}
impl<'a> StoreConnection for RocksDbStoreConnection<'a> {

Loading…
Cancel
Save