Uses u128 instead of UUID inside of BlankNode

Makes code slightly simpler
pull/10/head
Tpt 5 years ago
parent 68e5fb43df
commit 4bbd2ce204
  1. 30
      lib/src/model/blank_node.rs
  2. 4
      lib/src/sparql/algebra.rs
  3. 28
      lib/src/sparql/eval.rs
  4. 16
      lib/src/sparql/model.rs
  5. 46
      lib/src/store/numeric_encoder.rs

@ -1,11 +1,12 @@
use rand::random;
use rio_api::model as rio; use rio_api::model as rio;
use std::fmt; use std::fmt;
use std::io::Write;
use std::str; use std::str;
use uuid::Uuid;
/// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node). /// A RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
/// ///
/// This implementation enforces that the blank node id is an UUID to easily ensure /// This implementation enforces that the blank node id is a uniquely generated ID to easily ensure
/// that it is not possible for two blank nodes to share an id. /// that it is not possible for two blank nodes to share an id.
/// ///
/// The common way to create a new blank node is to use the `Default::default` trait method. /// The common way to create a new blank node is to use the `Default::default` trait method.
@ -15,19 +16,26 @@ use uuid::Uuid;
/// ///
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub struct BlankNode { pub struct BlankNode {
uuid: Uuid, id: u128,
str: [u8; 32], str: [u8; 32],
} }
impl BlankNode { impl BlankNode {
/// Creates a blank node from a unique id
pub(crate) fn new_from_unique_id(id: u128) -> Self {
let mut str = [0; 32];
write!(&mut str[..], "{:x}", id).unwrap();
Self { id, str }
}
/// Returns the underlying ID of this blank node /// Returns the underlying ID of this blank node
pub fn as_str(&self) -> &str { pub fn as_str(&self) -> &str {
str::from_utf8(&self.str).unwrap() str::from_utf8(&self.str).unwrap()
} }
/// Returns the underlying UUID of this blank node /// Returns the underlying ID of this blank node
pub fn uuid(&self) -> Uuid { pub(crate) fn id(&self) -> u128 {
self.uuid self.id
} }
} }
@ -40,15 +48,7 @@ impl fmt::Display for BlankNode {
impl Default for BlankNode { impl Default for BlankNode {
/// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id
fn default() -> Self { fn default() -> Self {
Self::from(Uuid::new_v4()) Self::new_from_unique_id(random::<u128>())
}
}
impl From<Uuid> for BlankNode {
fn from(id: Uuid) -> Self {
let mut str = [0; 32];
id.to_simple().encode_lower(&mut str);
Self { uuid: id, str }
} }
} }

@ -770,9 +770,7 @@ impl<'a> fmt::Display for SparqlGraphPattern<'a> {
SparqlGraphPattern(&*a), SparqlGraphPattern(&*a),
SparqlGraphPattern(&*b), SparqlGraphPattern(&*b),
), ),
GraphPattern::Graph(g, p) => { GraphPattern::Graph(g, p) => write!(f, "GRAPH {} {{ {} }}", g, SparqlGraphPattern(&*p),),
write!(f, "GRAPH {} {{ {} }}", g, SparqlGraphPattern(&*p),)
}
GraphPattern::Extend(p, v, e) => write!( GraphPattern::Extend(p, v, e) => write!(
f, f,
"{} BIND({} AS {})", "{} BIND({} AS {})",

@ -40,7 +40,7 @@ type EncodedTuplesIterator<'a> = Box<dyn Iterator<Item = Result<EncodedTuple>> +
pub struct SimpleEvaluator<S: StoreConnection> { pub struct SimpleEvaluator<S: StoreConnection> {
dataset: DatasetView<S>, dataset: DatasetView<S>,
bnodes_map: Mutex<BTreeMap<u64, Uuid>>, bnodes_map: Mutex<BTreeMap<u64, u128>>,
base_iri: Option<Iri<String>>, base_iri: Option<Iri<String>>,
now: DateTime<FixedOffset>, now: DateTime<FixedOffset>,
} }
@ -872,19 +872,21 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
if let EncodedTerm::StringLiteral { value_id } = if let EncodedTerm::StringLiteral { value_id } =
self.eval_expression(id, tuple)? self.eval_expression(id, tuple)?
{ {
Some(EncodedTerm::BlankNode( Some(EncodedTerm::BlankNode {
*self id: *self
.bnodes_map .bnodes_map
.lock() .lock()
.ok()? .ok()?
.entry(value_id) .entry(value_id)
.or_insert_with(Uuid::new_v4), .or_insert_with(random::<u128>),
)) })
} else { } else {
None None
} }
} }
None => Some(EncodedTerm::BlankNode(Uuid::new_v4())), None => Some(EncodedTerm::BlankNode {
id: random::<u128>(),
}),
}, },
PlanExpression::Rand => Some(random::<f64>().into()), PlanExpression::Rand => Some(random::<f64>().into()),
PlanExpression::Abs(e) => match self.eval_expression(e, tuple)? { PlanExpression::Abs(e) => match self.eval_expression(e, tuple)? {
@ -1411,7 +1413,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
match term { match term {
EncodedTerm::DefaultGraph => None, EncodedTerm::DefaultGraph => None,
EncodedTerm::NamedNode { iri_id } => Some(iri_id), EncodedTerm::NamedNode { iri_id } => Some(iri_id),
EncodedTerm::BlankNode(_) => None, EncodedTerm::BlankNode { .. } => None,
EncodedTerm::StringLiteral { value_id } EncodedTerm::StringLiteral { value_id }
| EncodedTerm::LangStringLiteral { value_id, .. } | EncodedTerm::LangStringLiteral { value_id, .. }
| EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id), | EncodedTerm::TypedLiteral { value_id, .. } => Some(value_id),
@ -1584,7 +1586,7 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
match a { match a {
EncodedTerm::DefaultGraph EncodedTerm::DefaultGraph
| EncodedTerm::NamedNode { .. } | EncodedTerm::NamedNode { .. }
| EncodedTerm::BlankNode(_) | EncodedTerm::BlankNode { .. }
| EncodedTerm::LangStringLiteral { .. } => Some(a == b), | EncodedTerm::LangStringLiteral { .. } => Some(a == b),
EncodedTerm::StringLiteral { value_id: a } => match b { EncodedTerm::StringLiteral { value_id: a } => match b {
EncodedTerm::StringLiteral { value_id: b } => Some(a == b), EncodedTerm::StringLiteral { value_id: b } => Some(a == b),
@ -1706,8 +1708,8 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
fn cmp_terms(&self, a: Option<EncodedTerm>, b: Option<EncodedTerm>) -> Ordering { fn cmp_terms(&self, a: Option<EncodedTerm>, b: Option<EncodedTerm>) -> Ordering {
match (a, b) { match (a, b) {
(Some(a), Some(b)) => match a { (Some(a), Some(b)) => match a {
EncodedTerm::BlankNode(a) => { EncodedTerm::BlankNode { id: a } => {
if let EncodedTerm::BlankNode(b) = b { if let EncodedTerm::BlankNode { id: b } = b {
a.cmp(&b) a.cmp(&b)
} else { } else {
Ordering::Less Ordering::Less
@ -1717,11 +1719,13 @@ impl<'a, S: StoreConnection + 'a> SimpleEvaluator<S> {
EncodedTerm::NamedNode { iri_id: b } => { EncodedTerm::NamedNode { iri_id: b } => {
self.compare_str_ids(a, b).unwrap_or(Ordering::Equal) self.compare_str_ids(a, b).unwrap_or(Ordering::Equal)
} }
EncodedTerm::BlankNode(_) => Ordering::Greater, EncodedTerm::BlankNode { .. } => Ordering::Greater,
_ => Ordering::Less, _ => Ordering::Less,
}, },
a => match b { a => match b {
EncodedTerm::NamedNode { .. } | EncodedTerm::BlankNode(_) => Ordering::Greater, EncodedTerm::NamedNode { .. } | EncodedTerm::BlankNode { .. } => {
Ordering::Greater
}
b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal), b => self.partial_cmp_literals(a, b).unwrap_or(Ordering::Equal),
}, },
}, },

@ -3,12 +3,12 @@ use crate::sparql::json_results::write_json_results;
use crate::sparql::xml_results::{read_xml_results, write_xml_results}; use crate::sparql::xml_results::{read_xml_results, write_xml_results};
use crate::{FileSyntax, GraphSyntax, Result}; use crate::{FileSyntax, GraphSyntax, Result};
use failure::format_err; use failure::format_err;
use rand::random;
use rio_api::formatter::TriplesFormatter; use rio_api::formatter::TriplesFormatter;
use rio_turtle::{NTriplesFormatter, TurtleFormatter}; use rio_turtle::{NTriplesFormatter, TurtleFormatter};
use rio_xml::RdfXmlFormatter; use rio_xml::RdfXmlFormatter;
use std::fmt; use std::fmt;
use std::io::{BufRead, Write}; use std::io::{BufRead, Write};
use uuid::Uuid;
/// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/) /// Results of a [SPARQL query](https://www.w3.org/TR/sparql11-query/)
pub enum QueryResult<'a> { pub enum QueryResult<'a> {
@ -145,8 +145,8 @@ impl<'a> BindingsIterator<'a> {
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)] #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
pub enum Variable { pub enum Variable {
Variable { name: String }, Variable { name: String },
BlankNode { id: Uuid }, BlankNode { id: u128 },
Internal { id: Uuid }, Internal { id: u128 },
} }
impl Variable { impl Variable {
@ -173,22 +173,24 @@ impl fmt::Display for Variable {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Variable::Variable { name } => write!(f, "?{}", name), Variable::Variable { name } => write!(f, "?{}", name),
Variable::BlankNode { id } => write!(f, "_:{}", id.to_simple()), Variable::BlankNode { id } => write!(f, "_:{:x}", id),
Variable::Internal { id } => write!(f, "?{}", id.to_simple()), Variable::Internal { id } => write!(f, "?{:x}", id),
} }
} }
} }
impl Default for Variable { impl Default for Variable {
fn default() -> Self { fn default() -> Self {
Variable::Internal { id: Uuid::new_v4() } Variable::Internal {
id: random::<u128>(),
}
} }
} }
impl From<BlankNode> for Variable { impl From<BlankNode> for Variable {
fn from(blank_node: BlankNode) -> Self { fn from(blank_node: BlankNode) -> Self {
Variable::BlankNode { Variable::BlankNode {
id: blank_node.uuid(), id: blank_node.id(),
} }
} }
} }

@ -9,6 +9,7 @@ use failure::format_err;
use failure::Backtrace; use failure::Backtrace;
use failure::Fail; use failure::Fail;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use rand::random;
use rio_api::model as rio; use rio_api::model as rio;
use rust_decimal::Decimal; use rust_decimal::Decimal;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
@ -19,7 +20,6 @@ use std::ops::Deref;
use std::str; use std::str;
use std::sync::PoisonError; use std::sync::PoisonError;
use std::sync::RwLock; use std::sync::RwLock;
use uuid::Uuid;
const EMPTY_STRING_ID: u64 = 0; const EMPTY_STRING_ID: u64 = 0;
const RDF_LANG_STRING_ID: u64 = 1; const RDF_LANG_STRING_ID: u64 = 1;
@ -184,7 +184,7 @@ pub const ENCODED_XSD_DATE_TIME_NAMED_NODE: EncodedTerm = EncodedTerm::NamedNode
pub enum EncodedTerm { pub enum EncodedTerm {
DefaultGraph, DefaultGraph,
NamedNode { iri_id: u64 }, NamedNode { iri_id: u64 },
BlankNode(Uuid), BlankNode { id: u128 },
StringLiteral { value_id: u64 }, StringLiteral { value_id: u64 },
LangStringLiteral { value_id: u64, language_id: u64 }, LangStringLiteral { value_id: u64, language_id: u64 },
TypedLiteral { value_id: u64, datatype_id: u64 }, TypedLiteral { value_id: u64, datatype_id: u64 },
@ -210,7 +210,7 @@ impl EncodedTerm {
pub fn is_blank_node(&self) -> bool { pub fn is_blank_node(&self) -> bool {
match self { match self {
EncodedTerm::BlankNode(_) => true, EncodedTerm::BlankNode { .. } => true,
_ => false, _ => false,
} }
} }
@ -260,7 +260,7 @@ impl EncodedTerm {
match self { match self {
EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID, EncodedTerm::DefaultGraph { .. } => TYPE_DEFAULT_GRAPH_ID,
EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID, EncodedTerm::NamedNode { .. } => TYPE_NAMED_NODE_ID,
EncodedTerm::BlankNode(_) => TYPE_BLANK_NODE_ID, EncodedTerm::BlankNode { .. } => TYPE_BLANK_NODE_ID,
EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL, EncodedTerm::StringLiteral { .. } => TYPE_STRING_LITERAL,
EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID, EncodedTerm::LangStringLiteral { .. } => TYPE_LANG_STRING_LITERAL_ID,
EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID, EncodedTerm::TypedLiteral { .. } => TYPE_TYPED_LITERAL_ID,
@ -365,7 +365,7 @@ impl From<NaiveDateTime> for EncodedTerm {
impl From<BlankNode> for EncodedTerm { impl From<BlankNode> for EncodedTerm {
fn from(node: BlankNode) -> Self { fn from(node: BlankNode) -> Self {
EncodedTerm::BlankNode(node.uuid()) EncodedTerm::BlankNode { id: node.id() }
} }
} }
@ -410,11 +410,9 @@ impl<R: Read> TermReader for R {
TYPE_NAMED_NODE_ID => Ok(EncodedTerm::NamedNode { TYPE_NAMED_NODE_ID => Ok(EncodedTerm::NamedNode {
iri_id: self.read_u64::<LittleEndian>()?, iri_id: self.read_u64::<LittleEndian>()?,
}), }),
TYPE_BLANK_NODE_ID => { TYPE_BLANK_NODE_ID => Ok(EncodedTerm::BlankNode {
let mut uuid_buffer = [0 as u8; 16]; id: self.read_u128::<LittleEndian>()?,
self.read_exact(&mut uuid_buffer)?; }),
Ok(EncodedTerm::BlankNode(Uuid::from_bytes(uuid_buffer)))
}
TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral { TYPE_LANG_STRING_LITERAL_ID => Ok(EncodedTerm::LangStringLiteral {
language_id: self.read_u64::<LittleEndian>()?, language_id: self.read_u64::<LittleEndian>()?,
value_id: self.read_u64::<LittleEndian>()?, value_id: self.read_u64::<LittleEndian>()?,
@ -576,7 +574,7 @@ impl<W: Write> TermWriter for W {
match term { match term {
EncodedTerm::DefaultGraph => {} EncodedTerm::DefaultGraph => {}
EncodedTerm::NamedNode { iri_id } => self.write_u64::<LittleEndian>(iri_id)?, EncodedTerm::NamedNode { iri_id } => self.write_u64::<LittleEndian>(iri_id)?,
EncodedTerm::BlankNode(id) => self.write_all(id.as_bytes())?, EncodedTerm::BlankNode { id } => self.write_u128::<LittleEndian>(id)?,
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => {
self.write_u64::<LittleEndian>(value_id)?; self.write_u64::<LittleEndian>(value_id)?;
} }
@ -686,7 +684,9 @@ impl<S: StringStore> Encoder<S> {
} }
pub fn encode_blank_node(&self, blank_node: &BlankNode) -> Result<EncodedTerm> { pub fn encode_blank_node(&self, blank_node: &BlankNode) -> Result<EncodedTerm> {
Ok(EncodedTerm::BlankNode(blank_node.uuid())) Ok(EncodedTerm::BlankNode {
id: blank_node.id(),
})
} }
pub fn encode_literal(&self, literal: &Literal) -> Result<EncodedTerm> { pub fn encode_literal(&self, literal: &Literal) -> Result<EncodedTerm> {
@ -742,14 +742,14 @@ impl<S: StringStore> Encoder<S> {
pub fn encode_rio_blank_node( pub fn encode_rio_blank_node(
&self, &self,
blank_node: rio::BlankNode, blank_node: rio::BlankNode,
bnodes_map: &mut HashMap<String, Uuid>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm> { ) -> Result<EncodedTerm> {
Ok(if let Some(uuid) = bnodes_map.get(blank_node.id) { Ok(if let Some(id) = bnodes_map.get(blank_node.id) {
EncodedTerm::BlankNode(*uuid) EncodedTerm::BlankNode { id: *id }
} else { } else {
let uuid = Uuid::new_v4(); let id = random::<u128>();
bnodes_map.insert(blank_node.id.to_owned(), uuid); bnodes_map.insert(blank_node.id.to_owned(), id);
EncodedTerm::BlankNode(uuid) EncodedTerm::BlankNode { id }
}) })
} }
@ -813,7 +813,7 @@ impl<S: StringStore> Encoder<S> {
pub fn encode_rio_named_or_blank_node( pub fn encode_rio_named_or_blank_node(
&self, &self,
term: rio::NamedOrBlankNode, term: rio::NamedOrBlankNode,
bnodes_map: &mut HashMap<String, Uuid>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm> { ) -> Result<EncodedTerm> {
match term { match term {
rio::NamedOrBlankNode::NamedNode(named_node) => self.encode_rio_named_node(named_node), rio::NamedOrBlankNode::NamedNode(named_node) => self.encode_rio_named_node(named_node),
@ -826,7 +826,7 @@ impl<S: StringStore> Encoder<S> {
pub fn encode_rio_term( pub fn encode_rio_term(
&self, &self,
term: rio::Term, term: rio::Term,
bnodes_map: &mut HashMap<String, Uuid>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedTerm> { ) -> Result<EncodedTerm> {
match term { match term {
rio::Term::NamedNode(named_node) => self.encode_rio_named_node(named_node), rio::Term::NamedNode(named_node) => self.encode_rio_named_node(named_node),
@ -838,7 +838,7 @@ impl<S: StringStore> Encoder<S> {
pub fn encode_rio_quad( pub fn encode_rio_quad(
&self, &self,
quad: rio::Quad, quad: rio::Quad,
bnodes_map: &mut HashMap<String, Uuid>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedQuad> { ) -> Result<EncodedQuad> {
Ok(EncodedQuad { Ok(EncodedQuad {
subject: self.encode_rio_named_or_blank_node(quad.subject, bnodes_map)?, subject: self.encode_rio_named_or_blank_node(quad.subject, bnodes_map)?,
@ -855,7 +855,7 @@ impl<S: StringStore> Encoder<S> {
&self, &self,
triple: rio::Triple, triple: rio::Triple,
graph_name: EncodedTerm, graph_name: EncodedTerm,
bnodes_map: &mut HashMap<String, Uuid>, bnodes_map: &mut HashMap<String, u128>,
) -> Result<EncodedQuad> { ) -> Result<EncodedQuad> {
Ok(EncodedQuad { Ok(EncodedQuad {
subject: self.encode_rio_named_or_blank_node(triple.subject, bnodes_map)?, subject: self.encode_rio_named_or_blank_node(triple.subject, bnodes_map)?,
@ -939,7 +939,7 @@ impl<S: StringStore> Encoder<S> {
EncodedTerm::NamedNode { iri_id } => { EncodedTerm::NamedNode { iri_id } => {
Ok(NamedNode::new_from_string(self.get_str(iri_id)?).into()) Ok(NamedNode::new_from_string(self.get_str(iri_id)?).into())
} }
EncodedTerm::BlankNode(id) => Ok(BlankNode::from(id).into()), EncodedTerm::BlankNode { id } => Ok(BlankNode::new_from_unique_id(id).into()),
EncodedTerm::StringLiteral { value_id } => { EncodedTerm::StringLiteral { value_id } => {
Ok(Literal::new_simple_literal(self.get_str(value_id)?).into()) Ok(Literal::new_simple_literal(self.get_str(value_id)?).into())
} }

Loading…
Cancel
Save