parent
bce06e5a05
commit
576ba213ee
@ -0,0 +1,294 @@ |
||||
// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers
|
||||
// All rights reserved.
|
||||
// Licensed under the Apache License, Version 2.0
|
||||
// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
|
||||
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
|
||||
// at your option. All files in the project carrying such
|
||||
// notice may not be copied, modified, or distributed except
|
||||
// according to those terms.
|
||||
|
||||
use ng_oxigraph::oxrdf::Subject; |
||||
use ng_repo::log::*; |
||||
use ng_repo::types::OverlayId; |
||||
|
||||
use std::collections::HashMap; |
||||
use std::collections::HashSet; |
||||
|
||||
use lazy_static::lazy_static; |
||||
pub use ng_net::orm::{OrmDiff, OrmShapeType}; |
||||
use ng_net::{app_protocol::*, orm::*}; |
||||
use ng_oxigraph::oxrdf::Triple; |
||||
use ng_repo::errors::NgError; |
||||
use ng_repo::errors::VerifierError; |
||||
use regex::Regex; |
||||
|
||||
use crate::orm::types::*; |
||||
|
||||
/// Heuristic:
|
||||
/// Consider a string an IRI if it contains alphanumeric characters and then a colon within the first 13 characters
|
||||
pub fn is_iri(s: &str) -> bool { |
||||
lazy_static! { |
||||
static ref IRI_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9+\.\-]{1,12}:").unwrap(); |
||||
} |
||||
IRI_REGEX.is_match(s) |
||||
} |
||||
|
||||
pub fn literal_to_sparql_str(var: OrmSchemaDataType) -> Vec<String> { |
||||
match var.literals { |
||||
None => [].to_vec(), |
||||
Some(literals) => literals |
||||
.iter() |
||||
.map(|literal| match literal { |
||||
BasicType::Bool(val) => { |
||||
if *val { |
||||
"true".to_string() |
||||
} else { |
||||
"false".to_string() |
||||
} |
||||
} |
||||
BasicType::Num(number) => number.to_string(), |
||||
BasicType::Str(sting) => { |
||||
if is_iri(sting) { |
||||
format!("<{}>", sting) |
||||
} else { |
||||
format!("\"{}\"", escape_literal(sting)) |
||||
} |
||||
} |
||||
}) |
||||
.collect(), |
||||
} |
||||
} |
||||
|
||||
pub fn shape_type_to_sparql( |
||||
schema: &OrmSchema, |
||||
shape: &ShapeIri, |
||||
filter_subjects: Option<Vec<String>>, |
||||
) -> Result<String, NgError> { |
||||
// Use a counter to generate unique variable names.
|
||||
let mut var_counter = 0; |
||||
fn get_new_var_name(counter: &mut i32) -> String { |
||||
let name = format!("v{}", counter); |
||||
*counter += 1; |
||||
name |
||||
} |
||||
|
||||
// Collect all statements to be added to the construct and where bodies.
|
||||
let mut construct_statements = Vec::new(); |
||||
let mut where_statements = Vec::new(); |
||||
|
||||
// Keep track of visited shapes while recursing to prevent infinite loops.
|
||||
let mut visited_shapes: HashSet<ShapeIri> = HashSet::new(); |
||||
|
||||
// Recursive function to call for (nested) shapes.
|
||||
fn process_shape( |
||||
schema: &OrmSchema, |
||||
shape: &OrmSchemaShape, |
||||
subject_var_name: &str, |
||||
construct_statements: &mut Vec<String>, |
||||
where_statements: &mut Vec<String>, |
||||
var_counter: &mut i32, |
||||
visited_shapes: &mut HashSet<String>, |
||||
) { |
||||
// Prevent infinite recursion on cyclic schemas.
|
||||
// TODO: We could handle this as IRI string reference.
|
||||
if visited_shapes.contains(&shape.iri) { |
||||
return; |
||||
} |
||||
visited_shapes.insert(shape.iri.clone()); |
||||
|
||||
// Add statements for each predicate.
|
||||
for predicate in &shape.predicates { |
||||
let mut union_branches = Vec::new(); |
||||
let mut allowed_literals = Vec::new(); |
||||
|
||||
// Predicate constraints might have more than one acceptable data type. Traverse each.
|
||||
// It is assumed that constant literals, nested shapes and regular types are not mixed.
|
||||
for datatype in &predicate.dataTypes { |
||||
if datatype.valType == OrmSchemaLiteralType::literal { |
||||
// Collect allowed literals and as strings
|
||||
// (already in SPARQL-format, e.g. `"a astring"`, `<http:ex.co/>`, `true`, or `42`).
|
||||
allowed_literals.extend(literal_to_sparql_str(datatype.clone())); |
||||
} else if datatype.valType == OrmSchemaLiteralType::shape { |
||||
let shape_iri = &datatype.shape.clone().unwrap(); |
||||
let nested_shape = schema.get(shape_iri).unwrap(); |
||||
|
||||
// For the current acceptable shape, add CONSTRUCT, WHERE, and recurse.
|
||||
|
||||
// Each shape option gets its own var.
|
||||
let obj_var_name = get_new_var_name(var_counter); |
||||
|
||||
construct_statements.push(format!( |
||||
" ?{} <{}> ?{}", |
||||
subject_var_name, predicate.iri, obj_var_name |
||||
)); |
||||
// Those are later added to a UNION, if there is more than one shape.
|
||||
union_branches.push(format!( |
||||
" ?{} <{}> ?{}", |
||||
subject_var_name, predicate.iri, obj_var_name |
||||
)); |
||||
|
||||
// Recurse to add statements for nested object.
|
||||
process_shape( |
||||
schema, |
||||
nested_shape, |
||||
&obj_var_name, |
||||
construct_statements, |
||||
where_statements, |
||||
var_counter, |
||||
visited_shapes, |
||||
); |
||||
} |
||||
} |
||||
|
||||
// The where statement which might be wrapped in OPTIONAL.
|
||||
let where_body: String; |
||||
|
||||
if !allowed_literals.is_empty() |
||||
&& !predicate.extra.unwrap_or(false) |
||||
&& predicate.minCardinality > 0 |
||||
{ |
||||
// If we have literal requirements and they are not optional ("extra"),
|
||||
// Add CONSTRUCT, WHERE, and FILTER.
|
||||
|
||||
let pred_var_name = get_new_var_name(var_counter); |
||||
construct_statements.push(format!( |
||||
" ?{} <{}> ?{}", |
||||
subject_var_name, predicate.iri, pred_var_name |
||||
)); |
||||
where_body = format!( |
||||
" ?{s} <{p}> ?{o} . \n FILTER(?{o} IN ({lits}))", |
||||
s = subject_var_name, |
||||
p = predicate.iri, |
||||
o = pred_var_name, |
||||
lits = allowed_literals.join(", ") |
||||
); |
||||
} else if !union_branches.is_empty() { |
||||
// We have nested shape(s) which were already added to CONSTRUCT above.
|
||||
// Join them with UNION.
|
||||
|
||||
where_body = union_branches |
||||
.into_iter() |
||||
.map(|b| format!("{{\n{}\n}}", b)) |
||||
.collect::<Vec<_>>() |
||||
.join(" UNION "); |
||||
} else { |
||||
// Regular predicate data type. Just add basic CONSTRUCT and WHERE statements.
|
||||
|
||||
let pred_var_name = get_new_var_name(var_counter); |
||||
construct_statements.push(format!( |
||||
" ?{} <{}> ?{}", |
||||
subject_var_name, predicate.iri, pred_var_name |
||||
)); |
||||
where_body = format!( |
||||
" ?{} <{}> ?{}", |
||||
subject_var_name, predicate.iri, pred_var_name |
||||
); |
||||
} |
||||
|
||||
// Wrap in optional, if necessary.
|
||||
if predicate.minCardinality < 1 { |
||||
where_statements.push(format!(" OPTIONAL {{\n{}\n }}", where_body)); |
||||
} else { |
||||
where_statements.push(where_body); |
||||
}; |
||||
} |
||||
|
||||
visited_shapes.remove(&shape.iri); |
||||
} |
||||
|
||||
let root_shape = schema.get(shape).ok_or(VerifierError::InvalidOrmSchema)?; |
||||
|
||||
// Root subject variable name
|
||||
let root_var_name = get_new_var_name(&mut var_counter); |
||||
|
||||
process_shape( |
||||
schema, |
||||
root_shape, |
||||
&root_var_name, |
||||
&mut construct_statements, |
||||
&mut where_statements, |
||||
&mut var_counter, |
||||
&mut visited_shapes, |
||||
); |
||||
|
||||
// Filter subjects, if present.
|
||||
if let Some(subjects) = filter_subjects { |
||||
log_debug!("filter_subjects: {:?}", subjects); |
||||
let subjects_str = subjects |
||||
.iter() |
||||
.map(|s| format!("<{}>", s)) |
||||
.collect::<Vec<_>>() |
||||
.join(", "); |
||||
where_statements.push(format!(" FILTER(?v0 IN ({}))", subjects_str)); |
||||
} |
||||
|
||||
// Create query from statements.
|
||||
let construct_body = construct_statements.join(" .\n"); |
||||
|
||||
let where_body = where_statements.join(" .\n"); |
||||
|
||||
Ok(format!( |
||||
"CONSTRUCT {{\n{}\n}}\nWHERE {{\n{}\n}}", |
||||
construct_body, where_body |
||||
)) |
||||
} |
||||
|
||||
/// SPARQL literal escape: backslash, quotes, newlines, tabs.
|
||||
fn escape_literal(lit: &str) -> String { |
||||
let mut out = String::with_capacity(lit.len() + 4); |
||||
for c in lit.chars() { |
||||
match c { |
||||
'\\' => out.push_str("\\\\"), |
||||
'\"' => out.push_str("\\\""), |
||||
'\n' => out.push_str("\\n"), |
||||
'\r' => out.push_str("\\r"), |
||||
'\t' => out.push_str("\\t"), |
||||
_ => out.push(c), |
||||
} |
||||
} |
||||
return out; |
||||
} |
||||
|
||||
pub fn group_by_subject_for_shape<'a>( |
||||
shape: &OrmSchemaShape, |
||||
triples: &'a [Triple], |
||||
allowed_subjects: &[String], |
||||
) -> HashMap<String, Vec<&'a Triple>> { |
||||
let mut triples_by_subject: HashMap<String, Vec<&Triple>> = HashMap::new(); |
||||
let allowed_preds_set: HashSet<&str> = |
||||
shape.predicates.iter().map(|p| p.iri.as_str()).collect(); |
||||
let allowed_subject_set: HashSet<&str> = allowed_subjects.iter().map(|s| s.as_str()).collect(); |
||||
for triple in triples { |
||||
// triple.subject must be in allowed_subjects (or allowed_subjects empty)
|
||||
// and triple.predicate must be in allowed_preds.
|
||||
if allowed_preds_set.contains(triple.predicate.as_str()) { |
||||
// filter subjects if list provided
|
||||
let subj = match &triple.subject { |
||||
Subject::NamedNode(n) => n.clone().into_string(), |
||||
_ => continue, |
||||
}; |
||||
// Subject must be in allowed subjects (or allowed_subjects is empty).
|
||||
if allowed_subject_set.is_empty() || allowed_subject_set.contains(&subj.as_str()) { |
||||
triples_by_subject |
||||
.entry(subj) |
||||
.or_insert_with(Vec::new) |
||||
.push(triple); |
||||
} |
||||
} |
||||
} |
||||
|
||||
return triples_by_subject; |
||||
} |
||||
|
||||
pub fn nuri_to_string(nuri: &NuriV0) -> String { |
||||
// Get repo_id and overlay_id from the nuri
|
||||
let repo_id = nuri.target.repo_id(); |
||||
let overlay_id = if let Some(overlay_link) = &nuri.overlay { |
||||
overlay_link.clone().try_into().unwrap() |
||||
} else { |
||||
// Default overlay for the repo
|
||||
OverlayId::outer(repo_id) |
||||
}; |
||||
let graph_name = NuriV0::repo_graph_name(repo_id, &overlay_id); |
||||
graph_name |
||||
} |
Loading…
Reference in new issue