Rust implementation of NextGraph, a Decentralized and local-first web 3.0 ecosystem
https://nextgraph.org
byzantine-fault-tolerancecrdtsdappsdecentralizede2eeeventual-consistencyjson-ldlocal-firstmarkdownocapoffline-firstp2pp2p-networkprivacy-protectionrdfrich-text-editorself-hostedsemantic-websparqlweb3collaboration
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
294 lines
11 KiB
294 lines
11 KiB
// Copyright (c) 2022-2025 Niko Bonnieure, Par le Peuple, NextGraph.org developers
|
|
// All rights reserved.
|
|
// Licensed under the Apache License, Version 2.0
|
|
// <LICENSE-APACHE2 or http://www.apache.org/licenses/LICENSE-2.0>
|
|
// or the MIT license <LICENSE-MIT or http://opensource.org/licenses/MIT>,
|
|
// at your option. All files in the project carrying such
|
|
// notice may not be copied, modified, or distributed except
|
|
// according to those terms.
|
|
|
|
use ng_oxigraph::oxrdf::Subject;
|
|
use ng_repo::log::*;
|
|
use ng_repo::types::OverlayId;
|
|
|
|
use std::collections::HashMap;
|
|
use std::collections::HashSet;
|
|
|
|
use lazy_static::lazy_static;
|
|
pub use ng_net::orm::{OrmDiff, OrmShapeType};
|
|
use ng_net::{app_protocol::*, orm::*};
|
|
use ng_oxigraph::oxrdf::Triple;
|
|
use ng_repo::errors::NgError;
|
|
use ng_repo::errors::VerifierError;
|
|
use regex::Regex;
|
|
|
|
use crate::orm::types::*;
|
|
|
|
/// Heuristic:
|
|
/// Consider a string an IRI if it contains alphanumeric characters and then a colon within the first 13 characters
|
|
pub fn is_iri(s: &str) -> bool {
|
|
lazy_static! {
|
|
static ref IRI_REGEX: Regex = Regex::new(r"^[A-Za-z][A-Za-z0-9+\.\-]{1,12}:").unwrap();
|
|
}
|
|
IRI_REGEX.is_match(s)
|
|
}
|
|
|
|
pub fn literal_to_sparql_str(var: OrmSchemaDataType) -> Vec<String> {
|
|
match var.literals {
|
|
None => [].to_vec(),
|
|
Some(literals) => literals
|
|
.iter()
|
|
.map(|literal| match literal {
|
|
BasicType::Bool(val) => {
|
|
if *val {
|
|
"true".to_string()
|
|
} else {
|
|
"false".to_string()
|
|
}
|
|
}
|
|
BasicType::Num(number) => number.to_string(),
|
|
BasicType::Str(sting) => {
|
|
if is_iri(sting) {
|
|
format!("<{}>", sting)
|
|
} else {
|
|
format!("\"{}\"", escape_literal(sting))
|
|
}
|
|
}
|
|
})
|
|
.collect(),
|
|
}
|
|
}
|
|
|
|
pub fn shape_type_to_sparql(
|
|
schema: &OrmSchema,
|
|
shape: &ShapeIri,
|
|
filter_subjects: Option<Vec<String>>,
|
|
) -> Result<String, NgError> {
|
|
// Use a counter to generate unique variable names.
|
|
let mut var_counter = 0;
|
|
fn get_new_var_name(counter: &mut i32) -> String {
|
|
let name = format!("v{}", counter);
|
|
*counter += 1;
|
|
name
|
|
}
|
|
|
|
// Collect all statements to be added to the construct and where bodies.
|
|
let mut construct_statements = Vec::new();
|
|
let mut where_statements = Vec::new();
|
|
|
|
// Keep track of visited shapes while recursing to prevent infinite loops.
|
|
let mut visited_shapes: HashSet<ShapeIri> = HashSet::new();
|
|
|
|
// Recursive function to call for (nested) shapes.
|
|
fn process_shape(
|
|
schema: &OrmSchema,
|
|
shape: &OrmSchemaShape,
|
|
subject_var_name: &str,
|
|
construct_statements: &mut Vec<String>,
|
|
where_statements: &mut Vec<String>,
|
|
var_counter: &mut i32,
|
|
visited_shapes: &mut HashSet<String>,
|
|
) {
|
|
// Prevent infinite recursion on cyclic schemas.
|
|
// TODO: We could handle this as IRI string reference.
|
|
if visited_shapes.contains(&shape.iri) {
|
|
return;
|
|
}
|
|
visited_shapes.insert(shape.iri.clone());
|
|
|
|
// Add statements for each predicate.
|
|
for predicate in &shape.predicates {
|
|
let mut union_branches = Vec::new();
|
|
let mut allowed_literals = Vec::new();
|
|
|
|
// Predicate constraints might have more than one acceptable data type. Traverse each.
|
|
// It is assumed that constant literals, nested shapes and regular types are not mixed.
|
|
for datatype in &predicate.dataTypes {
|
|
if datatype.valType == OrmSchemaLiteralType::literal {
|
|
// Collect allowed literals and as strings
|
|
// (already in SPARQL-format, e.g. `"a astring"`, `<http:ex.co/>`, `true`, or `42`).
|
|
allowed_literals.extend(literal_to_sparql_str(datatype.clone()));
|
|
} else if datatype.valType == OrmSchemaLiteralType::shape {
|
|
let shape_iri = &datatype.shape.clone().unwrap();
|
|
let nested_shape = schema.get(shape_iri).unwrap();
|
|
|
|
// For the current acceptable shape, add CONSTRUCT, WHERE, and recurse.
|
|
|
|
// Each shape option gets its own var.
|
|
let obj_var_name = get_new_var_name(var_counter);
|
|
|
|
construct_statements.push(format!(
|
|
" ?{} <{}> ?{}",
|
|
subject_var_name, predicate.iri, obj_var_name
|
|
));
|
|
// Those are later added to a UNION, if there is more than one shape.
|
|
union_branches.push(format!(
|
|
" ?{} <{}> ?{}",
|
|
subject_var_name, predicate.iri, obj_var_name
|
|
));
|
|
|
|
// Recurse to add statements for nested object.
|
|
process_shape(
|
|
schema,
|
|
nested_shape,
|
|
&obj_var_name,
|
|
construct_statements,
|
|
where_statements,
|
|
var_counter,
|
|
visited_shapes,
|
|
);
|
|
}
|
|
}
|
|
|
|
// The where statement which might be wrapped in OPTIONAL.
|
|
let where_body: String;
|
|
|
|
if !allowed_literals.is_empty()
|
|
&& !predicate.extra.unwrap_or(false)
|
|
&& predicate.minCardinality > 0
|
|
{
|
|
// If we have literal requirements and they are not optional ("extra"),
|
|
// Add CONSTRUCT, WHERE, and FILTER.
|
|
|
|
let pred_var_name = get_new_var_name(var_counter);
|
|
construct_statements.push(format!(
|
|
" ?{} <{}> ?{}",
|
|
subject_var_name, predicate.iri, pred_var_name
|
|
));
|
|
where_body = format!(
|
|
" ?{s} <{p}> ?{o} . \n FILTER(?{o} IN ({lits}))",
|
|
s = subject_var_name,
|
|
p = predicate.iri,
|
|
o = pred_var_name,
|
|
lits = allowed_literals.join(", ")
|
|
);
|
|
} else if !union_branches.is_empty() {
|
|
// We have nested shape(s) which were already added to CONSTRUCT above.
|
|
// Join them with UNION.
|
|
|
|
where_body = union_branches
|
|
.into_iter()
|
|
.map(|b| format!("{{\n{}\n}}", b))
|
|
.collect::<Vec<_>>()
|
|
.join(" UNION ");
|
|
} else {
|
|
// Regular predicate data type. Just add basic CONSTRUCT and WHERE statements.
|
|
|
|
let pred_var_name = get_new_var_name(var_counter);
|
|
construct_statements.push(format!(
|
|
" ?{} <{}> ?{}",
|
|
subject_var_name, predicate.iri, pred_var_name
|
|
));
|
|
where_body = format!(
|
|
" ?{} <{}> ?{}",
|
|
subject_var_name, predicate.iri, pred_var_name
|
|
);
|
|
}
|
|
|
|
// Wrap in optional, if necessary.
|
|
if predicate.minCardinality < 1 {
|
|
where_statements.push(format!(" OPTIONAL {{\n{}\n }}", where_body));
|
|
} else {
|
|
where_statements.push(where_body);
|
|
};
|
|
}
|
|
|
|
visited_shapes.remove(&shape.iri);
|
|
}
|
|
|
|
let root_shape = schema.get(shape).ok_or(VerifierError::InvalidOrmSchema)?;
|
|
|
|
// Root subject variable name
|
|
let root_var_name = get_new_var_name(&mut var_counter);
|
|
|
|
process_shape(
|
|
schema,
|
|
root_shape,
|
|
&root_var_name,
|
|
&mut construct_statements,
|
|
&mut where_statements,
|
|
&mut var_counter,
|
|
&mut visited_shapes,
|
|
);
|
|
|
|
// Filter subjects, if present.
|
|
if let Some(subjects) = filter_subjects {
|
|
log_debug!("filter_subjects: {:?}", subjects);
|
|
let subjects_str = subjects
|
|
.iter()
|
|
.map(|s| format!("<{}>", s))
|
|
.collect::<Vec<_>>()
|
|
.join(", ");
|
|
where_statements.push(format!(" FILTER(?v0 IN ({}))", subjects_str));
|
|
}
|
|
|
|
// Create query from statements.
|
|
let construct_body = construct_statements.join(" .\n");
|
|
|
|
let where_body = where_statements.join(" .\n");
|
|
|
|
Ok(format!(
|
|
"CONSTRUCT {{\n{}\n}}\nWHERE {{\n{}\n}}",
|
|
construct_body, where_body
|
|
))
|
|
}
|
|
|
|
/// SPARQL literal escape: backslash, quotes, newlines, tabs.
|
|
fn escape_literal(lit: &str) -> String {
|
|
let mut out = String::with_capacity(lit.len() + 4);
|
|
for c in lit.chars() {
|
|
match c {
|
|
'\\' => out.push_str("\\\\"),
|
|
'\"' => out.push_str("\\\""),
|
|
'\n' => out.push_str("\\n"),
|
|
'\r' => out.push_str("\\r"),
|
|
'\t' => out.push_str("\\t"),
|
|
_ => out.push(c),
|
|
}
|
|
}
|
|
return out;
|
|
}
|
|
|
|
pub fn group_by_subject_for_shape<'a>(
|
|
shape: &OrmSchemaShape,
|
|
triples: &'a [Triple],
|
|
allowed_subjects: &[String],
|
|
) -> HashMap<String, Vec<&'a Triple>> {
|
|
let mut triples_by_subject: HashMap<String, Vec<&Triple>> = HashMap::new();
|
|
let allowed_preds_set: HashSet<&str> =
|
|
shape.predicates.iter().map(|p| p.iri.as_str()).collect();
|
|
let allowed_subject_set: HashSet<&str> = allowed_subjects.iter().map(|s| s.as_str()).collect();
|
|
for triple in triples {
|
|
// triple.subject must be in allowed_subjects (or allowed_subjects empty)
|
|
// and triple.predicate must be in allowed_preds.
|
|
if allowed_preds_set.contains(triple.predicate.as_str()) {
|
|
// filter subjects if list provided
|
|
let subj = match &triple.subject {
|
|
Subject::NamedNode(n) => n.clone().into_string(),
|
|
_ => continue,
|
|
};
|
|
// Subject must be in allowed subjects (or allowed_subjects is empty).
|
|
if allowed_subject_set.is_empty() || allowed_subject_set.contains(&subj.as_str()) {
|
|
triples_by_subject
|
|
.entry(subj)
|
|
.or_insert_with(Vec::new)
|
|
.push(triple);
|
|
}
|
|
}
|
|
}
|
|
|
|
return triples_by_subject;
|
|
}
|
|
|
|
pub fn nuri_to_string(nuri: &NuriV0) -> String {
|
|
// Get repo_id and overlay_id from the nuri
|
|
let repo_id = nuri.target.repo_id();
|
|
let overlay_id = if let Some(overlay_link) = &nuri.overlay {
|
|
overlay_link.clone().try_into().unwrap()
|
|
} else {
|
|
// Default overlay for the repo
|
|
OverlayId::outer(repo_id)
|
|
};
|
|
let graph_name = NuriV0::repo_graph_name(repo_id, &overlay_id);
|
|
graph_name
|
|
}
|
|
|