From ab0ee164fada10cbc96b1ddb882a7807aa4c7636 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Sat, 10 Feb 2024 22:55:44 -0500 Subject: [PATCH] Experimental parse-display POC It may be possible to eliminate a lot of `fmt::Display` code by using [parse-display](https://github.com/frozenlib/parse-display/tree/master?tab=readme-ov-file#parse-display) crate. In the first iteration, it can replace many display traits for the simple cases of constants and formatted inner values. In the more advanced, it should be possible to format iterators, and if the issue I proposed get implemented, might even cover many of the `fmt_sse` functions. Note that I made a few unit tests for the migration purposes - just to see that the result is identical. We may want to remove at least some of them later on as being too trivial. One aspect that may need discussion: `write!(f, "{value}")` is not the same as `value.fmt(f)` because the first case creates a new `Formatter` instance, whereas the second case reuses the one passed as an argument to `Display::fmt` function. In some cases, it may break if formatter contains padding or number formatting configuration that will or won't be passed to the nested object. `parse-display` seem to always generate a new Formatter, but Oxigraph uses a lot of `.fmt` calls - which might actually be a bug. --- Cargo.lock | 50 +++++++ Cargo.toml | 1 + lib/oxttl/Cargo.toml | 5 +- lib/oxttl/src/n3.rs | 53 ++++++-- lib/spargebra/Cargo.toml | 1 + lib/spargebra/src/algebra.rs | 256 +++++++++++++++++++++-------------- 6 files changed, 253 insertions(+), 113 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6ce4974e..309c5b37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1188,6 +1188,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", + "parse-display", "thiserror", "tokio", ] @@ -1215,6 +1216,31 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "parse-display" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06af5f9333eb47bd9ba8462d612e37a8328a5cb80b13f0af4de4c3b89f52dee5" +dependencies = [ + "parse-display-derive", + "regex", + "regex-syntax", +] + +[[package]] +name = "parse-display-derive" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc9252f259500ee570c75adcc4e317fa6f57a1e47747d622e0bf838002a7b790" +dependencies = [ + "proc-macro2", + "quote", + "regex", + "regex-syntax", + "structmeta", + "syn", +] + [[package]] name = "peg" version = "0.8.2" @@ -1761,6 +1787,7 @@ dependencies = [ "oxilangtag", "oxiri", "oxrdf", + "parse-display", "peg", "rand", "thiserror", @@ -1794,6 +1821,29 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01" +[[package]] +name = "structmeta" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e1575d8d40908d70f6fd05537266b90ae71b15dbbe7a8b7dffa2b759306d329" +dependencies = [ + "proc-macro2", + "quote", + "structmeta-derive", + "syn", +] + +[[package]] +name = "structmeta-derive" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "subtle" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 7e0fe900..7614a4b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,6 +48,7 @@ memchr = "2.5" oxhttp = "0.2.0-alpha.3" oxilangtag = "0.1" oxiri = "0.2.3-alpha.1" +parse-display = "0.9.0" peg = "0.8" pkg-config = "0.3.25" predicates = ">=2.0, <4.0" diff --git a/lib/oxttl/Cargo.toml b/lib/oxttl/Cargo.toml index c86150ed..0ed84fe8 100644 --- a/lib/oxttl/Cargo.toml +++ b/lib/oxttl/Cargo.toml @@ -20,9 +20,10 @@ async-tokio = ["dep:tokio"] [dependencies] memchr.workspace = true -oxrdf.workspace = true -oxiri.workspace = true oxilangtag.workspace = true +oxiri.workspace = true +oxrdf.workspace = true +parse-display.workspace = true thiserror.workspace = true tokio = { workspace = true, optional = true, features = ["io-util"] } diff --git a/lib/oxttl/src/n3.rs b/lib/oxttl/src/n3.rs index de3e9363..c3bf1aef 100644 --- a/lib/oxttl/src/n3.rs +++ b/lib/oxttl/src/n3.rs @@ -23,7 +23,8 @@ use std::io::Read; use tokio::io::AsyncRead; /// A N3 term i.e. a RDF `Term` or a `Variable`. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] +#[display("{0}")] pub enum N3Term { NamedNode(NamedNode), BlankNode(BlankNode), @@ -33,17 +34,45 @@ pub enum N3Term { Variable(Variable), } -impl fmt::Display for N3Term { - #[inline] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::NamedNode(term) => term.fmt(f), - Self::BlankNode(term) => term.fmt(f), - Self::Literal(term) => term.fmt(f), - #[cfg(feature = "rdf-star")] - Self::Triple(term) => term.fmt(f), - Self::Variable(term) => term.fmt(f), - } +#[cfg(test)] +mod test_n3term { + use super::*; + #[test] + fn display() { + // This is a temporary migration test - we can remove most of these before merging + assert_eq!( + N3Term::NamedNode(NamedNode::new_unchecked("named")).to_string(), + "" + ); + assert_eq!( + N3Term::BlankNode(BlankNode::new_unchecked("blank")).to_string(), + "_:blank" + ); + assert_eq!( + N3Term::Literal(Literal::new_simple_literal("literal")).to_string(), + r#""literal""# + ); + + #[cfg(feature = "rdf-star")] + assert_eq!( + N3Term::Triple(Box::new( + Triple::new( + NamedNode::new_unchecked("http://example.com/s"), + NamedNode::new_unchecked("http://example.com/p"), + Triple::new( + NamedNode::new_unchecked("http://example.com/os"), + NamedNode::new_unchecked("http://example.com/op"), + NamedNode::new_unchecked("http://example.com/oo"), + ), + ) + )) + .to_string(), + " << >>" + ); + assert_eq!( + N3Term::Variable(Variable::new_unchecked("var")).to_string(), + "?var" + ); } } diff --git a/lib/spargebra/Cargo.toml b/lib/spargebra/Cargo.toml index f367c57c..ef4250f3 100644 --- a/lib/spargebra/Cargo.toml +++ b/lib/spargebra/Cargo.toml @@ -23,6 +23,7 @@ sep-0006 = [] oxilangtag.workspace = true oxiri.workspace = true oxrdf.workspace = true +parse-display.workspace = true peg.workspace = true rand.workspace = true thiserror.workspace = true diff --git a/lib/spargebra/src/algebra.rs b/lib/spargebra/src/algebra.rs index a261b2dc..0ee00d10 100644 --- a/lib/spargebra/src/algebra.rs +++ b/lib/spargebra/src/algebra.rs @@ -91,6 +91,30 @@ impl fmt::Display for PropertyPathExpression { } } +/// The `display(with = ...)` will not work until this PR lands: +/// https://github.com/frozenlib/parse-display/issues/36#issuecomment-1925367731 +/// +#[cfg(skip)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] +pub enum PropertyPathExpression { + #[display("{0}")] + NamedNode(NamedNode), + #[display("^({0})")] + Reverse(Box), + #[display("({0} / {1})")] + Sequence(Box, Box), + #[display("({0} | {1})")] + Alternative(Box, Box), + #[display("({0})*")] + ZeroOrMore(Box), + #[display("({0})+")] + OneOrMore(Box), + #[display("({0})?")] + ZeroOrOne(Box), + #[display("!({0})")] + NegatedPropertySet(#[display(with = delimiter(" | "))] Vec), +} + impl From for PropertyPathExpression { fn from(p: NamedNode) -> Self { Self::NamedNode(p) @@ -318,7 +342,8 @@ fn write_arg_list( } /// A function name. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] +#[display(style = "UPPERCASE")] pub enum Function { Str, Lang, @@ -337,6 +362,7 @@ pub enum Function { Replace, UCase, LCase, + #[display("ENCODE_FOR_URI")] EncodeForUri, Contains, StrStarts, @@ -361,9 +387,13 @@ pub enum Function { Sha512, StrLang, StrDt, + #[display("isIRI")] IsIri, + #[display("isBLANK")] IsBlank, + #[display("isLITERAL")] IsLiteral, + #[display("isNUMERIC")] IsNumeric, Regex, #[cfg(feature = "rdf-star")] @@ -375,9 +405,11 @@ pub enum Function { #[cfg(feature = "rdf-star")] Object, #[cfg(feature = "rdf-star")] + #[display("isTRIPLE")] IsTriple, #[cfg(feature = "sep-0002")] Adjust, + #[display("{0}")] Custom(NamedNode), } @@ -448,69 +480,74 @@ impl Function { } } -impl fmt::Display for Function { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Str => f.write_str("STR"), - Self::Lang => f.write_str("LANG"), - Self::LangMatches => f.write_str("LANGMATCHES"), - Self::Datatype => f.write_str("DATATYPE"), - Self::Iri => f.write_str("IRI"), - Self::BNode => f.write_str("BNODE"), - Self::Rand => f.write_str("RAND"), - Self::Abs => f.write_str("ABS"), - Self::Ceil => f.write_str("CEIL"), - Self::Floor => f.write_str("FLOOR"), - Self::Round => f.write_str("ROUND"), - Self::Concat => f.write_str("CONCAT"), - Self::SubStr => f.write_str("SUBSTR"), - Self::StrLen => f.write_str("STRLEN"), - Self::Replace => f.write_str("REPLACE"), - Self::UCase => f.write_str("UCASE"), - Self::LCase => f.write_str("LCASE"), - Self::EncodeForUri => f.write_str("ENCODE_FOR_URI"), - Self::Contains => f.write_str("CONTAINS"), - Self::StrStarts => f.write_str("STRSTARTS"), - Self::StrEnds => f.write_str("STRENDS"), - Self::StrBefore => f.write_str("STRBEFORE"), - Self::StrAfter => f.write_str("STRAFTER"), - Self::Year => f.write_str("YEAR"), - Self::Month => f.write_str("MONTH"), - Self::Day => f.write_str("DAY"), - Self::Hours => f.write_str("HOURS"), - Self::Minutes => f.write_str("MINUTES"), - Self::Seconds => f.write_str("SECONDS"), - Self::Timezone => f.write_str("TIMEZONE"), - Self::Tz => f.write_str("TZ"), - Self::Now => f.write_str("NOW"), - Self::Uuid => f.write_str("UUID"), - Self::StrUuid => f.write_str("STRUUID"), - Self::Md5 => f.write_str("MD5"), - Self::Sha1 => f.write_str("SHA1"), - Self::Sha256 => f.write_str("SHA256"), - Self::Sha384 => f.write_str("SHA384"), - Self::Sha512 => f.write_str("SHA512"), - Self::StrLang => f.write_str("STRLANG"), - Self::StrDt => f.write_str("STRDT"), - Self::IsIri => f.write_str("isIRI"), - Self::IsBlank => f.write_str("isBLANK"), - Self::IsLiteral => f.write_str("isLITERAL"), - Self::IsNumeric => f.write_str("isNUMERIC"), - Self::Regex => f.write_str("REGEX"), - #[cfg(feature = "rdf-star")] - Self::Triple => f.write_str("TRIPLE"), - #[cfg(feature = "rdf-star")] - Self::Subject => f.write_str("SUBJECT"), - #[cfg(feature = "rdf-star")] - Self::Predicate => f.write_str("PREDICATE"), - #[cfg(feature = "rdf-star")] - Self::Object => f.write_str("OBJECT"), - #[cfg(feature = "rdf-star")] - Self::IsTriple => f.write_str("isTRIPLE"), - #[cfg(feature = "sep-0002")] - Self::Adjust => f.write_str("ADJUST"), - Self::Custom(iri) => iri.fmt(f), - } +#[cfg(test)] +mod test_func { + use super::*; + #[test] + fn display() { + // This is a temporary migration test - we can remove most of these before merging + assert_eq!(Function::Str.to_string(), "STR"); + assert_eq!(Function::Lang.to_string(), "LANG"); + assert_eq!(Function::LangMatches.to_string(), "LANGMATCHES"); + assert_eq!(Function::Datatype.to_string(), "DATATYPE"); + assert_eq!(Function::Iri.to_string(), "IRI"); + assert_eq!(Function::BNode.to_string(), "BNODE"); + assert_eq!(Function::Rand.to_string(), "RAND"); + assert_eq!(Function::Abs.to_string(), "ABS"); + assert_eq!(Function::Ceil.to_string(), "CEIL"); + assert_eq!(Function::Floor.to_string(), "FLOOR"); + assert_eq!(Function::Round.to_string(), "ROUND"); + assert_eq!(Function::Concat.to_string(), "CONCAT"); + assert_eq!(Function::SubStr.to_string(), "SUBSTR"); + assert_eq!(Function::StrLen.to_string(), "STRLEN"); + assert_eq!(Function::Replace.to_string(), "REPLACE"); + assert_eq!(Function::UCase.to_string(), "UCASE"); + assert_eq!(Function::LCase.to_string(), "LCASE"); + assert_eq!(Function::EncodeForUri.to_string(), "ENCODE_FOR_URI"); + assert_eq!(Function::Contains.to_string(), "CONTAINS"); + assert_eq!(Function::StrStarts.to_string(), "STRSTARTS"); + assert_eq!(Function::StrEnds.to_string(), "STRENDS"); + assert_eq!(Function::StrBefore.to_string(), "STRBEFORE"); + assert_eq!(Function::StrAfter.to_string(), "STRAFTER"); + assert_eq!(Function::Year.to_string(), "YEAR"); + assert_eq!(Function::Month.to_string(), "MONTH"); + assert_eq!(Function::Day.to_string(), "DAY"); + assert_eq!(Function::Hours.to_string(), "HOURS"); + assert_eq!(Function::Minutes.to_string(), "MINUTES"); + assert_eq!(Function::Seconds.to_string(), "SECONDS"); + assert_eq!(Function::Timezone.to_string(), "TIMEZONE"); + assert_eq!(Function::Tz.to_string(), "TZ"); + assert_eq!(Function::Now.to_string(), "NOW"); + assert_eq!(Function::Uuid.to_string(), "UUID"); + assert_eq!(Function::StrUuid.to_string(), "STRUUID"); + assert_eq!(Function::Md5.to_string(), "MD5"); + assert_eq!(Function::Sha1.to_string(), "SHA1"); + assert_eq!(Function::Sha256.to_string(), "SHA256"); + assert_eq!(Function::Sha384.to_string(), "SHA384"); + assert_eq!(Function::Sha512.to_string(), "SHA512"); + assert_eq!(Function::StrLang.to_string(), "STRLANG"); + assert_eq!(Function::StrDt.to_string(), "STRDT"); + assert_eq!(Function::IsIri.to_string(), "isIRI"); + assert_eq!(Function::IsBlank.to_string(), "isBLANK"); + assert_eq!(Function::IsLiteral.to_string(), "isLITERAL"); + assert_eq!(Function::IsNumeric.to_string(), "isNUMERIC"); + assert_eq!(Function::Regex.to_string(), "REGEX"); + #[cfg(feature = "rdf-star")] + assert_eq!(Function::Triple.to_string(), "TRIPLE"); + #[cfg(feature = "rdf-star")] + assert_eq!(Function::Subject.to_string(), "SUBJECT"); + #[cfg(feature = "rdf-star")] + assert_eq!(Function::Predicate.to_string(), "PREDICATE"); + #[cfg(feature = "rdf-star")] + assert_eq!(Function::Object.to_string(), "OBJECT"); + #[cfg(feature = "rdf-star")] + assert_eq!(Function::IsTriple.to_string(), "isTRIPLE"); + #[cfg(feature = "sep-0002")] + assert_eq!(Function::Adjust.to_string(), "ADJUST"); + assert_eq!( + Function::Custom(NamedNode::new("http://example.com/foo").unwrap()).to_string(), + "" + ); } } @@ -1217,7 +1254,8 @@ impl fmt::Display for AggregateExpression { } /// An aggregate function name. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] +#[display(style = "SNAKE_CASE")] pub enum AggregateFunction { /// [Count](https://www.w3.org/TR/sparql11-query/#defn_aggCount) with *. Count, @@ -1230,11 +1268,11 @@ pub enum AggregateFunction { /// [Max](https://www.w3.org/TR/sparql11-query/#defn_aggMax). Max, /// [GroupConcat](https://www.w3.org/TR/sparql11-query/#defn_aggGroupConcat). - GroupConcat { - separator: Option, - }, + #[display("{}")] + GroupConcat { separator: Option }, /// [Sample](https://www.w3.org/TR/sparql11-query/#defn_aggSample). Sample, + #[display("{0}")] Custom(NamedNode), } @@ -1254,23 +1292,36 @@ impl AggregateFunction { } } -impl fmt::Display for AggregateFunction { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Count => f.write_str("COUNT"), - Self::Sum => f.write_str("SUM"), - Self::Avg => f.write_str("AVG"), - Self::Min => f.write_str("MIN"), - Self::Max => f.write_str("MAX"), - Self::GroupConcat { .. } => f.write_str("GROUP_CONCAT"), - Self::Sample => f.write_str("SAMPLE"), - Self::Custom(iri) => iri.fmt(f), - } +#[cfg(test)] +mod test_agg { + use super::*; + #[test] + fn display() { + // This is a temporary migration test - we can remove most of these before merging + assert_eq!(AggregateFunction::Count.to_string(), "COUNT"); + assert_eq!(AggregateFunction::Sum.to_string(), "SUM"); + assert_eq!(AggregateFunction::Avg.to_string(), "AVG"); + assert_eq!(AggregateFunction::Min.to_string(), "MIN"); + assert_eq!(AggregateFunction::Max.to_string(), "MAX"); + assert_eq!( + AggregateFunction::GroupConcat { + separator: Some("foo".to_owned()) + } + .to_string(), + "GROUP_CONCAT" + ); + assert_eq!(AggregateFunction::Sample.to_string(), "SAMPLE"); + assert_eq!( + AggregateFunction::Custom(NamedNode::new("http://example.com/foo").unwrap()) + .to_string(), + "" + ); } } /// An ordering comparator used by [`GraphPattern::OrderBy`]. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] +#[display("{}({0})", style = "UPPERCASE")] pub enum OrderExpression { /// Ascending order Asc(Expression), @@ -1296,12 +1347,26 @@ impl OrderExpression { } } -impl fmt::Display for OrderExpression { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Asc(e) => write!(f, "ASC({e})"), - Self::Desc(e) => write!(f, "DESC({e})"), - } +#[cfg(test)] +mod test_order_expr { + use super::*; + #[test] + fn display() { + // This is a temporary migration test - we can remove most of these before merging + assert_eq!( + OrderExpression::Asc(Expression::NamedNode( + NamedNode::new("http://example.com/foo").unwrap() + )) + .to_string(), + "ASC()" + ); + assert_eq!( + OrderExpression::Desc(Expression::NamedNode( + NamedNode::new("http://example.com/bar").unwrap() + )) + .to_string(), + "DESC()" + ); } } @@ -1351,11 +1416,15 @@ impl fmt::Display for QueryDataset { /// A target RDF graph for update operations. /// /// Could be a specific graph, all named graphs or the complete dataset. -#[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[derive(Eq, PartialEq, Debug, Clone, Hash, parse_display::Display)] pub enum GraphTarget { + #[display("GRAPH {0}")] NamedNode(NamedNode), + #[display("DEFAULT")] DefaultGraph, + #[display("NAMED")] NamedGraphs, + #[display("ALL")] AllGraphs, } @@ -1371,17 +1440,6 @@ impl GraphTarget { } } -impl fmt::Display for GraphTarget { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::NamedNode(node) => write!(f, "GRAPH {node}"), - Self::DefaultGraph => f.write_str("DEFAULT"), - Self::NamedGraphs => f.write_str("NAMED"), - Self::AllGraphs => f.write_str("ALL"), - } - } -} - impl From for GraphTarget { fn from(node: NamedNode) -> Self { Self::NamedNode(node)