diff --git a/lib/oxrdfio/src/serializer.rs b/lib/oxrdfio/src/serializer.rs index 6ec4e666..1a721b6c 100644 --- a/lib/oxrdfio/src/serializer.rs +++ b/lib/oxrdfio/src/serializer.rs @@ -82,24 +82,24 @@ impl RdfSerializer { /// assert_eq!(buffer.as_slice(), " .\n".as_bytes()); /// # Result::<_,Box>::Ok(()) /// ``` - pub fn serialize_to_write(&self, writer: W) -> ToWriteQuadWriter { + pub fn serialize_to_write(&self, write: W) -> ToWriteQuadWriter { ToWriteQuadWriter { formatter: match self.format { - RdfFormat::NQuads => ToWriteQuadWriterKind::NQuads( - NQuadsSerializer::new().serialize_to_write(writer), - ), + RdfFormat::NQuads => { + ToWriteQuadWriterKind::NQuads(NQuadsSerializer::new().serialize_to_write(write)) + } RdfFormat::NTriples => ToWriteQuadWriterKind::NTriples( - NTriplesSerializer::new().serialize_to_write(writer), - ), - RdfFormat::RdfXml => ToWriteQuadWriterKind::RdfXml( - RdfXmlSerializer::new().serialize_to_write(writer), + NTriplesSerializer::new().serialize_to_write(write), ), + RdfFormat::RdfXml => { + ToWriteQuadWriterKind::RdfXml(RdfXmlSerializer::new().serialize_to_write(write)) + } RdfFormat::TriG => { - ToWriteQuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(writer)) + ToWriteQuadWriterKind::TriG(TriGSerializer::new().serialize_to_write(write)) + } + RdfFormat::Turtle | RdfFormat::N3 => { + ToWriteQuadWriterKind::Turtle(TurtleSerializer::new().serialize_to_write(write)) } - RdfFormat::Turtle | RdfFormat::N3 => ToWriteQuadWriterKind::Turtle( - TurtleSerializer::new().serialize_to_write(writer), - ), }, } } @@ -134,24 +134,24 @@ impl RdfSerializer { #[cfg(feature = "async-tokio")] pub fn serialize_to_tokio_async_write( &self, - writer: W, + write: W, ) -> ToTokioAsyncWriteQuadWriter { ToTokioAsyncWriteQuadWriter { formatter: match self.format { RdfFormat::NQuads => ToTokioAsyncWriteQuadWriterKind::NQuads( - NQuadsSerializer::new().serialize_to_tokio_async_write(writer), + NQuadsSerializer::new().serialize_to_tokio_async_write(write), ), RdfFormat::NTriples => ToTokioAsyncWriteQuadWriterKind::NTriples( - NTriplesSerializer::new().serialize_to_tokio_async_write(writer), + NTriplesSerializer::new().serialize_to_tokio_async_write(write), ), RdfFormat::RdfXml => ToTokioAsyncWriteQuadWriterKind::RdfXml( - RdfXmlSerializer::new().serialize_to_tokio_async_write(writer), + RdfXmlSerializer::new().serialize_to_tokio_async_write(write), ), RdfFormat::TriG => ToTokioAsyncWriteQuadWriterKind::TriG( - TriGSerializer::new().serialize_to_tokio_async_write(writer), + TriGSerializer::new().serialize_to_tokio_async_write(write), ), RdfFormat::Turtle | RdfFormat::N3 => ToTokioAsyncWriteQuadWriterKind::Turtle( - TurtleSerializer::new().serialize_to_tokio_async_write(writer), + TurtleSerializer::new().serialize_to_tokio_async_write(write), ), }, } diff --git a/lib/src/io/write.rs b/lib/src/io/write.rs index 7c308c86..4c398791 100644 --- a/lib/src/io/write.rs +++ b/lib/src/io/write.rs @@ -45,9 +45,9 @@ impl GraphSerializer { } /// Returns a [`TripleWriter`] allowing writing triples into the given [`Write`] implementation - pub fn triple_writer(&self, writer: W) -> TripleWriter { + pub fn triple_writer(&self, write: W) -> TripleWriter { TripleWriter { - writer: self.inner.serialize_to_write(writer), + writer: self.inner.serialize_to_write(write), } } } @@ -128,9 +128,9 @@ impl DatasetSerializer { } /// Returns a [`QuadWriter`] allowing writing triples into the given [`Write`] implementation - pub fn quad_writer(&self, writer: W) -> QuadWriter { + pub fn quad_writer(&self, write: W) -> QuadWriter { QuadWriter { - writer: self.inner.serialize_to_write(writer), + writer: self.inner.serialize_to_write(write), } } } diff --git a/server/src/main.rs b/server/src/main.rs index 67a5d527..638f0e60 100644 --- a/server/src/main.rs +++ b/server/src/main.rs @@ -4,7 +4,7 @@ use clap::{Parser, Subcommand}; use flate2::read::MultiGzDecoder; use oxhttp::model::{Body, HeaderName, HeaderValue, Method, Request, Response, Status}; use oxhttp::Server; -use oxigraph::io::{RdfFormat, RdfSerializer}; +use oxigraph::io::{RdfFormat, RdfParser, RdfSerializer}; use oxigraph::model::{ GraphName, GraphNameRef, IriParseError, NamedNode, NamedNodeRef, NamedOrBlankNode, }; @@ -129,9 +129,10 @@ enum Command { /// By default the format is guessed from the loaded file extension. #[arg(long, required_unless_present = "file")] format: Option, + /// Base IRI of the file(s) to load. + #[arg(long)] + base: Option, /// Attempt to keep loading even if the data file is invalid. - /// - /// Only works with N-Triples and N-Quads for now. #[arg(long)] lenient: bool, /// Name of the graph to load the data to. @@ -174,7 +175,7 @@ enum Command { /// If no query or query file are given, stdin is used. #[arg(long, conflicts_with = "query")] query_file: Option, - /// Base URI of the query. + /// Base IRI of the query. #[arg(long)] query_base: Option, /// File in which the query results will be stored. @@ -219,7 +220,7 @@ enum Command { /// If no update or update file are given, stdin is used. #[arg(long, conflicts_with = "update")] update_file: Option, - /// Base URI of the update. + /// Base IRI of the update. #[arg(long)] update_base: Option, }, @@ -228,6 +229,52 @@ enum Command { /// Done by default in the background when serving requests. /// It is likely to not be useful in most of cases except if you provide a read-only SPARQL endpoint under heavy load. Optimize {}, + /// Converts a RDF serialization from one format to an other. + Convert { + /// File to convert from. + /// + /// If no file is given, stdin is read. + #[arg(short, long)] + from_file: Option, + /// The format of the file(s) to convert from. + /// + /// Can be an extension like "nt" or a MIME type like "application/n-triples". + /// + /// By default the format is guessed from the input file extension. + #[arg(long, required_unless_present = "from_file")] + from_format: Option, + /// Base IRI of the file to read. + #[arg(long)] + from_base: Option, + /// File to convert to. + /// + /// If no file is given, stdout is written. + #[arg(short, long)] + to_file: Option, + /// The format of the file(s) to convert from. + /// + /// Can be an extension like "nt" or a MIME type like "application/n-triples". + /// + /// By default the format is guessed from the target file extension. + #[arg(long, required_unless_present = "to_file")] + to_format: Option, + /// Attempt to keep converting even if the data file is invalid. + #[arg(long)] + lenient: bool, + /// Only load the given named graph from the input file. + /// + /// By default all graphs are loaded. + #[arg(long, conflicts_with = "from_default_graph")] + from_graph: Option, + /// Only load the default graph from the input file. + #[arg(long, conflicts_with = "from_graph")] + from_default_graph: bool, + /// Name of the graph to map the default graph to. + /// + /// By default the default graph is used. + #[arg(long)] + to_graph: Option, + }, } pub fn main() -> anyhow::Result<()> { @@ -286,6 +333,7 @@ pub fn main() -> anyhow::Result<()> { file, lenient, format, + base, graph, } => { let store = if let Some(location) = matches.location { @@ -331,7 +379,7 @@ pub fn main() -> anyhow::Result<()> { format.ok_or_else(|| { anyhow!("The --format option must be set when loading from stdin") })?, - None, + base.as_deref(), graph, ) } else { @@ -343,6 +391,7 @@ pub fn main() -> anyhow::Result<()> { for file in file { let store = store.clone(); let graph = graph.clone(); + let base = base.clone(); s.spawn(move |_| { let f = file.clone(); let start = Instant::now(); @@ -383,7 +432,7 @@ pub fn main() -> anyhow::Result<()> { rdf_format_from_path(&file.with_extension("")) .unwrap() }), - None, + base.as_deref(), graph, ) } else { @@ -393,7 +442,7 @@ pub fn main() -> anyhow::Result<()> { format.unwrap_or_else(|| { rdf_format_from_path(&file).unwrap() }), - None, + base.as_deref(), graph, ) } @@ -631,6 +680,101 @@ pub fn main() -> anyhow::Result<()> { store.optimize()?; Ok(()) } + Command::Convert { + from_file, + from_format, + from_base, + to_file, + to_format, + lenient, + from_graph, + from_default_graph, + to_graph, + } => { + let from_format = if let Some(format) = from_format { + rdf_format_from_name(&format)? + } else if let Some(file) = &from_file { + rdf_format_from_path(file)? + } else { + bail!("The --from-format option must be set when reading from stdin") + }; + let mut parser = RdfParser::from_format(from_format); + if let Some(base) = from_base { + parser = parser + .with_base_iri(&base) + .with_context(|| anyhow!("Invalid base IRI {base}"))?; + } + + let to_format = if let Some(format) = to_format { + rdf_format_from_name(&format)? + } else if let Some(file) = &from_file { + rdf_format_from_path(file)? + } else { + bail!("The --to-format option must be set when writing to stdout") + }; + let serializer = RdfSerializer::from_format(to_format); + + let from_graph = if let Some(from_graph) = from_graph { + Some( + NamedNode::new(&from_graph) + .with_context(|| format!("The source graph name {from_graph} is invalid"))? + .into(), + ) + } else if from_default_graph { + Some(GraphName::DefaultGraph) + } else { + None + }; + let to_graph = if let Some(to_graph) = to_graph { + NamedNode::new(&to_graph) + .with_context(|| format!("The target graph name {to_graph} is invalid"))? + .into() + } else { + GraphName::DefaultGraph + }; + + match (from_file, to_file) { + (Some(from_file), Some(to_file)) => close_file_writer(do_convert( + &parser, + File::open(from_file)?, + &serializer, + BufWriter::new(File::create(to_file)?), + lenient, + &from_graph, + &to_graph, + )?), + (Some(from_file), None) => do_convert( + &parser, + File::open(from_file)?, + &serializer, + stdout().lock(), + lenient, + &from_graph, + &to_graph, + )? + .flush(), + (None, Some(to_file)) => close_file_writer(do_convert( + &parser, + stdin().lock(), + &serializer, + BufWriter::new(File::create(to_file)?), + lenient, + &from_graph, + &to_graph, + )?), + (None, None) => do_convert( + &parser, + stdin().lock(), + &serializer, + stdout().lock(), + lenient, + &from_graph, + &to_graph, + )? + .flush(), + }?; + Ok(()) + } } } @@ -651,18 +795,55 @@ fn bulk_load( fn dump( store: &Store, - writer: W, + write: W, format: RdfFormat, from_graph_name: Option>, ) -> anyhow::Result { ensure!(format.supports_datasets() || from_graph_name.is_some(), "The --graph option is required when writing a format not supporting datasets like NTriples, Turtle or RDF/XML"); Ok(if let Some(from_graph_name) = from_graph_name { - store.dump_graph(writer, format, from_graph_name) + store.dump_graph(write, format, from_graph_name) } else { - store.dump_dataset(writer, format) + store.dump_dataset(write, format) }?) } +fn do_convert( + parser: &RdfParser, + read: R, + serializer: &RdfSerializer, + write: W, + lenient: bool, + from_graph: &Option, + default_graph: &GraphName, +) -> anyhow::Result { + let mut writer = serializer.serialize_to_write(write); + for quad_result in parser.parse_read(read) { + match quad_result { + Ok(mut quad) => { + if let Some(from_graph) = from_graph { + if quad.graph_name == *from_graph { + quad.graph_name = GraphName::DefaultGraph; + } else { + continue; + } + } + if quad.graph_name.is_default_graph() { + quad.graph_name = default_graph.clone(); + } + writer.write_quad(&quad)?; + } + Err(e) => { + if lenient { + eprintln!("Parsing error: {e}"); + } else { + return Err(e.into()); + } + } + } + } + Ok(writer.finish()?) +} + fn format_from_path( path: &Path, from_extension: impl FnOnce(&str) -> anyhow::Result, @@ -1710,15 +1891,16 @@ mod tests { #[test] fn cli_load_optimize_and_dump_graph() -> Result<()> { let store_dir = TempDir::new()?; - let input_file = NamedTempFile::new("input.nt")?; - input_file - .write_str(" .")?; + let input_file = NamedTempFile::new("input.ttl")?; + input_file.write_str(" .")?; cli_command()? .arg("--location") .arg(store_dir.path()) .arg("load") .arg("--file") .arg(input_file.path()) + .arg("--base") + .arg("http://example.com/") .assert() .success(); @@ -2041,6 +2223,60 @@ mod tests { ) } + #[test] + fn cli_convert_file() -> Result<()> { + let input_file = NamedTempFile::new("input.ttl")?; + input_file.write_str("

.")?; + let output_file = NamedTempFile::new("output.nt")?; + cli_command()? + .arg("convert") + .arg("--from-file") + .arg(input_file.path()) + .arg("--from-base") + .arg("http://example.com/") + .arg("--to-file") + .arg(output_file.path()) + .assert() + .success(); + output_file + .assert(" .\n"); + Ok(()) + } + + #[test] + fn cli_convert_from_default_graph_to_named_graph() -> Result<()> { + cli_command()? + .arg("convert") + .arg("--from-format") + .arg("trig") + .arg("--to-format") + .arg("nq") + .arg("--from-default-graph") + .arg("--to-graph") + .arg("http://example.com/t") + .write_stdin("@base .

. { . }") + .assert() + .stdout(" .\n") + .success(); + Ok(()) + } + + #[test] + fn cli_convert_from_named_graph() -> Result<()> { + cli_command()? + .arg("convert") + .arg("--from-format") + .arg("trig") + .arg("--to-format") + .arg("nq") + .arg("--from-graph") + .arg("http://example.com/g") + .write_stdin("@base .

. { . }") + .assert() + .stdout(" .\n"); + Ok(()) + } + #[test] fn get_ui() -> Result<()> { ServerTest::new()?.test_status(