Server: refactor bulk load code

pull/299/head
Tpt 2 years ago committed by Thomas Tanon
parent 796780cd12
commit 2de13a9498
  1. 110
      server/src/main.rs

@ -1,4 +1,4 @@
use anyhow::{anyhow, bail}; use anyhow::bail;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use flate2::read::MultiGzDecoder; use flate2::read::MultiGzDecoder;
use oxhttp::model::{Body, HeaderName, HeaderValue, Request, Response, Status}; use oxhttp::model::{Body, HeaderName, HeaderValue, Request, Response, Status};
@ -116,10 +116,17 @@ pub fn main() -> anyhow::Result<()> {
bulk_load( bulk_load(
loader, loader,
MultiGzDecoder::new(fp), MultiGzDecoder::new(fp),
&file.with_extension(""), GraphOrDatasetFormat::from_path(&file.with_extension(""))
.unwrap(),
None,
) )
} else { } else {
bulk_load(loader, fp, &file) bulk_load(
loader,
fp,
GraphOrDatasetFormat::from_path(&file).unwrap(),
None,
)
} }
} { } {
eprintln!("Error while loading file {}: {}", file.display(), error) eprintln!("Error while loading file {}: {}", file.display(), error)
@ -142,29 +149,72 @@ pub fn main() -> anyhow::Result<()> {
} }
} }
fn bulk_load(loader: BulkLoader, reader: impl Read, file: &Path) -> anyhow::Result<()> { fn bulk_load(
let extension = file loader: BulkLoader,
.extension() reader: impl Read,
.and_then(|extension| extension.to_str()) format: GraphOrDatasetFormat,
.ok_or_else(|| { base_iri: Option<&str>,
anyhow!( ) -> anyhow::Result<()> {
"Not able to guess the file format of {} because the file name as no extension",
file.display()
)
})?;
let reader = BufReader::new(reader); let reader = BufReader::new(reader);
if let Some(format) = DatasetFormat::from_extension(extension) { match format {
loader.load_dataset(reader, format, None)?; GraphOrDatasetFormat::Graph(format) => {
Ok(()) loader.load_graph(reader, format, GraphNameRef::DefaultGraph, base_iri)
} else if let Some(format) = GraphFormat::from_extension(extension) { }
loader.load_graph(reader, format, GraphNameRef::DefaultGraph, None)?; GraphOrDatasetFormat::Dataset(format) => loader.load_dataset(reader, format, base_iri),
}?;
Ok(()) Ok(())
}
#[derive(Copy, Clone)]
enum GraphOrDatasetFormat {
Graph(GraphFormat),
Dataset(DatasetFormat),
}
impl GraphOrDatasetFormat {
fn from_path(path: &Path) -> anyhow::Result<Self> {
if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) {
Self::from_name(ext).map_err(|e| {
e.context(format!(
"Not able to guess the file format from file name extension '{}'",
ext
))
})
} else { } else {
bail!( bail!(
"Not able to guess the file format from the extension {}", "The path {} has no extension to guess a file format from",
extension path.display()
) )
} }
}
fn from_name(name: &str) -> anyhow::Result<Self> {
let mut candidates = Vec::with_capacity(4);
if let Some(f) = GraphFormat::from_extension(name) {
candidates.push(GraphOrDatasetFormat::Graph(f));
}
if let Some(f) = DatasetFormat::from_extension(name) {
candidates.push(GraphOrDatasetFormat::Dataset(f));
}
if let Some(f) = GraphFormat::from_media_type(name) {
candidates.push(GraphOrDatasetFormat::Graph(f));
}
if let Some(f) = DatasetFormat::from_media_type(name) {
candidates.push(GraphOrDatasetFormat::Dataset(f));
}
if candidates.is_empty() {
bail!("The format '{}' is unknown", name)
} else if candidates.len() == 1 {
Ok(candidates[0])
} else {
bail!("The format '{}' can be resolved to multiple known formats, not sure what to pick ({})", name, candidates.iter().fold(String::new(), |a, f| {
a + " " + match f {
GraphOrDatasetFormat::Graph(f) => f.file_extension(),
GraphOrDatasetFormat::Dataset(f) => f.file_extension(),
}
}).trim())
}
}
} }
fn handle_request(request: &mut Request, store: Store) -> Response { fn handle_request(request: &mut Request, store: Store) -> Response {
@ -1093,6 +1143,8 @@ mod tests {
use anyhow::Result; use anyhow::Result;
use assert_cmd::Command; use assert_cmd::Command;
use assert_fs::prelude::*; use assert_fs::prelude::*;
use flate2::write::GzEncoder;
use flate2::Compression;
use oxhttp::model::Method; use oxhttp::model::Method;
use predicates::prelude::*; use predicates::prelude::*;
@ -1146,6 +1198,24 @@ mod tests {
Ok(()) Ok(())
} }
#[test]
fn cli_load_gzip_dataset() -> Result<()> {
let file = assert_fs::NamedTempFile::new("sample.nq.gz")?;
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder
.write_all(b"<http://example.com/s> <http://example.com/p> <http://example.com/o> .")?;
file.write_binary(&encoder.finish()?)?;
cli_command()?
.arg("load")
.arg("-f")
.arg(file.path())
.assert()
.success()
.stdout("")
.stderr(predicate::str::starts_with("1 triples loaded"));
Ok(())
}
#[test] #[test]
fn get_ui() { fn get_ui() {
ServerTest::new().test_status( ServerTest::new().test_status(

Loading…
Cancel
Save