diff --git a/wikibase/src/loader.rs b/wikibase/src/loader.rs index 69c1419f..6b4f360b 100644 --- a/wikibase/src/loader.rs +++ b/wikibase/src/loader.rs @@ -1,5 +1,5 @@ use crate::SERVER; -use chrono::{DateTime, Utc}; +use chrono::{DateTime, Datelike, Utc}; use oxigraph::model::NamedNode; use oxigraph::*; use reqwest::header::USER_AGENT; @@ -16,6 +16,7 @@ pub struct WikibaseLoader { entity_data_url: Url, client: Client, namespaces: Vec, + slot: Option, frequency: Duration, start: DateTime, } @@ -26,6 +27,7 @@ impl WikibaseLoader { api_url: &str, pages_base_url: &str, namespaces: &[u32], + slot: Option<&str>, frequency: Duration, ) -> Result { Ok(Self { @@ -35,16 +37,23 @@ impl WikibaseLoader { .map_err(Error::wrap)?, client: Client::new(), namespaces: namespaces.to_vec(), + slot: slot.map(|t| t.to_owned()), start: Utc::now(), frequency, }) } pub fn initial_loading(&mut self) -> Result<()> { - println!("Initial loading "); - self.start = Utc::now(); + if self.slot.is_some() { + println!("Skipping initial loading because a slot is required"); + // No good initial loading + self.start = self.start.with_year(2018).unwrap(); + return Ok(()); + } + + println!("Initial loading "); for namespace in &self.namespaces { let mut parameters = HashMap::default(); parameters.insert("action".to_owned(), "query".to_owned()); @@ -104,14 +113,18 @@ impl WikibaseLoader { let mut parameters = HashMap::default(); parameters.insert("action".to_owned(), "query".to_owned()); parameters.insert("list".to_owned(), "recentchanges".to_owned()); - parameters.insert( - "rcnamespace".to_owned(), - self.namespaces - .iter() - .map(|ns| ns.to_string()) - .collect::>() - .join("|"), - ); + if let Some(slot) = &self.slot { + parameters.insert("rcslot".to_owned(), slot.to_owned()); + } else { + parameters.insert( + "rcnamespace".to_owned(), + self.namespaces + .iter() + .map(|ns| ns.to_string()) + .collect::>() + .join("|"), + ); + } parameters.insert("rcend".to_owned(), start.to_rfc2822()); parameters.insert("rcprop".to_owned(), "title|ids".to_owned()); parameters.insert("limit".to_owned(), "50".to_owned()); @@ -128,20 +141,21 @@ impl WikibaseLoader { .unwrap() { let desc = change.as_object().unwrap(); - let title = desc.get("title").unwrap().as_str().unwrap(); - - let id = title.split(':').last().unwrap_or(title); - if seen.contains(id) { + let id = if desc.get("ns").unwrap().as_u64().unwrap() == 6 { + // Hack for media info + format!("M{}", desc.get("pageid").unwrap().as_u64().unwrap()) + } else { + let title = desc.get("title").unwrap().as_str().unwrap(); + title.split(':').last().unwrap_or(title).to_owned() + }; + if seen.contains(&id) { continue; } - seen.insert(id.to_owned()); + seen.insert(id.clone()); - match self.get_entity_data(id) { + match self.get_entity_data(&id) { Ok(data) => { - self.load_entity_data( - &(self.entity_data_url.to_string() + "/" + id), - data, - )?; + self.load_entity_data(&format!("{}/{}", self.entity_data_url, id), data)?; } Err(e) => eprintln!("Error while retrieving data for entity {}: {}", id, e), } diff --git a/wikibase/src/main.rs b/wikibase/src/main.rs index 3125ae8f..1c598abd 100644 --- a/wikibase/src/main.rs +++ b/wikibase/src/main.rs @@ -65,8 +65,15 @@ pub fn main() { .arg( Arg::with_name("namespaces") .long("namespaces") - .help("Namespaces ids, to load in Blazegraph like \"0,120\"") - .required(true) + .help("Namespaces ids to load like \"0,120\"") + .required(false) + .takes_value(true), + ) + .arg( + Arg::with_name("slot") + .long("slot") + .help("Slot to load like \"mediainfo\". Could not be use with namespaces") + .required(false) .takes_value(true), ) .get_matches(); @@ -91,16 +98,25 @@ where let mediawiki_base_url = matches.value_of("mediawiki_base_url").unwrap().to_owned(); let namespaces = matches .value_of("namespaces") - .unwrap() + .unwrap_or("") .split(',') - .map(|t| u32::from_str(t.trim()).unwrap()) + .flat_map(|t| { + let t = t.trim(); + if t.is_empty() { + None + } else { + Some(u32::from_str(t).unwrap()) + } + }) .collect::>(); + let slot = matches.value_of("slot").map(|t| t.to_owned()); thread::spawn(move || { let mut loader = WikibaseLoader::new( repo.as_ref(), &mediawiki_api, &mediawiki_base_url, &namespaces, + slot.as_deref(), Duration::new(10, 0), ) .unwrap();