From c423d931a5efa3ac589b14faf0bc98b8f644a872 Mon Sep 17 00:00:00 2001 From: Gabriel Schneider Date: Thu, 26 Oct 2023 19:14:03 -0300 Subject: [PATCH 1/5] small improvements to archives - added infer as dependency to check for zip and gz archives. - added option to extract to a configurable directory. - after an archive is extracted, the compressed file is deleted. --- Cargo.toml | 1 + src/archives.rs | 11 +++++++---- src/cache.rs | 14 +++++++++++--- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 41dada5..53c4601 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ indicatif = "0.16" env_logger = { version = "0.10", optional = true } structopt = { version = "0.3", optional = true } color-eyre = { version = "0.6", optional = true } +infer = "0.15.0" [features] default = ["default-tls"] diff --git a/src/archives.rs b/src/archives.rs index 3095a2a..99a2473 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -13,9 +13,11 @@ pub(crate) enum ArchiveFormat { impl ArchiveFormat { /// Parse archive type from resource extension. pub(crate) fn parse_from_extension(resource: &str) -> Result { - if resource.ends_with(".tar.gz") { + let ext = infer::get_from_path(resource).unwrap().unwrap().extension(); + // Here we assume that gz contain a tar inside it. + if ext.ends_with("gz") { Ok(Self::TarGz) - } else if resource.ends_with(".zip") { + } else if ext.ends_with("zip") { Ok(Self::Zip) } else { Err(Error::ExtractionError("unsupported archive format".into())) @@ -34,13 +36,13 @@ pub(crate) fn extract_archive>( match format { ArchiveFormat::TarGz => { - let tar_gz = File::open(path)?; + let tar_gz = File::open(&path)?; let tar = GzDecoder::new(tar_gz); let mut archive = tar::Archive::new(tar); archive.unpack(&temp_target)?; } ArchiveFormat::Zip => { - let file = File::open(path)?; + let file = File::open(&path)?; let mut archive = zip::ZipArchive::new(file).map_err(|e| Error::ExtractionError(e.to_string()))?; archive @@ -49,6 +51,7 @@ pub(crate) fn extract_archive>( } }; + fs::remove_file(&path)?; // Now rename the temp directory to the final target directory. fs::rename(temp_target, target)?; diff --git a/src/cache.rs b/src/cache.rs index 9735884..f14413a 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -154,13 +154,16 @@ pub struct Options { pub subdir: Option, /// Automatically extract the resource, assuming the resource is an archive. pub extract: bool, + /// An optional subdirectory (relative to the cache root) to extract the resource in. + pub extract_dir: Option, } impl Options { - pub fn new(subdir: Option<&str>, extract: bool) -> Self { + pub fn new(subdir: Option<&str>, extract: bool, extract_dir: Option<&str>) -> Self { Self { subdir: subdir.map(String::from), extract, + extract_dir: extract_dir.map(String::from), } } @@ -317,9 +320,14 @@ impl Cache { filelock.lock_exclusive()?; debug!("Lock on extraction directory acquired for {}", resource); + let dirpath = if options.extract_dir.is_some() { + self.dir.join(options.extract_dir.as_ref().unwrap()) + } else { + dirpath + }; if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); - let format = ArchiveFormat::parse_from_extension(resource)?; + let format = ArchiveFormat::parse_from_extension(cached_path.to_str().unwrap())?; extract_archive(&cached_path, &dirpath, &format)?; } @@ -356,7 +364,7 @@ impl Cache { resource: &str, subdir: Option<&str>, ) -> Result { - let options = Options::new(subdir, false); + let options = Options::new(subdir, false, None); self.cached_path_with_options(resource, &options) } From 31c43b886b13f0fe2d011adebfe45f1eb3b918ee Mon Sep 17 00:00:00 2001 From: Gabriel Schneider Date: Thu, 26 Oct 2023 19:22:33 -0300 Subject: [PATCH 2/5] small commit to trigger workflow --- src/archives.rs | 2 ++ src/cache.rs | 5 ++++- src/meta.rs | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/archives.rs b/src/archives.rs index 99a2473..96932e0 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -1,5 +1,7 @@ use crate::error::Error; use flate2::read::GzDecoder; +use log::info; +use std::convert::TryInto; use std::fs::{self, File}; use std::path::Path; use tempfile::tempdir_in; diff --git a/src/cache.rs b/src/cache.rs index f14413a..a8014fe 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -292,6 +292,7 @@ impl Cache { } } else { // This is a remote resource, so fetch it to the cache. + debug!("Getting remote file GBRLS debug {}", resource); let meta = self.fetch_remote_resource(resource, options.subdir.as_deref())?; // Check if we need to extract. @@ -325,10 +326,12 @@ impl Cache { } else { dirpath }; + if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); let format = ArchiveFormat::parse_from_extension(cached_path.to_str().unwrap())?; extract_archive(&cached_path, &dirpath, &format)?; + info!("Done extracting (deleteme) {} to {:?}", resource, dirpath); } filelock.unlock()?; @@ -666,7 +669,7 @@ mod tests { .build() .unwrap(); - let resource = "http://localhost:5000/foo.txt"; + let resource = "http://localhost:5000/foo.txt"; assert_eq!( cache .resource_to_filepath(resource, &None, None, None) diff --git a/src/meta.rs b/src/meta.rs index 6530e06..41c78a6 100644 --- a/src/meta.rs +++ b/src/meta.rs @@ -20,6 +20,8 @@ pub(crate) struct Meta { pub(crate) expires: Option, /// Time this version of the resource was cached. pub(crate) creation_time: f64, + /// Time of the last time the resource was accessed, default to creation_time. + pub(crate) last_accessed: f64, } impl Meta { @@ -35,6 +37,7 @@ impl Meta { expires = Some(creation_time + (lifetime as f64)); } let meta_path = Meta::meta_path(&resource_path); + let last_accessed = creation_time; Meta { resource, resource_path, @@ -42,6 +45,7 @@ impl Meta { etag, expires, creation_time, + last_accessed, } } From 25850be3f5baaa2877ee74c2fc5af9313d9ede48 Mon Sep 17 00:00:00 2001 From: Gabriel Schneider Date: Thu, 26 Oct 2023 19:27:04 -0300 Subject: [PATCH 3/5] Revert "small commit to trigger workflow" This reverts commit 31c43b886b13f0fe2d011adebfe45f1eb3b918ee. --- src/archives.rs | 2 -- src/cache.rs | 5 +---- src/meta.rs | 4 ---- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/archives.rs b/src/archives.rs index 96932e0..99a2473 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -1,7 +1,5 @@ use crate::error::Error; use flate2::read::GzDecoder; -use log::info; -use std::convert::TryInto; use std::fs::{self, File}; use std::path::Path; use tempfile::tempdir_in; diff --git a/src/cache.rs b/src/cache.rs index a8014fe..f14413a 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -292,7 +292,6 @@ impl Cache { } } else { // This is a remote resource, so fetch it to the cache. - debug!("Getting remote file GBRLS debug {}", resource); let meta = self.fetch_remote_resource(resource, options.subdir.as_deref())?; // Check if we need to extract. @@ -326,12 +325,10 @@ impl Cache { } else { dirpath }; - if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); let format = ArchiveFormat::parse_from_extension(cached_path.to_str().unwrap())?; extract_archive(&cached_path, &dirpath, &format)?; - info!("Done extracting (deleteme) {} to {:?}", resource, dirpath); } filelock.unlock()?; @@ -669,7 +666,7 @@ mod tests { .build() .unwrap(); - let resource = "http://localhost:5000/foo.txt"; + let resource = "http://localhost:5000/foo.txt"; assert_eq!( cache .resource_to_filepath(resource, &None, None, None) diff --git a/src/meta.rs b/src/meta.rs index 41c78a6..6530e06 100644 --- a/src/meta.rs +++ b/src/meta.rs @@ -20,8 +20,6 @@ pub(crate) struct Meta { pub(crate) expires: Option, /// Time this version of the resource was cached. pub(crate) creation_time: f64, - /// Time of the last time the resource was accessed, default to creation_time. - pub(crate) last_accessed: f64, } impl Meta { @@ -37,7 +35,6 @@ impl Meta { expires = Some(creation_time + (lifetime as f64)); } let meta_path = Meta::meta_path(&resource_path); - let last_accessed = creation_time; Meta { resource, resource_path, @@ -45,7 +42,6 @@ impl Meta { etag, expires, creation_time, - last_accessed, } } From 7285edfa77ec0e4de0099f80438fdd8e6d7a47e0 Mon Sep 17 00:00:00 2001 From: Gabriel Schneider Date: Fri, 27 Oct 2023 11:38:09 -0300 Subject: [PATCH 4/5] added extraction_dir as a metadata field --- src/cache.rs | 44 ++++++++++++++++++++++++++++---------------- src/meta.rs | 11 ++++++++++- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/src/cache.rs b/src/cache.rs index f14413a..ca3ce83 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -8,6 +8,7 @@ use std::default::Default; use std::env; use std::fs::{self, OpenOptions}; use std::path::{Path, PathBuf}; +use std::str::FromStr; use std::thread; use std::time::{self, Duration}; use tempfile::NamedTempFile; @@ -283,16 +284,25 @@ impl Cache { .ok() .and_then(|sys_time| sys_time.elapsed().ok()) .map(|duration| format!("{}", duration.as_secs())); - extraction_dir = Some(self.resource_to_filepath( - resource, - &resource_last_modified, - options.subdir.as_deref(), - Some("-extracted"), - )); + + extraction_dir = if let Some(extract_dir) = &options.extract_dir { + Some(cached_path.join(extract_dir)) + } else { + Some(self.resource_to_filepath( + resource, + &resource_last_modified, + options.subdir.as_deref(), + Some("-extracted"), + )) + }; } } else { // This is a remote resource, so fetch it to the cache. - let meta = self.fetch_remote_resource(resource, options.subdir.as_deref())?; + let meta = self.fetch_remote_resource( + resource, + options.subdir.as_deref(), + options.extract_dir.as_deref(), + )?; // Check if we need to extract. if options.extract { @@ -320,11 +330,6 @@ impl Cache { filelock.lock_exclusive()?; debug!("Lock on extraction directory acquired for {}", resource); - let dirpath = if options.extract_dir.is_some() { - self.dir.join(options.extract_dir.as_ref().unwrap()) - } else { - dirpath - }; if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); let format = ArchiveFormat::parse_from_extension(cached_path.to_str().unwrap())?; @@ -368,7 +373,12 @@ impl Cache { self.cached_path_with_options(resource, &options) } - fn fetch_remote_resource(&self, resource: &str, subdir: Option<&str>) -> Result { + fn fetch_remote_resource( + &self, + resource: &str, + subdir: Option<&str>, + extract_dir: Option<&str>, + ) -> Result { // Otherwise we attempt to parse the URL. let url = reqwest::Url::parse(resource).map_err(|_| Error::InvalidUrl(String::from(resource)))?; @@ -425,8 +435,7 @@ impl Cache { } // No up-to-date version cached, so we have to try downloading it. - let meta = self.try_download_resource(resource, &url, &path, &etag)?; - + let meta = self.try_download_resource(resource, &url, &path, &etag, extract_dir)?; info!("New version of {} cached", resource); filelock.unlock()?; @@ -469,10 +478,11 @@ impl Cache { url: &reqwest::Url, path: &Path, etag: &Option, + extract_dir: Option<&str>, ) -> Result { let mut retries: u32 = 0; loop { - match self.download_resource(resource, url, path, etag) { + match self.download_resource(resource, url, path, etag, extract_dir) { Ok(meta) => { return Ok(meta); } @@ -503,6 +513,7 @@ impl Cache { url: &reqwest::Url, path: &Path, etag: &Option, + extract_dir: Option<&str>, ) -> Result { debug!("Attempting connection to {}", url); @@ -543,6 +554,7 @@ impl Cache { path.into(), etag.clone(), self.freshness_lifetime, + extract_dir.map(|s| PathBuf::from_str(s).unwrap()), ); meta.to_file()?; diff --git a/src/meta.rs b/src/meta.rs index 6530e06..38d5319 100644 --- a/src/meta.rs +++ b/src/meta.rs @@ -14,6 +14,8 @@ pub(crate) struct Meta { pub(crate) resource_path: PathBuf, /// Path to the serialized meta. pub(crate) meta_path: PathBuf, + /// Path to the directory that the resource is extracted. + pub(crate) extraction_path: Option, /// The ETAG of the resource from the time it was cached, if there was one. pub(crate) etag: Option, /// Time that the freshness of this cached resource will expire. @@ -28,6 +30,7 @@ impl Meta { resource_path: PathBuf, etag: Option, freshness_lifetime: Option, + extraction_path: Option, ) -> Meta { let mut expires: Option = None; let creation_time = now(); @@ -42,6 +45,7 @@ impl Meta { etag, expires, creation_time, + extraction_path, } } @@ -58,7 +62,12 @@ impl Meta { "{}-extracted", self.resource_path.file_name().unwrap().to_str().unwrap() ); - self.resource_path.parent().unwrap().join(dirname) + + if let Some(extraction_path) = &self.extraction_path { + self.resource_path.parent().unwrap().join(extraction_path) + } else { + self.resource_path.parent().unwrap().join(dirname) + } } pub(crate) fn to_file(&self) -> Result<(), Error> { From a1eadf2c2f83bd6946fb5da977ddba54a8936a00 Mon Sep 17 00:00:00 2001 From: Gabriel Schneider Date: Fri, 27 Oct 2023 14:55:06 -0300 Subject: [PATCH 5/5] implemented suggestions from reviews and fixed the redownload --- src/archives.rs | 8 ++++---- src/cache.rs | 4 ++-- src/meta.rs | 9 ++++----- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/archives.rs b/src/archives.rs index 99a2473..1679796 100644 --- a/src/archives.rs +++ b/src/archives.rs @@ -12,12 +12,12 @@ pub(crate) enum ArchiveFormat { impl ArchiveFormat { /// Parse archive type from resource extension. - pub(crate) fn parse_from_extension(resource: &str) -> Result { - let ext = infer::get_from_path(resource).unwrap().unwrap().extension(); + pub(crate) fn parse_from_path>(path: P) -> Result { + let ext = infer::get_from_path(path).unwrap().unwrap().extension(); // Here we assume that gz contain a tar inside it. - if ext.ends_with("gz") { + if ext == "gz" { Ok(Self::TarGz) - } else if ext.ends_with("zip") { + } else if ext == "zip" { Ok(Self::Zip) } else { Err(Error::ExtractionError("unsupported archive format".into())) diff --git a/src/cache.rs b/src/cache.rs index ca3ce83..e836140 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -332,7 +332,7 @@ impl Cache { if !dirpath.is_dir() { info!("Extracting {} to {:?}", resource, dirpath); - let format = ArchiveFormat::parse_from_extension(cached_path.to_str().unwrap())?; + let format = ArchiveFormat::parse_from_path(&cached_path)?; extract_archive(&cached_path, &dirpath, &format)?; } @@ -448,7 +448,7 @@ impl Cache { fn find_existing(&self, resource: &str, subdir: Option<&str>) -> Vec { let mut existing_meta: Vec = vec![]; let glob_string = format!( - "{}.*.meta", + "{}*.meta", self.resource_to_filepath(resource, &None, subdir, None) .to_str() .unwrap(), diff --git a/src/meta.rs b/src/meta.rs index 38d5319..3562af4 100644 --- a/src/meta.rs +++ b/src/meta.rs @@ -58,14 +58,13 @@ impl Meta { } pub(crate) fn get_extraction_path(&self) -> PathBuf { - let dirname = format!( - "{}-extracted", - self.resource_path.file_name().unwrap().to_str().unwrap() - ); - if let Some(extraction_path) = &self.extraction_path { self.resource_path.parent().unwrap().join(extraction_path) } else { + let dirname = format!( + "{}-extracted", + self.resource_path.file_name().unwrap().to_str().unwrap() + ); self.resource_path.parent().unwrap().join(dirname) } }