Skip to content

Commit

Permalink
add option to filter crates by a regex pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
the-shank committed Feb 16, 2024
1 parent d6ca10e commit ece3303
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 55 deletions.
13 changes: 7 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ clap = { version = "4.0.32", features = ["derive"] }
color-eyre = "0.6.2"
crates-index = { version = "2.3.0", features = ["git-https"] }
eyre = "0.6.12"
regex = "1.10.3"
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

Expand Down
129 changes: 80 additions & 49 deletions src/bin/download_all_crates.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
use clap::Parser;
use eyre::{eyre, ContextCompat, Result};
use regex::Regex;
use std::fs;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::str::FromStr;
use tracing::level_filters::LevelFilter;
Expand All @@ -19,20 +20,54 @@ use utils_rs::common::consts::LOGGER_NAME;
#[clap(author, version, about)]
struct Args {
/// directory where the crates are to be downloaded
#[arg(short, long)]
#[arg(long)]
download_dir: String,

/// extract the downloaded archive of each crate
#[arg(short, long, default_value_t = false)]
#[arg(long, default_value_t = false)]
extract: bool,

/// only donwload packages matching the regexp
#[arg(long)]
regexp: Option<String>,

/// dry-run
#[arg(long, default_value_t = false)]
dry_run: bool,
}

fn setup_tracing() {
let filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.with_env_var(LOGGER_NAME)
.from_env_lossy();
tracing_subscriber::fmt().with_env_filter(filter).init();
// let filter = EnvFilter::builder()
// .with_default_directive(LevelFilter::INFO.into())
// .with_env_var(LOGGER_NAME)
// .from_env_lossy();
// tracing_subscriber::fmt().with_env_filter(filter).init();
tracing_subscriber::fmt::init();
}

fn download_crate_from_url<P: AsRef<Path>>(url: &str, download_dir: P) -> Result<i32> {
Command::new("wget")
.arg("-c")
.arg("--no-verbose")
.arg("--content-disposition") // to keep the crate name
.arg("--directory-prefix")
.arg(download_dir.as_ref())
.arg(url)
.stdout(Stdio::null())
.status()?
.code()
.wrap_err_with(|| eyre!("unable to get the exit code for the wget command"))
}

fn extract_archive<P: AsRef<Path>>(filename: &str, cwd: P) -> Result<i32> {
Command::new("tar")
.arg("-xf")
.arg(filename)
.stdout(Stdio::null())
.current_dir(cwd.as_ref())
.status()?
.code()
.wrap_err_with(|| eyre!("unable to get the exit_code for the extraction command"))
}

fn main() -> Result<()> {
Expand All @@ -41,20 +76,35 @@ fn main() -> Result<()> {

// parse args
let args = Args::parse();
dbg!(&args);
info!("args: {args:#?}");
// std::process::exit(1);

// create the download dir
fs::create_dir_all(&args.download_dir)?;

// now download the crates
// update the crates index
let mut index = crates_index::GitIndex::new_cargo_default()?;
info!("Updating index...");
index.update()?;

let index_config = index.index_config()?;

for crate_releases in index.crates() {
// regex?
let regex = if let Some(ref r) = args.regexp {
Some(Regex::new(r)?)
} else {
None
};

// apply regexp filtering (if applicable)
let filtered = index.crates().filter(|crate_release| {
if let Some(ref rexp) = regex {
rexp.is_match(crate_release.name())
} else {
true
}
});

for crate_releases in filtered {
if let Some(ver) = crate_releases.highest_normal_version() {
if let Some(download_url) = ver.download_url(&index_config) {
info!(
Expand All @@ -65,44 +115,25 @@ fn main() -> Result<()> {

// TODO: add retries for failed downloads

let exit_code = Command::new("wget")
.arg("-c")
.arg("--no-verbose")
.arg("--content-disposition") // to keep the crate name
.arg("--directory-prefix")
.arg(&args.download_dir)
.arg(download_url)
.stdout(Stdio::null())
.status()?
.code()
.wrap_err_with(|| eyre!("unable to get the exit code for the wget command"))?;

debug!("wget exit_code : {exit_code}");

if exit_code != 0 {
continue;
}
if !args.dry_run {
let exit_code = download_crate_from_url(&download_url, &args.download_dir)?;
debug!("wget exit_code : {exit_code}");

if exit_code != 0 {
continue;
}

// extract the archive and remove the archive after extracting it
if args.extract {
let downloaded_filename = format!("{}-{}.crate", ver.name(), ver.version());
let exit_code = Command::new("tar")
.arg("-xf")
.arg(&downloaded_filename)
.stdout(Stdio::null())
.current_dir(&args.download_dir)
.status()?
.code()
.wrap_err_with(|| {
eyre!("unable to get the exit_code for the extraction command")
})?;

debug!("tar exit_code : {exit_code}");

if exit_code == 0 {
let downloaded_filepath =
PathBuf::from_str(&args.download_dir)?.join(downloaded_filename);
fs::remove_file(downloaded_filepath)?;
// extract the archive and remove the archive after extracting it
if args.extract {
let downloaded_filename = format!("{}-{}.crate", ver.name(), ver.version());
let exit_code = extract_archive(&downloaded_filename, &args.download_dir)?;
debug!("tar exit_code : {exit_code}");

if exit_code == 0 {
let downloaded_filepath =
PathBuf::from_str(&args.download_dir)?.join(downloaded_filename);
fs::remove_file(downloaded_filepath)?;
}
}
}
};
Expand Down

0 comments on commit ece3303

Please sign in to comment.