Skip to content

Commit

Permalink
download_all_crates | improvements + bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
the-shank committed Feb 10, 2024
1 parent 30842ee commit d6ca10e
Show file tree
Hide file tree
Showing 7 changed files with 149 additions and 25 deletions.
67 changes: 64 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ edition = "2021"
clap = { version = "4.0.32", features = ["derive"] }
color-eyre = "0.6.2"
crates-index = { version = "2.3.0", features = ["git-https"] }
eyre = "0.6.12"
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

[lints.clippy]
pedantic = "warn"
1 change: 0 additions & 1 deletion src/bin/broken_symlinks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use std::fs::{self, read_dir};
use std::path::PathBuf;
use std::time::Instant;

extern crate utils_rs;
use utils_rs::common::parsers;

/// A simple utility to find broken symlinks
Expand Down
2 changes: 1 addition & 1 deletion src/bin/dlcrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ fn download(name: &str, version: &str) -> Result<()> {
let _ = Command::new("wget")
.arg(&url)
.arg("--output-document")
.arg(format!("{}-{}.tar.gz", name, version))
.arg(format!("{name}-{version}.tar.gz"))
.status()
.wrap_err_with(|| eyre!("Failed to download crate from {}", url))?;
Ok(())
Expand Down
99 changes: 80 additions & 19 deletions src/bin/download_all_crates.rs
Original file line number Diff line number Diff line change
@@ -1,49 +1,110 @@
//! Tool to download all the crates from crates.io.
use std::process::Command;
use tracing::debug;
use tracing::Level;
use clap::Parser;
use eyre::{eyre, ContextCompat, Result};
use std::fs;
use std::path::PathBuf;
use std::process::{Command, Stdio};
use std::str::FromStr;
use tracing::level_filters::LevelFilter;
use tracing::{debug, info};
use tracing_subscriber::EnvFilter;
use utils_rs::common::consts::LOGGER_NAME;

// TODO: also add a file containing the date that the crates were downloaded
// or add the date to the name of the downloads dir

// TODO: download dir should be provided by a command line argument
const DOWNLOAD_DIR: &str = "/workdisk/shank/crates";
/// Tool to download all the crates from crates.io.
#[derive(Parser, Debug)]
#[clap(author, version, about)]
struct Args {
/// directory where the crates are to be downloaded
#[arg(short, long)]
download_dir: String,

fn main() -> Result<(), Box<dyn std::error::Error>> {
tracing_subscriber::fmt()
.with_max_level(Level::DEBUG)
.init();
/// extract the downloaded archive of each crate
#[arg(short, long, default_value_t = false)]
extract: bool,
}

fn setup_tracing() {
let filter = EnvFilter::builder()
.with_default_directive(LevelFilter::INFO.into())
.with_env_var(LOGGER_NAME)
.from_env_lossy();
tracing_subscriber::fmt().with_env_filter(filter).init();
}

fn main() -> Result<()> {
// setup tracing
setup_tracing();

// parse args
let args = Args::parse();
dbg!(&args);
// std::process::exit(1);

// create the download dir
let _ = Command::new("mkdir").arg("-p").arg(DOWNLOAD_DIR).status()?;
fs::create_dir_all(&args.download_dir)?;

// now download the crates
let mut index = crates_index::GitIndex::new_cargo_default()?;
println!("Updating index");
info!("Updating index...");
index.update()?;

let index_config = index.index_config()?;

for crate_releases in index.crates() {
if let Some(ver) = crate_releases.highest_normal_version() {
if let Some(download_url) = ver.download_url(&index_config) {
debug!(
"downloading `{}` from {}",
info!(
">> downloading `{}` from {}",
crate_releases.name(),
download_url
);

// TODO: add retries for failed downloads

let mut cmd = Command::new("wget");
cmd.arg("-c")
let exit_code = Command::new("wget")
.arg("-c")
.arg("--no-verbose")
.arg("--content-disposition") // to keep the crate name
.arg("--directory-prefix")
.arg(DOWNLOAD_DIR)
.arg(download_url);
let exit_code = cmd.status()?.code();
debug!(?exit_code);
.arg(&args.download_dir)
.arg(download_url)
.stdout(Stdio::null())
.status()?
.code()
.wrap_err_with(|| eyre!("unable to get the exit code for the wget command"))?;

debug!("wget exit_code : {exit_code}");

if exit_code != 0 {
continue;
}

// extract the archive and remove the archive after extracting it
if args.extract {
let downloaded_filename = format!("{}-{}.crate", ver.name(), ver.version());
let exit_code = Command::new("tar")
.arg("-xf")
.arg(&downloaded_filename)
.stdout(Stdio::null())
.current_dir(&args.download_dir)
.status()?
.code()
.wrap_err_with(|| {
eyre!("unable to get the exit_code for the extraction command")
})?;

debug!("tar exit_code : {exit_code}");

if exit_code == 0 {
let downloaded_filepath =
PathBuf::from_str(&args.download_dir)?.join(downloaded_filename);
fs::remove_file(downloaded_filepath)?;
}
}
};
}
}
Expand Down
1 change: 1 addition & 0 deletions src/common/consts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pub const LOGGER_NAME: &str = "UTILS_RS_LOG";
1 change: 1 addition & 0 deletions src/common/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pub mod consts;
pub mod parsers;

0 comments on commit d6ca10e

Please sign in to comment.