From afad3723d7ee0d6a6c6d7bc05a6c48e252ffc059 Mon Sep 17 00:00:00 2001 From: Matt Burridge Date: Thu, 12 Sep 2024 20:26:07 +0100 Subject: [PATCH] refactored the zipping to hopefully not change dir, removed minio due to PyO3 build errors --- Cargo.toml | 2 - src/ro_crate/transfer/minio.rs | 75 ---------------------------------- src/ro_crate/transfer/mod.rs | 3 -- src/ro_crate/write.rs | 68 +++++++++++++----------------- 4 files changed, 28 insertions(+), 120 deletions(-) delete mode 100644 src/ro_crate/transfer/minio.rs delete mode 100644 src/ro_crate/transfer/mod.rs diff --git a/Cargo.toml b/Cargo.toml index fa4ab7f..0b2e1a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,8 +27,6 @@ url = "2.2" zip = "0.6.6" walkdir = "2" reqwest = { version = "0.11", features = ["blocking", "json"], default-features = false} -minio = {git="https://github.com/minio/minio-rs.git"} -tokio = "1.38.0" [dev-dependencies] tempfile = "3.9" diff --git a/src/ro_crate/transfer/minio.rs b/src/ro_crate/transfer/minio.rs deleted file mode 100644 index cbb2c8e..0000000 --- a/src/ro_crate/transfer/minio.rs +++ /dev/null @@ -1,75 +0,0 @@ -//! Developed from minio-rs examples -use minio::s3::args::{BucketExistsArgs, MakeBucketArgs}; -use minio::s3::builders::ObjectContent; -use minio::s3::client::ClientBuilder; -use minio::s3::creds::StaticProvider; -use minio::s3::http::BaseUrl; -use std::path::Path; - -/// Defines minio connection parameters -pub struct MinioParams { - /// URI for Minio (i.e localhost:9000) - pub uri: String, - /// Access key - pub access: String, - /// Secret key - pub secret: String, - /// Session token - pub session: Option, -} - -/// Currently HTTP insecure, not HTTPS -#[tokio::main] -pub async fn minio_transfer( - crate_path: &Path, - bucket: &str, - conn: &MinioParams, -) -> Result<(), Box> { - let mut base_url = conn.uri.parse::()?; - base_url.https = false; - - println!("Trying to connect to MinIO at: `{:?}`", base_url); - - let static_provider = StaticProvider::new(&conn.access, &conn.secret, conn.session.as_deref()); - - let client = ClientBuilder::new(base_url.clone()) - .provider(Some(Box::new(static_provider))) - .ignore_cert_check(Some(true)) - .build()?; - - println!("{:?}", client); - - // Check bucket exist or not. - let exists: bool = client - .bucket_exists(&BucketExistsArgs::new(bucket).unwrap()) - .await - .unwrap(); - - println!("Bucker exists? {}", exists); - // Make 'bucket_name' bucket if not exist. - if !exists { - client - .make_bucket(&MakeBucketArgs::new(bucket).unwrap()) - .await - .unwrap(); - } - - // Name of the object that will be stored in the bucket - let object_name: &str = crate_path.file_name().unwrap().to_str().unwrap(); - - println!("filename {}", &object_name); - - let content = ObjectContent::from(crate_path); - client - .put_object_content(bucket, object_name, content) - .send() - .await?; - - println!( - "File `{}` is successfully uploaded as object `{}` to bucket `{}`.", - crate_path.display(), - object_name, - bucket - ); - Ok(()) -} diff --git a/src/ro_crate/transfer/mod.rs b/src/ro_crate/transfer/mod.rs deleted file mode 100644 index d5b0cb5..0000000 --- a/src/ro_crate/transfer/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -//! Enables immediate upload to data storage of choosing - -pub mod minio; diff --git a/src/ro_crate/write.rs b/src/ro_crate/write.rs index 30ca3eb..e814f80 100644 --- a/src/ro_crate/write.rs +++ b/src/ro_crate/write.rs @@ -5,7 +5,6 @@ use crate::ro_crate::read::read_crate; use crate::ro_crate::rocrate::RoCrate; -use std::env; use std::fmt; use std::fs::{self, File}; use std::io::{self, Write}; @@ -303,28 +302,27 @@ pub fn zip_crate_external( fn update_zip_ids(rocrate: &mut RoCrate, id: PathBuf, zip_id: &str) { let id_str = id.to_str().unwrap_or_default(); - // NOTE: this only really checks for extended length path failures - others may be present so this can - // be refactored when needed - // base update on direct match - if rocrate.update_id_recursive(id_str, zip_id).is_none() { - // if fail - check if the ID string contains the '\\?\' prefix - if id_str.starts_with(r"\\?\") { - let stripped_id = &id_str[4..]; - if let Some(_) = rocrate.update_id_recursive(stripped_id, zip_id) { - } else { - // if win extend length not an issue, check \\ stripping - if id_str.contains("\\\\") { - if let Some(_) = rocrate.update_id_recursive(stripped_id, zip_id) {} - } else { - if let Some(_) = rocrate.update_id_recursive(stripped_id, zip_id) {} - } - } - } else { + // Try updating based on a direct match + if rocrate.update_id_recursive(id_str, zip_id).is_some() { + return; + } + + // Handle Windows extended-length path prefixes (\\?\) + if id_str.starts_with(r"\\?\") { + let stripped_id = &id_str[4..]; + + // Attempt to update using the stripped path + if rocrate.update_id_recursive(stripped_id, zip_id).is_some() { + return; + } + + // Handle paths with '\\' by replacing them with a single '\' + if id_str.contains("\\\\") { + let normalized_id = stripped_id.replace("\\\\", "\\"); + rocrate.update_id_recursive(&normalized_id, zip_id); } - } else { } } - /// Identifies file paths that are not relative to the given RO-Crate directory. /// /// When preparing an RO-Crate for zipping, it's important to include all related files, even those @@ -339,38 +337,28 @@ fn update_zip_ids(rocrate: &mut RoCrate, id: PathBuf, zip_id: &str) { fn get_nonrelative_paths(ids: &Vec<&String>, crate_dir: &Path) -> Vec { let mut nonrels: Vec = Vec::new(); + // Get the absolute path of the crate directory let rocrate_path = get_absolute_path(crate_dir).unwrap(); - let root_dir = rocrate_path.parent(); - // Extract the directory part of the path - if let Some(directory_path) = root_dir { - // Try to change the current working directory - let _ = env::set_current_dir(directory_path); - } else { - } - // Iterate over all the ids, check the paths are relative to crate. - // If not relative to crate and a file, then grab, add to extern folder - // and zip + // Iterate over all the ids, check if the paths are relative to the crate. for id in ids.iter() { + // Skip IDs that are fragment references (i.e., starting with '#') if id.starts_with('#') { continue; } + + // Resolve the absolute path of the current ID if let Some(path) = get_absolute_path(Path::new(id)) { + // Check if the path exists if path.exists() { - let nonrel = is_outside_base_folder(root_dir.unwrap(), &path); - if nonrel { - if id.starts_with(".") { - nonrels.push(id.into()); - } else { - nonrels.push(path); - } + // Check if the path is outside the base crate directory + if is_outside_base_folder(&rocrate_path, &path) { + nonrels.push(path); } - } else { } - } else { - continue; } } + nonrels } /// Converts a relative path to an absolute one, if possible.