From 3de2d587ae7d4ebf8102b120c8700fc2b4e6ebcb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 May 2024 20:01:13 +0100 Subject: [PATCH 1/4] removed ONGOING tags from in code - these are now as they should be... issues --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index ce9a014..1d28936 100644 --- a/src/main.rs +++ b/src/main.rs @@ -217,7 +217,7 @@ fn main() -> Result<(), Error> { ) ) .subcommand( - Command::new("subsetONGOING") + Command::new("subset") .about("Subset a fasta file in a random manner by percentage of file") .arg( Arg::new("fasta-file") @@ -267,7 +267,7 @@ fn main() -> Result<(), Error> { ) ) .subcommand( - Command::new("mergehapsONGOING") + Command::new("mergehaps") .about("Merge haplotypes / multi fasta files together") .arg( Arg::new("fasta-1") From 71f168cad230eff182eb1c58fb20aba189a55cef Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 May 2024 20:07:32 +0100 Subject: [PATCH 2/4] A very small made up fasta for quick testing, includes 2 good, 1 passable and 1 bad record --- test_data/synthetic/tiny.fa | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 test_data/synthetic/tiny.fa diff --git a/test_data/synthetic/tiny.fa b/test_data/synthetic/tiny.fa new file mode 100644 index 0000000..ec8ebed --- /dev/null +++ b/test_data/synthetic/tiny.fa @@ -0,0 +1,17 @@ +>SG1 +AATGGCCGGCGCGTTAAACCCAATGCCCCGGTTAANNGCTCGTCGCTTGCTTCGCAAAA +>ATLANTIS +TGCATGTCAGTAGCTGCGGCCCATAAAAAAACGATCGATGCATGTAAAAATGCATCCAC +ACACATCGATCGATCGAGTAAAACACATAATTATATTATTTTTATATTATACGATCATT +CACACATGTGCATCG +>UNIVERSE +TGCATCGATCGATCGACTAGCTACGATCGATCGATCGACTGTCGCGAGGCGCGCGGCGC +CGGCGCGGGGGCGCCCGCGGGCGCCCCATCACGACTGATAGGCCGCGAGACGGCGAGGG +TCA +>UNSPOKEN_ANIMATED_ONE(CEST TERRIBLE) +CATGCTGTAGCAGCTGTCAGTCGATCGATCACTNNNGCATGCATCGNNNNNNNNGCATC +CATCGTAGTGCGGGCGAGCTATCGGCGATCATCGATCGGGCAGCGATCTACGAGCGGGC +NNNNNNNNNNNNNNNNNNNNNNNNCACACAGTCGATCGAGCTGGCCCACACACACACCA +CACACCACACACACACACACACACACACACACACACACACA +>ERROR +HELLOIMNOTAREALSEQUENCEIMJUSTHERETOTRIPYOUUPUWU From 49bfe25e91685c762a80dd0e675dcc3dad2082a7 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 May 2024 20:33:46 +0100 Subject: [PATCH 3/4] Updates to use .repeat() rather than have a 200 long string to join a Vec together --- src/main.rs | 11 +++++++++-- src/tpf_fasta.rs | 14 ++++++++++---- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/main.rs b/src/main.rs index 1d28936..aedc212 100644 --- a/src/main.rs +++ b/src/main.rs @@ -165,7 +165,7 @@ fn main() -> Result<(), Error> { ) ) .subcommand( - Command::new("profileONGOING") + Command::new("profile") .about("Profile an input fasta file and return various statistics") .arg( Arg::new("fasta-file") @@ -215,6 +215,13 @@ fn main() -> Result<(), Error> { .default_value("new.fasta") .help("The output name of the new fasta file") ) + .arg( + Arg::new("n_length") + .aliases(["n_len"]) + .value_parser(clap::value_parser!(usize)) + .default_value("200") + .help("Length that the N (gap) string should be.") + ) ) .subcommand( Command::new("subset") @@ -267,7 +274,7 @@ fn main() -> Result<(), Error> { ) ) .subcommand( - Command::new("mergehaps") + Command::new("mergehaps") .about("Merge haplotypes / multi fasta files together") .arg( Arg::new("fasta-1") diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index 9f3b4a0..fc5ec7e 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -137,7 +137,12 @@ pub mod tpf_fasta_mod { uniques } - fn save_to_fasta(fasta_data: Vec, tpf_data: Vec, output: &String) { + fn save_to_fasta( + fasta_data: Vec, + tpf_data: Vec, + output: &String, + n_length: usize, + ) { // // TPF is in the input TPF order, this will continue to be the case until // the script is modified and the Tpf struct gets modified in place for some reason @@ -191,9 +196,9 @@ pub mod tpf_fasta_mod { } let line_len: usize = 60; - let fixed = data.sequence; - let fixed2 = fixed.join("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN"); + let n_string = "N".repeat(n_length); + let fixed2 = fixed.join(&n_string); //.join required a borrowed str let fixed3 = fixed2 .as_bytes() .chunks(line_len) @@ -221,6 +226,7 @@ pub mod tpf_fasta_mod { // let fasta_file: &String = arguments.unwrap().get_one::("fasta").unwrap(); let tpf_file: &String = arguments.unwrap().get_one::("tpf").unwrap(); + let n_length: &usize = arguments.unwrap().get_one::("n_length").unwrap(); let output: &String = arguments.unwrap().get_one::("output").unwrap(); println!("LET'S GET CURATING THAT FASTA!"); stacker::maybe_grow(32 * 1024, 1024 * 5120, || { @@ -262,7 +268,7 @@ pub mod tpf_fasta_mod { Err(e) => panic!("{:?}", e), }; } - save_to_fasta(new_fasta_data, tpf_data, output) + save_to_fasta(new_fasta_data, tpf_data, output, n_length.to_owned()) } Err(e) => panic!("Something is wrong with the file! | {}", e), } From 4c281548d0ea472acc08f3dfc1e59656501cc905 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Fri, 10 May 2024 20:34:00 +0100 Subject: [PATCH 4/4] Update version --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9da4770..6249461 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,7 +405,7 @@ dependencies = [ [[package]] name = "fasta_manipulation" -version = "0.1.1" +version = "0.1.2" dependencies = [ "clap", "colored", diff --git a/Cargo.toml b/Cargo.toml index c2ae451..ad77b8f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "fasta_manipulation" -version = "0.1.1" +version = "0.1.2" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html