From e07a6895940b8cdd76aa5becd1d0b5528e29e752 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 23 May 2024 21:04:45 +0100 Subject: [PATCH 1/7] Documenting code and removed an unnecessary check --- src/generics.rs | 1 + src/tpf_fasta.rs | 57 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/src/generics.rs b/src/generics.rs index f10b880..f12e80e 100644 --- a/src/generics.rs +++ b/src/generics.rs @@ -19,6 +19,7 @@ pub fn validate_fasta( ) -> result::Result, Box> { // Simply validate the fasta is valid by reading though and ensure there are // valid record formats through out the file + // Return a Dict of header and length let reader: Result>, std::io::Error> = fasta::reader::Builder.build_from_path(path); let mut fasta_map = HashMap::new(); diff --git a/src/tpf_fasta.rs b/src/tpf_fasta.rs index fc5ec7e..2111ded 100644 --- a/src/tpf_fasta.rs +++ b/src/tpf_fasta.rs @@ -20,6 +20,7 @@ pub mod tpf_fasta_mod { } impl std::fmt::Display for Tpf { + // This is how we want to print a Tpf object fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { write!( fmt, @@ -42,9 +43,13 @@ pub mod tpf_fasta_mod { } fn parse_tpf(path: &String) -> Vec { + // Instantiate a List of Tpf objects let mut all_tpf: Vec = Vec::new(); for line in read_to_string(path).unwrap().lines() { + // If line starts with '?' parse line, lines + // without this are gaps if line.starts_with('?') { + // Parse data into TpF object let line_replaced = line.replace('\t', " "); let line_list: Vec<&str> = line_replaced.split_whitespace().collect(); let scaff_data: Vec<&str> = line_list[1].split(':').collect(); @@ -82,6 +87,10 @@ pub mod tpf_fasta_mod { parsed: std::option::Option, orientation: String, ) -> String { + // The TPF will contain data in both PLUS (normal) and + // MINUS (inverted), if MINUS then we need to invert again + // and get thr complement sequence + // We then return the sequence of the record. if orientation == "MINUS" { let start = Position::try_from(1).unwrap(); let parse_orientation = parsed.unwrap(); @@ -103,18 +112,22 @@ pub mod tpf_fasta_mod { sequence: std::option::Option, tpf: Vec<&Tpf>, ) -> Vec { - let mut subset_tpf: Vec = Vec::new(); // // Take the input sequence and scaffold name // Parse the input sequence based on the data contained in - // the TPF. Which is already a subset based on scaff name + // the TPF. Which is already a subset based on scaff name. + // + // for instance this Vec may only contain SCAFFOLD_1 TPF records + // if the sequence is from a SCAFFOLD_1 component + // as we move through the list, we are cutting the sequence at the + // recorded positions and outputting the new sequence. // + let mut subset_tpf: Vec = Vec::new(); let new_seq = sequence.unwrap(); // Option(Sequence ()) -> Sequence () for &i in &tpf { let start = Position::try_from(i.start_coord).unwrap(); let end = Position::try_from(i.end_coord).unwrap(); - //let region = Region::new(&i.new_scaffold, start.unwrap()..=end.unwrap()); let parsed = new_seq.slice(start..=end); let the_sequence = check_orientation(parsed, i.orientation.to_owned()); let data = NewFasta { @@ -127,6 +140,7 @@ pub mod tpf_fasta_mod { } fn get_uniques(tpf_list: &Vec) -> Vec { + // Get a Vec of the uniques names in the TPF Vec let mut uniques: Vec = Vec::new(); for i in tpf_list { @@ -145,7 +159,9 @@ pub mod tpf_fasta_mod { ) { // // TPF is in the input TPF order, this will continue to be the case until - // the script is modified and the Tpf struct gets modified in place for some reason + // such time that the script starts modifying the TPF in place which + // we don't want to happen. Once this happens the order will no + // longer be guaranteed. // let _data_file = File::create(output); let mut file = OpenOptions::new() @@ -161,15 +177,18 @@ pub mod tpf_fasta_mod { let uniques = get_uniques(&tpf_data); - // This is inefficient as we are scanning through the fasta_data, uniques number of times + // This is inefficient as we are scanning through the fasta_data, uniques + // ( equal to number of scaffolds) number of times // If uniques is 10 long and fasta is 100, then this is 1000 scans through in total. - let mut no_more: Vec = Vec::new(); for x in uniques { println!("NOW WRITING DATA FOR: {:?}", &x); // X = "SUPER_1" let stringy = format!(">{x}\n"); file.write_all(stringy.as_bytes()) .expect("Unable to write to file"); + + // file2 will collect what went where + // no sequence data file2 .write_all(stringy.as_bytes()) .expect("Unable to write to file"); @@ -179,7 +198,6 @@ pub mod tpf_fasta_mod { sequence: Vec::new(), }; - no_more.push(x.to_owned()); x.clone_into(&mut data.name); for tpf in &tpf_data { if tpf.new_scaffold == x { @@ -195,6 +213,11 @@ pub mod tpf_fasta_mod { } } + // Should be it's own function really + // This actually writes the new fasta file + // Joining the data together with user (default = 200) + // N's (gap) + let line_len: usize = 60; let fixed = data.sequence; let n_string = "N".repeat(n_length); @@ -210,7 +233,6 @@ pub mod tpf_fasta_mod { let formatted = i.to_owned() + "\n"; file.write_all(formatted.as_bytes()).unwrap(); } - println!("NO LONG SCANNING FOR: {:?}", &no_more) } } @@ -219,7 +241,7 @@ pub mod tpf_fasta_mod { pub fn curate_fasta(arguments: std::option::Option<&ArgMatches>) { // // Generate a curated fasta file based on the input TPF file - // which was generated by Pretext and the agp_to_tpf script. + // which was generated by Pretext and the agp_to_tpf scripts. // This new fasta file contains a new scaffold naming as well // as pieced together sequences generated by the splitting of // data in Pretext. @@ -229,11 +251,14 @@ pub mod tpf_fasta_mod { let n_length: &usize = arguments.unwrap().get_one::("n_length").unwrap(); let output: &String = arguments.unwrap().get_one::("output").unwrap(); println!("LET'S GET CURATING THAT FASTA!"); + + // Stacker is supposed to increase the stack size + // once memory runs out stacker::maybe_grow(32 * 1024, 1024 * 5120, || { match validate_fasta(fasta_file) { + // validate returns Vec of headers - basically indexes it Ok(fasta_d) => { let tpf_data = parse_tpf(&tpf_file); - //let _validated = varify_validity(&tpf_data, &fasta_d); // // Start indexed reader of the input fasta @@ -244,10 +269,12 @@ pub mod tpf_fasta_mod { let fasta_repo = match reader { Ok(data) => { let adapter = IndexedReader::new(data); + + // Now read the fasta and return is as a queryable object let repository = fasta::Repository::new(adapter); repository } - Err(_) => todo!(), + Err(_) => todo!(), // Probably just panic! }; // @@ -257,9 +284,16 @@ pub mod tpf_fasta_mod { // let mut new_fasta_data: Vec = Vec::new(); for i in fasta_d { + // for header in fasta_d + // subset the tpf on header and length + // cross referencing with fasta_d let subset_tpf = subset_vec_tpf(&tpf_data, (&i.0, &i.1)); + + // Query the fasta for scaffold = header let sequence = fasta_repo.get(&i.0).transpose(); + // if exists then get the seqeuence, return a tpf object + // containing the trimmed sequence match sequence { Ok(data) => { let subset_results = parse_seq(data, subset_tpf); @@ -268,6 +302,7 @@ pub mod tpf_fasta_mod { Err(e) => panic!("{:?}", e), }; } + // Write it all out to fasta save_to_fasta(new_fasta_data, tpf_data, output, n_length.to_owned()) } Err(e) => panic!("Something is wrong with the file! | {}", e), From 0242f4d047f682344d8ae449f604b0954d0c5123 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 23 May 2024 21:07:29 +0100 Subject: [PATCH 2/7] Documenting the small exclude seq --- src/exclude_seq.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/exclude_seq.rs b/src/exclude_seq.rs index ab82c4e..fcd7973 100644 --- a/src/exclude_seq.rs +++ b/src/exclude_seq.rs @@ -9,8 +9,11 @@ pub mod exclude_seq_mod { fasta: &'a str, out_file: &str, ) -> std::result::Result<&'a str, Box> { + // Open and read fasta let reader: Result>, std::io::Error> = fasta::reader::Builder.build_from_path(fasta); + + // Create new file let file = fs::OpenOptions::new() .create(true) .append(true) @@ -19,6 +22,8 @@ pub mod exclude_seq_mod { match reader { Ok(fasta) => { + // on Ok reading append record to new fasta if + // not in user given list of headers let mut binding = fasta; for result in binding.records() { let record = result?; From 7bf90c477e2c47c83907a1f9ae00a0a6ffebf89c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 23 May 2024 21:09:37 +0100 Subject: [PATCH 3/7] Documenting the generics --- src/generics.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/generics.rs b/src/generics.rs index f12e80e..6f2e634 100644 --- a/src/generics.rs +++ b/src/generics.rs @@ -45,6 +45,7 @@ pub fn only_keys(map: HashMap) -> impl Iterator { } fn get_gene_symbol(header: String) -> Result> { + // Take a string and return first segment of it let header_list: Vec<&str> = header.split(' ').collect(); let record_header = header_list[0]; Ok(record_header[1..].to_owned()) @@ -72,13 +73,16 @@ fn get_gene_symbol(header: String) -> Result> } pub fn sanitise_header(old_header: &Definition) -> String { + // Clean the header + // This is overly complex for historical reasons + // It is still here incase those reasons come back to haunt me + // ...again let x = get_gene_symbol(old_header.to_string()); - // Yeah i dont know either... match x { Ok(c) => c, Err(e) => { - format!("Regex isnt good enough to capture header id: {}", e) + format!("Split didn't work: {}", e) } } } From 6fd352312077369a2971695cf93e069f4992c0e2 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 23 May 2024 21:27:28 +0100 Subject: [PATCH 4/7] Documenting the map headers --- src/map_headers.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/map_headers.rs b/src/map_headers.rs index 2b066b0..95370a1 100644 --- a/src/map_headers.rs +++ b/src/map_headers.rs @@ -1,5 +1,4 @@ pub mod mapping_headers { - use clap::ArgMatches; use colored::Colorize; use std::error::Error; @@ -51,6 +50,7 @@ pub mod mapping_headers { std::vec::IntoIter, >, ) { + // Save the header mapping to file let f: File = File::create(output).expect("Unable to create file"); let mut f: BufWriter = BufWriter::new(f); for map_pair in mapped { @@ -69,6 +69,9 @@ pub mod mapping_headers { std::vec::IntoIter, >, ) { + // Swap out the old with the new + // skip all else. + // This could be re-written now that I know more about noodles let file_reader: File = File::open(input).expect("CAN'T OPEN FILE"); let buff_reader: BufReader = BufReader::new(file_reader); let mut new_fasta: File = File::create(output).unwrap(); @@ -95,6 +98,8 @@ pub mod mapping_headers { pub fn map_fasta_head( arguments: std::option::Option<&ArgMatches>, ) -> Result<(), Box> { + // Generate a mapped.txt with the old and new headers + // Generate a mapped.fasta with the new headers let file: &String = arguments.unwrap().get_one::("fasta-file").unwrap(); let replacer: &String = arguments .unwrap() @@ -110,19 +115,22 @@ pub mod mapping_headers { match validate_fasta(file) { Ok(names) => { + // Vec of scaffold names from validate_fasta + // return only the headers, not the lengths let new_names = Vec::from_iter(only_keys(names)); + // Generate a Zip of the a=old and new names let new_map: Zip, std::vec::IntoIter> = create_mapping(new_names, replacer); + // Save the mapping to file let map_to_save: Zip, std::vec::IntoIter> = new_map.clone(); let output_file = format!("{}mapped-heads.tsv", output); - save_mapping(&output_file, map_to_save); + // Generate a new fasta with the mapped headers let new_fasta: String = format!("{output}mapped.fasta"); - create_mapped_fasta(file, &new_fasta, new_map); println!( From a1af7b64ab15ace060879411d0940f9d8b749cb0 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 23 May 2024 21:34:28 +0100 Subject: [PATCH 5/7] Documentation --- src/split_by_count.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/split_by_count.rs b/src/split_by_count.rs index 1396f00..fa91188 100644 --- a/src/split_by_count.rs +++ b/src/split_by_count.rs @@ -13,6 +13,8 @@ pub mod split_by_count_mod { #[allow(clippy::needless_return)] fn fix_head(records: Record, sanitise: bool) -> Record { + // Taker a Record and sanitise the header + // recombine into a new Record if sanitise { let header = sanitise_header(records.definition()); let definition = fasta::record::Definition::new(header, None); @@ -24,6 +26,7 @@ pub mod split_by_count_mod { } fn write_fasta(outdir: &String, fasta_record: &Vec) { + // Take fasta Record and append to output file println!("{}", outdir); let _data_file = File::create(outdir); @@ -61,16 +64,20 @@ pub mod split_by_count_mod { fasta_file, fasta_count ); + // Header counter let mut counter: u16 = 0; let mut file_counter: u16 = 1; + // Remove the file suffix from the file name let file_name: Vec<&str> = actual_name.split('.').collect(); + // Open the fasta file let mut reader = File::open(fasta_file) .map(BufReader::new) .map(fasta::Reader::new) .unwrap(); + // Create a Record List let mut record_list: Vec = Vec::new(); for result in reader.records() { let record = result.unwrap(); From 748379dd7f39eb42b06bb9e946f4205ab3f77953 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Wed, 29 May 2024 16:51:30 +0100 Subject: [PATCH 6/7] Refactor of splitbycount most of the functional work of splitbysize is complete. Santitise headers just needs adding --- src/generics.rs | 24 +++++++++ src/main.rs | 16 +++++- src/split_by_count.rs | 33 +++--------- src/split_by_size.rs | 122 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 163 insertions(+), 32 deletions(-) diff --git a/src/generics.rs b/src/generics.rs index 6f2e634..36b9a19 100644 --- a/src/generics.rs +++ b/src/generics.rs @@ -1,6 +1,7 @@ use noodles::fasta; use noodles::fasta::record::Definition; use std::error::Error; +use std::fs::{self, File, OpenOptions}; use std::{collections::HashMap, fmt, io::BufRead, result, str}; #[derive(Debug, Clone)] @@ -86,3 +87,26 @@ pub fn sanitise_header(old_header: &Definition) -> String { } } } + +pub fn write_fasta( + outdir: &String, + file_name: String, + fasta_record: Vec, +) -> std::io::Result<()> { + // Create file + fs::create_dir_all(&outdir)?; + let file_path = format!("{}/{}", outdir, file_name); + let _data_file = File::create(&file_path); + + // Append to file + let file = OpenOptions::new() + .append(true) + .open(file_path) + .expect("creation failed"); + + let mut writer = fasta::Writer::new(file); + for i in fasta_record { + writer.write_record(&i).unwrap(); + } + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index 9e947f9..7a88c42 100644 --- a/src/main.rs +++ b/src/main.rs @@ -100,11 +100,23 @@ fn main() -> Result<(), Error> { ) .arg( Arg::new("mem-size") - .short('s') + .short('m') .required(true) - .value_parser(clap::value_parser!(u16)) + .value_parser(clap::value_parser!(usize)) .help("Size in MB that a fasta file is to be chunked into") ) + .arg( + Arg::new("data_type") + .short('d') + .value_parser(clap::builder::PossibleValuesParser::new(split_options)) + .help("The data type of the input data") + ) + .arg( + Arg::new("sanitise") + .short('s') + .value_parser(clap::value_parser!(bool)) + .help("Do we need to sanitise the headers of the input fasta") + ) .arg( Arg::new("output-directory") .short('o') diff --git a/src/split_by_count.rs b/src/split_by_count.rs index fa91188..f41321f 100644 --- a/src/split_by_count.rs +++ b/src/split_by_count.rs @@ -1,5 +1,5 @@ pub mod split_by_count_mod { - use crate::generics::sanitise_header; + use crate::generics::{sanitise_header, write_fasta}; use clap::ArgMatches; use compare::{natural, Compare}; use noodles::fasta::{self, Record}; @@ -25,22 +25,6 @@ pub mod split_by_count_mod { }; } - fn write_fasta(outdir: &String, fasta_record: &Vec) { - // Take fasta Record and append to output file - println!("{}", outdir); - - let _data_file = File::create(outdir); - let file = OpenOptions::new() - .append(true) - .open(outdir) - .expect("creation failed"); - - let mut writer = fasta::Writer::new(file); - for i in fasta_record { - writer.write_record(i).unwrap(); - } - } - pub fn split_file_by_count(arguments: std::option::Option<&ArgMatches>) { let sanitise: &bool = arguments.unwrap().get_one::("sanitise").unwrap(); let fasta_file = arguments.unwrap().get_one::("fasta-file").unwrap(); @@ -57,7 +41,6 @@ pub mod split_by_count_mod { .unwrap(); let new_outpath = format!("{}/{}/{}/", outpath, actual_name, data_type); - create_dir_all(new_outpath.clone()).unwrap(); let fasta_count = arguments.unwrap().get_one::("count").unwrap(); println!( "Fasta file for processing: {:?}\nNumber of records per file: {:?}", @@ -89,30 +72,28 @@ pub mod split_by_count_mod { let cmp = natural(); let compared = cmp.compare(&counter, fasta_count); if compared == Ordering::Equal { - let full_outpath = format!( - "{}{}_f{}_c{}-a{}.fa", - new_outpath, + let file_name = format!( + "{}_f{}_c{}-a{}.fa", file_name[0], file_counter, &fasta_count, &record_list.len() ); - write_fasta(&full_outpath, &record_list); + write_fasta(&new_outpath, file_name, record_list); file_counter += 1; counter = 0; record_list = Vec::new(); } } - let full_outpath = format!( - "{}{}_f{}_c{}-a{}.fa", - new_outpath, + let file_name = format!( + "{}_f{}_c{}-a{}.fa", file_name[0], file_counter, &fasta_count, &record_list.len() ); - write_fasta(&full_outpath, &record_list); + write_fasta(&new_outpath, file_name, record_list); } } diff --git a/src/split_by_size.rs b/src/split_by_size.rs index f1b4a7b..31753b2 100644 --- a/src/split_by_size.rs +++ b/src/split_by_size.rs @@ -1,12 +1,126 @@ pub mod split_by_size_mod { + use crate::generics::{only_keys, validate_fasta, write_fasta}; use clap::ArgMatches; + use noodles::fasta; + use noodles::fasta::record::Definition; + use noodles::fasta::repository::adapters::IndexedReader; + use noodles::fasta::Record; + use std::collections::HashMap; + use std::path::Path; + + pub fn find_chunks<'a>( + header_sizes: &'a HashMap, + size: &usize, + ) -> HashMap> { + //let mut new_map = HashMap::new(); + let mut chunk = 1; + let mut new_map: HashMap> = HashMap::new(); + let mut subset_map: HashMap<&String, &usize> = HashMap::new(); + let mut temp_map: HashMap<&String, &usize> = HashMap::new(); + + for i in header_sizes { + let scaff_name = i.0; + let scaff_size = i.1; + // If scaffold size is greater than chunk then output + // straight away + if i.1 > size { + // Must be something cleaner for this bit + temp_map.insert(scaff_name, scaff_size); + new_map.insert(chunk, temp_map); + + // Clear Hashmap + temp_map = HashMap::new(); + chunk += 1; + // If Scaffold not > chunk size, add to HashMap + // scan through HashMap and check whether greater than Chunk. + } else { + subset_map.insert(scaff_name, scaff_size); + // If this list sums to larger than Chunk then + // remove last item and check again. + // if removing [-1] makes total size < chunk + // out to file and keep that [-1] in list for next round of + // chunking + if subset_map.len() > 1 { + let summed: usize = subset_map.values().copied().sum(); + if summed > size.to_owned() { + subset_map.remove(scaff_name); + let summed: usize = subset_map.values().copied().sum(); + if summed < size.to_owned() { + new_map.insert(chunk, subset_map); + chunk += 1; + } else { + println!("ERROR: MORE LOGIC NEEDED TO SPLIT UP") + } + subset_map = HashMap::new(); + subset_map.insert(scaff_name, scaff_size); + } + } + } + } + new_map.insert(chunk.to_owned(), subset_map.to_owned()); + + new_map + } pub fn split_file_by_size(arguments: std::option::Option<&ArgMatches>) { let fasta_file: &String = arguments.unwrap().get_one::("fasta-file").unwrap(); + let chunk_size: &usize = arguments.unwrap().get_one::("mem-size").unwrap(); + let data_type: &String = arguments.unwrap().get_one::("data_type").unwrap(); + let outpath: &String = arguments + .unwrap() + .get_one::("output-directory") + .unwrap(); + + let path_obj = Path::new(fasta_file); + let grab_name = path_obj.file_name().unwrap(); + let actual_list: Vec<&str> = grab_name.to_str().unwrap().split('.').collect(); + let actual_name = actual_list[0]; + + let new_outpath = format!("{}/{}/{}/", outpath, actual_name, data_type); + println!("Fasta file for processing: {:?}", &fasta_file); - println!( - "Size to chunk fasta into: {:?}", - arguments.unwrap().get_one::("mem-size").unwrap() - ); + println!("Size to chunk fasta into: {:?}", &chunk_size); + + let validation = validate_fasta(fasta_file); + + // Deserved better error handling here + let results = validation.unwrap(); + + // Returns only the HashMap< usize, Hashmap> + let split_hash = find_chunks(&results, &chunk_size); + + // Duplicated from TPF_FASTA + // Should be abstracted into generics + let reader = fasta::indexed_reader::Builder::default().build_from_path(fasta_file); + let fasta_repo = match reader { + Ok(data) => { + let adapter = IndexedReader::new(data); + + // Now read the fasta and return is as a queryable object + let repository = fasta::Repository::new(adapter); + repository + } + Err(_) => todo!(), // Probably just panic! + }; + + for i in split_hash { + let mut record_list: Vec = Vec::new(); + let list: Vec<&String> = only_keys(i.1.to_owned()).collect(); + for ii in list { + let results = fasta_repo.get(ii).transpose(); + let new_rec = match results { + Ok(data) => { + let definition = Definition::new(ii, None); + Record::new(definition, data.unwrap()) + } + Err(e) => panic!("{:?}", e), + }; + record_list.push(new_rec) + } + let file_name = format!("{}_f{}_{}.fasta", actual_name, i.0, data_type); + + let _ = write_fasta(&new_outpath, file_name, record_list); + } + //println!("{:?}", split_hash) } } From 888947986d3885a93f6f25446f012055a482c371 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 30 May 2024 11:17:38 +0100 Subject: [PATCH 7/7] Linting fixes --- src/generics.rs | 2 +- src/split_by_count.rs | 11 +++-------- src/split_by_size.rs | 9 ++++----- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/src/generics.rs b/src/generics.rs index 36b9a19..ad70e17 100644 --- a/src/generics.rs +++ b/src/generics.rs @@ -94,7 +94,7 @@ pub fn write_fasta( fasta_record: Vec, ) -> std::io::Result<()> { // Create file - fs::create_dir_all(&outdir)?; + fs::create_dir_all(outdir)?; let file_path = format!("{}/{}", outdir, file_name); let _data_file = File::create(&file_path); diff --git a/src/split_by_count.rs b/src/split_by_count.rs index f41321f..8bb394b 100644 --- a/src/split_by_count.rs +++ b/src/split_by_count.rs @@ -4,12 +4,7 @@ pub mod split_by_count_mod { use compare::{natural, Compare}; use noodles::fasta::{self, Record}; use std::cmp::Ordering; - use std::fs::OpenOptions; - use std::{ - fs::{create_dir_all, File}, - io::BufReader, - path::Path, - }; + use std::{fs::File, io::BufReader, path::Path}; #[allow(clippy::needless_return)] fn fix_head(records: Record, sanitise: bool) -> Record { @@ -80,7 +75,7 @@ pub mod split_by_count_mod { &record_list.len() ); - write_fasta(&new_outpath, file_name, record_list); + let _ = write_fasta(&new_outpath, file_name, record_list); file_counter += 1; counter = 0; record_list = Vec::new(); @@ -94,6 +89,6 @@ pub mod split_by_count_mod { &fasta_count, &record_list.len() ); - write_fasta(&new_outpath, file_name, record_list); + let _ = write_fasta(&new_outpath, file_name, record_list); } } diff --git a/src/split_by_size.rs b/src/split_by_size.rs index 31753b2..20c35ef 100644 --- a/src/split_by_size.rs +++ b/src/split_by_size.rs @@ -42,10 +42,10 @@ pub mod split_by_size_mod { // chunking if subset_map.len() > 1 { let summed: usize = subset_map.values().copied().sum(); - if summed > size.to_owned() { + if summed > *size { subset_map.remove(scaff_name); let summed: usize = subset_map.values().copied().sum(); - if summed < size.to_owned() { + if summed < *size { new_map.insert(chunk, subset_map); chunk += 1; } else { @@ -87,7 +87,7 @@ pub mod split_by_size_mod { let results = validation.unwrap(); // Returns only the HashMap< usize, Hashmap> - let split_hash = find_chunks(&results, &chunk_size); + let split_hash = find_chunks(&results, chunk_size); // Duplicated from TPF_FASTA // Should be abstracted into generics @@ -97,8 +97,7 @@ pub mod split_by_size_mod { let adapter = IndexedReader::new(data); // Now read the fasta and return is as a queryable object - let repository = fasta::Repository::new(adapter); - repository + fasta::Repository::new(adapter) } Err(_) => todo!(), // Probably just panic! };