From 35a11ab8f02c07c21d96dc13e7be0a886cca742f Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Fri, 13 Dec 2024 13:47:06 -0800 Subject: [PATCH] Update to v0.12.8 --- CHANGELOG.md | 8 +++++++ Cargo.lock | 16 +++++++++++--- Cargo.toml | 3 ++- LICENSE-THIRDPARTY.json | 11 +++++++++- src/bam_sa_parser.rs | 10 +++++++-- src/main.rs | 8 +++++++ .../assemble/banded_pairwise_assembler.rs | 2 +- src/spoa_utils.rs | 20 +++--------------- src/utils.rs | 21 +++++++++++++++++-- 9 files changed, 72 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5144353..00b29d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Change Log +## v0.12.8 - 2024-12-13 + +### Fixed + +- Increase system open file limit (github #9) + - May simplify joint-call for large pedigrees at high thread counts +- Improve error message when split reads map to an unknown chromosome (github #8) + ## v0.12.7 - 2024-10-23 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index 7628066..1bd8cdb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,6 +1,6 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 +version = 4 [[package]] name = "adler2" @@ -148,7 +148,7 @@ dependencies = [ "editdistancek", "enum-map", "fxhash", - "itertools 0.12.1", + "itertools 0.13.0", "itertools-num", "lazy_static", "multimap", @@ -1462,6 +1462,15 @@ version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +[[package]] +name = "rlimit" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7043b63bd0cd1aaa628e476b80e6d4023a3b50eb32789f2728908107bd0c793a" +dependencies = [ + "libc", +] + [[package]] name = "rmp" version = "0.8.14" @@ -1610,7 +1619,7 @@ dependencies = [ [[package]] name = "sawfish" -version = "0.12.7" +version = "0.12.8" dependencies = [ "approx", "bio", @@ -1627,6 +1636,7 @@ dependencies = [ "num_cpus", "rayon", "regex", + "rlimit", "rmp-serde", "rust-htslib", "rust-vc-utils", diff --git a/Cargo.toml b/Cargo.toml index cf060b5..9af2359 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "sawfish" -version = "0.12.7" +version = "0.12.8" authors = ["Chris Saunders "] description = "Structural variant analysis for mapped PacBio HiFi reads" edition = "2021" @@ -32,6 +32,7 @@ num = "0.4" num_cpus = "1" rayon = "1" regex = "1" +rlimit = "0" rust-htslib = { version = "0.47", default-features = false } rust-vc-utils = { path="lib/rust-vc-utils" } rust-wfa2 = { git = "https://github.com/ctsa/rust-wfa2.git" } diff --git a/LICENSE-THIRDPARTY.json b/LICENSE-THIRDPARTY.json index 9effeaa..0c8236c 100644 --- a/LICENSE-THIRDPARTY.json +++ b/LICENSE-THIRDPARTY.json @@ -1448,6 +1448,15 @@ "license_file": null, "description": "A regular expression parser." }, + { + "name": "rlimit", + "version": "0.10.2", + "authors": "Nugine ", + "repository": "https://github.com/Nugine/rlimit/", + "license": "MIT", + "license_file": null, + "description": "Resource limits" + }, { "name": "rmp", "version": "0.8.14", @@ -1576,7 +1585,7 @@ }, { "name": "sawfish", - "version": "0.12.7", + "version": "0.12.8", "authors": "Chris Saunders ", "repository": null, "license": null, diff --git a/src/bam_sa_parser.rs b/src/bam_sa_parser.rs index cb7a3bb..0a2b887 100644 --- a/src/bam_sa_parser.rs +++ b/src/bam_sa_parser.rs @@ -144,7 +144,7 @@ pub fn get_seq_order_read_split_segments( read_size }; - for sa_segment in sa_segments.iter() { + for (sa_segment_index, sa_segment) in sa_segments.iter().enumerate() { if !has_aligned_segments(&sa_segment.cigar) { let qname = std::str::from_utf8(record.qname()).unwrap().to_string(); panic!("Bam record split segment id unaligned in read {qname}"); @@ -153,7 +153,13 @@ pub fn get_seq_order_read_split_segments( assert_eq!(primary_read_size, read_size); let (seq_order_read_start, seq_order_read_end) = get_seq_order_read_pos(read_start, read_end, read_size, sa_segment.is_fwd_strand); - let chrom_index = *chrom_list.label_to_index.get(&sa_segment.rname).unwrap(); + let chrom_index = match chrom_list.label_to_index.get(&sa_segment.rname) { + Some(&x) => x, + None => { + let qname = std::str::from_utf8(record.qname()).unwrap().to_string(); + panic!("In read '{qname}', the SA aux tag desribes a split read mapped to {}:{} (in segment {}), which is not found in the input reference fasta", sa_segment.rname, sa_segment.pos, sa_segment_index); + } + }; seq_order_read_split_segments.push({ SeqOrderSplitReadSegment { seq_order_read_start, diff --git a/src/main.rs b/src/main.rs index 103f80d..57eac64 100644 --- a/src/main.rs +++ b/src/main.rs @@ -47,6 +47,12 @@ use crate::globals::{PROGRAM_NAME, PROGRAM_VERSION}; use crate::joint_call::run_joint_call; use crate::logger::setup_output_dir_and_logger; +/// Run system configuration steps prior to starting any other program logic +/// +fn system_configuration_prelude() { + utils::attempt_max_open_file_limit(); +} + fn run(settings: &cli::Settings) -> Result<(), Box> { info!("Starting {PROGRAM_NAME} {PROGRAM_VERSION}"); info!( @@ -74,6 +80,8 @@ fn run(settings: &cli::Settings) -> Result<(), Box> { } fn main() { + system_configuration_prelude(); + let settings = cli::validate_and_fix_settings(cli::parse_settings()); // Setup logger, including creation of the output directory for the log file: diff --git a/src/refine_sv/assemble/banded_pairwise_assembler.rs b/src/refine_sv/assemble/banded_pairwise_assembler.rs index c99eee3..653066d 100644 --- a/src/refine_sv/assemble/banded_pairwise_assembler.rs +++ b/src/refine_sv/assemble/banded_pairwise_assembler.rs @@ -53,7 +53,7 @@ struct BackboneAlignmentInfo { // Align the next read to the backbone. If the read extends the backbone, then redefine the backbone with this extension. // Repeat for all reads to form a fully extended backbone. // Rejection scoring follows the same scheme as for POA, so we might be building multiple backbones. -/// +// // 2.Pairwise POA from each backbone // For each backbone and its associated read set, pairwise align all reads in the set to the backbone, and take a majority vote on each edit to determine the consensus sequence. diff --git a/src/spoa_utils.rs b/src/spoa_utils.rs index 93add5a..23b7f0c 100644 --- a/src/spoa_utils.rs +++ b/src/spoa_utils.rs @@ -7,6 +7,7 @@ use rust_htslib::bam::record::Cigar; use spoa::{get_alignment_clip_size, get_alignment_overlap_size, AlignmentEngine, Graph}; use crate::simple_alignment::SimpleAlignment; +use crate::utils; /// Create a new poa graph with only the given sequence /// @@ -80,7 +81,7 @@ pub fn print_msa(alignments: &[CString]) { assert_eq!(seq.len(), len); } - let rows = (len + WIDTH - 1) / WIDTH; + let rows = len.div_ceil(WIDTH); let ruler = { let mut ruler = String::new(); @@ -129,23 +130,8 @@ pub fn print_msa(alignments: &[CString]) { pub fn print_fasta(alignments: &[CString]) { const WIDTH: usize = 100; - if alignments.is_empty() { - return; - } - let bytes = alignments.iter().map(|x| x.as_bytes()).collect::>(); - - for (seq_index, seq) in bytes.iter().enumerate() { - let len = seq.len(); - let rows = (len + WIDTH - 1) / WIDTH; - - eprintln!("> {}", seq_index); - for row_index in 0..rows { - let start = WIDTH * row_index; - let end = std::cmp::min(start + WIDTH, len); - eprintln!("{}", std::str::from_utf8(&seq[start..end]).unwrap()); - } - } + utils::print_fasta(WIDTH, &bytes); } /// Use spoa to get a consensus of two reads using the given alignment engine diff --git a/src/utils.rs b/src/utils.rs index 2fa48cb..44cd912 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -190,7 +190,7 @@ pub fn pairwise_alignment_printer(width: usize, lines: &[&[u8]]) { ruler }; - let rows = (len + width - 1) / width; + let rows = len.div_ceil(width); for row_index in 0..rows { let start = width * row_index; let end = std::cmp::min(start + width, len); @@ -219,7 +219,7 @@ pub fn print_fasta(width: usize, lines: &[&[u8]]) { for (seq_index, seq) in lines.iter().enumerate() { let len = seq.len(); - let rows = (len + width - 1) / width; + let rows = len.div_ceil(width); eprintln!("> {}", seq_index); for row_index in 0..rows { @@ -249,6 +249,23 @@ pub fn drop_true(vec: &mut Vec, drop_list: &[bool]) { vec.retain(|_| !*drop.next().unwrap()) } +/// Attempt to increase open file limit to the system's hard limit on *nix-like systems +/// +/// This is an optional increase so continue through all failure cases without error. +/// +pub fn attempt_max_open_file_limit() { + use rlimit::Resource; + + let (soft, hard) = match Resource::NOFILE.get() { + Ok(x) => x, + Err(_) => return, + }; + + if soft < hard { + rlimit::setrlimit(Resource::NOFILE, hard, hard).unwrap_or_default(); + } +} + #[cfg(test)] mod tests { use super::*;