diff --git a/include/sviper/auxiliary.h b/include/sviper/auxiliary.h index cb929d3..a49c039 100644 --- a/include/sviper/auxiliary.h +++ b/include/sviper/auxiliary.h @@ -19,16 +19,16 @@ struct input_output_information{ // Custom constructor. input_output_information(CmdOptions & options_) : cmd_options(options_) {} - std::ofstream log_file; - CmdOptions cmd_options; - std::vector> long_read_file_handles; - seqan::BamHeader long_read_header; // The bam header object needed to fill bam context - seqan::BamIndex long_read_bai; - std::vector> short_read_file_handles; - seqan::BamHeader short_read_header; // The bam header object needed to fill bam context - seqan::BamIndex short_read_bai; - std::vector> faidx_file_handles; - std::vector polished_reads; // stores records in case info.cmd_options.output-polished-bam is true + std::ofstream log_file{}; + CmdOptions cmd_options{}; + std::vector> long_read_file_handles{}; + seqan::BamHeader long_read_header{}; // The bam header object needed to fill bam context + seqan::BamIndex long_read_bai{}; + std::vector> short_read_file_handles{}; + seqan::BamHeader short_read_header{}; // The bam header object needed to fill bam context + seqan::BamIndex short_read_bai{}; + std::vector> faidx_file_handles{}; + std::vector polished_reads{}; // stores records in case info.cmd_options.output-polished-bam is true }; bool prep_file_handles(input_output_information & info) diff --git a/include/sviper/basics.h b/include/sviper/basics.h index 05a202c..dba3de2 100644 --- a/include/sviper/basics.h +++ b/include/sviper/basics.h @@ -145,7 +145,7 @@ void advance_in_cigar(unsigned & cigar_pos, if (stop_criterion(ref_pos, read_pos)) break; - if ((cigar[cigar_pos]).operation == 'M') + if ((cigar[cigar_pos]).operation == 'M' || (cigar[cigar_pos]).operation == 'X' || (cigar[cigar_pos]).operation == '=') { // Matches/mismatches advance both the read and the reference position read_pos += (cigar[cigar_pos]).count; @@ -162,6 +162,12 @@ void advance_in_cigar(unsigned & cigar_pos, ref_pos += (cigar[cigar_pos]).count; } // else hard-clips ('H') advance neither the read nor the reference position + else if ((cigar[cigar_pos]).operation == 'H') + {} + else + { + throw std::logic_error{"UNKNOWN CIGAR CHARACTER."}; + } ++cigar_pos; } @@ -184,8 +190,8 @@ inline std::tuple get_read_region_boundaries(seqan::BamAlignmentRecord int ref_region_begin, int ref_region_end) { - int read_region_begin; - int read_region_end; + int read_region_begin{}; + int read_region_end{}; if (ref_region_begin >= ref_region_end) { @@ -309,7 +315,7 @@ inline void records_to_read_pairs(seqan::StringSet & reads1, records.erase(last, records.end()); seqan::Dna5QString dummy_seq{}; // read sequence for dummy record is an empty string - seqan::CharString id1; // stores last seen id + seqan::CharString id1{}; // stores last seen id for (unsigned i = 0; i < length(records); ++i) { @@ -398,7 +404,7 @@ inline void cut_down_high_coverage(std::vector & shor if (short_reads.size() == 0) return; - std::vector tmp; + std::vector tmp{}; std::sort(short_reads.begin(), short_reads.end(), bamRecordPosLessQualityGreater()); std::vector ends{}; @@ -477,15 +483,15 @@ inline seqan::Dna5String build_consensus(seqan::StringSet con std::vector const & quals) // mapping qualities { // TODO:: include base qualities from bam file - seqan::Align align; + seqan::Align align{}; seqan::resize(seqan::rows(align), seqan::length(seqs)); for (unsigned i = 0; i < seqan::length(seqs); ++i) seqan::assignSource(seqan::row(align, i), seqs[i]); seqan::globalMsaAlignment(align, seqan::SimpleScore(5, -3, -1, -3)); - seqan::Dna5String consensus; - seqan::String > profile; + seqan::Dna5String consensus{}; + seqan::String > profile{}; seqan::resize(profile, 1); // fill profile and get maximum supported character for each position diff --git a/include/sviper/config.h b/include/sviper/config.h index 488d3f0..972b5db 100644 --- a/include/sviper/config.h +++ b/include/sviper/config.h @@ -16,12 +16,12 @@ struct CmdOptions double mean_insert_size_of_short_reads{280.054}; // original coverage double stdev_insert_size_of_short_reads{145.162}; int length_of_short_reads{150}; // original coverage - std::string long_read_file_name; - std::string short_read_file_name; - std::string candidate_file_name; - std::string output_prefix; - std::string reference_file_name; - std::string log_file_name; + std::string long_read_file_name{}; + std::string short_read_file_name{}; + std::string candidate_file_name{}; + std::string output_prefix{}; + std::string reference_file_name{}; + std::string log_file_name{}; CmdOptions(std::string long_, std::string short_, std::string candidate_, std::string ref_) : threads(std::thread::hardware_concurrency()), @@ -49,11 +49,11 @@ struct SViperConfig bool verbose{false}; // options to be set through the command line options - int flanking_region; // size of flanking region for breakpoints - int mean_coverage_of_short_reads; // original coverage - double mean_insert_size_of_short_reads; - double stdev_insert_size_of_short_reads; - int length_of_short_reads; + int flanking_region{}; // size of flanking region for breakpoints + int mean_coverage_of_short_reads{}; // original coverage + double mean_insert_size_of_short_reads{}; + double stdev_insert_size_of_short_reads{}; + int length_of_short_reads{}; unsigned ref_flank_length{500}; // length to flank to the consensus sequence with the reference @@ -74,7 +74,7 @@ struct SViperConfig double alpha{1}; // scaling factor such that mapping and base quality are comparable double mean_coverage{0}; // will be overridden every round after mapping double min_coverage{4}; - std::vector cov_profile; + std::vector cov_profile{}; /* Polishing statistics (total base count). * Note: The total number can exceed the length easily since bases can be diff --git a/include/sviper/io.h b/include/sviper/io.h index 1e931aa..3098ad7 100644 --- a/include/sviper/io.h +++ b/include/sviper/io.h @@ -11,7 +11,7 @@ namespace sviper { bool read_vcf(std::vector & variants, std::vector & vcf_header, input_output_information & info) { - std::ifstream input_vcf; // The candidate variants to polish + std::ifstream input_vcf{}; // The candidate variants to polish if (!open_file_success(input_vcf, info.cmd_options.candidate_file_name.c_str())) return false; @@ -32,7 +32,7 @@ bool read_vcf(std::vector & variants, std::vector & vcf_he bool write_vcf(std::vector & variants, std::vector & vcf_header, input_output_information & info) { - std::ofstream output_vcf; // The polished variant as output + std::ofstream output_vcf{}; // The polished variant as output if (!open_file_success(output_vcf, (info.cmd_options.output_prefix + ".vcf").c_str())) return false; diff --git a/include/sviper/mapping.h b/include/sviper/mapping.h index 00187c0..88ea64c 100644 --- a/include/sviper/mapping.h +++ b/include/sviper/mapping.h @@ -19,15 +19,15 @@ struct Mapping_object typedef seqan::Gaps TGapsRead; typedef seqan::Gaps TGapsRef; - TRead read; - TGapsRead gapsRead; - TGapsRef gapsRef; + TRead read{}; + TGapsRead gapsRead{}; + TGapsRef gapsRef{}; bool read_is_rc{false}; // rc = reverse-complemented double mapQRead{0.0}; - TRead mate; - TGapsRead gapsMate; - TGapsRef gapsRefMate; + TRead mate{}; + TGapsRead gapsMate{}; + TGapsRef gapsRefMate{}; bool mate_is_rc{false}; // rc = reverse-complemented double mapQMate{0.0}; @@ -152,7 +152,7 @@ std::vector mapping(seqan::StringSet const & { SEQAN_ASSERT_EQ(seqan::length(reads1), seqan::length(reads2)); - std::vector mobs; + std::vector mobs{}; //mobs.resize(length(reads1)); TODO for (unsigned ridx = 0; ridx < seqan::length(reads1); ++ridx) // for every read (pair) diff --git a/include/sviper/merge_split_alignments.h b/include/sviper/merge_split_alignments.h index 0ca03a3..27f9581 100644 --- a/include/sviper/merge_split_alignments.h +++ b/include/sviper/merge_split_alignments.h @@ -94,6 +94,8 @@ inline void truncate_cigar_right(seqan::BamAlignmentRecord & record, int until) } if ((record.cigar[length(record.cigar) - 1]).operation != 'M' && + (record.cigar[length(record.cigar) - 1]).operation != 'X' && + (record.cigar[length(record.cigar) - 1]).operation != '=' && (record.cigar[length(record.cigar) - 1]).operation != 'I') // meaning == H,S or D { if ((record.cigar[length(record.cigar) - 1]).operation == 'D' || @@ -101,10 +103,14 @@ inline void truncate_cigar_right(seqan::BamAlignmentRecord & record, int until) { // continuing deletion in record must be added to the large DEL event seqan::erase(record.cigar, length(record.cigar) - 1); } - else // meaning == S + else if ((record.cigar[length(record.cigar) - 1]).operation == 'S') { (record.cigar[length(record.cigar) - 1]).operation = 'I'; // add those bases } + else + { + throw std::logic_error{"UNKNOWN CIGAR CHARACTER."}; + } } } @@ -246,8 +252,8 @@ inline void turn_hard_clipping_to_soft_clipping(seqan::BamAlignmentRecord & prim // Note: common hard clipping is removed from the cigar string. // hard clipping in one is turned to soft clipping by appending part // of the sequence of the other - decltype(prim.seq) final_prim_seq; - decltype(supp.seq) final_supp_seq; + decltype(prim.seq) final_prim_seq{}; + decltype(supp.seq) final_supp_seq{}; // beginning of alignment if ((prim.cigar[0]).operation == 'H') @@ -518,8 +524,8 @@ inline seqan::BamAlignmentRecord merge_record_group(std::vector merge_alignments(std::vector const & records) { - std::vector merged_records; - std::vector record_group; + std::vector merged_records{}; + std::vector record_group{}; for (auto const & rec : records) { diff --git a/include/sviper/polishing.h b/include/sviper/polishing.h index 09ecb01..0254da8 100644 --- a/include/sviper/polishing.h +++ b/include/sviper/polishing.h @@ -116,8 +116,8 @@ void fill_profile(seqan::String > & prof // every position. This will account for deletions and substitutions. // Insertions are first stored seperately because otherwise the positions // in read-ref-alignments are not "synchronized" and hard to handle. - std::vector >> insertion_profiles; - std::vector> insertion_cov_profiles; + std::vector >> insertion_profiles{}; + std::vector> insertion_cov_profiles{}; insertion_profiles.resize(seqan::length(profile)); // there can be an insertion after every position insertion_cov_profiles.resize(seqan::length(profile)); @@ -125,7 +125,7 @@ void fill_profile(seqan::String > & prof // but additionally, one needs to count the number of reads that span the position // and do NOT display an insertion. Those are counted with non_insertion_coverage_count // for each read and the insertion profile is updated accordingly. - std::vector non_insertion_coverage_count; + std::vector non_insertion_coverage_count{}; non_insertion_coverage_count.resize(seqan::length(profile)); for (auto & cov_count : non_insertion_coverage_count) cov_count = 0; @@ -239,7 +239,7 @@ inline seqan::Dna5String consensus_from_profile(seqan::String cons seqan::Dna5String ref, SViperConfig & config) { - seqan::Dna5String old_ref; + seqan::Dna5String old_ref{}; // first, do only fix mismatches while (ref != old_ref && config.rounds < 20) diff --git a/include/sviper/stats.h b/include/sviper/stats.h index df9aefb..e06f963 100644 --- a/include/sviper/stats.h +++ b/include/sviper/stats.h @@ -11,19 +11,19 @@ namespace sviper { inline std::pair stats_insert_size(std::string const & bam_file_name, std::string contig_name) { - seqan::BamFileIn bam_file; - seqan::BamIndex bam_index; + seqan::BamFileIn bam_file{}; + seqan::BamIndex bam_index{}; if (!open_file_success(bam_file, bam_file_name.c_str())) return {-1.0, -1.0}; if (!open_file_success(bam_index, (bam_file_name + ".bai").c_str())) return {-1.0, -1.0}; - seqan::BamHeader bam_header; + seqan::BamHeader bam_header{}; seqan::readHeader(bam_header, bam_file); - std::vector insert_sizes; - seqan::BamAlignmentRecord record; + std::vector insert_sizes{}; + seqan::BamAlignmentRecord record{}; bool hasAlignments = false; int rID = 0; diff --git a/include/sviper/sviper.h b/include/sviper/sviper.h index 4d37760..b12cdf0 100644 --- a/include/sviper/sviper.h +++ b/include/sviper/sviper.h @@ -129,7 +129,7 @@ seqan::ArgumentParser::ParseResult parseCommandLine(CmdOptions & options, int ar bool polish_variant(Variant & var, input_output_information & info) { - std::stringstream localLog; + std::stringstream localLog{}; seqan::BamFileIn & short_read_bam = *(info.short_read_file_handles[omp_get_thread_num()]); seqan::BamFileIn & long_read_bam = *(info.long_read_file_handles[omp_get_thread_num()]); seqan::FaiIndex & faiIndex = *(info.faidx_file_handles[omp_get_thread_num()]); @@ -187,8 +187,8 @@ bool polish_variant(Variant & var, input_output_information & info) SEQAN_ASSERT_LEQ(ref_region_start, ref_region_end); // get reference id in bam File - unsigned rID_short; - unsigned rID_long; + unsigned rID_short{}; + unsigned rID_long{}; if (!seqan::getIdByName(rID_short, seqan::contigNamesCache(seqan::context(short_read_bam)), var.ref_chrom)) { @@ -212,10 +212,10 @@ bool polish_variant(Variant & var, input_output_information & info) // Extract long reads // --------------------------------------------------------------------- - std::vector supporting_records; + std::vector supporting_records{}; { - std::vector long_reads; + std::vector long_reads{}; // extract overlapping the start breakpoint +-50 bp's seqan::viewRecords(long_reads, long_read_bam, info.long_read_bai, rID_long, var_ref_pos_sub50, var_ref_pos_add50); @@ -295,7 +295,7 @@ bool polish_variant(Variant & var, input_output_information & info) // --------------------------------------------------------------------- localLog << "--- Cropping long reads with a buffer of +-" << info.cmd_options.flanking_region << " around variants." << std::endl; - seqan::StringSet supporting_sequences; + seqan::StringSet supporting_sequences{}; std::vector::size_type maximum_long_reads = 5; // sort records such that the highest quality ones are chosen first @@ -304,6 +304,11 @@ bool polish_variant(Variant & var, input_output_information & info) for (unsigned i = 0; i < std::min(maximum_long_reads, supporting_records.size()); ++i) { auto region = get_read_region_boundaries(supporting_records[i], ref_region_start, ref_region_end); + + assert(std::get<0>(region) >= 0); + assert(std::get<1>(region) >= 0); + assert(std::get<0>(region) <= std::get<1>(region)); + seqan::Dna5String reg = seqan::infix(supporting_records[i].seq, std::get<0>(region), std::get<1>(region)); // For deletions, the expected size of the subsequence is that of @@ -343,7 +348,7 @@ bool polish_variant(Variant & var, input_output_information & info) // Build consensus of supporting read regions // --------------------------------------------------------------------- - std::vector mapping_qualities; + std::vector mapping_qualities{}; mapping_qualities.resize(supporting_records.size()); for (unsigned i = 0; i < supporting_records.size(); ++i) mapping_qualities[i] = (supporting_records[i]).mapQ; @@ -352,18 +357,18 @@ bool polish_variant(Variant & var, input_output_information & info) localLog << "--- Built a consensus with a MSA of length " << seqan::length(cns) << "." << std::endl; - seqan::Dna5String polished_ref; + seqan::Dna5String polished_ref{}; SViperConfig config{info.cmd_options}; config.ref_flank_length = 500; { - seqan::StringSet short_reads_1; // reads (first in pair) - seqan::StringSet short_reads_2; // mates (second in pair) + seqan::StringSet short_reads_1{}; // reads (first in pair) + seqan::StringSet short_reads_2{}; // mates (second in pair) { // Extract short reads in region // --------------------------------------------------------------------- - std::vector short_reads; + std::vector short_reads{}; // If the breakpoints are farther apart then illumina-read-length + 2 * flanking-region, // then extract reads for each break point separately. if (ref_region_end - ref_region_start > info.cmd_options.flanking_region * 2 + info.cmd_options.length_of_short_reads) @@ -438,7 +443,7 @@ bool polish_variant(Variant & var, input_output_information & info) // Align polished sequence to reference // --------------------------------------------------------------------- - seqan::Dna5String ref_part; + seqan::Dna5String ref_part{}; seqan::readRegion(ref_part, faiIndex, ref_fai_idx, std::max(1u, ref_region_start - config.ref_flank_length), std::min(ref_region_end + config.ref_flank_length, static_cast(ref_length))); diff --git a/include/sviper/variant.h b/include/sviper/variant.h index 7d89dee..86165e8 100644 --- a/include/sviper/variant.h +++ b/include/sviper/variant.h @@ -25,7 +25,7 @@ struct Variant Variant(const std::string & line) { std::stringstream ss(line); - std::string dummy; + std::string dummy{}; // read variant line into member variables ss >> ref_chrom; @@ -138,7 +138,7 @@ struct Variant void throw_verbose_exception(std::string const & what) { - std::ostringstream os; + std::ostringstream os{}; os << what << "Read: "; (*this).write(os); throw std::iostream::failure(os.str()); @@ -150,19 +150,19 @@ struct Variant Variant& operator=(const Variant&) = default; Variant& operator=(Variant&&) = default; - SV_TYPE sv_type; + SV_TYPE sv_type{}; int sv_length{-1}; - std::string ref_chrom; + std::string ref_chrom{}; int ref_pos{-1}; int ref_pos_end{-1}; - std::string id; - std::string ref_seq; - std::string alt_seq; + std::string id{}; + std::string ref_seq{}; + std::string alt_seq{}; double quality{0}; - std::string filter; - std::string info; - std::string format; - std::vector samples; + std::string filter{}; + std::string info{}; + std::string format{}; + std::vector samples{}; void write(std::ostream & stream) {