From e5f282e73a079354e9c823bedc866feddf4c7318 Mon Sep 17 00:00:00 2001 From: deliaBlue <103108590+deliaBlue@users.noreply.github.com> Date: Sat, 29 Apr 2023 03:20:13 +0200 Subject: [PATCH] test: add unit tests (#72) * ci: add pytest * build: add pysam dependency * build: add pytest and coverage dependencies * refactor: add necessary things to pass pytest * test: add test for filter_multimappers.py * test: add test file for filter_multimappers.py * refactor: change some lines * revert: revert to initial version * test: remove test file * test: remove test file * test: add test file * test: modify test_write_output * build: add pysam dependencies * test: add coverage file * Delete snakemake_report.html * build: delete unused import * build: remove pysam dependency * build: remove pytest-cov and add pysam dependency * ci: remove .coverage * refacor: change path to files * ci: ypdate unit tests call * test: add files for testing * test: add test for secondary and supp. alingments * test: add pragma to avoid testing * ci: update codecov-actions to v3 * ci: update pytest call * build: add pytest-cov dependency * ci: update pytest call * ci: update pytest call * ci: update pytest call * ci: update pytest call * ci: update pytest call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report and pytest call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage report call * ci: update coverage call * ci: update coverage call * ci: update coverage call * ci: update coverage call * ci: update submit coverage call * style: correct whitespaces in pragma * refactor: change first iteration * refactor: delete unuseful tests * refactor: modify functions * test: modify output file * refactor: remove skip secondary alns * test: add codecov.yml * test: add codecov.yml * test: add codecov.yml * test: update path on codecov.yml * refactor: remove first assignment * test: add token in coverage call * build: remove codecov.yml * test: delete coverage call * style: remove whitespaces --------- Co-authored-by: Iris Mestres --- .github/workflows/CI.yml | 29 ++ environment.dev.yml | 5 + scripts/filter_multimappers.py | 45 +-- .../tests/files/in_sam_diff_multimappers.sam | 4 + scripts/tests/files/in_sam_empty.sam | 3 + .../tests/files/in_sam_equal_multimappers.sam | 5 + scripts/tests/files/in_sam_multimappers.sam | 13 + scripts/tests/files/in_sam_sec_sup.sam | 13 + .../tests/files/out_sam_diff_multimappers.sam | 1 + scripts/tests/files/out_sam_empty.sam | 3 + .../files/out_sam_equal_multimappers.sam | 3 + scripts/tests/files/out_sam_multimappers.sam | 12 + scripts/tests/files/out_sam_sec_sup.sam | 10 + scripts/tests/files/sam_no_multimappers.sam | 7 + scripts/tests/test_filter_multimappers.py | 287 ++++++++++++++++++ 15 files changed, 419 insertions(+), 21 deletions(-) mode change 100644 => 100755 scripts/filter_multimappers.py create mode 100644 scripts/tests/files/in_sam_diff_multimappers.sam create mode 100644 scripts/tests/files/in_sam_empty.sam create mode 100644 scripts/tests/files/in_sam_equal_multimappers.sam create mode 100644 scripts/tests/files/in_sam_multimappers.sam create mode 100644 scripts/tests/files/in_sam_sec_sup.sam create mode 100644 scripts/tests/files/out_sam_diff_multimappers.sam create mode 100644 scripts/tests/files/out_sam_empty.sam create mode 100644 scripts/tests/files/out_sam_equal_multimappers.sam create mode 100644 scripts/tests/files/out_sam_multimappers.sam create mode 100644 scripts/tests/files/out_sam_sec_sup.sam create mode 100644 scripts/tests/files/sam_no_multimappers.sam create mode 100755 scripts/tests/test_filter_multimappers.py diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 52377c80..41a92481 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -15,6 +15,7 @@ jobs: defaults: run: shell: bash -l {0} + steps: - name: Checkout Repository @@ -43,3 +44,31 @@ jobs: - name: Run local test run: bash test/test_workflow_local.sh + + unit-testing: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + + steps: + - name: Checkout Repository + uses: actions/checkout@v3 + + - name: Setup environment + uses: conda-incubator/setup-miniconda@v2 + with: + mamba-version: "*" + activate-environment: mirflowz + environment-file: environment.yml + auto-activate-base: false + + - name: Update mirflowz env with root packages + run: mamba env update -n mirflowz -f environment.root.yml + + - name: Update mirflowz env with dev packages + run: mamba env update -n mirflowz -f environment.dev.yml + + - name: run unit tests + working-directory: ./scripts/tests + run: pytest --cov=scripts --cov-branch --cov-report=term-missing diff --git a/environment.dev.yml b/environment.dev.yml index 3218487f..7ab14349 100644 --- a/environment.dev.yml +++ b/environment.dev.yml @@ -1,5 +1,10 @@ name: mirflowz channels: - bioconda + - defaults dependencies: + - coverage>=7.2 + - pytest>=7.1.2 + - pytest-cov + - pysam>=0.21.0 - snakefmt \ No newline at end of file diff --git a/scripts/filter_multimappers.py b/scripts/filter_multimappers.py old mode 100644 new mode 100755 index 22af73dc..de997dc9 --- a/scripts/filter_multimappers.py +++ b/scripts/filter_multimappers.py @@ -67,7 +67,7 @@ def parse_arguments(): return parser -def count_indels(aln: pysam.AlignedSegment) -> int: +def count_indels(aln: pysam.libcalignedsegment.AlignedSegment) -> int: """Count the number of indels in an alignment based on its CIGAR string. This function counts the number of indels in the alignment based on the @@ -101,29 +101,29 @@ def find_best_alignments(alns: List[pysam.AlignedSegment]) -> List[pysam.Aligned Retrns: best_alignments: alignments with the less indels """ - aln_indels = [(aln, count_indels(alignment=aln)) for aln in alns] - min_indels = min(aln_indels, key=lambda x: x[1])[1] - best_alignments = [aln for i, (aln, indels) in enumerate(aln_indels) if indels == min_indels] + if len(alns) == 1: + return alns - for i in range(len(best_alignments)): - best_alignments[i].set_tag('NH', len(best_alignments)) - best_alignments[i].set_tag('HI', i + 1) + else: + aln_indels = [(aln, count_indels(aln=aln)) for aln in alns] + min_indels = min(aln_indels, key=lambda x: x[1])[1] + best_alignments = [aln for i, (aln, indels) in enumerate(aln_indels) if indels == min_indels] + + for i in range(len(best_alignments)): + best_alignments[i].set_tag('NH', len(best_alignments)) + best_alignments[i].set_tag('HI', i + 1) - return best_alignments + return best_alignments -def write_output(alignments: List[pysam.AlignedSegment]) -> None: +def write_output(alns: List[pysam.AlignedSegment]) -> None: """Write the output to the standard output (stdout). Args: alignments: alignments with the same query name """ - if len(alignments) == 1: - sys.stdout.write(alignments[0].to_string() + '\n') - else: - best_alignments = find_best_alignments(alignments=alignments) - for alignment in best_alignments: - sys.stdout.write(alignment.to_string() + '\n') + for alignment in alns: + sys.stdout.write(alignment.to_string() + '\n') def main(sam_file: Path) -> None: @@ -136,12 +136,12 @@ def main(sam_file: Path) -> None: sys.stdout.write(str(samfile.header)) - current_query = None current_alignments: list[pysam.AlignedSegment] = [] + current_query = None for alignment in samfile: - if alignment.is_secondary or alignment.is_supplementary: + if alignment.is_supplementary: continue if current_query is None: @@ -151,14 +151,17 @@ def main(sam_file: Path) -> None: current_alignments.append(alignment) else: - write_output(alignments=current_alignments) + current_alignments = find_best_alignments(current_alignments) + write_output(alns=current_alignments) current_query = alignment.query_name current_alignments = [alignment] - write_output(alignments=current_alignments) + if len(current_alignments) > 0: + current_alignments = find_best_alignments(current_alignments) + write_output(alns=current_alignments) if __name__ == "__main__": - args = parse_arguments().parse_args() - main(sam_file=args.infile) + args = parse_arguments().parse_args() # pragma:no cover + main(sam_file=args.infile) # pragma: no cover diff --git a/scripts/tests/files/in_sam_diff_multimappers.sam b/scripts/tests/files/in_sam_diff_multimappers.sam new file mode 100644 index 00000000..6d499b9f --- /dev/null +++ b/scripts/tests/files/in_sam_diff_multimappers.sam @@ -0,0 +1,4 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NH:i:2 NM:i:3 XA:Z:Q XI:i:1 +1-1 0 19 330456 255 4M1D1M1I3M1D13M * 0 0 CTGACATCAGTGATTCTCCTGC * MD:Z:4^G4^A13 NH:i:2 NM:i:3 XA:Z:Q XI:i:0 \ No newline at end of file diff --git a/scripts/tests/files/in_sam_empty.sam b/scripts/tests/files/in_sam_empty.sam new file mode 100644 index 00000000..f1f17e4a --- /dev/null +++ b/scripts/tests/files/in_sam_empty.sam @@ -0,0 +1,3 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +@PG ID:samtools PN:samtools VN:1.16.1 CL:samtools sort -n -o results/test_lib/header_sorted_catMappings.sam results/test_lib/concatenated_header_catMappings.sam \ No newline at end of file diff --git a/scripts/tests/files/in_sam_equal_multimappers.sam b/scripts/tests/files/in_sam_equal_multimappers.sam new file mode 100644 index 00000000..cc1b7ef3 --- /dev/null +++ b/scripts/tests/files/in_sam_equal_multimappers.sam @@ -0,0 +1,5 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NH:i:3 NM:i:3 XA:Z:Q XI:i:0 +1-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:2 +1-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:1 diff --git a/scripts/tests/files/in_sam_multimappers.sam b/scripts/tests/files/in_sam_multimappers.sam new file mode 100644 index 00000000..1217ba8c --- /dev/null +++ b/scripts/tests/files/in_sam_multimappers.sam @@ -0,0 +1,13 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 7589 255 24M * 0 0 ATTTCAAGCCAGGTGGCGTTTTTC * MD:Z:13G10 NH:i:1 NM:i:1 +2-1 0 19 63250 255 22M * 0 0 GAAAGCGCTTCGCTTCAGAGTG * MD:Z:11C10 NH:i:1 NM:i:1 +3-1 0 19 63251 255 22M * 0 0 AAAGCGCTTCCCTTCAGAGTGA * MD:Z:21T NH:i:1 NM:i:1 +4-1 0 19 63250 255 22M * 0 0 TAAAGCGCTTCCCTTCAGAGTG * MD:Z:G21 NH:i:1 NM:i:1 +5-1 0 19 7589 255 24M * 0 0 CTTTCAAGCCAGGGGGCGTTTTTC * MD:Z:A23 NH:i:1 NM:i:1 +6-1 0 19 7590 255 24M * 0 0 TTTCAAGCCAGGTGGCGTTTTTCT * MD:Z:12G11 NH:i:1 NM:i:1 +7-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NH:i:3 NM:i:3 XA:Z:Q XI:i:0 +7-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:2 +7-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:1 +8-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NH:i:2 NM:i:3 XA:Z:Q XI:i:1 +8-1 0 19 330456 255 4M1D1M1I3M1D13M * 0 0 CTGACATCAGTGATTCTCCTGC * MD:Z:4^G4^A13 NH:i:2 NM:i:3 XA:Z:Q XI:i:0 \ No newline at end of file diff --git a/scripts/tests/files/in_sam_sec_sup.sam b/scripts/tests/files/in_sam_sec_sup.sam new file mode 100644 index 00000000..c6bdaf4a --- /dev/null +++ b/scripts/tests/files/in_sam_sec_sup.sam @@ -0,0 +1,13 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 7589 255 24M * 0 0 ATTTCAAGCCAGGTGGCGTTTTTC * MD:Z:13G10 NH:i:1 NM:i:1 +2-1 0 19 63250 255 22M * 0 0 GAAAGCGCTTCGCTTCAGAGTG * MD:Z:11C10 NH:i:1 NM:i:1 +3-1 2128 19 63251 255 22M * 0 0 AAAGCGCTTCCCTTCAGAGTGA * MD:Z:21T NH:i:1 NM:i:1 +4-1 0 19 63250 255 22M * 0 0 TAAAGCGCTTCCCTTCAGAGTG * MD:Z:G21 NH:i:1 NM:i:1 +5-1 2064 19 7589 255 24M * 0 0 CTTTCAAGCCAGGGGGCGTTTTTC * MD:Z:A23 NH:i:1 NM:i:1 +6-1 256 19 7590 255 24M * 0 0 TTTCAAGCCAGGTGGCGTTTTTCT * MD:Z:12G11 NH:i:1 NM:i:1 +7-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NH:i:3 NM:i:3 XA:Z:Q XI:i:0 +7-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:2 +7-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NH:i:3 NM:i:3 XA:Z:Q XI:i:1 +8-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NH:i:2 NM:i:3 XA:Z:Q XI:i:1 +8-1 0 19 330456 255 4M1D1M1I3M1D13M * 0 0 CTGACATCAGTGATTCTCCTGC * MD:Z:4^G4^A13 NH:i:2 NM:i:3 XA:Z:Q XI:i:0 \ No newline at end of file diff --git a/scripts/tests/files/out_sam_diff_multimappers.sam b/scripts/tests/files/out_sam_diff_multimappers.sam new file mode 100644 index 00000000..0e6b6be5 --- /dev/null +++ b/scripts/tests/files/out_sam_diff_multimappers.sam @@ -0,0 +1 @@ +1-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NM:i:3 XA:Z:Q XI:i:1 NH:i:1 HI:i:1 diff --git a/scripts/tests/files/out_sam_empty.sam b/scripts/tests/files/out_sam_empty.sam new file mode 100644 index 00000000..62447b19 --- /dev/null +++ b/scripts/tests/files/out_sam_empty.sam @@ -0,0 +1,3 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +@PG ID:samtools PN:samtools VN:1.16.1 CL:samtools sort -n -o results/test_lib/header_sorted_catMappings.sam results/test_lib/concatenated_header_catMappings.sam diff --git a/scripts/tests/files/out_sam_equal_multimappers.sam b/scripts/tests/files/out_sam_equal_multimappers.sam new file mode 100644 index 00000000..1db7e367 --- /dev/null +++ b/scripts/tests/files/out_sam_equal_multimappers.sam @@ -0,0 +1,3 @@ +1-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NM:i:3 XA:Z:Q XI:i:0 NH:i:3 HI:i:1 +1-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NM:i:3 XA:Z:Q XI:i:2 NH:i:3 HI:i:2 +1-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NM:i:3 XA:Z:Q XI:i:1 NH:i:3 HI:i:3 diff --git a/scripts/tests/files/out_sam_multimappers.sam b/scripts/tests/files/out_sam_multimappers.sam new file mode 100644 index 00000000..704c1aca --- /dev/null +++ b/scripts/tests/files/out_sam_multimappers.sam @@ -0,0 +1,12 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 7589 255 24M * 0 0 ATTTCAAGCCAGGTGGCGTTTTTC * MD:Z:13G10 NH:i:1 NM:i:1 +2-1 0 19 63250 255 22M * 0 0 GAAAGCGCTTCGCTTCAGAGTG * MD:Z:11C10 NH:i:1 NM:i:1 +3-1 0 19 63251 255 22M * 0 0 AAAGCGCTTCCCTTCAGAGTGA * MD:Z:21T NH:i:1 NM:i:1 +4-1 0 19 63250 255 22M * 0 0 TAAAGCGCTTCCCTTCAGAGTG * MD:Z:G21 NH:i:1 NM:i:1 +5-1 0 19 7589 255 24M * 0 0 CTTTCAAGCCAGGGGGCGTTTTTC * MD:Z:A23 NH:i:1 NM:i:1 +6-1 0 19 7590 255 24M * 0 0 TTTCAAGCCAGGTGGCGTTTTTCT * MD:Z:12G11 NH:i:1 NM:i:1 +7-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NM:i:3 XA:Z:Q XI:i:0 NH:i:3 HI:i:1 +7-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NM:i:3 XA:Z:Q XI:i:2 NH:i:3 HI:i:2 +7-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NM:i:3 XA:Z:Q XI:i:1 NH:i:3 HI:i:3 +8-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NM:i:3 XA:Z:Q XI:i:1 NH:i:1 HI:i:1 diff --git a/scripts/tests/files/out_sam_sec_sup.sam b/scripts/tests/files/out_sam_sec_sup.sam new file mode 100644 index 00000000..29f971c5 --- /dev/null +++ b/scripts/tests/files/out_sam_sec_sup.sam @@ -0,0 +1,10 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 7589 255 24M * 0 0 ATTTCAAGCCAGGTGGCGTTTTTC * MD:Z:13G10 NH:i:1 NM:i:1 +2-1 0 19 63250 255 22M * 0 0 GAAAGCGCTTCGCTTCAGAGTG * MD:Z:11C10 NH:i:1 NM:i:1 +4-1 0 19 63250 255 22M * 0 0 TAAAGCGCTTCCCTTCAGAGTG * MD:Z:G21 NH:i:1 NM:i:1 +6-1 256 19 7590 255 24M * 0 0 TTTCAAGCCAGGTGGCGTTTTTCT * MD:Z:12G11 NH:i:1 NM:i:1 +7-1 0 19 142777 255 15M1I5M * 0 0 GCTAGGTGGGAGGCTTGAAGC * MD:Z:4C0T14 NM:i:3 XA:Z:Q XI:i:0 NH:i:3 HI:i:1 +7-1 16 19 270081 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14G0G4 NM:i:3 XA:Z:Q XI:i:2 NH:i:3 HI:i:2 +7-1 16 19 545543 255 6M1I14M * 0 0 GCTTCAAGCCTCCCACCTAGC * MD:Z:14A0G4 NM:i:3 XA:Z:Q XI:i:1 NH:i:3 HI:i:3 +8-1 16 19 77595 255 14M1D8M * 0 0 GCAGGAGAATCACTGATGTCAG * MD:Z:14^T2A1C3 NM:i:3 XA:Z:Q XI:i:1 NH:i:1 HI:i:1 diff --git a/scripts/tests/files/sam_no_multimappers.sam b/scripts/tests/files/sam_no_multimappers.sam new file mode 100644 index 00000000..2108dbef --- /dev/null +++ b/scripts/tests/files/sam_no_multimappers.sam @@ -0,0 +1,7 @@ +@HD VN:1.0 SO:queryname +@SQ SN:19 LN:600000 M5:f9635dbff42b16049de830a7a121fa12 UR:NA +1-1 0 19 44414 255 21M * 0 0 GAAGGCGCTTCCCTTTGGAGT * MD:Z:21 NH:i:1 NM:i:0 +2-1 0 19 44377 255 11M3I11M * 0 0 CTACAAAGGGAGGTAGCACTTTCTC * MD:Z:22 NH:i:1 NM:i:3 XA:Z:Q XI:i:0 +3-1 0 19 7590 255 20M3I3M * 0 0 TTTCAAGCCAGGGGGCGTTTCCGTTC * MD:Z:23 NH:i:1 NM:i:3 XA:Z:Q XI:i:0 +4-1 0 19 7590 255 23M * 0 0 TTTCAAGTCAGGGGGCGTTTTTC * MD:Z:7C15 NH:i:1 NM:i:1 +5-1 0 19 30893 255 32M2I53M * 0 0 CTCAAGCTGTGACTCTCCAGAGGGATGCACTTGATCTCTTATGTGAAAAAAAAGAAGGCGCTTCCCTTTAGAGCGTTACGGTTTGGG * MD:Z:85 NH:i:1 NM:i:2 XA:Z:Q XI:i:0 diff --git a/scripts/tests/test_filter_multimappers.py b/scripts/tests/test_filter_multimappers.py new file mode 100755 index 00000000..620fe01c --- /dev/null +++ b/scripts/tests/test_filter_multimappers.py @@ -0,0 +1,287 @@ +"""Unit tests for module 'filter_multimappers.py'.""" + +import argparse +import sys + +from pathlib import Path +import pysam +import pytest + +sys.path.append("../../") + +from scripts.filter_multimappers import ( + count_indels, + find_best_alignments, + main, + parse_arguments, + write_output +) + + +@pytest.fixture +def sam_empty_files(): + """Import path to empty test files.""" + in_empty = Path("files/in_sam_empty.sam") + out_empty = Path("files/out_sam_empty.sam") + + return in_empty, out_empty + + +@pytest.fixture +def sam_multimappers_files(): + """Import path to test files with multimappers.""" + in_multimappers = Path("files/in_sam_multimappers.sam") + out_multimappers = Path("files/out_sam_multimappers.sam") + + return in_multimappers, out_multimappers + + +@pytest.fixture +def sam_no_multimappers_file(): + """Import path to test files with no multimappers.""" + no_multi = Path("files/sam_no_multimappers.sam") + + return no_multi + +@pytest.fixture +def sam_unique_diff_multimappers_files(): + """Import path to test files with a single multimapper.""" + in_diff_multi = Path("files/in_sam_diff_multimappers.sam") + out_diff_multi = Path("files/out_sam_diff_multimappers.sam") + + return in_diff_multi, out_diff_multi + +@pytest.fixture +def sam_unique_equal_multimapper_files(): + """Import path to the test file with a single multimapper.""" + in_sam = Path("files/in_sam_equal_multimappers.sam") + out_sam = Path("files/out_sam_equal_multimappers.sam") + + return in_sam, out_sam + +@pytest.fixture +def sam_sec_sup_files(): + """ + Import path to the test files with secondary and supplementary alignments. + """ + in_sam = Path("files/in_sam_sec_sup.sam") + out_sam = Path("files/out_sam_sec_sup.sam") + + return in_sam, out_sam + + +@pytest.fixture +def alns(): + """Create sample AlignedSegment objects.""" + # Alignment with 10 matches + aln1 = pysam.AlignedSegment() + aln1.cigartuples = [(0, 10)] + aln1.query_name = "read1" + aln1.query_sequence = "TAAAGCGCTT" + aln1.flag = 0x2 + aln1.reference_id = 19 + aln1.reference_start = 63250 + aln1.set_tag("NH", 2) + aln1.set_tag("HI", 1) + + # Alignment with 3 insertions + aln2 = pysam.AlignedSegment() + aln2.cigartuples = [(0, 5), (1, 3), (0, 5)] + aln2.query_name = "read1" + aln2.query_sequence = "GGGGCGTTTT" + aln2.flag = 0x2 + aln2.reference_id = 19 + aln2.reference_start = 7589 + aln2.set_tag("NH", 2) + aln2.set_tag("HI", 2) + + # Alignment with 3 deletions + aln3 = pysam.AlignedSegment() + aln3.cigartuples = [(0, 10), (2, 3), (0, 5)] + aln3.query_name = "read2" + aln3.query_sequence = "GCCAGGTGGCGTTTT" + aln3.flag = 0x2 + aln3.reference_id = 19 + aln3.reference_start = 142777 + aln3.set_tag("NH", 3) + aln3.set_tag("HI", 1) + + # Alignments with 1 insertion and 1 deletion + aln4 = pysam.AlignedSegment() + aln4.cigartuples = [(0, 10), (1, 1), (0, 3), (2, 1), (0, 2)] + aln4.query_name = "read2" + aln4.query_sequence = "AAGCCTCCCACCTAG" + aln4.flag = 0x2 + aln4.reference_id = 19 + aln4.reference_start = 63251 + aln4.set_tag("NH", 3) + aln4.set_tag("HI", 2) + + aln5 = pysam.AlignedSegment() + aln5.cigartuples = [(0, 10), (2, 1), (0, 2), (1, 1), (0, 3)] + aln5.query_name = "read2" + aln5.query_sequence = "AGGTGGCGTTTTTCT" + aln5.flag = 0x2 + aln5.reference_id = 19 + aln5.reference_start = 77595 + aln5.set_tag("NH", 3) + aln5.set_tag("HI", 2) + + return [aln1, aln2, aln3, aln4, aln5] + + +class TestParseArguments: + """Test 'parse_arguments()' function.""" + + def test_no_input(self, monkeypatch): + """Call without input file.""" + with pytest.raises(SystemExit) as sysex: + monkeypatch.setattr( + sys, 'argv', + ['filter_multimappers'] + ) + parse_arguments().parse_args() + assert sysex.value.code == 2 + + def test_correct_input(self, monkeypatch, sam_no_multimappers_file): + """Call with a single input file.""" + sam_1 = sam_no_multimappers_file + monkeypatch.setattr( + sys, 'argv', + ['filter_multimappers', + str(sam_1), + ] + ) + args = parse_arguments().parse_args() + assert isinstance(args, argparse.Namespace) + + def test_too_many_inputs(self, monkeypatch, sam_multimappers_files): + """Call with too many input files.""" + sam_1, sam_2 = sam_multimappers_files + monkeypatch.setattr( + sys, 'argv', + ['filter_multimappers', + str(sam_1), str(sam_2), + ] + ) + with pytest.raises(SystemExit) as sysex: + parse_arguments().parse_args() + assert sysex.value.code == 2 + + +class TestCountIndels: + """Test 'count_indels()' function.""" + + def test_no_indels(self, alns): + """Test CIGAR string with 10 matches.""" + assert count_indels(alns[0]) == 0 + + def test_three_insertions(self, alns): + """Test CIGAR string with 3 insertions.""" + assert count_indels(alns[1]) == 3 + + def test_three_deletions(self, alns): + """Test CIGAR string with 3 deletions.""" + assert count_indels(alns[2]) == 3 + + def test_one_in_one_del(self, alns): + """Test CIGAR string with one insertion and one deletion.""" + assert count_indels(alns[3]) == 2 + + +class TestFindBestAlignments: + """Test 'find_best_alignments()' function.""" + + def test_find_best_alignments_multimappers(self, alns): + """Test function with multimappers with different indel count.""" + output = find_best_alignments([alns[0], alns[1]]) + + assert len(output) == 1 + assert output[0] == alns[0] + + assert output[0].get_tag("NH") == 1 + assert output[0].get_tag("HI") == 1 + + def test_find_best_alignments_equal_multimappers(self, alns): + """Test function with multimappers with same indel count.""" + output = find_best_alignments([alns[3], alns[4]]) + + assert len(output) == 2 + assert output[0] == alns[3] + assert output[1] == alns[4] + + assert output[0].get_tag("NH") == 2 + assert output[1].get_tag("NH") == 2 + assert output[0].get_tag("HI") == 1 + assert output[1].get_tag("HI") == 2 + + +class TestWriteOutout: + """Test 'write_output()' function.""" + + def test_write_output_one_alignment(self, capsys, sam_multimappers_files): + """Test funciton with a single alignment.""" + in_sam, out_sam = sam_multimappers_files + + with pysam.AlignmentFile(in_sam, 'r') as in_file: + alignment = next(in_file) + + write_output([alignment]) + captured = capsys.readouterr() + + with pysam.AlignmentFile(out_sam, 'r') as out_file: + out_alignment = next(out_file) + + assert captured.out == out_alignment.to_string() + '\n' + + +class TestMain: + """Test 'main()' function.""" + + def test_main_empty_file(self, capsys, sam_empty_files): + """Test main function with an empty file.""" + in_sam, out_sam = sam_empty_files + + main(in_sam) + captured = capsys.readouterr() + + with open(out_sam, 'r') as out_file: + expected_output = out_file.read() + + assert captured.out == expected_output + + def test_main_multimappers(self, capsys, sam_multimappers_files): + """Test main function with multimappers.""" + in_sam, out_sam = sam_multimappers_files + + main(in_sam) + captured = capsys.readouterr() + + with open(out_sam, 'r') as out_file: + expected_output = out_file.read() + + assert captured.out == expected_output + + def test_main_no_multimappers(self, capsys, sam_no_multimappers_file): + """Test main function with no multimappers.""" + sam_file = sam_no_multimappers_file + + main(sam_file) + captured = capsys.readouterr() + + with open(sam_file, 'r') as out_file: + expected_output = out_file.read() + + assert captured.out == expected_output + + def test_main_secondary_supplementary(self, capsys, sam_sec_sup_files): + """Test main function with secondary and supplementary alignments.""" + in_sam, out_sam = sam_sec_sup_files + + main(in_sam) + captured = capsys.readouterr() + + with open(out_sam, 'r') as out_file: + expected_output = out_file.read() + + assert captured.out == expected_output