Skip to content

Commit

Permalink
added unit test for keeplength option
Browse files Browse the repository at this point in the history
  • Loading branch information
mikerobeson committed Nov 11, 2024
1 parent dd500b9 commit d3c8eaa
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 18 deletions.
2 changes: 1 addition & 1 deletion rescript/plugin_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@
'primer_rev': 'Reverse primer used to find the end position '
'for alignment trimming. Provide as 5\'-3\'.',
'position_start': 'Position within the alignment where the trimming '
'will begin. If not provided, alignment will not'
'will begin. If not provided, alignment will not '
'be trimmed at the beginning. If forward primer is'
'specified this parameter will be ignored.',
'position_end': 'Position within the alignment where the trimming '
Expand Down
10 changes: 10 additions & 0 deletions rescript/tests/data/small-silva-full-len-alignment.fasta

Large diffs are not rendered by default.

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions rescript/tests/data/small-silva-v4-trim-keeplength.fasta

Large diffs are not rendered by default.

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions rescript/tests/data/small-silva-v4-trim-no-keeplength.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
>AB299544.1.1336 Bacteria;Firmicutes;Clostridia;Oscillospirales;Ruminococcaceae;uncultured;uncultured Clostridiales bacterium
----------------------------------------------------T-A--G----G--G--A---AT-AA-C--AT-------------------------T-T-G-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>JF826529.1.1310 Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Alcaligenaceae;Achromobacter;Achromobacter sp. NS014
----------------------------------------------------C-G--G----G--G--G---AT-AA-C--TA-------------------------C-C-C-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>CP011401.2356993.2358524 Bacteria;Proteobacteria;Gammaproteobacteria;Burkholderiales;Alcaligenaceae;Bordetella;Bordetella pertussis
----------------------------------------------------C-G--G----G--G--G---AT-AA-C--TA-------------------------C-G-C-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>HM446088.1.1410 Bacteria;Cyanobacteria;Cyanobacteriia;Synechococcales;Cyanobiaceae;Cyanobium PCC-6307;uncultured bacterium
----------------------------------------------------A-G--G----G--G--G---AT-AA-C--GG-------------------------C-T-G-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
>FJ484772.1.1316 Bacteria;Proteobacteria;Gammaproteobacteria;Acidiferrobacterales;Acidiferrobacteraceae;Sulfurifustis;uncultured proteobacterium
----------------------------------------------------T-G--G----G--G--G---AC-AA-C--CC-------------------------G-G-C-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
97 changes: 83 additions & 14 deletions rescript/tests/test_trim_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@


import qiime2
import skbio
from qiime2.plugin.testing import TestPluginBase
from q2_types.feature_data import (
AlignedDNAFASTAFormat,
Expand All @@ -33,6 +34,12 @@ def aln2(self, *args, **kwargs):
def aln3(self, *args, **kwargs):
return [self.alignments[3]]

def aln4(self, *args, **kwargs):
return [self.alignments[4]]

def aln5(self, *args, **kwargs):
return [self.alignments[5]]

def get_action(self, which_alignment):
return getattr(self, f"aln{which_alignment}")

Expand All @@ -43,22 +50,59 @@ class TestExtractAlignmentRegion(TestPluginBase):
def setUp(self):
super().setUp()

aligned_seqs_fp = self.get_data_path('trim-test-alignment.fasta')
aligned_with_primers_fp = self.get_data_path(
'trim-test-alignment-with-primers.fasta')
aligned_silva_seqs_fp = self.get_data_path(
'small-silva-full-len-alignment.fasta')
self.aligned_silva_seqs = AlignedDNAFASTAFormat(
aligned_silva_seqs_fp, mode='r')

self.v4_primers_dict = {
"forward": "GTGYCAGCMGCCGCGGTAA",
"reverse": "GGACTACNVGGGTWTCTAAT"
}

silva_alignment_fp = self.get_data_path(
'small-silva-full-len-alignment.fasta')
self.silva_alignment = AlignedDNAFASTAFormat(
silva_alignment_fp, mode='r')

silva_alignment_v4_trim_keeplen_w_primers_fp = self.get_data_path(
'small-silva-v4-trim-keeplength-with-primers.fasta')
self.silva_v4_trim_keeplen_w_primers = AlignedDNAFASTAFormat(
silva_alignment_v4_trim_keeplen_w_primers_fp, mode='r')

silva_alignment_v4_trim_no_keeplen_w_primers_fp = self.get_data_path(
'small-silva-v4-trim-no-keeplength-with-primers.fasta')
self.silva_v4_trim_no_keeplen_w_primers = AlignedDNAFASTAFormat(
silva_alignment_v4_trim_no_keeplen_w_primers_fp, mode='r')

silva_alignment_v4_trim_keeplen_wo_primers_fp = self.get_data_path(
'small-silva-v4-trim-keeplength.fasta')
self.silva_v4_trim_keeplen = AlignedDNAFASTAFormat(
silva_alignment_v4_trim_keeplen_wo_primers_fp, mode='r')

silva_alignment_v4_trim_no_keeplen_wo_primers_fp = self.get_data_path(
'small-silva-v4-trim-no-keeplength.fasta')
self.silva_v4_trim_no_keeplen = AlignedDNAFASTAFormat(
silva_alignment_v4_trim_no_keeplen_wo_primers_fp, mode='r')

aligned_seqs_fp = self.get_data_path('trim-test-alignment.fasta')
self.aligned_seqs = qiime2.Artifact.import_data(
'FeatureData[AlignedSequence]', aligned_seqs_fp)
self.aligned_seqs_fasta = AlignedDNAFASTAFormat(
aligned_seqs_fp, mode='r')

self.primers_dict = {
"forward": "GGGAATCTTCCACAATGG",
"reverse": "GTGTTCTTCTCTAACAACAG"
}

aligned_with_primers_fp = self.get_data_path(
'trim-test-alignment-with-primers.fasta')
self.aligned_with_primers = qiime2.Artifact.import_data(
'FeatureData[AlignedSequence]', aligned_with_primers_fp)
self.aligned_with_primers_fasta = AlignedDNAFASTAFormat(
aligned_with_primers_fp, mode='r')

self.aligned_mess_fasta = AlignedDNAFASTAFormat(
self.get_data_path(
'trim-test-alignment-with-primers-mess.fasta'), mode='r')
Expand All @@ -71,7 +115,9 @@ def setUp(self):

self.fake_ctx = FakeCtx({1: self.aligned_with_primers_fasta,
2: self.aligned_with_fwd_fasta,
3: self.aligned_with_rev_fasta})
3: self.aligned_with_rev_fasta,
4: self.silva_v4_trim_no_keeplen_w_primers,
5: self.silva_v4_trim_keeplen_w_primers})

self.exp_seqs_both_primers = {
's1': ('GGGAATCTTCCACAATGGGTGCAAACCTGATGGAGCAATGCCGCGTGAG'
Expand Down Expand Up @@ -237,16 +283,39 @@ def test_trim_all_sequences_no_rev(self):
for seq in obs.view(DNAIterator)}
self.assertDictEqual(obs_seqs, self.exp_seqs_only_fwd)

# test trimming when both primers are given
def test_trim_alignment(self):
obs = _trim_alignment(
self.fake_ctx.get_action(1),
self.aligned_seqs_fasta,
self.primers_dict["forward"],
self.primers_dict["reverse"])
obs_seqs = {seq.metadata['id']: str(seq)
for seq in obs.view(DNAIterator)}
self.assertDictEqual(obs_seqs, self.exp_seqs_both_primers)
# test trimming when both primers are given and keeplength = False
# tests against expected alignment length
def test_trim_alignment_keeplen_false(self):
obs_v4_nokeep_aln = _trim_alignment(
self.fake_ctx.get_action(4),
self.aligned_silva_seqs,
self.v4_primers_dict["forward"],
self.v4_primers_dict["reverse"],
keeplength=False)

obs_aln = skbio.io.read(str(obs_v4_nokeep_aln), into=skbio.TabularMSA,
constructor=skbio.DNA)
exp_aln = skbio.io.read(str(self.silva_v4_trim_no_keeplen),
into=skbio.TabularMSA,
constructor=skbio.DNA)
self.assertEqual(obs_aln, exp_aln)

# test trimming when both primers are given and keeplength = True
# tests against expected alignment length
def test_trim_alignment_keeplen_true(self):
obs_v4_keep_aln = _trim_alignment(
self.fake_ctx.get_action(5),
self.aligned_silva_seqs,
self.v4_primers_dict["forward"],
self.v4_primers_dict["reverse"],
keeplength=True)

obs_aln = skbio.io.read(str(obs_v4_keep_aln), into=skbio.TabularMSA,
constructor=skbio.DNA)
exp_aln = skbio.io.read(str(self.silva_v4_trim_keeplen),
into=skbio.TabularMSA,
constructor=skbio.DNA)
self.assertEqual(obs_aln, exp_aln)

# test trimming when only fwd primer is given
def test_trim_alignment_only_fwd(self):
Expand Down
7 changes: 4 additions & 3 deletions rescript/trim_alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ def _trim_alignment(expand_alignment_action,
primer_rev=None,
position_start=None,
position_end=None,
n_threads=1) -> AlignedDNAFASTAFormat:
n_threads=1,
keeplength=True) -> AlignedDNAFASTAFormat:
"""
Trim alignment based on primer alignment or explicitly specified
positions. When at least one primer sequence is given, primer-based
Expand Down Expand Up @@ -231,8 +232,8 @@ def _trim_alignment(expand_alignment_action,
alignment=aligned_sequences,
sequences=primers,
addfragments=True,
keeplength=True,
n_threads=n_threads)
n_threads=n_threads,
keeplength=keeplength)

# find trim positions based on primer positions within alignment
trim_positions = _locate_primer_positions(alignment_with_primers)
Expand Down

0 comments on commit d3c8eaa

Please sign in to comment.