Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
add --check-unknown-nucleotides flag
Browse files Browse the repository at this point in the history
  • Loading branch information
Donaim committed Feb 16, 2024
1 parent 975a58d commit 5580125
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
7 changes: 5 additions & 2 deletions bin/proviral.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def cli():
@click.option(
'--check-internal-inversion/--ignore-internal-inversion', default=False
)
@click.option(
'--check-unknown-nucleotides/--ignore-unknown-nucleotides', default=True
)
@click.option(
'--include-small-orfs/--exclude-small-orfs', default=False)
@click.option(
Expand All @@ -65,7 +68,7 @@ def cli():
def intact(input_file, subtype, include_packaging_signal,
include_rre, check_major_splice_donor_site, run_hypermut,
check_long_deletion, check_nonhiv, check_scramble, check_internal_inversion,
include_small_orfs, output_csv, working_folder):
check_unknown_nucleotides, include_small_orfs, output_csv, working_folder):
"""
Check consensus sequences for intactness.
"""
Expand All @@ -77,7 +80,7 @@ def intact(input_file, subtype, include_packaging_signal,
folder, input_file, subtype, include_packaging_signal, include_rre,
check_major_splice_donor_site, run_hypermut,
check_long_deletion, check_nonhiv, check_scramble, check_internal_inversion,
include_small_orfs, output_csv
check_unknown_nucleotides, include_small_orfs, output_csv
)

if __name__ == "__main__": cli()
8 changes: 5 additions & 3 deletions intact/intact.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ def intact( working_dir,
check_nonhiv,
check_scramble,
check_internal_inversion,
check_unknown_nucleotides,
include_small_orfs,
output_csv,
hxb2_forward_orfs = const.DEFAULT_FORWARD_ORFs,
Expand Down Expand Up @@ -747,9 +748,10 @@ def analyse_single_sequence(writer, sequence, blast_rows):
for subseq in invalid_subsequences
)

err = IntactnessError(sequence.id, UNKNOWN_NUCLEOTIDE,
f'Sequence contains invalid nucleotides: {error_details}')
sequence_errors.append(err)
if check_unknown_nucleotides:
err = IntactnessError(sequence.id, UNKNOWN_NUCLEOTIDE,
f'Sequence contains invalid nucleotides: {error_details}')
sequence_errors.append(err)

sequence = SeqRecord.SeqRecord(
Seq.Seq(''.join(x for x in sequence.seq if x in VALID_DNA_CHARACTERS)),
Expand Down
1 change: 1 addition & 0 deletions tests/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def run_end_to_end(tmp_path, data_file, expected_dir, subtype, output_csv):
check_nonhiv=True,
check_scramble=True,
check_internal_inversion=True,
check_unknown_nucleotides=True,
include_small_orfs=True,
output_csv=output_csv,
)
Expand Down

0 comments on commit 5580125

Please sign in to comment.