diff --git a/pvacseq/lib/generate_fasta.py b/pvacseq/lib/generate_fasta.py index 2801e58..e913718 100644 --- a/pvacseq/lib/generate_fasta.py +++ b/pvacseq/lib/generate_fasta.py @@ -93,7 +93,10 @@ def main(args_input = sys.argv[1:]): variant_type = line['variant_type'] full_wildtype_sequence = line['wildtype_amino_acid_sequence'] if variant_type == 'FS': - position = int(line['protein_position'].split('-', 1)[0]) - 1 + if line['amino_acid_change'] is not None and line['amino_acid_change'].split('/')[0] == '-': + position = int(line['protein_position'].split('-', 1)[0]) + else: + position = int(line['protein_position'].split('-', 1)[0]) - 1 elif variant_type == 'missense' or variant_type == 'inframe_ins': wildtype_amino_acid, mutant_amino_acid = line['amino_acid_change'].split('/') if wildtype_amino_acid == '-': diff --git a/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv b/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv new file mode 100644 index 0000000..5229106 --- /dev/null +++ b/tests/test_data/generate_fasta/input_frameshift_variant_position_shift.tsv @@ -0,0 +1,2 @@ +chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index +12 62381609 62381609 G GT USP15 ENST00000280377 -/X ENSG00000135655 MAEGGAADLDTQRSDIATLLKTSLRKGDTWYLVDSRWFKQWKKYVGFDSWDKYQMGDQNVYPGPIDNSGLLKDGDAQSLKEHLIDELDYILLPTEGWNKLVSWYTLMEGQEPIARKVVEQGMFVKHCKVEVYLTELKLCENGNMNNVVTRRFSKADTIDTIEKEIRKIFSIPDEKETRLWNKYMSNTFEPLNKPDSTIQDAGLYQGQVLVIEQKNEDGTWPRGPSTPKSPGASNFSTLPKISPSSLSNNYNNMNNRNVKNSNYCLPSYTAYKNYDYSEPGRNNEQPGLCGLSNLGNTCFMNSAIQCLSNTPPLTEYFLNDKYQEELNFDNPLGMRGEIAKSYAELIKQMWSGKFSYVTPRAFKTQVGRFAPQFSGYQQQDCQELLAFLLDGLHEDLNRIRKKPYIQLKDADGRPDKVVAEEAWENHLKRNDSIIVDIFHGLFKSTLVCPECAKISVTFDPFCYLTLPLPMKKERTLEVYLVRMDPLTKPMQYKVVVPKIGNILDLCTALSALSGIPADKMIVTDIYNHRFHRIFAMDENLSSIMERDDIYVFEININRTEDTEHVIIPVCLREKFRHSSYTHHTGSSLFGQPFLMAVPRNNTEDKLYNLLLLRMCRYVKISTETEETEGSLHCCKDQNINGNGPNGIHEEGSPSEMETDEPDDESSQDQELPSENENSQSEDSVGGDNDSENGLCTEDTCKGQLTGHKKRLFTFQFNNLGNTDINYIKDDTRHIRFDDRQLRLDERSFLALDWDPDLKKRYFDENAAEDFEKHESVEYKPPKKPFVKLKDCIELFTTKEKLGAEDPWYCPNCKEHQQATKKLDLWSLPPVLVVHLKRFSYSRYMRDKLDTLVDFPINDLDMSEFLINPNAGPCRYNLIAVSNHYGGMGGGHYTAFAKNKDDGKWYYFDDSSVSTASEDQIVSKAAYVLFYQRQDTFSGTGFFPLDRETKGASAATGIPLESDEDSNDNDNDIENENCMHTN YQANVVWKV FS 345-346 NA NA NA NA NA NA NA NA USP15_ENST00000280377_1.FS.345-346 diff --git a/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta new file mode 100644 index 0000000..f657d8d --- /dev/null +++ b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.fasta @@ -0,0 +1,4 @@ +>1 +GEIAKSYAELIKQMWSGKFS +>2 +GEIAKSYAELYQANVVWKV diff --git a/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key new file mode 100644 index 0000000..d8dcf63 --- /dev/null +++ b/tests/test_data/generate_fasta/output_frameshift_variant_position_shift.key @@ -0,0 +1,4 @@ +1: +- WT.USP15_ENST00000280377_1.FS.345-346 +2: +- MT.USP15_ENST00000280377_1.FS.345-346 diff --git a/tests/test_generate_fasta.py b/tests/test_generate_fasta.py index c7853aa..533aead 100644 --- a/tests/test_generate_fasta.py +++ b/tests/test_generate_fasta.py @@ -365,6 +365,25 @@ def test_input_file_with_frameshift_variant_range_generates_expected_file(self): expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_range.key') self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file)) + def test_input_file_with_frameshift_variant_position_shift_generates_expected_file(self): + generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_frameshift_variant_position_shift.tsv') + generate_fasta_output_file = tempfile.NamedTemporaryFile() + generate_fasta_key_output_file = tempfile.NamedTemporaryFile() + + self.assertFalse(call([ + self.python, + self.executable, + generate_fasta_input_file, + self.peptide_sequence_length, + self.epitope_length, + generate_fasta_output_file.name, + generate_fasta_key_output_file.name, + ], shell=False)) + expected_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.fasta') + self.assertTrue(cmp(generate_fasta_output_file.name, expected_output_file)) + expected_key_output_file = os.path.join(self.test_data_dir, 'output_frameshift_variant_position_shift.key') + self.assertTrue(cmp(generate_fasta_key_output_file.name, expected_key_output_file)) + def test_input_file_with_sequence_containing_asterisk(self): generate_fasta_input_file = os.path.join(self.test_data_dir, 'input_asterisk_sequence.tsv') generate_fasta_output_file = tempfile.NamedTemporaryFile()