-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
executable file
·102 lines (88 loc) · 3.68 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
'''
@author: Marta Luksza, [email protected]
'''
import sys
from Aligner import Aligner
from Neoantigen import Neoantigen
def readNeoantigens(neofilename):
neoantigens={}
f=open(neofilename)
header=f.readline()
htab=header.strip().split()
hdict={}
for i in range(0,len(htab)):
hdict[htab[i]]=i
line=f.readline()
while line:
line=line.strip()
nparams=line.split()
if nparams[7]=="NA":
line=f.readline()
continue
#print(nparams)
neoantigen=Neoantigen(nparams)
neoantigens[neoantigen.id]=neoantigen
neoantigen.setA()
line=f.readline()
f.close()
samples=set(map(lambda neo: neo.getSampleName(),neoantigens.values()))
return [neoantigens,samples]
def main(argv):
'''
command line parameters:
neofile - text file with neoantigen data (supplementary data)
alignmentDirectory - folder with precomputed alignments
a - midpoint parameter of the logistic function, alignment score threshold
k - slope parameter of the logistic function
outfile - path to a file where to output neoantigen fitness computation
'''
neofile=argv[1]
alignmentDirectory=argv[2]
a=float(argv[3])
k=float(argv[4])
outfile=sys.argv[5]
xmlpath=sys.argv[6]
[neoantigens,samples]=readNeoantigens(neofile)
#Compute TCR-recognition probabilities for all neoantigens
aligner=Aligner()
#for sample in samples:
# xmlpath=alignmentDirectory+"/neoantigens_"+sample+"_iedb.xml"
# aligner.readAllBlastAlignments(xmlpath)
#xmlpath=alignmentDirectory+"/neoantigens_5NSAK_iedb.xml"
#xmlpath="/scratch/eknodel/cohen_melanoma/validated_peptides/3466/3466_lukszablast.out"
aligner.readAllBlastAlignments(xmlpath)
aligner.computeR(a, k)
#Write neoantigen recognition potential
of=open(outfile,'w')
#header=["NeoantigenID","Mutation","Sample","MutatedPeptide","ResidueChangeClass","MutantPeptide","WildtypePeptide","A","R","Excluded","NeoantigenRecognitionPotential"]
header=["NeoantigenID","Mutation","Sample","MutantPeptide","WildtypePeptide","A","R","w","wc","Fitness"]
header="\t".join(header)
of.write(header+"\n")
for i in neoantigens:
#print(i)
neoantigen=neoantigens[i]
#print(neoantigen)
w=neoantigen.getHydro() #calculates neoantigen fraction based on Luksza definition
wc = neoantigen.getConsortiumHydro() #Calcuculates neoantigen fraction based on Consortium's definition (https://doi.org/10.1016/j.cell.2020.09.015)
A=neoantigens[i].getA() #MHC amplitude A
#print(A)
mtpeptide=neoantigens[i].mtPeptide #mutant peptide
wtpeptide=neoantigens[i].wtPeptide
R=aligner.getR(i)
#print(R)
# Residue change:
# HH: from hydrophobic to hydrophobic,
# NN: from non-hydrophobic to non-hydrophobic
# HN: from hydrophobic to non-hydrophobic,
# NH: from non-hydrophobic to hydrophobic
# other (WW, WH, HW, NW, WN) which include aminoacids without a clear classification
#residueChange=neoantigen.residueChange
#print(residueChange)
fitnessCost=A*R*w
#fitnessCost=A*R
#l=[i, neoantigen.mid, neoantigen.sample, neoantigen.position, residueChange, mtpeptide, wtpeptide, A,R, 1-w, fitnessCost]#, neoAlignment, epitopeAlignment, score, species]
l=[i, neoantigen.mid, neoantigen.sample, mtpeptide, wtpeptide, A,R,w,wc, fitnessCost]#, neoAlignment, epitopeAlignment, score, species]
l="\t".join(map(lambda s: str(s),l))
of.write(l+"\n")
if __name__ == '__main__':
main(sys.argv)