Skip to content

Commit

Permalink
Interface looks nice.
Browse files Browse the repository at this point in the history
  • Loading branch information
bmatern committed May 23, 2017
1 parent 142810e commit 1f49a1c
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 105 deletions.
156 changes: 101 additions & 55 deletions src/AlleleGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,58 +33,95 @@ def __init__(self):
self.inputFileName = ''
self.outputFileName = ''
self.sequenceAnnotation = HLAGene()
self.inputCellNummer = 0#12345
self.inputGene = ''#HLA-C'
self.inputAllele = ''#C0316ext'
self.inputCellNummer = 0
self.inputGene = ''
self.inputAllele = ''

# This is a short wrapper method to use biopython's translation method.
# Most of this code is just checking for things that went wrong
def translateSequence(self,inputSequence):

try:
coding_dna = Seq(inputSequence, generic_dna)
proteinSequence = str(coding_dna.translate())
print ('Exon Sequence before translation:' + coding_dna)
print ('Translated Protein:' + proteinSequence)

# Perform Sanity Checks.
# Stop codon *should* be at the end of the protein.
stopCodonLocation = proteinSequence.find('*')

# If no stop codon was found
if (stopCodonLocation == -1):
if(len(coding_dna) % 3 == 0):
tkMessageBox.showinfo('No Stop Codon Found',
'The translated protein does not contain a stop codon.' )
else:
tkMessageBox.showinfo('No Stop Codon Found',
'The translated protein does not contain a stop codon.\n' +
'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.')

# This happens if the coding sequence is the wrong length
elif (not len(coding_dna) % 3 == 0):
tkMessageBox.showinfo('Check your coding sequence length.',
'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.')
proteinSequence = ''

# This is normal and expected.
else:
if (stopCodonLocation == len(proteinSequence) - 1):
# Stop codon is the last character in the peptide sequence.
# That's just fine, but trim off the stop codon.
proteinSequence = proteinSequence[0:stopCodonLocation]
pass
try:
# Do nothing if the input sequence is blank.
if( len(inputSequence) > 0 ):

coding_dna = Seq(inputSequence, generic_dna)
proteinSequence = str(coding_dna.translate())
print ('Exon Sequence before translation:' + coding_dna)
print ('Translated Protein:' + proteinSequence)

# Perform Sanity Checks.
# Stop codon *should* be at the end of the protein.
# Here we seek out the first instance of a stop codon,
# and remove the peptides afterwards.
# because that's what happens in real life.
stopCodonLocation = proteinSequence.find('*')

# If no stop codon was found
if (stopCodonLocation == -1):
# If multiple of three (correct codon length)
if(len(coding_dna) % 3 == 0):
tkMessageBox.showinfo('No Stop Codon Found',
'The translated protein does not contain a stop codon.' )

# Wrong Codon Length
else:
tkMessageBox.showinfo('No Stop Codon Found',
'The translated protein does not contain a stop codon.\n' +
'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.')

# If Stop Codon is in the end of the protein (This is expected and correct)
elif (stopCodonLocation == len(proteinSequence) - 1):
# If multiple of three (correct codon length)
if(len(coding_dna) % 3 == 0):
# Everything is fine in this case. Trim off the stop codon
proteinSequence = proteinSequence[0:stopCodonLocation]
pass
# Wrong Codon Length
else:
tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon',
'The stop codon is at the correct position in the protein, but ' +
'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' +
'Please double check your sequence.')
proteinSequence = proteinSequence[0:stopCodonLocation]

# Else Stop Codon is premature (before the end of the protein)
else:
tkMessageBox.showinfo('Premature Stop Codon Detected',
'Premature stop codon found:\nProtein Position (' +
str(stopCodonLocation + 1) + '/' +
str(len(proteinSequence)) + ')\n\n' +
'Double check your protein sequence,\n' +
'because some aminos from the\n3\' / C-Terminus end\nwere spliced out.\n\n' +
'Protein Before Splicing:\n' + proteinSequence +
'\n\nProtein After Splicing:\n' + proteinSequence[0:stopCodonLocation] +
'\n'
)
proteinSequence = proteinSequence[0:stopCodonLocation]
# If multiple of three (correct codon length)
if(len(coding_dna) % 3 == 0):
tkMessageBox.showinfo('Premature Stop Codon Detected',
'Premature stop codon found:\nProtein Position (' +
str(stopCodonLocation + 1) + '/' +
str(len(proteinSequence)) + ')\n\n' +
'Double check your protein sequence,\n' +
'this might indicate a missense mutation.\n\n' +
'Translated Protein:\n' + proteinSequence +
'\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
'\n'
)
proteinSequence = proteinSequence[0:stopCodonLocation]


# Wrong Codon Length
else:
tkMessageBox.showinfo('Premature Stop Codon Detected',
'Premature stop codon found:\nProtein Position (' +
str(stopCodonLocation + 1) + '/' +
str(len(proteinSequence)) + ')\n\n' +
'Nucleotide count is not a multiple of 3,\n' +
'Double check your protein sequence,\n' +
'this might indicate a missense mutation.\n\n' +
'Translated Protein:\n' + proteinSequence +
'\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
'\n'
)
proteinSequence = proteinSequence[0:stopCodonLocation]
else:
print('Translating a nucleotide sequence of length 0. That was easy.')
pass

return proteinSequence

except Exception:
Expand Down Expand Up @@ -416,16 +453,25 @@ def buildENASubmission(self):

totalLength = self.sequenceAnnotation.totalLength()
print('total calculated length = ' + str(totalLength))

# These are the main sections of the ENA submission.
documentBuffer += self.printHeader()
documentBuffer += self.printMRNA()
documentBuffer += self.printCDS()
documentBuffer += self.printFeatures()
documentBuffer += self.printSequence()

# Print entry terminator. The last line of an ENA entry.
documentBuffer += ('//\n')

if(totalLength > 0):

# These are the main sections of the ENA submission.
documentBuffer += self.printHeader()
documentBuffer += self.printMRNA()
documentBuffer += self.printCDS()
documentBuffer += self.printFeatures()
documentBuffer += self.printSequence()

# Print entry terminator. The last line of an ENA entry.
documentBuffer += ('//\n')

else:
tkMessageBox.showinfo('No HLA Sequence Found',
'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' )

pass


return documentBuffer

Expand Down
85 changes: 36 additions & 49 deletions src/AlleleGui.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class AlleleGui(Tkinter.Frame):
# Initialize the GUI
def __init__(self, root):
Tkinter.Frame.__init__(self, root)
root.title("EMBL Novel HLA Allele Submission Tool")
root.title("AlleleGen - A Novel HLA Allele Submission Generator")
self.parent = root

# Ctrl-A doesn't work by default in TK. I guess I need to do it myself.
Expand All @@ -51,29 +51,21 @@ def initialize(self):
self.cellNumInstrText = Tkinter.StringVar()
self.cellNumInstrText.set('Sample ID:')
self.inputCellNummer = Tkinter.StringVar()
self.inputCellNummer.set('Donor_12345')

self.geneInstrText = Tkinter.StringVar()
self.geneInstrText.set('Gene:')
self.inputGene = Tkinter.StringVar()
self.inputGene.set('HLA-C')

self.alleleInstrText = Tkinter.StringVar()
self.alleleInstrText.set('Allele:')
self.inputAllele = Tkinter.StringVar()
self.inputAllele.set('Allele:01:02')

#self.inputFeature = Tkinter.StringVar()
#self.inputFeature.set('AGC[AGT]CCG[GGC]AAT')
self.featureInstrText = Tkinter.StringVar()
self.featureInstrText.set('Annotated Sequence:')

self.outputEMBLSubmission = Tkinter.StringVar()
self.outputEMBLSubmission.set('Resulting Allele Submission:')

#Moving this to the bottom
#Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack()

Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack()
Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack()

Expand Down Expand Up @@ -106,11 +98,7 @@ def initialize(self):
self.featureInputGuiObject.pack()
self.featureInputFrame.pack()

self.featureInputGuiObject.delete('1.0','end')
self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')

#Tkinter.Button(self, text='\|/ Generate an EMBL submission \|/', command=self.updateGUI).pack(**button_opt)
Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.updateGUI).pack(**button_opt)
Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission).pack(**button_opt)

Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack()

Expand Down Expand Up @@ -144,63 +132,61 @@ def initialize(self):
Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt)

self.instructionText = Tkinter.StringVar()
#self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n'
# + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n'
# + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n'
# + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n'
# + 'All spaces, tabs, and newlines are discarded and ignored.')
self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:')
Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack()



# Make a frame for the more-info buttons
self.moreInfoFrame = Tkinter.Frame(self)

Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0)
Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1)
Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence).grid(row=0, column=2)



self.moreInfoFrame.pack()


self.updateGUI()

def howToUse(self):
# This method should popup some instruction text in a wee window.
def sampleSequence(self):
self.featureInputGuiObject.delete('1.0','end')
self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')

self.inputAllele.set('Allele:01:02')
self.inputGene.set('HLA-C')
self.inputCellNummer.set('Donor_12345')

#self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n'
# + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n'
# + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n'
# + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n'
# + 'All spaces, tabs, and newlines are discarded and ignored.')
self.constructSubmission()

# This method should popup some instruction text in a wee window.
# This should be explicit on how to use the tool.
def howToUse(self):
tkMessageBox.showinfo('How to use this tool',
'This software is to be used to create an\n'
+ 'EMBL-formatted submission document,\n'
+ 'which specifies a novel HLA allele,\n'
+ 'including exon/intron annotation.\n\n'
+ 'which specifies a (novel) HLA allele.\n\n'

+ 'This tool assumes you are submitting a\n'
+ 'full length HLA allele.\n'
+ 'HLA alleles should be fully sequenced,\n'
+ 'including 5\' and 3\' UTRs.\n'
+ 'This tool requires you to submit a\n'
+ 'full length HLA allele, including\n'
+ '5\' and 3\' UTRs.\n\n'

+ 'Use capital letters for exons,\n'
+ 'lowercase for introns & UTRs.\n\n'

+ 'Paste your formatted sequence in the\n'
+ 'first text area, and push the button\n'
+ 'to generate a submission.\n\n'

+ 'You can copy the submission from the GUI\n'
+ 'or save it as text to your computer.\n\n'

+ 'An example is included in the form,\n'
+ 'Push the "Example Sequence" button to see a small example of'
+ ' a formatted sequence.\n'
+ 'Sequences should follow this pattern:\n'
+ '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'

+ 'All spaces, tabs, and newlines are\n'
+ 'removed and ignored.'
+ 'To use this tool:\n'
+ '1.) Fill in a Sample ID, Gene Name, and Allele.'
+ ' This text will be included in the submission.\n'
+ '2.) Paste your formatted sequence in the\n'
+ 'Annotated Sequence text area.\n'
+ '3.) Push \"Generate an EMBL submission\" button'
+ ' to generate a submission.\n'
+ '4.) Push the "Save the submission" button'
+ ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n'

+ 'All spaces, tabs, and newlines are'
+ ' removed before the nucleotide sequence is translated.'
)

def contactInformation(self):
Expand Down Expand Up @@ -236,12 +222,13 @@ def saveSubmissionFile(self):
options['initialdir'] = self.idir
options['parent'] = self
options['title'] = 'Specify your output file.'
options['initialfile'] = 'NovelAlleleEMBLSubmission.txt'
outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt)
submissionText = self.submOutputGuiObject.get('1.0', 'end')
outputFileObject.write(submissionText)

# Gather sequence information from the input elements, and generate a text EMBL submission.
def updateGUI(self):
def constructSubmission(self):

allGen = AlleleGenerator()
roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end')
Expand Down
2 changes: 1 addition & 1 deletion src/AlleleSubmissionEMBL.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
AlleleGui(root).pack()
root.mainloop()

print('Done. Yay.')
print('Done. Hooray.')

# Print the Software Version
elif (len(sys.argv) == 2 and (
Expand Down

0 comments on commit 1f49a1c

Please sign in to comment.