From 1f49a1ce73edd520743fd7349123856c51e4eea9 Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 23 May 2017 16:56:15 +0200 Subject: [PATCH] Interface looks nice. --- src/AlleleGenerator.py | 156 +++++++++++++++++++++++------------- src/AlleleGui.py | 85 +++++++++----------- src/AlleleSubmissionEMBL.py | 2 +- 3 files changed, 138 insertions(+), 105 deletions(-) diff --git a/src/AlleleGenerator.py b/src/AlleleGenerator.py index cdba97b..88a1604 100755 --- a/src/AlleleGenerator.py +++ b/src/AlleleGenerator.py @@ -33,58 +33,95 @@ def __init__(self): self.inputFileName = '' self.outputFileName = '' self.sequenceAnnotation = HLAGene() - self.inputCellNummer = 0#12345 - self.inputGene = ''#HLA-C' - self.inputAllele = ''#C0316ext' + self.inputCellNummer = 0 + self.inputGene = '' + self.inputAllele = '' # This is a short wrapper method to use biopython's translation method. + # Most of this code is just checking for things that went wrong def translateSequence(self,inputSequence): - try: - coding_dna = Seq(inputSequence, generic_dna) - proteinSequence = str(coding_dna.translate()) - print ('Exon Sequence before translation:' + coding_dna) - print ('Translated Protein:' + proteinSequence) - - # Perform Sanity Checks. - # Stop codon *should* be at the end of the protein. - stopCodonLocation = proteinSequence.find('*') - - # If no stop codon was found - if (stopCodonLocation == -1): - if(len(coding_dna) % 3 == 0): - tkMessageBox.showinfo('No Stop Codon Found', - 'The translated protein does not contain a stop codon.' ) - else: - tkMessageBox.showinfo('No Stop Codon Found', - 'The translated protein does not contain a stop codon.\n' + - 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.') - - # This happens if the coding sequence is the wrong length - elif (not len(coding_dna) % 3 == 0): - tkMessageBox.showinfo('Check your coding sequence length.', - 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.') + proteinSequence = '' - # This is normal and expected. - else: - if (stopCodonLocation == len(proteinSequence) - 1): - # Stop codon is the last character in the peptide sequence. - # That's just fine, but trim off the stop codon. - proteinSequence = proteinSequence[0:stopCodonLocation] - pass + try: + # Do nothing if the input sequence is blank. + if( len(inputSequence) > 0 ): + + coding_dna = Seq(inputSequence, generic_dna) + proteinSequence = str(coding_dna.translate()) + print ('Exon Sequence before translation:' + coding_dna) + print ('Translated Protein:' + proteinSequence) + + # Perform Sanity Checks. + # Stop codon *should* be at the end of the protein. + # Here we seek out the first instance of a stop codon, + # and remove the peptides afterwards. + # because that's what happens in real life. + stopCodonLocation = proteinSequence.find('*') + + # If no stop codon was found + if (stopCodonLocation == -1): + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.' ) + + # Wrong Codon Length + else: + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.\n' + + 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.') + + # If Stop Codon is in the end of the protein (This is expected and correct) + elif (stopCodonLocation == len(proteinSequence) - 1): + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + # Everything is fine in this case. Trim off the stop codon + proteinSequence = proteinSequence[0:stopCodonLocation] + pass + # Wrong Codon Length + else: + tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon', + 'The stop codon is at the correct position in the protein, but ' + + 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' + + 'Please double check your sequence.') + proteinSequence = proteinSequence[0:stopCodonLocation] + + # Else Stop Codon is premature (before the end of the protein) else: - tkMessageBox.showinfo('Premature Stop Codon Detected', - 'Premature stop codon found:\nProtein Position (' + - str(stopCodonLocation + 1) + '/' + - str(len(proteinSequence)) + ')\n\n' + - 'Double check your protein sequence,\n' + - 'because some aminos from the\n3\' / C-Terminus end\nwere spliced out.\n\n' + - 'Protein Before Splicing:\n' + proteinSequence + - '\n\nProtein After Splicing:\n' + proteinSequence[0:stopCodonLocation] + - '\n' - ) - proteinSequence = proteinSequence[0:stopCodonLocation] + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + tkMessageBox.showinfo('Premature Stop Codon Detected', + 'Premature stop codon found:\nProtein Position (' + + str(stopCodonLocation + 1) + '/' + + str(len(proteinSequence)) + ')\n\n' + + 'Double check your protein sequence,\n' + + 'this might indicate a missense mutation.\n\n' + + 'Translated Protein:\n' + proteinSequence + + '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + + '\n' + ) + proteinSequence = proteinSequence[0:stopCodonLocation] + + # Wrong Codon Length + else: + tkMessageBox.showinfo('Premature Stop Codon Detected', + 'Premature stop codon found:\nProtein Position (' + + str(stopCodonLocation + 1) + '/' + + str(len(proteinSequence)) + ')\n\n' + + 'Nucleotide count is not a multiple of 3,\n' + + 'Double check your protein sequence,\n' + + 'this might indicate a missense mutation.\n\n' + + 'Translated Protein:\n' + proteinSequence + + '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + + '\n' + ) + proteinSequence = proteinSequence[0:stopCodonLocation] + else: + print('Translating a nucleotide sequence of length 0. That was easy.') + pass + return proteinSequence except Exception: @@ -416,16 +453,25 @@ def buildENASubmission(self): totalLength = self.sequenceAnnotation.totalLength() print('total calculated length = ' + str(totalLength)) - - # These are the main sections of the ENA submission. - documentBuffer += self.printHeader() - documentBuffer += self.printMRNA() - documentBuffer += self.printCDS() - documentBuffer += self.printFeatures() - documentBuffer += self.printSequence() - - # Print entry terminator. The last line of an ENA entry. - documentBuffer += ('//\n') + + if(totalLength > 0): + + # These are the main sections of the ENA submission. + documentBuffer += self.printHeader() + documentBuffer += self.printMRNA() + documentBuffer += self.printCDS() + documentBuffer += self.printFeatures() + documentBuffer += self.printSequence() + + # Print entry terminator. The last line of an ENA entry. + documentBuffer += ('//\n') + + else: + tkMessageBox.showinfo('No HLA Sequence Found', + 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' ) + + pass + return documentBuffer diff --git a/src/AlleleGui.py b/src/AlleleGui.py index 0a63a52..ece5473 100755 --- a/src/AlleleGui.py +++ b/src/AlleleGui.py @@ -30,7 +30,7 @@ class AlleleGui(Tkinter.Frame): # Initialize the GUI def __init__(self, root): Tkinter.Frame.__init__(self, root) - root.title("EMBL Novel HLA Allele Submission Tool") + root.title("AlleleGen - A Novel HLA Allele Submission Generator") self.parent = root # Ctrl-A doesn't work by default in TK. I guess I need to do it myself. @@ -51,29 +51,21 @@ def initialize(self): self.cellNumInstrText = Tkinter.StringVar() self.cellNumInstrText.set('Sample ID:') self.inputCellNummer = Tkinter.StringVar() - self.inputCellNummer.set('Donor_12345') self.geneInstrText = Tkinter.StringVar() self.geneInstrText.set('Gene:') self.inputGene = Tkinter.StringVar() - self.inputGene.set('HLA-C') self.alleleInstrText = Tkinter.StringVar() self.alleleInstrText.set('Allele:') self.inputAllele = Tkinter.StringVar() - self.inputAllele.set('Allele:01:02') - #self.inputFeature = Tkinter.StringVar() - #self.inputFeature.set('AGC[AGT]CCG[GGC]AAT') self.featureInstrText = Tkinter.StringVar() self.featureInstrText.set('Annotated Sequence:') self.outputEMBLSubmission = Tkinter.StringVar() self.outputEMBLSubmission.set('Resulting Allele Submission:') - #Moving this to the bottom - #Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack() - Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack() Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack() @@ -106,11 +98,7 @@ def initialize(self): self.featureInputGuiObject.pack() self.featureInputFrame.pack() - self.featureInputGuiObject.delete('1.0','end') - self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') - - #Tkinter.Button(self, text='\|/ Generate an EMBL submission \|/', command=self.updateGUI).pack(**button_opt) - Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.updateGUI).pack(**button_opt) + Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission).pack(**button_opt) Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack() @@ -144,63 +132,61 @@ def initialize(self): Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt) self.instructionText = Tkinter.StringVar() - #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n' - # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n' - # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n' - # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n' - # + 'All spaces, tabs, and newlines are discarded and ignored.') self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:') Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack() - - + # Make a frame for the more-info buttons self.moreInfoFrame = Tkinter.Frame(self) Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0) Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1) + Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence).grid(row=0, column=2) - - self.moreInfoFrame.pack() - - self.updateGUI() - def howToUse(self): - # This method should popup some instruction text in a wee window. + def sampleSequence(self): + self.featureInputGuiObject.delete('1.0','end') + self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') + + self.inputAllele.set('Allele:01:02') + self.inputGene.set('HLA-C') + self.inputCellNummer.set('Donor_12345') - #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n' - # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n' - # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n' - # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n' - # + 'All spaces, tabs, and newlines are discarded and ignored.') + self.constructSubmission() + # This method should popup some instruction text in a wee window. + # This should be explicit on how to use the tool. + def howToUse(self): tkMessageBox.showinfo('How to use this tool', 'This software is to be used to create an\n' + 'EMBL-formatted submission document,\n' - + 'which specifies a novel HLA allele,\n' - + 'including exon/intron annotation.\n\n' + + 'which specifies a (novel) HLA allele.\n\n' - + 'This tool assumes you are submitting a\n' - + 'full length HLA allele.\n' - + 'HLA alleles should be fully sequenced,\n' - + 'including 5\' and 3\' UTRs.\n' + + 'This tool requires you to submit a\n' + + 'full length HLA allele, including\n' + + '5\' and 3\' UTRs.\n\n' + + 'Use capital letters for exons,\n' + 'lowercase for introns & UTRs.\n\n' - + 'Paste your formatted sequence in the\n' - + 'first text area, and push the button\n' - + 'to generate a submission.\n\n' - - + 'You can copy the submission from the GUI\n' - + 'or save it as text to your computer.\n\n' - - + 'An example is included in the form,\n' + + 'Push the "Example Sequence" button to see a small example of' + + ' a formatted sequence.\n' + 'Sequences should follow this pattern:\n' + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' - + 'All spaces, tabs, and newlines are\n' - + 'removed and ignored.' + + 'To use this tool:\n' + + '1.) Fill in a Sample ID, Gene Name, and Allele.' + + ' This text will be included in the submission.\n' + + '2.) Paste your formatted sequence in the\n' + + 'Annotated Sequence text area.\n' + + '3.) Push \"Generate an EMBL submission\" button' + + ' to generate a submission.\n' + + '4.) Push the "Save the submission" button' + + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n' + + + 'All spaces, tabs, and newlines are' + + ' removed before the nucleotide sequence is translated.' ) def contactInformation(self): @@ -236,12 +222,13 @@ def saveSubmissionFile(self): options['initialdir'] = self.idir options['parent'] = self options['title'] = 'Specify your output file.' + options['initialfile'] = 'NovelAlleleEMBLSubmission.txt' outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt) submissionText = self.submOutputGuiObject.get('1.0', 'end') outputFileObject.write(submissionText) # Gather sequence information from the input elements, and generate a text EMBL submission. - def updateGUI(self): + def constructSubmission(self): allGen = AlleleGenerator() roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') diff --git a/src/AlleleSubmissionEMBL.py b/src/AlleleSubmissionEMBL.py index 19beb9b..7df558b 100755 --- a/src/AlleleSubmissionEMBL.py +++ b/src/AlleleSubmissionEMBL.py @@ -36,7 +36,7 @@ AlleleGui(root).pack() root.mainloop() - print('Done. Yay.') + print('Done. Hooray.') # Print the Software Version elif (len(sys.argv) == 2 and (