diff --git a/HEADER b/HEADER new file mode 100755 index 0000000..6d61fb3 --- /dev/null +++ b/HEADER @@ -0,0 +1,16 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 diff --git a/MakeExecutables.bat b/MakeExecutables.bat new file mode 100755 index 0000000..65f829c --- /dev/null +++ b/MakeExecutables.bat @@ -0,0 +1,34 @@ +:: This file is part of EMBL-HLA-Submission. +:: +:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: it under the terms of the GNU Lesser General Public License as published by +:: the Free Software Foundation, either version 3 of the License, or +:: (at your option) any later version. +:: +:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: but WITHOUT ANY WARRANTY; without even the implied warranty of +:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +:: GNU Lesser General Public License for more details. +:: +:: You should have received a copy of the GNU Lesser General Public License +:: along with EMBL-HLA-Submission. If not, see . + +:: Version 1.0 + +:: This bat file is intended to create an executable for the windows environment. +:: It uses Anaconda for python 2.7 to keep track of packages. + +:: See the file README.MD for how to set up your anaconda environment. + +:: Please verify that your files are set up such that the files exist here:: +:: C:\MinIONScripts\AlleleSubmission\MakeExecutables.bat +:: If that is a problem, Modify the spec file "AlleleSubInstallerOptions_Windows.spec" as your needs require. + +SET CodePath=C:\MUMCScripts\EMBL-HLA-Submission\src +SET BinPath=C:\MUMCScripts\EMBL-HLA-Submission\bin +SET SpecFile=AlleleSubInstallerOptions_Windows.spec +SET CondaEnvironment=AlleleSubEnvironment + +:: Run Pyinstaller to create executables +cd %CodePath% +activate %CondaEnvironment% && pyinstaller %SpecFile% --distpath %BinPath% && deactivate diff --git a/MakeExecutables.sh b/MakeExecutables.sh new file mode 100755 index 0000000..1e83bbe --- /dev/null +++ b/MakeExecutables.sh @@ -0,0 +1,29 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +# This isn't working in it's current state. The spec file is apparently just for windows, +# it's still pulling in windows DLLs. +# Maybe *you* want to make this file work, I just don't think it's necessary. + +# If you're a linux user, you should run the program using Run_allele_submission.sh. + +# TODO: Make a linux specific pyinstaller spec file that works. + + +#source activate AlleleSubEnvironment +#python /home/ben/Pyinstaller/PyInstaller-3.2/pyinstaller.py AlleleSubInstallerOptions.spec +#source deactivate diff --git a/Run_allele_submission.bat b/Run_allele_submission.bat new file mode 100755 index 0000000..24be771 --- /dev/null +++ b/Run_allele_submission.bat @@ -0,0 +1,21 @@ +:: This file is part of EMBL-HLA-Submission. +:: +:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: it under the terms of the GNU Lesser General Public License as published by +:: the Free Software Foundation, either version 3 of the License, or +:: (at your option) any later version. +:: +:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: but WITHOUT ANY WARRANTY; without even the implied warranty of +:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +:: GNU Lesser General Public License for more details. +:: +:: You should have received a copy of the GNU Lesser General Public License +:: along with EMBL-HLA-Submission. If not, see . + +:: Version 1.0 + +:: See the file README.MD for how to set up your anaconda environment. + +activate AlleleSubEnvironment && python AlleleSubmissionEMBL.py && deactivate + diff --git a/Run_allele_submission.sh b/Run_allele_submission.sh new file mode 100755 index 0000000..ad471f2 --- /dev/null +++ b/Run_allele_submission.sh @@ -0,0 +1,26 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +# See the file README.MD for how to set up your anaconda environment. + +cd src + +source activate AlleleSubEnvironment +python AlleleSubmissionEMBL.py +source deactivate + + diff --git a/src/AlleleGenerator.py b/src/AlleleGenerator.py new file mode 100755 index 0000000..410719b --- /dev/null +++ b/src/AlleleGenerator.py @@ -0,0 +1,368 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +import tkMessageBox + +import math + +from HLAGene import * + +# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission +# In ENA format. +class AlleleGenerator(): + + inputFileName = '' + outputFileName = '' + sequenceAnnotation = HLAGene() + inputCellNummer = 0#12345 + inputGene = ''#HLA-C' + inputAllele = ''#C0316ext' + + # This is a short wrapper method to use biopython's translation method. + def translateSequence(self,inputSequence): + + coding_dna = Seq(inputSequence, generic_dna) + + peptideSequence = str(coding_dna.translate()) + print ('Translated Protein:' + peptideSequence) + + #Stop codon *should* be at the end of the protein. + stopCodonLocation = peptideSequence.find('*') + + if (stopCodonLocation == -1): + if(len(coding_dna) % 3 == 0): + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.' ) + else: + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.\n' + + 'It looks like a frame shift,\n' + + 'The coding nucleotide sequence has length: ' + str(len(coding_dna)) + '.') + else: + if (stopCodonLocation == len(peptideSequence) - 1): + #Stop codon is the last character in the peptide sequence. That's just fine, but trim off the stop codon. + peptideSequence = peptideSequence[0:stopCodonLocation] + pass + else: + tkMessageBox.showinfo('Premature Stop Codon Detected', + 'Premature stop codon found:\nPeptide Position (' + + str(stopCodonLocation + 1) + '/' + + str(len(peptideSequence)) + ')\n\n' + + 'Double check your peptide sequence,\n' + + 'Some aminos from the 3\' / C-Terminus\nwere spliced out.\n\n' + + 'Before : ' + peptideSequence + + '\nAfter : ' + peptideSequence[0:stopCodonLocation] + + '\n' + ) + peptideSequence = peptideSequence[0:stopCodonLocation] + + return peptideSequence + + # + def readInputSequence(self): + + print('Reading file: ' + self.inputFileName) + + fileObject = open(self.inputFileName, 'r') + fullFile = fileObject.read() + + self.processInputSequence(fullFile) + + # The input file should be a string of nucleotides, with capital letters to identify exons and introns. + # Annotations are expected and read in this format: + # fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr + # agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag + # All spaces, line feeds, and tabs are removed and ignored. + def processInputSequence(self, inputSequenceText): + + resultGeneLoci = HLAGene() + # Why do I need to initialize loci array? I would have thought calling Gene() would do it. + resultGeneLoci.loci = [] + + # Trim out any spaces, tabs, newlines. Uppercase. + cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','') + + # Trim out the annotation marks, and capitalize, so I have a copy of the full sequence. + unannotatedGene = cleanedGene.upper() + resultGeneLoci.fullSequence = unannotatedGene + print('Total Sequence Length = ' + str(len(unannotatedGene))) + + # Loop through the cleaned and annotated input sequence, + # to search for exon annotation characters ( '[' and ']' ) + # I assume that the first and last loci are the 5' and 3' UTR, + # I assume that Exons and Introns will alternate beyond that. + # It no longer uses ( '[' and ']' ) to specify exons. I check for + # capitals and lowercase letters to determine exon start and end + insideAnExon = False + locusBeginPosition = 0 + for x in range(0, len(cleanedGene)): + currentChar = cleanedGene[x] + + # Is this a standard nucleotide character? + if(currentChar.upper() in ('A','G','C','T')): + + if(currentChar.isupper()): + if(insideAnExon): + #We're STILL in an exon. In this case, I should just do nothing and continue. + pass + else: + #In this case, we're just starting an EXON. + #Store the last Intron in the list. + currentIntron = GeneLocus() + currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper() + currentIntron.exon = False + resultGeneLoci.loci.append(currentIntron) + insideAnExon=True + locusBeginPosition = x + pass + + else: + if not (insideAnExon): + #We're STILL in an intron. Continue. + pass + else: + # Store an Exon in the list. + currentExon = GeneLocus() + currentExon.sequence = cleanedGene[locusBeginPosition:x].upper() + currentExon.exon = True + resultGeneLoci.loci.append(currentExon) + insideAnExon = False + locusBeginPosition=x + + #Starting a new Intron. + pass + else: + print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar + + '. If this is a wildcard character, you might be ok.') + + # Store the last(3') UTR as an intron. + currentIntron = GeneLocus() + currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper() + currentIntron.exon = False + resultGeneLoci.loci.append(currentIntron) + + # Annotate the loci (name them) and print the results of the read file. + resultGeneLoci.annotateLoci() + resultGeneLoci.printGeneSummary() + + self.sequenceAnnotation = resultGeneLoci + + # Create the text submission based on the ENA format. + def buildENASubmission(self): + + # ENA format is the preferred submission type for EMBL. More information: + # http://www.ebi.ac.uk/ena/submit/sequence-submission + # http://www.ebi.ac.uk/ena/submit/entry-upload-templates + # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt + # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html + # http://www.ebi.ac.uk/ena/software/flat-file-validator + + documentBuffer = '' + + # These variables are for test data, they should be filled in by GUI. + #self.inputCellNummer = 23445 + #self.inputGene = 'HLA-C' + #self.inputAllele = 'C0316ext' + + completeSequence = self.sequenceAnnotation.getCompleteSequence() + exonSequence = self.sequenceAnnotation.getExonSequence() + totalLength = self.sequenceAnnotation.totalLength() + featureCount = len(self.sequenceAnnotation.loci) + print('total calculated length = ' + str(totalLength)) + + # Print header + documentBuffer += ('ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(totalLength) + ' BP.\n') + documentBuffer += ('XX\n') + + # A valid document should have an AC (Accession Number) and DE (Description) field. + # I don't have an AC number available, so it's blank. + documentBuffer += ('AC \n') + documentBuffer += ('XX\n') + + documentBuffer += ('DE Human Leukocyte Antigen\n') + documentBuffer += ('XX\n') + + # Print key + documentBuffer += ('FH Key Location/Qualifiers\n') + documentBuffer += ('FH\n') + + # Print source + # It's from a human. + documentBuffer += ('FT source 1..' + str(totalLength) + '\n') + documentBuffer += ('FT /organism="Homo sapiens"\n') + documentBuffer += ('FT /db_xref="taxon:9606"\n') + documentBuffer += ('FT /mol_type="genomic DNA"\n') + documentBuffer += ('FT /chromosome="6"\n') + documentBuffer += ('FT /isolate="' + str(self.inputCellNummer) + '"\n') + + # Print mRNA + documentBuffer += ('FT mRNA join(') + # Iterate through the indices of the UTRs and exons. + # The 3' and 5' UTR are included in the mRNA + # But not in the CDS (coding sequence), since they're untranslated. + documentBuffer += (str(self.sequenceAnnotation.loci[0].beginIndex) + + '..' + str(self.sequenceAnnotation.loci[0].endIndex) + ',') + + for x in range(1,featureCount-1): + geneLocus = self.sequenceAnnotation.loci[x] + if (geneLocus.exon): + documentBuffer += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ',' + + documentBuffer += (str(self.sequenceAnnotation.loci[featureCount-1].beginIndex) + + '..' + str(self.sequenceAnnotation.loci[featureCount-1].endIndex) + ')\n') + + documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n') + documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n') + documentBuffer += ('FT /product=\"MHC class I antigen\"\n') + + # Print CDS + # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns + # The range 1:featureCount-1 will exclude the UTRs. + documentBuffer += ('FT CDS join(') + for x in range(1,featureCount-1): + geneLocus = self.sequenceAnnotation.loci[x] + if (geneLocus.exon): + documentBuffer += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + if not x==featureCount-2: + documentBuffer += ',' + else: + documentBuffer += ')\n' + + documentBuffer += ('FT /transl_table=1\n') + documentBuffer += ('FT /codon_start=1\n') + documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n') + documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n') + documentBuffer += ('FT /product=\"MHC class I antigen\"\n') + documentBuffer += ('FT /translation=\"') + + # Some simple formatting for the peptide sequence, making it human and computer readable. + # 80 peptides per line. Except the first line, which is 66. + # 66 is 80-14, where 14 is the length of { /translation=" } + peptideSequence = self.translateSequence(exonSequence) + if(len(peptideSequence) < 66): + documentBuffer += (peptideSequence) + '\"\n' + else: + documentBuffer += peptideSequence[0:66] + '\n' + i=66 + while (i < len(peptideSequence)): + documentBuffer += 'FT ' + peptideSequence[i:i+80] + '\n' + i += 80 + + # Print 5'UTR + utr = self.sequenceAnnotation.loci[0] + documentBuffer += ('FT 5\'UTR ' + str(utr.beginIndex) + '..' + str(utr.endIndex) + '\n') + documentBuffer += ('FT /note=\"5\'UTR\"\n') + documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n') + documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n') + + # Print alternating Ex/Int/Ex + for x in range(1,featureCount-1): + currentFeature = self.sequenceAnnotation.loci[x] + + if(currentFeature.exon): + documentBuffer += ('FT exon ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + else: + documentBuffer += ('FT intron ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + + geneNumber = int(math.ceil(x / 2.0)) + documentBuffer += ('FT /number=' + str(geneNumber) + '\n') + documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n') + documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n') + + + # Print 3'UTR + utr = self.sequenceAnnotation.loci[len(self.sequenceAnnotation.loci)-1] + documentBuffer += ('FT 3\'UTR ' + str(utr.beginIndex) + '..' + str(utr.endIndex) + '\n') + documentBuffer += ('FT /note=\"3\'UTR\"\n') + documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n') + documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n') + documentBuffer += ('XX\n') + + # Print sequence + # There's a sweet biopython method which can count the nucleotides. + # Bio.Seq.count('A') + # I didn't use it. + cCount = 0 + gCount = 0 + tCount = 0 + aCount = 0 + otherCount = 0 + for nucleotide in completeSequence: + if nucleotide == 'C': + cCount+=1 + elif nucleotide == 'G': + gCount+=1 + elif nucleotide == 'T': + tCount+=1 + elif nucleotide == 'A': + aCount+=1 + else: + otherCount+=1 + + documentBuffer += ('SQ Sequence ' + str(totalLength) + ' BP; ' + + str(aCount) + ' A; ' + str(cCount) + ' C; ' + + str(gCount) + ' G; ' + str(tCount) + ' T; ' + + str(otherCount) + ' other;\n') + + # Here's some logic to print the sequence information in groups of 10. + # This format is specified in the User manual specified by EMBL. + rowCount = 0 + columnCount = 0 + currentSeqIndex = 0 + + while (currentSeqIndex < totalLength): + # The character code for a sequence region is two blank spaces, + # followed by three blank spaces, for a total of 5 blanks. + documentBuffer += ' ' + sequenceRow = completeSequence[currentSeqIndex : currentSeqIndex + 60] + + # A sequenceChunk is 10 nucleotides in this context. + # Format specifies up to six "chunks" per line. + for i in range(0,6): + sequenceChunk = sequenceRow[i*10 : (i+1)*10] + documentBuffer += sequenceChunk + ' ' + + # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row. + if(len(sequenceRow) == 60): + documentBuffer += str(currentSeqIndex + 60) + '\n' + # but if line is not complete (this is more likely, and more complicated.) + else: + # Fill with spaces to align the nucleotide indices at the end of the sequence. + numberSpaces = 60-len(sequenceRow) + for n in range (0, numberSpaces): + documentBuffer += ' ' + documentBuffer += (str(len(sequenceRow) + currentSeqIndex) + '\n') + + # The next row of the sequence + currentSeqIndex += 60 + + # Print entry terminator. The last line of an ENA entry. + documentBuffer += ('//\n') + + return documentBuffer + + # Simple method to write the results to a file on your computer. + def outputENASubmissionToFile(self, outputText): + + outputFileObject = open(self.outputFileName, 'w') + outputFileObject.write(outputText) + outputFileObject.close() + diff --git a/src/AlleleGui.py b/src/AlleleGui.py new file mode 100755 index 0000000..7c94324 --- /dev/null +++ b/src/AlleleGui.py @@ -0,0 +1,240 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 +SoftwareVersion = "EMBL-HLA-Submission Version 1.0" + +import os + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +from Tkinter import * + +from AlleleGenerator import AlleleGenerator +from HLAGene import * + +# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class. +class AlleleGui(Tkinter.Frame): + + # Initialize the GUI + def __init__(self, root): + Tkinter.Frame.__init__(self, root) + root.title("EMBL Novel HLA Allele Submission Tool") + self.parent = root + self.initialize() + + # Initialize GUI elements + def initialize(self): + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + + self.cellNumInstrText = Tkinter.StringVar() + self.cellNumInstrText.set('Sample ID:') + self.inputCellNummer = Tkinter.StringVar() + self.inputCellNummer.set('11111') + + self.geneInstrText = Tkinter.StringVar() + self.geneInstrText.set('Gene:') + self.inputGene = Tkinter.StringVar() + self.inputGene.set('HLA-C') + + self.alleleInstrText = Tkinter.StringVar() + self.alleleInstrText.set('Allele:') + self.inputAllele = Tkinter.StringVar() + self.inputAllele.set('C0213ext') + + #self.inputFeature = Tkinter.StringVar() + #self.inputFeature.set('AGC[AGT]CCG[GGC]AAT') + self.featureInstrText = Tkinter.StringVar() + self.featureInstrText.set('Annotated Sequence:') + + self.outputEMBLSubmission = Tkinter.StringVar() + self.outputEMBLSubmission.set('Resulting Allele Submission:') + + #Moving this to the bottom + #Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack() + + Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack() + Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack() + + Tkinter.Label(self, width=80, height=1, textvariable=self.geneInstrText).pack() + Tkinter.Entry(self, width=15, textvariable=self.inputGene).pack() + + Tkinter.Label(self, width=80, height=1, textvariable=self.alleleInstrText).pack() + Tkinter.Entry(self, width=15, textvariable=self.inputAllele).pack() + + Tkinter.Label(self, width=80, height=1, textvariable=self.featureInstrText).pack() + + # Create a frame for the input widget, add scrollbars. + self.featureInputFrame = Tkinter.Frame(self) + + self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL) + self.featureInputXScrollbar.pack(side=BOTTOM, fill=X) + + self.featureInputYScrollbar = Scrollbar(self.featureInputFrame) + self.featureInputYScrollbar.pack(side=RIGHT, fill=Y) + + self.featureInputGuiObject = Tkinter.Text( + self.featureInputFrame, width=80, height=12, wrap=NONE + , xscrollcommand=self.featureInputXScrollbar.set + , yscrollcommand=self.featureInputYScrollbar.set + ) + + self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview) + self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview) + + self.featureInputGuiObject.pack() + self.featureInputFrame.pack() + + self.featureInputGuiObject.delete('1.0','end') + self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') + + #Tkinter.Button(self, text='\|/ Generate an EMBL submission \|/', command=self.updateGUI).pack(**button_opt) + Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.updateGUI).pack(**button_opt) + + Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack() + + # Output interface is contained on a frame. + self.submOutputFrame = Tkinter.Frame(self) + + self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL) + self.submOutputXScrollbar.pack(side=BOTTOM, fill=X) + + self.submOutputYScrollbar = Scrollbar(self.submOutputFrame) + self.submOutputYScrollbar.pack(side=RIGHT, fill=Y) + + self.submOutputGuiObject = Tkinter.Text( + self.submOutputFrame, width=80, height=15, wrap=NONE + , xscrollcommand=self.submOutputXScrollbar.set + , yscrollcommand=self.submOutputYScrollbar.set + ) + + self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview) + self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview) + + self.submOutputGuiObject.pack() + self.submOutputFrame.pack() + + # This is the directory the python executable is running from. + # self.idir is used inside the saveSubmissionFile method. + # Maybe the code should be in there. + FileAndPath = os.path.abspath(__file__) + self.idir, self.ifile = os.path.split(FileAndPath) + + Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt) + + self.instructionText = Tkinter.StringVar() + #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n' + # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n' + # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n' + # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n' + # + 'All spaces, tabs, and newlines are discarded and ignored.') + self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:') + Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack() + + + # Make a frame for the more-info buttons + self.moreInfoFrame = Tkinter.Frame(self) + + Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0) + Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1) + + + + self.moreInfoFrame.pack() + + + self.updateGUI() + + def howToUse(self): + # This method should popup some instruction text in a wee window. + + #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n' + # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n' + # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n' + # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n' + # + 'All spaces, tabs, and newlines are discarded and ignored.') + + tkMessageBox.showinfo('How to use this tool', + 'This software is to be used to create an\n' + + 'EMBL-formatted submission document,\n' + + 'which specifies a novel HLA allele,\n' + + 'including exon/intron annotation.\n\n' + + + 'This tool assumes you are submitting a\n' + + 'full length HLA allele.\n' + + 'HLA alleles should be fully sequenced,\n' + + 'including 5\' and 3\' UTRs.\n' + + 'Use capital letters for exons,\n' + + 'lowercase for introns & UTRs.\n\n' + + + 'An example is included in the form,\n' + + 'Sequences should follow this pattern:\n' + + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' + + + 'All spaces, tabs, and newlines are\n' + + 'removed and ignored.' + ) + + def contactInformation(self): + # This method should list contact information for MUMC, and a link to the github page. + tkMessageBox.showinfo('Contact Information', + 'This software was created at\n' + + 'Maastricht University Medical Center\n' + + 'Transplantation Immunology\n' + + 'Tissue Typing Laboratory.\n' + + 'by Ben Matern:\n' + + 'ben.matern@mumc.nl\n\n' + + + 'Please send Ben your bioinformatics\n' + + 'and data related questions.\n\n' + + + 'all other inquiries can be directed\n' + + 'to Marcel Tilanus:\n' + + 'm.tilanus@mumc.nl\n\n' + + + 'This code will be hosted at:\n' + + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n' + + 'You will find more information on\n' + + 'EMBL\'s data format on that page.' + + ) + + # Ask user for a output file location, and write the EMBL submission to a file. + # This takes the input from the output field, rather than generate a new submission. + # So the user can edit the submission before or after saving it. + def saveSubmissionFile(self): + + self.dir_opt = options = {} + options['initialdir'] = self.idir + options['parent'] = self + options['title'] = 'Specify your output file.' + outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt) + submissionText = self.submOutputGuiObject.get('1.0', 'end') + outputFileObject.write(submissionText) + + # Gather sequence information from the input elements, and generate a text EMBL submission. + def updateGUI(self): + + allGen = AlleleGenerator() + roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') + allGen.inputCellNummer = self.inputCellNummer.get() + allGen.inputGene = self.inputGene.get() + allGen.inputAllele = self.inputAllele.get() + allGen.processInputSequence(roughFeatureSequence) + enaSubmission = allGen.buildENASubmission() + self.submOutputGuiObject.delete('1.0','end') + self.submOutputGuiObject.insert('1.0', enaSubmission) + diff --git a/src/AlleleSubInstallerOptions_Windows.spec b/src/AlleleSubInstallerOptions_Windows.spec new file mode 100755 index 0000000..72a4717 --- /dev/null +++ b/src/AlleleSubInstallerOptions_Windows.spec @@ -0,0 +1,49 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +# This file contains specifications for packaging of the MinION Extractor GUI +# As a standalone executable. This file is meant to be used with pyinstaller +# http://www.pyinstaller.org/ + + +# -*- mode: python -*- + +block_cipher = None + + +a = Analysis(['AlleleSubmissionEMBL.py'], + binaries=None, + datas=None, + hiddenimports=['six', 'packaging', 'packaging.requirements', 'packaging.version', 'packaging.specifiers', 'Tkinter', 'tkFileDialog', 'Tkconstants'], + hookspath=[], + runtime_hooks=[], + excludes=['tkinter'], + win_no_prefer_redirects=False, + win_private_assemblies=False, + cipher=block_cipher) +pyz = PYZ(a.pure, a.zipped_data, + cipher=block_cipher) +exe = EXE(pyz, + a.scripts, + a.binaries, + a.zipfiles, + a.datas, + name='AlleleSubmissionEMBLWindows', + debug=False, + strip=False, + upx=True, + console=True ) diff --git a/src/AlleleSubmissionEMBL.py b/src/AlleleSubmissionEMBL.py new file mode 100755 index 0000000..1ffa0b5 --- /dev/null +++ b/src/AlleleSubmissionEMBL.py @@ -0,0 +1,65 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +SoftwareVersion = "EMBL-HLA-Submission Version 1.0" + +import Tkinter +import sys + +from AlleleGui import AlleleGui + +if __name__=='__main__': + try: + + # This is a really simple way to read commandline args, + # because there really shouldn't be any. + + # No parameters are expected at all. sys.argv[0] doesn't count. + if (len(sys.argv) == 1): + print('\n\n\n\n\n***Creating an EMBL Allele submission***\n') + + root = Tkinter.Tk() + AlleleGui(root).pack() + root.mainloop() + + print('Done. Yay.') + + # Print the Software Version + elif (len(sys.argv) == 2 and ( + sys.argv[1].lower() == '-v' or + sys.argv[1].lower() == '--version' or + sys.argv[1].lower() == '-version') + ): + print (SoftwareVersion) + + # You executed the software wrong. Sorry. + else: + print("usage:\n" + + "\tRun this program using standard python call:\n" + + "\t$python AlleleSubmissionEMBL.py\n" + + "\tbiopython must be accessible in your python environment. To run using Anaconda,\n" + "\tCheck readme at https://github.com/transplantation-immunology/EMBL-HLA-Submission\n" + ) + + + except Exception: + # Top Level exception handling like a pro. + # This is not really doing anything. + print 'Unexpected problem during execution:' + print sys.exc_info()[1] + raise + diff --git a/src/HLAGene.py b/src/HLAGene.py new file mode 100755 index 0000000..9b2b7ca --- /dev/null +++ b/src/HLAGene.py @@ -0,0 +1,103 @@ +# This file is part of EMBL-HLA-Submission. +# +# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with EMBL-HLA-Submission. If not, see . + +# Version 1.0 + +# The GeneLocus class specifies a locus on a Gene, +# Either an Exon, intron, or UTR. +class GeneLocus(): + + name = '' + sequence = '' + exon = False + beginIndex = 0 + endIndex = 0 + + def length(self): + return 1 + self.endIndex - self.beginIndex + +# The Gene class represents an entire HLA Gene, consisting of a series of loci. +class HLAGene(): + + fullSequence = '' + loci = [] + + def totalLength(self): + + return len(self.getCompleteSequence()) + + # Combine the UTRs, Exons, and Introns into a contiguous sequence. + def getCompleteSequence(self): + + sequence='' + for i in range(0, len(self.loci)): + sequence += self.loci[i].sequence + return sequence + + # Combine the Exons into a contiguous sequence + def getExonSequence(self): + + sequence='' + for i in range(1, len(self.loci)-1): + if(self.loci[i].exon): + sequence += self.loci[i].sequence + return sequence + + # This method names the UTRs, Exons, and Introns, and records their indices. + # A HLA gene is always expected to have the pattern + # # 5UT -> EX1 -> IN1 -> EX2 -> IN2 -> ... -> EXN -> 3UT + def annotateLoci(self): + + print('Annotating Gene Now') + + lociBeginIndex = 1 + if(len(self.loci) > 2): + for x in range(0, len(self.loci)): + + # Determine the name of this loci. + # 5UT -> EX1 -> IN1 -> EX2 -> IN2 -> ... -> EXN -> 3UT + if(x==0): + self.loci[x].name = '5UT' + elif(x==len(self.loci)-1): + self.loci[x].name = '3UT' + elif(x%2 == 1): + self.loci[x].name = 'EX' + str(x/2 + 1) + else: + self.loci[x].name = 'I' + str(x/2) + + # Determine start and end indices of these exons. + # Attempting to make index that looks like: + #5UT: 1-65 + #EX1: 66-137 + # I1: 138-267 + self.loci[x].beginIndex = lociBeginIndex + lociBeginIndex += len(self.loci[x].sequence) + self.loci[x].endIndex = lociBeginIndex - 1 + + + else: + print('I expected at least three loci in order to annotate them. Please double check your input file.') + + # Print a summary of the inputted sequence to console. + def printGeneSummary(self): + print('\nPrinting Gene Summary') + for x in range(0, len(self.loci)): + currentLocus = self.loci[x] + print(currentLocus.name + ":\t" + + str(currentLocus.beginIndex) + '-' + str(currentLocus.endIndex) + + '\n' + currentLocus.sequence + ) + print('') + diff --git a/src/helloworld.py b/src/helloworld.py deleted file mode 100644 index f8091b0..0000000 --- a/src/helloworld.py +++ /dev/null @@ -1,3 +0,0 @@ -#Adding a comment to the file. - -print('hello world')