diff --git a/HEADER b/HEADER
new file mode 100755
index 0000000..6d61fb3
--- /dev/null
+++ b/HEADER
@@ -0,0 +1,16 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
diff --git a/MakeExecutables.bat b/MakeExecutables.bat
new file mode 100755
index 0000000..65f829c
--- /dev/null
+++ b/MakeExecutables.bat
@@ -0,0 +1,34 @@
+:: This file is part of EMBL-HLA-Submission.
+::
+:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+:: it under the terms of the GNU Lesser General Public License as published by
+:: the Free Software Foundation, either version 3 of the License, or
+:: (at your option) any later version.
+::
+:: EMBL-HLA-Submission is distributed in the hope that it will be useful,
+:: but WITHOUT ANY WARRANTY; without even the implied warranty of
+:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+:: GNU Lesser General Public License for more details.
+::
+:: You should have received a copy of the GNU Lesser General Public License
+:: along with EMBL-HLA-Submission. If not, see .
+
+:: Version 1.0
+
+:: This bat file is intended to create an executable for the windows environment.
+:: It uses Anaconda for python 2.7 to keep track of packages.
+
+:: See the file README.MD for how to set up your anaconda environment.
+
+:: Please verify that your files are set up such that the files exist here::
+:: C:\MinIONScripts\AlleleSubmission\MakeExecutables.bat
+:: If that is a problem, Modify the spec file "AlleleSubInstallerOptions_Windows.spec" as your needs require.
+
+SET CodePath=C:\MUMCScripts\EMBL-HLA-Submission\src
+SET BinPath=C:\MUMCScripts\EMBL-HLA-Submission\bin
+SET SpecFile=AlleleSubInstallerOptions_Windows.spec
+SET CondaEnvironment=AlleleSubEnvironment
+
+:: Run Pyinstaller to create executables
+cd %CodePath%
+activate %CondaEnvironment% && pyinstaller %SpecFile% --distpath %BinPath% && deactivate
diff --git a/MakeExecutables.sh b/MakeExecutables.sh
new file mode 100755
index 0000000..1e83bbe
--- /dev/null
+++ b/MakeExecutables.sh
@@ -0,0 +1,29 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+# This isn't working in it's current state. The spec file is apparently just for windows,
+# it's still pulling in windows DLLs.
+# Maybe *you* want to make this file work, I just don't think it's necessary.
+
+# If you're a linux user, you should run the program using Run_allele_submission.sh.
+
+# TODO: Make a linux specific pyinstaller spec file that works.
+
+
+#source activate AlleleSubEnvironment
+#python /home/ben/Pyinstaller/PyInstaller-3.2/pyinstaller.py AlleleSubInstallerOptions.spec
+#source deactivate
diff --git a/Run_allele_submission.bat b/Run_allele_submission.bat
new file mode 100755
index 0000000..24be771
--- /dev/null
+++ b/Run_allele_submission.bat
@@ -0,0 +1,21 @@
+:: This file is part of EMBL-HLA-Submission.
+::
+:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+:: it under the terms of the GNU Lesser General Public License as published by
+:: the Free Software Foundation, either version 3 of the License, or
+:: (at your option) any later version.
+::
+:: EMBL-HLA-Submission is distributed in the hope that it will be useful,
+:: but WITHOUT ANY WARRANTY; without even the implied warranty of
+:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+:: GNU Lesser General Public License for more details.
+::
+:: You should have received a copy of the GNU Lesser General Public License
+:: along with EMBL-HLA-Submission. If not, see .
+
+:: Version 1.0
+
+:: See the file README.MD for how to set up your anaconda environment.
+
+activate AlleleSubEnvironment && python AlleleSubmissionEMBL.py && deactivate
+
diff --git a/Run_allele_submission.sh b/Run_allele_submission.sh
new file mode 100755
index 0000000..ad471f2
--- /dev/null
+++ b/Run_allele_submission.sh
@@ -0,0 +1,26 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+# See the file README.MD for how to set up your anaconda environment.
+
+cd src
+
+source activate AlleleSubEnvironment
+python AlleleSubmissionEMBL.py
+source deactivate
+
+
diff --git a/src/AlleleGenerator.py b/src/AlleleGenerator.py
new file mode 100755
index 0000000..410719b
--- /dev/null
+++ b/src/AlleleGenerator.py
@@ -0,0 +1,368 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+from Bio.Seq import Seq
+from Bio.Alphabet import generic_dna
+import tkMessageBox
+
+import math
+
+from HLAGene import *
+
+# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission
+# In ENA format.
+class AlleleGenerator():
+
+ inputFileName = ''
+ outputFileName = ''
+ sequenceAnnotation = HLAGene()
+ inputCellNummer = 0#12345
+ inputGene = ''#HLA-C'
+ inputAllele = ''#C0316ext'
+
+ # This is a short wrapper method to use biopython's translation method.
+ def translateSequence(self,inputSequence):
+
+ coding_dna = Seq(inputSequence, generic_dna)
+
+ peptideSequence = str(coding_dna.translate())
+ print ('Translated Protein:' + peptideSequence)
+
+ #Stop codon *should* be at the end of the protein.
+ stopCodonLocation = peptideSequence.find('*')
+
+ if (stopCodonLocation == -1):
+ if(len(coding_dna) % 3 == 0):
+ tkMessageBox.showinfo('No Stop Codon Found',
+ 'The translated protein does not contain a stop codon.' )
+ else:
+ tkMessageBox.showinfo('No Stop Codon Found',
+ 'The translated protein does not contain a stop codon.\n' +
+ 'It looks like a frame shift,\n' +
+ 'The coding nucleotide sequence has length: ' + str(len(coding_dna)) + '.')
+ else:
+ if (stopCodonLocation == len(peptideSequence) - 1):
+ #Stop codon is the last character in the peptide sequence. That's just fine, but trim off the stop codon.
+ peptideSequence = peptideSequence[0:stopCodonLocation]
+ pass
+ else:
+ tkMessageBox.showinfo('Premature Stop Codon Detected',
+ 'Premature stop codon found:\nPeptide Position (' +
+ str(stopCodonLocation + 1) + '/' +
+ str(len(peptideSequence)) + ')\n\n' +
+ 'Double check your peptide sequence,\n' +
+ 'Some aminos from the 3\' / C-Terminus\nwere spliced out.\n\n' +
+ 'Before : ' + peptideSequence +
+ '\nAfter : ' + peptideSequence[0:stopCodonLocation] +
+ '\n'
+ )
+ peptideSequence = peptideSequence[0:stopCodonLocation]
+
+ return peptideSequence
+
+ #
+ def readInputSequence(self):
+
+ print('Reading file: ' + self.inputFileName)
+
+ fileObject = open(self.inputFileName, 'r')
+ fullFile = fileObject.read()
+
+ self.processInputSequence(fullFile)
+
+ # The input file should be a string of nucleotides, with capital letters to identify exons and introns.
+ # Annotations are expected and read in this format:
+ # fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr
+ # agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag
+ # All spaces, line feeds, and tabs are removed and ignored.
+ def processInputSequence(self, inputSequenceText):
+
+ resultGeneLoci = HLAGene()
+ # Why do I need to initialize loci array? I would have thought calling Gene() would do it.
+ resultGeneLoci.loci = []
+
+ # Trim out any spaces, tabs, newlines. Uppercase.
+ cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','')
+
+ # Trim out the annotation marks, and capitalize, so I have a copy of the full sequence.
+ unannotatedGene = cleanedGene.upper()
+ resultGeneLoci.fullSequence = unannotatedGene
+ print('Total Sequence Length = ' + str(len(unannotatedGene)))
+
+ # Loop through the cleaned and annotated input sequence,
+ # to search for exon annotation characters ( '[' and ']' )
+ # I assume that the first and last loci are the 5' and 3' UTR,
+ # I assume that Exons and Introns will alternate beyond that.
+ # It no longer uses ( '[' and ']' ) to specify exons. I check for
+ # capitals and lowercase letters to determine exon start and end
+ insideAnExon = False
+ locusBeginPosition = 0
+ for x in range(0, len(cleanedGene)):
+ currentChar = cleanedGene[x]
+
+ # Is this a standard nucleotide character?
+ if(currentChar.upper() in ('A','G','C','T')):
+
+ if(currentChar.isupper()):
+ if(insideAnExon):
+ #We're STILL in an exon. In this case, I should just do nothing and continue.
+ pass
+ else:
+ #In this case, we're just starting an EXON.
+ #Store the last Intron in the list.
+ currentIntron = GeneLocus()
+ currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper()
+ currentIntron.exon = False
+ resultGeneLoci.loci.append(currentIntron)
+ insideAnExon=True
+ locusBeginPosition = x
+ pass
+
+ else:
+ if not (insideAnExon):
+ #We're STILL in an intron. Continue.
+ pass
+ else:
+ # Store an Exon in the list.
+ currentExon = GeneLocus()
+ currentExon.sequence = cleanedGene[locusBeginPosition:x].upper()
+ currentExon.exon = True
+ resultGeneLoci.loci.append(currentExon)
+ insideAnExon = False
+ locusBeginPosition=x
+
+ #Starting a new Intron.
+ pass
+ else:
+ print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar
+ + '. If this is a wildcard character, you might be ok.')
+
+ # Store the last(3') UTR as an intron.
+ currentIntron = GeneLocus()
+ currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper()
+ currentIntron.exon = False
+ resultGeneLoci.loci.append(currentIntron)
+
+ # Annotate the loci (name them) and print the results of the read file.
+ resultGeneLoci.annotateLoci()
+ resultGeneLoci.printGeneSummary()
+
+ self.sequenceAnnotation = resultGeneLoci
+
+ # Create the text submission based on the ENA format.
+ def buildENASubmission(self):
+
+ # ENA format is the preferred submission type for EMBL. More information:
+ # http://www.ebi.ac.uk/ena/submit/sequence-submission
+ # http://www.ebi.ac.uk/ena/submit/entry-upload-templates
+ # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt
+ # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html
+ # http://www.ebi.ac.uk/ena/software/flat-file-validator
+
+ documentBuffer = ''
+
+ # These variables are for test data, they should be filled in by GUI.
+ #self.inputCellNummer = 23445
+ #self.inputGene = 'HLA-C'
+ #self.inputAllele = 'C0316ext'
+
+ completeSequence = self.sequenceAnnotation.getCompleteSequence()
+ exonSequence = self.sequenceAnnotation.getExonSequence()
+ totalLength = self.sequenceAnnotation.totalLength()
+ featureCount = len(self.sequenceAnnotation.loci)
+ print('total calculated length = ' + str(totalLength))
+
+ # Print header
+ documentBuffer += ('ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(totalLength) + ' BP.\n')
+ documentBuffer += ('XX\n')
+
+ # A valid document should have an AC (Accession Number) and DE (Description) field.
+ # I don't have an AC number available, so it's blank.
+ documentBuffer += ('AC \n')
+ documentBuffer += ('XX\n')
+
+ documentBuffer += ('DE Human Leukocyte Antigen\n')
+ documentBuffer += ('XX\n')
+
+ # Print key
+ documentBuffer += ('FH Key Location/Qualifiers\n')
+ documentBuffer += ('FH\n')
+
+ # Print source
+ # It's from a human.
+ documentBuffer += ('FT source 1..' + str(totalLength) + '\n')
+ documentBuffer += ('FT /organism="Homo sapiens"\n')
+ documentBuffer += ('FT /db_xref="taxon:9606"\n')
+ documentBuffer += ('FT /mol_type="genomic DNA"\n')
+ documentBuffer += ('FT /chromosome="6"\n')
+ documentBuffer += ('FT /isolate="' + str(self.inputCellNummer) + '"\n')
+
+ # Print mRNA
+ documentBuffer += ('FT mRNA join(')
+ # Iterate through the indices of the UTRs and exons.
+ # The 3' and 5' UTR are included in the mRNA
+ # But not in the CDS (coding sequence), since they're untranslated.
+ documentBuffer += (str(self.sequenceAnnotation.loci[0].beginIndex)
+ + '..' + str(self.sequenceAnnotation.loci[0].endIndex) + ',')
+
+ for x in range(1,featureCount-1):
+ geneLocus = self.sequenceAnnotation.loci[x]
+ if (geneLocus.exon):
+ documentBuffer += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ','
+
+ documentBuffer += (str(self.sequenceAnnotation.loci[featureCount-1].beginIndex)
+ + '..' + str(self.sequenceAnnotation.loci[featureCount-1].endIndex) + ')\n')
+
+ documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n')
+ documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n')
+ documentBuffer += ('FT /product=\"MHC class I antigen\"\n')
+
+ # Print CDS
+ # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns
+ # The range 1:featureCount-1 will exclude the UTRs.
+ documentBuffer += ('FT CDS join(')
+ for x in range(1,featureCount-1):
+ geneLocus = self.sequenceAnnotation.loci[x]
+ if (geneLocus.exon):
+ documentBuffer += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex)
+ if not x==featureCount-2:
+ documentBuffer += ','
+ else:
+ documentBuffer += ')\n'
+
+ documentBuffer += ('FT /transl_table=1\n')
+ documentBuffer += ('FT /codon_start=1\n')
+ documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n')
+ documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n')
+ documentBuffer += ('FT /product=\"MHC class I antigen\"\n')
+ documentBuffer += ('FT /translation=\"')
+
+ # Some simple formatting for the peptide sequence, making it human and computer readable.
+ # 80 peptides per line. Except the first line, which is 66.
+ # 66 is 80-14, where 14 is the length of { /translation=" }
+ peptideSequence = self.translateSequence(exonSequence)
+ if(len(peptideSequence) < 66):
+ documentBuffer += (peptideSequence) + '\"\n'
+ else:
+ documentBuffer += peptideSequence[0:66] + '\n'
+ i=66
+ while (i < len(peptideSequence)):
+ documentBuffer += 'FT ' + peptideSequence[i:i+80] + '\n'
+ i += 80
+
+ # Print 5'UTR
+ utr = self.sequenceAnnotation.loci[0]
+ documentBuffer += ('FT 5\'UTR ' + str(utr.beginIndex) + '..' + str(utr.endIndex) + '\n')
+ documentBuffer += ('FT /note=\"5\'UTR\"\n')
+ documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n')
+ documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n')
+
+ # Print alternating Ex/Int/Ex
+ for x in range(1,featureCount-1):
+ currentFeature = self.sequenceAnnotation.loci[x]
+
+ if(currentFeature.exon):
+ documentBuffer += ('FT exon ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+ else:
+ documentBuffer += ('FT intron ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+
+ geneNumber = int(math.ceil(x / 2.0))
+ documentBuffer += ('FT /number=' + str(geneNumber) + '\n')
+ documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n')
+ documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n')
+
+
+ # Print 3'UTR
+ utr = self.sequenceAnnotation.loci[len(self.sequenceAnnotation.loci)-1]
+ documentBuffer += ('FT 3\'UTR ' + str(utr.beginIndex) + '..' + str(utr.endIndex) + '\n')
+ documentBuffer += ('FT /note=\"3\'UTR\"\n')
+ documentBuffer += ('FT /gene="' + str(self.inputGene) + '"\n')
+ documentBuffer += ('FT /allele="' + str(self.inputAllele) + '"\n')
+ documentBuffer += ('XX\n')
+
+ # Print sequence
+ # There's a sweet biopython method which can count the nucleotides.
+ # Bio.Seq.count('A')
+ # I didn't use it.
+ cCount = 0
+ gCount = 0
+ tCount = 0
+ aCount = 0
+ otherCount = 0
+ for nucleotide in completeSequence:
+ if nucleotide == 'C':
+ cCount+=1
+ elif nucleotide == 'G':
+ gCount+=1
+ elif nucleotide == 'T':
+ tCount+=1
+ elif nucleotide == 'A':
+ aCount+=1
+ else:
+ otherCount+=1
+
+ documentBuffer += ('SQ Sequence ' + str(totalLength) + ' BP; '
+ + str(aCount) + ' A; ' + str(cCount) + ' C; '
+ + str(gCount) + ' G; ' + str(tCount) + ' T; '
+ + str(otherCount) + ' other;\n')
+
+ # Here's some logic to print the sequence information in groups of 10.
+ # This format is specified in the User manual specified by EMBL.
+ rowCount = 0
+ columnCount = 0
+ currentSeqIndex = 0
+
+ while (currentSeqIndex < totalLength):
+ # The character code for a sequence region is two blank spaces,
+ # followed by three blank spaces, for a total of 5 blanks.
+ documentBuffer += ' '
+ sequenceRow = completeSequence[currentSeqIndex : currentSeqIndex + 60]
+
+ # A sequenceChunk is 10 nucleotides in this context.
+ # Format specifies up to six "chunks" per line.
+ for i in range(0,6):
+ sequenceChunk = sequenceRow[i*10 : (i+1)*10]
+ documentBuffer += sequenceChunk + ' '
+
+ # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row.
+ if(len(sequenceRow) == 60):
+ documentBuffer += str(currentSeqIndex + 60) + '\n'
+ # but if line is not complete (this is more likely, and more complicated.)
+ else:
+ # Fill with spaces to align the nucleotide indices at the end of the sequence.
+ numberSpaces = 60-len(sequenceRow)
+ for n in range (0, numberSpaces):
+ documentBuffer += ' '
+ documentBuffer += (str(len(sequenceRow) + currentSeqIndex) + '\n')
+
+ # The next row of the sequence
+ currentSeqIndex += 60
+
+ # Print entry terminator. The last line of an ENA entry.
+ documentBuffer += ('//\n')
+
+ return documentBuffer
+
+ # Simple method to write the results to a file on your computer.
+ def outputENASubmissionToFile(self, outputText):
+
+ outputFileObject = open(self.outputFileName, 'w')
+ outputFileObject.write(outputText)
+ outputFileObject.close()
+
diff --git a/src/AlleleGui.py b/src/AlleleGui.py
new file mode 100755
index 0000000..7c94324
--- /dev/null
+++ b/src/AlleleGui.py
@@ -0,0 +1,240 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+SoftwareVersion = "EMBL-HLA-Submission Version 1.0"
+
+import os
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+from Tkinter import *
+
+from AlleleGenerator import AlleleGenerator
+from HLAGene import *
+
+# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class.
+class AlleleGui(Tkinter.Frame):
+
+ # Initialize the GUI
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root)
+ root.title("EMBL Novel HLA Allele Submission Tool")
+ self.parent = root
+ self.initialize()
+
+ # Initialize GUI elements
+ def initialize(self):
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+
+ self.cellNumInstrText = Tkinter.StringVar()
+ self.cellNumInstrText.set('Sample ID:')
+ self.inputCellNummer = Tkinter.StringVar()
+ self.inputCellNummer.set('11111')
+
+ self.geneInstrText = Tkinter.StringVar()
+ self.geneInstrText.set('Gene:')
+ self.inputGene = Tkinter.StringVar()
+ self.inputGene.set('HLA-C')
+
+ self.alleleInstrText = Tkinter.StringVar()
+ self.alleleInstrText.set('Allele:')
+ self.inputAllele = Tkinter.StringVar()
+ self.inputAllele.set('C0213ext')
+
+ #self.inputFeature = Tkinter.StringVar()
+ #self.inputFeature.set('AGC[AGT]CCG[GGC]AAT')
+ self.featureInstrText = Tkinter.StringVar()
+ self.featureInstrText.set('Annotated Sequence:')
+
+ self.outputEMBLSubmission = Tkinter.StringVar()
+ self.outputEMBLSubmission.set('Resulting Allele Submission:')
+
+ #Moving this to the bottom
+ #Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack()
+
+ Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack()
+ Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack()
+
+ Tkinter.Label(self, width=80, height=1, textvariable=self.geneInstrText).pack()
+ Tkinter.Entry(self, width=15, textvariable=self.inputGene).pack()
+
+ Tkinter.Label(self, width=80, height=1, textvariable=self.alleleInstrText).pack()
+ Tkinter.Entry(self, width=15, textvariable=self.inputAllele).pack()
+
+ Tkinter.Label(self, width=80, height=1, textvariable=self.featureInstrText).pack()
+
+ # Create a frame for the input widget, add scrollbars.
+ self.featureInputFrame = Tkinter.Frame(self)
+
+ self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL)
+ self.featureInputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.featureInputYScrollbar = Scrollbar(self.featureInputFrame)
+ self.featureInputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.featureInputGuiObject = Tkinter.Text(
+ self.featureInputFrame, width=80, height=12, wrap=NONE
+ , xscrollcommand=self.featureInputXScrollbar.set
+ , yscrollcommand=self.featureInputYScrollbar.set
+ )
+
+ self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview)
+ self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview)
+
+ self.featureInputGuiObject.pack()
+ self.featureInputFrame.pack()
+
+ self.featureInputGuiObject.delete('1.0','end')
+ self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')
+
+ #Tkinter.Button(self, text='\|/ Generate an EMBL submission \|/', command=self.updateGUI).pack(**button_opt)
+ Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.updateGUI).pack(**button_opt)
+
+ Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack()
+
+ # Output interface is contained on a frame.
+ self.submOutputFrame = Tkinter.Frame(self)
+
+ self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL)
+ self.submOutputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.submOutputYScrollbar = Scrollbar(self.submOutputFrame)
+ self.submOutputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.submOutputGuiObject = Tkinter.Text(
+ self.submOutputFrame, width=80, height=15, wrap=NONE
+ , xscrollcommand=self.submOutputXScrollbar.set
+ , yscrollcommand=self.submOutputYScrollbar.set
+ )
+
+ self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview)
+ self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview)
+
+ self.submOutputGuiObject.pack()
+ self.submOutputFrame.pack()
+
+ # This is the directory the python executable is running from.
+ # self.idir is used inside the saveSubmissionFile method.
+ # Maybe the code should be in there.
+ FileAndPath = os.path.abspath(__file__)
+ self.idir, self.ifile = os.path.split(FileAndPath)
+
+ Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt)
+
+ self.instructionText = Tkinter.StringVar()
+ #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n'
+ # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n'
+ # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n'
+ # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n'
+ # + 'All spaces, tabs, and newlines are discarded and ignored.')
+ self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:')
+ Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack()
+
+
+ # Make a frame for the more-info buttons
+ self.moreInfoFrame = Tkinter.Frame(self)
+
+ Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0)
+ Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1)
+
+
+
+ self.moreInfoFrame.pack()
+
+
+ self.updateGUI()
+
+ def howToUse(self):
+ # This method should popup some instruction text in a wee window.
+
+ #self.instructionText.set('This tool assumes you are submitting a standard HLA allele.\n'
+ # + 'HLA alleles are assumed to be fully sequenced, including 5\' and 3\' UTRs.\n'
+ # + 'Use capital letters for exons, lowercase for introns & UTRs, like this:\n'
+ # + 'five\'utr EXON1 intron1 EXON2 ... EXON{X} three\'utr\n'
+ # + 'All spaces, tabs, and newlines are discarded and ignored.')
+
+ tkMessageBox.showinfo('How to use this tool',
+ 'This software is to be used to create an\n'
+ + 'EMBL-formatted submission document,\n'
+ + 'which specifies a novel HLA allele,\n'
+ + 'including exon/intron annotation.\n\n'
+
+ + 'This tool assumes you are submitting a\n'
+ + 'full length HLA allele.\n'
+ + 'HLA alleles should be fully sequenced,\n'
+ + 'including 5\' and 3\' UTRs.\n'
+ + 'Use capital letters for exons,\n'
+ + 'lowercase for introns & UTRs.\n\n'
+
+ + 'An example is included in the form,\n'
+ + 'Sequences should follow this pattern:\n'
+ + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'
+
+ + 'All spaces, tabs, and newlines are\n'
+ + 'removed and ignored.'
+ )
+
+ def contactInformation(self):
+ # This method should list contact information for MUMC, and a link to the github page.
+ tkMessageBox.showinfo('Contact Information',
+ 'This software was created at\n'
+ + 'Maastricht University Medical Center\n'
+ + 'Transplantation Immunology\n'
+ + 'Tissue Typing Laboratory.\n'
+ + 'by Ben Matern:\n'
+ + 'ben.matern@mumc.nl\n\n'
+
+ + 'Please send Ben your bioinformatics\n'
+ + 'and data related questions.\n\n'
+
+ + 'all other inquiries can be directed\n'
+ + 'to Marcel Tilanus:\n'
+ + 'm.tilanus@mumc.nl\n\n'
+
+ + 'This code will be hosted at:\n'
+ + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n'
+ + 'You will find more information on\n'
+ + 'EMBL\'s data format on that page.'
+
+ )
+
+ # Ask user for a output file location, and write the EMBL submission to a file.
+ # This takes the input from the output field, rather than generate a new submission.
+ # So the user can edit the submission before or after saving it.
+ def saveSubmissionFile(self):
+
+ self.dir_opt = options = {}
+ options['initialdir'] = self.idir
+ options['parent'] = self
+ options['title'] = 'Specify your output file.'
+ outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt)
+ submissionText = self.submOutputGuiObject.get('1.0', 'end')
+ outputFileObject.write(submissionText)
+
+ # Gather sequence information from the input elements, and generate a text EMBL submission.
+ def updateGUI(self):
+
+ allGen = AlleleGenerator()
+ roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end')
+ allGen.inputCellNummer = self.inputCellNummer.get()
+ allGen.inputGene = self.inputGene.get()
+ allGen.inputAllele = self.inputAllele.get()
+ allGen.processInputSequence(roughFeatureSequence)
+ enaSubmission = allGen.buildENASubmission()
+ self.submOutputGuiObject.delete('1.0','end')
+ self.submOutputGuiObject.insert('1.0', enaSubmission)
+
diff --git a/src/AlleleSubInstallerOptions_Windows.spec b/src/AlleleSubInstallerOptions_Windows.spec
new file mode 100755
index 0000000..72a4717
--- /dev/null
+++ b/src/AlleleSubInstallerOptions_Windows.spec
@@ -0,0 +1,49 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+# This file contains specifications for packaging of the MinION Extractor GUI
+# As a standalone executable. This file is meant to be used with pyinstaller
+# http://www.pyinstaller.org/
+
+
+# -*- mode: python -*-
+
+block_cipher = None
+
+
+a = Analysis(['AlleleSubmissionEMBL.py'],
+ binaries=None,
+ datas=None,
+ hiddenimports=['six', 'packaging', 'packaging.requirements', 'packaging.version', 'packaging.specifiers', 'Tkinter', 'tkFileDialog', 'Tkconstants'],
+ hookspath=[],
+ runtime_hooks=[],
+ excludes=['tkinter'],
+ win_no_prefer_redirects=False,
+ win_private_assemblies=False,
+ cipher=block_cipher)
+pyz = PYZ(a.pure, a.zipped_data,
+ cipher=block_cipher)
+exe = EXE(pyz,
+ a.scripts,
+ a.binaries,
+ a.zipfiles,
+ a.datas,
+ name='AlleleSubmissionEMBLWindows',
+ debug=False,
+ strip=False,
+ upx=True,
+ console=True )
diff --git a/src/AlleleSubmissionEMBL.py b/src/AlleleSubmissionEMBL.py
new file mode 100755
index 0000000..1ffa0b5
--- /dev/null
+++ b/src/AlleleSubmissionEMBL.py
@@ -0,0 +1,65 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+SoftwareVersion = "EMBL-HLA-Submission Version 1.0"
+
+import Tkinter
+import sys
+
+from AlleleGui import AlleleGui
+
+if __name__=='__main__':
+ try:
+
+ # This is a really simple way to read commandline args,
+ # because there really shouldn't be any.
+
+ # No parameters are expected at all. sys.argv[0] doesn't count.
+ if (len(sys.argv) == 1):
+ print('\n\n\n\n\n***Creating an EMBL Allele submission***\n')
+
+ root = Tkinter.Tk()
+ AlleleGui(root).pack()
+ root.mainloop()
+
+ print('Done. Yay.')
+
+ # Print the Software Version
+ elif (len(sys.argv) == 2 and (
+ sys.argv[1].lower() == '-v' or
+ sys.argv[1].lower() == '--version' or
+ sys.argv[1].lower() == '-version')
+ ):
+ print (SoftwareVersion)
+
+ # You executed the software wrong. Sorry.
+ else:
+ print("usage:\n" +
+ "\tRun this program using standard python call:\n" +
+ "\t$python AlleleSubmissionEMBL.py\n" +
+ "\tbiopython must be accessible in your python environment. To run using Anaconda,\n"
+ "\tCheck readme at https://github.com/transplantation-immunology/EMBL-HLA-Submission\n"
+ )
+
+
+ except Exception:
+ # Top Level exception handling like a pro.
+ # This is not really doing anything.
+ print 'Unexpected problem during execution:'
+ print sys.exc_info()[1]
+ raise
+
diff --git a/src/HLAGene.py b/src/HLAGene.py
new file mode 100755
index 0000000..9b2b7ca
--- /dev/null
+++ b/src/HLAGene.py
@@ -0,0 +1,103 @@
+# This file is part of EMBL-HLA-Submission.
+#
+# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with EMBL-HLA-Submission. If not, see .
+
+# Version 1.0
+
+# The GeneLocus class specifies a locus on a Gene,
+# Either an Exon, intron, or UTR.
+class GeneLocus():
+
+ name = ''
+ sequence = ''
+ exon = False
+ beginIndex = 0
+ endIndex = 0
+
+ def length(self):
+ return 1 + self.endIndex - self.beginIndex
+
+# The Gene class represents an entire HLA Gene, consisting of a series of loci.
+class HLAGene():
+
+ fullSequence = ''
+ loci = []
+
+ def totalLength(self):
+
+ return len(self.getCompleteSequence())
+
+ # Combine the UTRs, Exons, and Introns into a contiguous sequence.
+ def getCompleteSequence(self):
+
+ sequence=''
+ for i in range(0, len(self.loci)):
+ sequence += self.loci[i].sequence
+ return sequence
+
+ # Combine the Exons into a contiguous sequence
+ def getExonSequence(self):
+
+ sequence=''
+ for i in range(1, len(self.loci)-1):
+ if(self.loci[i].exon):
+ sequence += self.loci[i].sequence
+ return sequence
+
+ # This method names the UTRs, Exons, and Introns, and records their indices.
+ # A HLA gene is always expected to have the pattern
+ # # 5UT -> EX1 -> IN1 -> EX2 -> IN2 -> ... -> EXN -> 3UT
+ def annotateLoci(self):
+
+ print('Annotating Gene Now')
+
+ lociBeginIndex = 1
+ if(len(self.loci) > 2):
+ for x in range(0, len(self.loci)):
+
+ # Determine the name of this loci.
+ # 5UT -> EX1 -> IN1 -> EX2 -> IN2 -> ... -> EXN -> 3UT
+ if(x==0):
+ self.loci[x].name = '5UT'
+ elif(x==len(self.loci)-1):
+ self.loci[x].name = '3UT'
+ elif(x%2 == 1):
+ self.loci[x].name = 'EX' + str(x/2 + 1)
+ else:
+ self.loci[x].name = 'I' + str(x/2)
+
+ # Determine start and end indices of these exons.
+ # Attempting to make index that looks like:
+ #5UT: 1-65
+ #EX1: 66-137
+ # I1: 138-267
+ self.loci[x].beginIndex = lociBeginIndex
+ lociBeginIndex += len(self.loci[x].sequence)
+ self.loci[x].endIndex = lociBeginIndex - 1
+
+
+ else:
+ print('I expected at least three loci in order to annotate them. Please double check your input file.')
+
+ # Print a summary of the inputted sequence to console.
+ def printGeneSummary(self):
+ print('\nPrinting Gene Summary')
+ for x in range(0, len(self.loci)):
+ currentLocus = self.loci[x]
+ print(currentLocus.name + ":\t"
+ + str(currentLocus.beginIndex) + '-' + str(currentLocus.endIndex)
+ + '\n' + currentLocus.sequence
+ )
+ print('')
+
diff --git a/src/helloworld.py b/src/helloworld.py
deleted file mode 100644
index f8091b0..0000000
--- a/src/helloworld.py
+++ /dev/null
@@ -1,3 +0,0 @@
-#Adding a comment to the file.
-
-print('hello world')