diff --git a/HEADER b/HEADER
index 6d61fb3..dc43bfc 100755
--- a/HEADER
+++ b/HEADER
@@ -1,16 +1,15 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
+# along with saddle-bags. If not, see .
-# Version 1.0
diff --git a/MakeExecutables.bat b/MakeExecutables.bat
index db32d1d..68cbe52 100755
--- a/MakeExecutables.bat
+++ b/MakeExecutables.bat
@@ -1,17 +1,17 @@
-:: This file is part of EMBL-HLA-Submission.
+:: This file is part of saddle-bags.
::
-:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+:: saddle-bags is free software: you can redistribute it and/or modify
:: it under the terms of the GNU Lesser General Public License as published by
:: the Free Software Foundation, either version 3 of the License, or
:: (at your option) any later version.
::
-:: EMBL-HLA-Submission is distributed in the hope that it will be useful,
+:: saddle-bags is distributed in the hope that it will be useful,
:: but WITHOUT ANY WARRANTY; without even the implied warranty of
:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
:: GNU Lesser General Public License for more details.
::
:: You should have received a copy of the GNU Lesser General Public License
-:: along with EMBL-HLA-Submission. If not, see .
+:: along with saddle-bags. If not, see .
:: Version 1.0
diff --git a/MakeExecutables.sh b/MakeExecutables.sh
index 1e83bbe..364309b 100755
--- a/MakeExecutables.sh
+++ b/MakeExecutables.sh
@@ -1,19 +1,18 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
+# along with saddle-bags. If not, see .
-# Version 1.0
# This isn't working in it's current state. The spec file is apparently just for windows,
# it's still pulling in windows DLLs.
diff --git a/README.md b/README.md
index 6a6fda3..3635d17 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ To set up the environment in anaconda:
Linux/Mac:
```
-conda create --name AlleleSubEnvironment biopython six
+conda create --name AlleleSubEnvironment biopython six pycurl
source activate AlleleSubEnvironment
pip install pyinstaller packaging
source deactivate
diff --git a/Run_allele_submission.bat b/Run_allele_submission.bat
index a7152a8..b893555 100755
--- a/Run_allele_submission.bat
+++ b/Run_allele_submission.bat
@@ -1,21 +1,21 @@
-:: This file is part of EMBL-HLA-Submission.
+:: This file is part of saddle-bags.
::
-:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+:: saddle-bags is free software: you can redistribute it and/or modify
:: it under the terms of the GNU Lesser General Public License as published by
:: the Free Software Foundation, either version 3 of the License, or
:: (at your option) any later version.
::
-:: EMBL-HLA-Submission is distributed in the hope that it will be useful,
+:: saddle-bags is distributed in the hope that it will be useful,
:: but WITHOUT ANY WARRANTY; without even the implied warranty of
:: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
:: GNU Lesser General Public License for more details.
::
:: You should have received a copy of the GNU Lesser General Public License
-:: along with EMBL-HLA-Submission. If not, see .
+:: along with saddle-bags. If not, see .
:: Version 1.0
:: See the file README.MD for how to set up your anaconda environment.
-activate AlleleSubEnvironment && cd src && python AlleleSubmissionEMBL.py && deactivate
+activate AlleleSubEnvironment && cd src && python AlleleSubmissionMain.py && deactivate
diff --git a/Run_allele_submission.sh b/Run_allele_submission.sh
index ad471f2..3da7d12 100755
--- a/Run_allele_submission.sh
+++ b/Run_allele_submission.sh
@@ -1,26 +1,24 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
-
-# Version 1.0
+# along with saddle-bags. If not, see .
# See the file README.MD for how to set up your anaconda environment.
cd src
source activate AlleleSubEnvironment
-python AlleleSubmissionEMBL.py
+python AlleleSubmissionMain.py
source deactivate
diff --git a/src/AlleleGenerator.py b/src/AlleleGenerator.py
deleted file mode 100755
index 88a1604..0000000
--- a/src/AlleleGenerator.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# This file is part of EMBL-HLA-Submission.
-#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
-
-# Version 1.0
-
-from Bio.Seq import Seq
-from Bio.Alphabet import generic_dna
-import sys
-import tkMessageBox
-
-import math
-
-from HLAGene import *
-
-# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission
-# In ENA format.
-class AlleleGenerator():
-
- def __init__(self):
-
- self.inputFileName = ''
- self.outputFileName = ''
- self.sequenceAnnotation = HLAGene()
- self.inputCellNummer = 0
- self.inputGene = ''
- self.inputAllele = ''
-
- # This is a short wrapper method to use biopython's translation method.
- # Most of this code is just checking for things that went wrong
- def translateSequence(self,inputSequence):
-
- proteinSequence = ''
-
- try:
- # Do nothing if the input sequence is blank.
- if( len(inputSequence) > 0 ):
-
- coding_dna = Seq(inputSequence, generic_dna)
- proteinSequence = str(coding_dna.translate())
- print ('Exon Sequence before translation:' + coding_dna)
- print ('Translated Protein:' + proteinSequence)
-
- # Perform Sanity Checks.
- # Stop codon *should* be at the end of the protein.
- # Here we seek out the first instance of a stop codon,
- # and remove the peptides afterwards.
- # because that's what happens in real life.
- stopCodonLocation = proteinSequence.find('*')
-
- # If no stop codon was found
- if (stopCodonLocation == -1):
- # If multiple of three (correct codon length)
- if(len(coding_dna) % 3 == 0):
- tkMessageBox.showinfo('No Stop Codon Found',
- 'The translated protein does not contain a stop codon.' )
-
- # Wrong Codon Length
- else:
- tkMessageBox.showinfo('No Stop Codon Found',
- 'The translated protein does not contain a stop codon.\n' +
- 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.')
-
- # If Stop Codon is in the end of the protein (This is expected and correct)
- elif (stopCodonLocation == len(proteinSequence) - 1):
- # If multiple of three (correct codon length)
- if(len(coding_dna) % 3 == 0):
- # Everything is fine in this case. Trim off the stop codon
- proteinSequence = proteinSequence[0:stopCodonLocation]
- pass
- # Wrong Codon Length
- else:
- tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon',
- 'The stop codon is at the correct position in the protein, but ' +
- 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' +
- 'Please double check your sequence.')
- proteinSequence = proteinSequence[0:stopCodonLocation]
-
- # Else Stop Codon is premature (before the end of the protein)
- else:
- # If multiple of three (correct codon length)
- if(len(coding_dna) % 3 == 0):
- tkMessageBox.showinfo('Premature Stop Codon Detected',
- 'Premature stop codon found:\nProtein Position (' +
- str(stopCodonLocation + 1) + '/' +
- str(len(proteinSequence)) + ')\n\n' +
- 'Double check your protein sequence,\n' +
- 'this might indicate a missense mutation.\n\n' +
- 'Translated Protein:\n' + proteinSequence +
- '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
- '\n'
- )
- proteinSequence = proteinSequence[0:stopCodonLocation]
-
-
- # Wrong Codon Length
- else:
- tkMessageBox.showinfo('Premature Stop Codon Detected',
- 'Premature stop codon found:\nProtein Position (' +
- str(stopCodonLocation + 1) + '/' +
- str(len(proteinSequence)) + ')\n\n' +
- 'Nucleotide count is not a multiple of 3,\n' +
- 'Double check your protein sequence,\n' +
- 'this might indicate a missense mutation.\n\n' +
- 'Translated Protein:\n' + proteinSequence +
- '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
- '\n'
- )
- proteinSequence = proteinSequence[0:stopCodonLocation]
- else:
- print('Translating a nucleotide sequence of length 0. That was easy.')
- pass
-
- return proteinSequence
-
- except Exception:
- print 'Problem when translating protein:'
- print sys.exc_info()[1]
- tkMessageBox.showinfo('Protein Translation Error',
- 'I could not translate your protein:\n' + str(sys.exc_info()[1]))
-
- raise
-
- # The input file should be a string of nucleotides, with capital letters to identify exons and introns.
- # Annotations are expected and read in this format:
- # fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr
- # agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag
- # All spaces, line feeds, and tabs are removed and ignored.
- def processInputSequence(self, inputSequenceText):
-
- resultGeneLoci = HLAGene()
-
- # Trim out any spaces, tabs, newlines. Uppercase.
- cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','')
-
- # Capitalize, so I can store a copy of the full unannotated sequence.
- unannotatedGene = cleanedGene.upper()
- resultGeneLoci.fullSequence = unannotatedGene
- print('Total Sequence Length = ' + str(len(unannotatedGene)))
-
- # Loop through the cleaned and annotated input sequence,
- # capitals and lowercase letters to determine exon start and end
- if(len(cleanedGene) > 0):
-
- # Is the first feature an exon or an intron?
- # If we begin in an Exon
- if( cleanedGene[0] in ('A','G','C','T')):
- insideAnExon = True
- # If we begin in an Intron/UTR
- elif( cleanedGene[0] in ('a','g','c','t')):
- insideAnExon = False
- else:
- # Nonstandard nucleotide? I should start panicking.
- #raise Exception('Nonstandard Nucleotide, not sure how to handle it')
- print('Nonstandard Nucleotide at the beginning of the sequence, not sure how to handle it')
- insideAnExon = False
-
-
- locusBeginPosition = 0
- for x in range(0, len(cleanedGene)):
- currentChar = cleanedGene[x]
-
- # Is this a standard nucleotide character?
- if(currentChar.upper() in ('A','G','C','T')):
-
- if(currentChar.isupper()):
- if(insideAnExon):
- #We're STILL in an exon. In this case, I should just do nothing and continue.
- pass
- else:
- #In this case, we're just starting an EXON.
- #Store the last Intron in the list.
- currentIntron = GeneLocus()
- currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper()
- currentIntron.exon = False
- resultGeneLoci.loci.append(currentIntron)
- insideAnExon=True
- locusBeginPosition = x
- pass
-
- else:
- if not (insideAnExon):
- #We're STILL in an intron. Continue.
- pass
- else:
- #Starting a new Intron.
- # Store an Exon in the list.
- currentExon = GeneLocus()
- currentExon.sequence = cleanedGene[locusBeginPosition:x].upper()
- currentExon.exon = True
- resultGeneLoci.loci.append(currentExon)
- insideAnExon = False
- locusBeginPosition=x
- pass
- else:
- print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar
- + '. If this is a wildcard character, you might be ok.')
-
- # We've reached the end of the loop and we still need to store the last feature.
- # Should be a 3' UTR, but I can't be sure, people like to put in weird sequences.
- currentIntron = GeneLocus()
- currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper()
- currentIntron.exon = insideAnExon
- resultGeneLoci.loci.append(currentIntron)
-
- # Annotate the loci (name them) and print the results of the read file.
- resultGeneLoci.annotateLoci()
- resultGeneLoci.printGeneSummary()
-
- # If the sequence is empty
- else:
- print('Empty sequence, I don\'t have anything to do.')
-
- self.sequenceAnnotation = resultGeneLoci
-
-
-
- def printHeader(self):
- headerText = ''
-
- # Print header
- headerText += 'ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n'
- headerText += 'XX\n'
- # A valid document should have an AC (Accession Number) and DE (Description) field.
- # I don't have an AC number available, so it's blank.
- headerText += 'AC \n'
- headerText += 'XX\n'
- headerText += 'DE Human Leukocyte Antigen\n'
- headerText += 'XX\n'
-
- # Print key
- headerText += ('FH Key Location/Qualifiers\n')
- headerText += ('FH\n')
-
- # Print source
- # It's from a human.
- headerText += ('FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n')
- headerText += ('FT /organism="Homo sapiens"\n')
- headerText += ('FT /db_xref="taxon:9606"\n')
- headerText += ('FT /mol_type="genomic DNA"\n')
- headerText += ('FT /chromosome="6"\n')
- headerText += ('FT /isolate="' + str(self.inputCellNummer) + '"\n')
-
- return headerText
-
- def printMRNA(self):
- mRNAText = ''
- # Print mRNA
- mRNAText += ('FT mRNA join(')
-
- # Iterate through the indices of the UTRs and exons.
- # The 3' and 5' UTR are included in the mRNA
- for x in range(0,len(self.sequenceAnnotation.loci)):
- geneLocus = self.sequenceAnnotation.loci[x]
- # If it is an exon or UTR
- if (geneLocus.exon or 'UT' in geneLocus.name):
- mRNAText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ','
-
- # Trim off the last comma and add a parenthese
- mRNAText = mRNAText[0:len(mRNAText)-1] + ')\n'
-
- mRNAText += ('FT /gene="' + str(self.inputGene) + '"\n')
- mRNAText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- mRNAText += ('FT /product=\"MHC class I antigen\"\n')
-
- return mRNAText
-
-
- def printCDS(self):
- cdsText = ''
-
- # Print CDS
- # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns
- # The range 1:featureCount-1 will exclude the UTRs.
- cdsText += ('FT CDS join(')
- for x in range(0,len(self.sequenceAnnotation.loci)):
- geneLocus = self.sequenceAnnotation.loci[x]
- if (geneLocus.exon):
- cdsText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex)
- if not x==len(self.sequenceAnnotation.loci)-2:
- cdsText += ','
- else:
- cdsText += ')\n'
-
- cdsText += ('FT /transl_table=1\n')
- cdsText += ('FT /codon_start=1\n')
- cdsText += ('FT /gene="' + str(self.inputGene) + '"\n')
- cdsText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- cdsText += ('FT /product=\"MHC class I antigen\"\n')
- cdsText += ('FT /translation=\"')
-
- # Some simple formatting for the peptide sequence, making it human and computer readable.
- # 80 peptides per line. Except the first line, which is 66.
- # 66 is 80-14, where 14 is the length of { /translation=" }
- peptideSequence = self.translateSequence(self.sequenceAnnotation.getExonSequence())
- if(len(peptideSequence) < 66):
- cdsText += (peptideSequence) + '\"\n'
- else:
- cdsText += peptideSequence[0:66] + '\n'
- i=66
- while (i < len(peptideSequence)):
- cdsText += 'FT ' + peptideSequence[i:i+80] + '\n'
- i += 80
-
- return cdsText
-
- def printFeatures(self):
- featureText = ''
-
- exonIndex = 1
- intronIndex = 1
-
- geneHas3UTR = False
- geneHas5UTR = False
-
- for x in range(0,len(self.sequenceAnnotation.loci)):
- currentFeature = self.sequenceAnnotation.loci[x]
-
- # 3' UTR
- if(currentFeature.name == '3UT'):
- featureText += ('FT 3\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
- featureText += ('FT /note=\"3\'UTR\"\n')
- featureText += ('FT /gene="' + str(self.inputGene) + '"\n')
- featureText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- geneHas3UTR = True
-
- # 5' UTR
- elif(currentFeature.name == '5UT'):
- featureText += ('FT 5\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
- featureText += ('FT /note=\"5\'UTR\"\n')
- featureText += ('FT /gene="' + str(self.inputGene) + '"\n')
- featureText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- geneHas5UTR = True
-
- # Exon
- elif(currentFeature.exon):
- featureText += ('FT exon ' + str(currentFeature.beginIndex)
- + '..' + str(currentFeature.endIndex) + '\n')
- featureText += ('FT /number=' + str(exonIndex) + '\n')
- featureText += ('FT /gene="' + str(self.inputGene) + '"\n')
- featureText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- exonIndex += 1
-
- # Intron
- else:
- featureText += ('FT intron ' + str(currentFeature.beginIndex)
- + '..' + str(currentFeature.endIndex) + '\n')
- featureText += ('FT /number=' + str(intronIndex) + '\n')
- featureText += ('FT /gene="' + str(self.inputGene) + '"\n')
- featureText += ('FT /allele="' + str(self.inputAllele) + '"\n')
- intronIndex += 1
-
-
- featureText += ('XX\n')
-
- # Do a quick sanity check. If we are missing either UTR I should warn the user.
- # But move on with your life, this is not worth getting upset over.
- if (not geneHas3UTR and not geneHas5UTR):
- tkMessageBox.showinfo('Missing UTRs',
- 'This sequence has no 5\' or 3\' UTR.\n\n' +
- 'Use lowercase nucleotides at the\n' +
- 'beginning and end of your DNA\n' +
- 'sequence to specify the 5\' and 3\' UTRs.' )
- elif (not geneHas5UTR):
- tkMessageBox.showinfo('Missing 5\' UTR',
- 'This sequence has no 5\' UTR.\n\n' +
- 'Use lowercase nucleotides at the\n' +
- 'beginning and end of your DNA\n' +
- 'sequence to specify the 5\' and 3\' UTRs.' )
- elif (not geneHas3UTR):
- tkMessageBox.showinfo('Missing 3\' UTR',
- 'This sequence has no 3\' UTR.\n\n' +
- 'Use lowercase nucleotides at the\n' +
- 'beginning and end of your DNA\n' +
- 'sequence to specify the 5\' and 3\' UTRs.' )
- else:
- print('The UTRs look fine.')
- pass
-
- return featureText
-
- def printSequence(self):
- sequenceText = ''
-
- completeSequence = self.sequenceAnnotation.getCompleteSequence().upper()
-
- cCount = completeSequence.count('C')
- gCount = completeSequence.count('G')
- tCount = completeSequence.count('T')
- aCount = completeSequence.count('A')
- otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount)
-
- sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; '
- + str(aCount) + ' A; ' + str(cCount) + ' C; '
- + str(gCount) + ' G; ' + str(tCount) + ' T; '
- + str(otherCount) + ' other;\n')
-
- # Here's some logic to print the sequence information in groups of 10.
- # This format is specified in the User manual specified by EMBL.
- currentSeqIndex = 0
-
- while (currentSeqIndex < self.sequenceAnnotation.totalLength()):
- # The character code for a sequence region is two blank spaces,
- # followed by three blank spaces, for a total of 5 blanks.
- sequenceText += ' '
- sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60]
-
- # A sequenceChunk is 10 nucleotides in this context.
- # Format specifies up to six "chunks" per line.
- for i in range(0,6):
- sequenceChunk = sequenceRow[i*10 : (i+1)*10]
- sequenceText += sequenceChunk + ' '
-
- # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row.
- if(len(sequenceRow) == 60):
- sequenceText += str(currentSeqIndex + 60) + '\n'
- # but if line is not complete (this is more likely, and more complicated.)
- else:
- # Fill with spaces to align the nucleotide indices at the end of the sequence.
- numberSpaces = 60-len(sequenceRow)
- for n in range (0, numberSpaces):
- sequenceText += ' '
- sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n')
-
- # The next row of the sequence
- currentSeqIndex += 60
-
- return sequenceText
-
-
- # Create the text submission based on the ENA format.
- def buildENASubmission(self):
-
- # ENA format is the preferred submission type for EMBL. More information:
- # http://www.ebi.ac.uk/ena/submit/sequence-submission
- # http://www.ebi.ac.uk/ena/submit/entry-upload-templates
- # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt
- # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html
- # http://www.ebi.ac.uk/ena/software/flat-file-validator
-
- documentBuffer = ''
-
- totalLength = self.sequenceAnnotation.totalLength()
- print('total calculated length = ' + str(totalLength))
-
- if(totalLength > 0):
-
- # These are the main sections of the ENA submission.
- documentBuffer += self.printHeader()
- documentBuffer += self.printMRNA()
- documentBuffer += self.printCDS()
- documentBuffer += self.printFeatures()
- documentBuffer += self.printSequence()
-
- # Print entry terminator. The last line of an ENA entry.
- documentBuffer += ('//\n')
-
- else:
- tkMessageBox.showinfo('No HLA Sequence Found',
- 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' )
-
- pass
-
-
- return documentBuffer
-
- # Simple method to write the results to a file on your computer.
- def outputENASubmissionToFile(self, outputText):
-
- outputFileObject = open(self.outputFileName, 'w')
- outputFileObject.write(outputText)
- outputFileObject.close()
-
diff --git a/src/AlleleGui.py b/src/AlleleGui.py
deleted file mode 100755
index 1a99985..0000000
--- a/src/AlleleGui.py
+++ /dev/null
@@ -1,242 +0,0 @@
-# This file is part of EMBL-HLA-Submission.
-#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
-
-# Version 1.0
-SoftwareVersion = "Bhast Version 1.0"
-
-import os
-
-import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
-from Tkinter import *
-
-from AlleleGenerator import AlleleGenerator
-from HLAGene import *
-
-# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class.
-class AlleleGui(Tkinter.Frame):
-
- # Initialize the GUI
- def __init__(self, root):
- Tkinter.Frame.__init__(self, root)
- root.title("Bhast - A Novel HLA Allele Submission Generator")
- self.parent = root
-
- # Ctrl-A doesn't work by default in TK. I guess I need to do it myself.
- root.bind_class("Text","", self.selectall)
-
- self.initialize()
-
- # I shouldn't need to write a select-All method but TK is kind of annoying.
- def selectall(self, event):
-
- event.widget.tag_add("sel","1.0","end")
-
- # Initialize GUI elements
- def initialize(self):
-
- button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
-
- self.cellNumInstrText = Tkinter.StringVar()
- self.cellNumInstrText.set('Sample ID:')
- self.inputCellNummer = Tkinter.StringVar()
-
- self.geneInstrText = Tkinter.StringVar()
- self.geneInstrText.set('Gene:')
- self.inputGene = Tkinter.StringVar()
-
- self.alleleInstrText = Tkinter.StringVar()
- self.alleleInstrText.set('Allele:')
- self.inputAllele = Tkinter.StringVar()
-
- self.featureInstrText = Tkinter.StringVar()
- self.featureInstrText.set('Annotated Sequence:')
-
- self.outputEMBLSubmission = Tkinter.StringVar()
- self.outputEMBLSubmission.set('Resulting Allele Submission:')
-
- Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack()
- Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack()
-
- Tkinter.Label(self, width=80, height=1, textvariable=self.geneInstrText).pack()
- Tkinter.Entry(self, width=15, textvariable=self.inputGene).pack()
-
- Tkinter.Label(self, width=80, height=1, textvariable=self.alleleInstrText).pack()
- Tkinter.Entry(self, width=15, textvariable=self.inputAllele).pack()
-
- Tkinter.Label(self, width=80, height=1, textvariable=self.featureInstrText).pack()
-
- # Create a frame for the input widget, add scrollbars.
- self.featureInputFrame = Tkinter.Frame(self)
-
- self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL)
- self.featureInputXScrollbar.pack(side=BOTTOM, fill=X)
-
- self.featureInputYScrollbar = Scrollbar(self.featureInputFrame)
- self.featureInputYScrollbar.pack(side=RIGHT, fill=Y)
-
- self.featureInputGuiObject = Tkinter.Text(
- self.featureInputFrame, width=80, height=12, wrap=NONE
- , xscrollcommand=self.featureInputXScrollbar.set
- , yscrollcommand=self.featureInputYScrollbar.set
- )
-
- self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview)
- self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview)
-
- self.featureInputGuiObject.pack()
- self.featureInputFrame.pack()
-
- Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission).pack(**button_opt)
-
- Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack()
-
- # Output interface is contained on a frame.
- self.submOutputFrame = Tkinter.Frame(self)
-
- self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL)
- self.submOutputXScrollbar.pack(side=BOTTOM, fill=X)
-
- self.submOutputYScrollbar = Scrollbar(self.submOutputFrame)
- self.submOutputYScrollbar.pack(side=RIGHT, fill=Y)
-
- self.submOutputGuiObject = Tkinter.Text(
- self.submOutputFrame, width=80, height=15, wrap=NONE
- , xscrollcommand=self.submOutputXScrollbar.set
- , yscrollcommand=self.submOutputYScrollbar.set
- )
-
- self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview)
- self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview)
-
- self.submOutputGuiObject.pack()
- self.submOutputFrame.pack()
-
- # This is the directory the python executable is running from.
- # self.idir is used inside the saveSubmissionFile method.
- # Maybe the code should be in there.
- FileAndPath = os.path.abspath(__file__)
- self.idir, self.ifile = os.path.split(FileAndPath)
-
- Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt)
-
- self.instructionText = Tkinter.StringVar()
- self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:')
- Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack()
-
- # Make a frame for the more-info buttons
- self.moreInfoFrame = Tkinter.Frame(self)
-
- Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0)
- Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1)
- Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence).grid(row=0, column=2)
-
- self.moreInfoFrame.pack()
-
-
- def sampleSequence(self):
- self.featureInputGuiObject.delete('1.0','end')
- self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')
-
- self.inputAllele.set('Allele:01:02')
- self.inputGene.set('HLA-C')
- self.inputCellNummer.set('Donor_12345')
-
- self.constructSubmission()
-
- # This method should popup some instruction text in a wee window.
- # This should be explicit on how to use the tool.
- def howToUse(self):
- tkMessageBox.showinfo('How to use this tool',
- 'This software is to be used to create an\n'
- + 'EMBL-formatted submission document,\n'
- + 'which specifies a (novel) HLA allele.\n\n'
-
- + 'This tool requires you to submit a\n'
- + 'full length HLA allele, including\n'
- + '5\' and 3\' UTRs.\n\n'
-
- + 'Use capital letters for exons,\n'
- + 'lowercase for introns & UTRs.\n\n'
-
- + 'Push the "Example Sequence" button to see a small example of'
- + ' a formatted sequence.\n'
- + 'Sequences should follow this pattern:\n'
- + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'
-
- + 'To use this tool:\n'
- + '1.) Fill in a Sample ID, Gene Name, and Allele.'
- + ' This text will be included in the submission.\n'
- + '2.) Paste your formatted sequence in the\n'
- + 'Annotated Sequence text area.\n'
- + '3.) Push \"Generate an EMBL submission\" button'
- + ' to generate a submission.\n'
- + '4.) Push the "Save the submission" button'
- + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n'
-
- + 'All spaces, tabs, and newlines are'
- + ' removed before the nucleotide sequence is translated.'
- )
-
- def contactInformation(self):
- # This method should list contact information for MUMC, and a link to the github page.
- tkMessageBox.showinfo('Contact Information',
- 'This software was created at\n'
- + 'Maastricht University Medical Center\n'
- + 'Transplantation Immunology\n'
- + 'Tissue Typing Laboratory.\n'
- + 'by Ben Matern:\n'
- + 'ben.matern@mumc.nl\n\n'
-
- + 'Please send Ben your bioinformatics\n'
- + 'and data related questions.\n\n'
-
- + 'all other inquiries can be directed\n'
- + 'to Marcel Tilanus:\n'
- + 'm.tilanus@mumc.nl\n\n'
-
- + 'This code will be hosted at:\n'
- + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n'
- + 'You will find more information on\n'
- + 'EMBL\'s data format on that page.'
-
- )
-
- # Ask user for a output file location, and write the EMBL submission to a file.
- # This takes the input from the output field, rather than generate a new submission.
- # So the user can edit the submission before or after saving it.
- def saveSubmissionFile(self):
-
- self.dir_opt = options = {}
- options['initialdir'] = self.idir
- options['parent'] = self
- options['title'] = 'Specify your output file.'
- options['initialfile'] = 'NovelAlleleEMBLSubmission.txt'
- outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt)
- submissionText = self.submOutputGuiObject.get('1.0', 'end')
- outputFileObject.write(submissionText)
-
- # Gather sequence information from the input elements, and generate a text EMBL submission.
- def constructSubmission(self):
-
- allGen = AlleleGenerator()
- roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end')
- allGen.inputCellNummer = self.inputCellNummer.get()
- allGen.inputGene = self.inputGene.get()
- allGen.inputAllele = self.inputAllele.get()
- allGen.processInputSequence(roughFeatureSequence)
- enaSubmission = allGen.buildENASubmission()
- self.submOutputGuiObject.delete('1.0','end')
- self.submOutputGuiObject.insert('1.0', enaSubmission)
-
diff --git a/src/AlleleGuiEMBL.py b/src/AlleleGuiEMBL.py
new file mode 100755
index 0000000..8737461
--- /dev/null
+++ b/src/AlleleGuiEMBL.py
@@ -0,0 +1,649 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+
+import os
+from os import makedirs
+from os.path import expanduser, join, isdir
+
+import datetime
+import hashlib
+import ftplib
+import gzip
+import shutil
+#import pycurl
+#import StringIO
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+from Tkinter import *
+
+from SubmissionGeneratorEMBL import SubmissionGeneratorEMBL
+from AlleleGuiEMBLInputForm import AlleleGuiEMBLInputForm
+from AlleleSubCommon import *
+from AlleleSubmissionEMBLXml import *
+from AlleleSubmissionEMBLRestMethods import *
+#from HLAGene import HLAGene
+
+# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class.
+class AlleleGuiEMBL(Tkinter.Frame):
+
+ # I shouldn't need to write a select-All method but TK is kind of annoying.
+ def selectall(self, event):
+ event.widget.tag_add("sel","1.0","end")
+
+ # Initialize the GUI
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root)
+ root.title("Create and Submit an EMBL Sequence Submission")
+ self.parent = root
+
+ # Ctrl-A doesn't work by default in TK. I guess I need to do it myself.
+ root.bind_class("Text","", self.selectall)
+
+ # To define the exit behavior. Save the input sequence text.
+ self.parent.protocol('WM_DELETE_WINDOW', self.saveAndExit)
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+
+ # A frame for the Instructions Label.
+ self.instructionsFrame = Tkinter.Frame(self)
+ self.instructionText = Tkinter.StringVar()
+ self.instructionText.set('\nThis tool will generate an HLA allele submission for\n'
+ + 'the EMBL / ENA nucleotide database.\n'
+ + 'If you provide login credentials, you may automatically submit the sequence.\n'
+ + 'For more information:\n')
+ Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack()
+ self.instructionsFrame.pack(expand=False, fill='both')
+
+ # Make a frame for the more-info buttons
+ self.moreInfoFrame = Tkinter.Frame(self)
+ self.howToUseButton = Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse)
+ self.howToUseButton.grid(row=0, column=0)
+ self.exampleButton = Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence)
+ self.exampleButton.grid(row=0, column=1)
+ self.moreInfoFrame.pack()
+
+ # Create a frame for the input widget, add scrollbars.
+ self.featureInputFrame = Tkinter.Frame(self)
+
+ self.featureInstrText = Tkinter.StringVar()
+ self.featureInstrText.set('Annotated Sequence:')
+ self.featureInstrLabel = Tkinter.Label(self.featureInputFrame, width=80, height=1, textvariable=self.featureInstrText).pack()
+
+ self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL)
+ self.featureInputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.featureInputYScrollbar = Scrollbar(self.featureInputFrame)
+ self.featureInputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.featureInputGuiObject = Tkinter.Text(
+ self.featureInputFrame
+ , width=80, height=8
+ , wrap=NONE
+ , xscrollcommand=self.featureInputXScrollbar.set
+ , yscrollcommand=self.featureInputYScrollbar.set
+ )
+
+ self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview)
+ self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview)
+
+ self.featureInputGuiObject.pack(expand=True, fill='both')
+ self.featureInputFrame.pack(expand=True, fill='both')
+
+
+ # Create Frame for "Generate Submission" button.
+ self.submButtonFrame = Tkinter.Frame(self)
+ self.submissionOptionsButton = Tkinter.Button(self.submButtonFrame, text='Submission Options', command=self.chooseSubmissionOptions)
+ self.submissionOptionsButton.grid(row=0, column=0)
+ self.generateSubmissionButton = Tkinter.Button(self.submButtonFrame, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission)
+ self.generateSubmissionButton.grid(row=0, column=1)
+ self.submButtonFrame.pack()
+
+
+ # Output interface is contained on a frame.
+ self.submOutputFrame = Tkinter.Frame(self)
+
+ self.outputEMBLSubmission = Tkinter.StringVar()
+ self.outputEMBLSubmission.set('Allele Submission Preview:')
+ self.outputEMBLLabel = Tkinter.Label(self.submOutputFrame, width=80, height=1, textvariable=self.outputEMBLSubmission).pack()
+
+ self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL)
+ self.submOutputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.submOutputYScrollbar = Scrollbar(self.submOutputFrame)
+ self.submOutputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.submOutputGuiObject = Tkinter.Text(
+ self.submOutputFrame, width=80, height=8, wrap=NONE
+ , xscrollcommand=self.submOutputXScrollbar.set
+ , yscrollcommand=self.submOutputYScrollbar.set
+ )
+
+ self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview)
+ self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview)
+
+ self.submOutputGuiObject.pack(expand=True, fill='both')
+ self.submOutputFrame.pack(expand=True, fill='both')
+
+ self.uploadSubmissionFrame = Tkinter.Frame(self)
+ self.uploadButton = Tkinter.Button(self.uploadSubmissionFrame, text='Upload Submission to EMBL', command=self.uploadSubmission)
+ self.uploadButton.pack(**button_opt)
+ self.saveSubmissionButton = Tkinter.Button(self.uploadSubmissionFrame, text='Save Submission to My Computer', command=self.saveSubmissionFile)
+ self.saveSubmissionButton.pack(**button_opt)
+ self.exitButton = Tkinter.Button(self.uploadSubmissionFrame, text='Exit', command=self.saveAndExit)
+ self.exitButton.pack(**button_opt)
+ self.uploadSubmissionFrame.pack()
+
+ self.pack(expand=True, fill='both')
+
+ def chooseSubmissionOptions(self):
+ print ('Opening the EMBL Submission Options Dialog')
+
+ self.disableGUI()
+
+ emblOptionsRoot = Tkinter.Toplevel()
+ emblOptionsRoot.bind("", self.enableGUI)
+ AlleleGuiEMBLInputForm(emblOptionsRoot).pack()
+
+ # Set the X and the Y Position of the options window, so it is nearby.
+ emblOptionsRoot.update()
+ windowXpos = str(self.parent.winfo_geometry().split('+')[1])
+ windowYpos = str(self.parent.winfo_geometry().split('+')[2])
+ newGeometry = (str(emblOptionsRoot.winfo_width()) + 'x'
+ + str(emblOptionsRoot.winfo_height()) + '+'
+ + str(windowXpos) + '+'
+ + str(windowYpos))
+ emblOptionsRoot.geometry(newGeometry)
+
+ emblOptionsRoot.mainloop()
+
+
+ def writeMd5(self, inputFileName, outputFileName):
+ hash_md5 = hashlib.md5()
+ with open(inputFileName, "rb") as f:
+ for chunk in iter(lambda: f.read(4096), b""):
+ hash_md5.update(chunk)
+ hashValue= hash_md5.hexdigest()
+
+ outputFile = createOutputFile(outputFileName)
+ # The Ubuntu md5sum program seems to write a single checksum and filename with 2 spaces between
+ # I don't know why 2 spaces, but I'll roll with it.
+ outputFile.write(str(hashValue) + ' ' + str(split(inputFileName)[1]))
+ outputFile.close()
+
+ return hashValue
+
+
+
+
+ def uploadSubmission(self):
+ print('Uploading Submission to EMBL')
+
+ # Determine a working directory. Folder underneath executable called temp.
+ try:
+ workingDirectory = join(expanduser("~"), 'temp_upload_directory')
+ print('I can work in this directory:' + workingDirectory)
+
+ if not isdir(workingDirectory):
+ print('Making Directory:' + workingDirectory)
+ makedirs(workingDirectory)
+ except Exception:
+ print 'Cannot Initialize Working Directory'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Working Directory Error',
+ 'Sorry, I failed to create this working directory:\n'
+ + str(workingDirectory)
+ + '\n and I cannot continue.\nMaybe this is a '
+ + 'permissions issue, are these folders read only?\n'
+ + str(sys.exc_info()[1]))
+ return
+
+ restLog = createOutputFile(join(workingDirectory, 'Submission_Log.txt'))
+
+
+
+ # TODO: Make a REST log.
+ # For each step report success or failure. Same as popup messages.
+
+
+
+ emblUsername = getConfigurationValue('embl_username')
+ emblPassword = getConfigurationValue('embl_password')
+ if(emblUsername is None
+ or len(emblUsername) < 1
+ or emblPassword is None
+ or len(emblPassword) < 1):
+ tkMessageBox.showinfo('Missing Login Credentials',
+ 'You must provide EMBL username and password.\n'
+ 'Please use the "Submission Options" button.')
+ restLog.write('Missing EMBL Username or Password.' + '\n')
+ return
+ else:
+ restLog.write('EMBL Username and Password exist.' + '\n')
+
+
+ useTestServers = (int(getConfigurationValue('test_submission')) == 1)
+ # Are you sure?
+ if useTestServers:
+ restLog.write('Using Test EMBL Server.' + '\n')
+ result = tkMessageBox.askquestion("Submit to TEST / DEMO environment", "You are about to submit a sequence to the\n\nTEST / DEMO EMBL environment.\n\nAre You Sure?", icon='warning')
+ else:
+ restLog.write('Using Production EMBL Server.' + '\n')
+ result = tkMessageBox.askquestion("Submit to LIVE / PROD environment", "You are about to submit a sequence to the\n\nLIVE / PROD EMBL environment.\n\nAre You Sure?", icon='warning')
+
+ if result == 'yes':
+ pass
+ else:
+ return
+
+ # TODO: Existing project? Maybe I should check if the study/project exists, before I get started
+
+
+
+
+
+ # Give my submission a filename. SOmething with a datetime stamp
+ try:
+ # This includes a "seconds" measure, should be pretty unique.
+ dateTimeNow = '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.datetime.now())
+ submissionShortFileName = 'HLA_Submission_' + dateTimeNow + '.txt'
+ submissionFileName = join(workingDirectory, submissionShortFileName)
+ zippedShortFileName = submissionShortFileName + '.gz'
+ zippedFileName = join(workingDirectory, zippedShortFileName)
+ md5FileName = zippedFileName + '.md5'
+
+ submissionText = self.submOutputGuiObject.get('1.0', 'end')
+
+ outputFileObject = open(submissionFileName, 'w')
+ outputFileObject.write(submissionText)
+ outputFileObject.close()
+
+ except Exception:
+ print 'Cannot Write Submission Flatfile'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Write Submission Flatfile',
+ 'Sorry, I failed to create the submission file:\n'
+ + str(submissionText)
+ + '\n and I cannot continue.\nMaybe this is a '
+ + 'permissions issue, are these folders read only?\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to create submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('Submission file was created:' + str(submissionFileName) + '\n')
+
+ # gzip the submission file. Make a gz file.
+ try:
+ #zippedFileName = submissionFileName + '.gz'
+
+ with open(submissionFileName, 'rb') as fileIn, gzip.open(zippedFileName, 'wb') as fileOut:
+ shutil.copyfileobj(fileIn, fileOut)
+
+ except Exception:
+ print 'Cannot Compress Submission File'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Compress Submission File',
+ 'Sorry, I failed to compress the submission file:\n'
+ + str(zippedFileName)
+ + '\n and I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('Zip file was created:' + str(zippedFileName) + '\n')
+
+ # Calculate an MD5SUM
+ try:
+ #md5FileName = zippedFileName + '.md5'
+ md5HashValue = self.writeMd5(zippedFileName,md5FileName)
+
+ except Exception:
+ print 'Cannot Calculate MD5'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Calculate an Md5 checksum',
+ 'Sorry, I failed to calculate an md5 checksum\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('md5 file was created:' + str(md5FileName) + '\n')
+
+ # Use FTP to send the file to EMBL
+ try:
+ if useTestServers:
+ ftpServerAddress = getConfigurationValue('embl_ftp_upload_site_test')
+ else:
+ ftpServerAddress = getConfigurationValue('embl_ftp_upload_site_prod')
+
+ #print ('attempting to open ftp connection')
+ ftp = ftplib.FTP(ftpServerAddress)
+ ftp.login(getConfigurationValue('embl_username'), getConfigurationValue('embl_password'))
+ ftp.storbinary('STOR ' + '/' + split(zippedFileName)[1], open(zippedFileName, 'rb'), 1024)
+ ftp.storbinary('STOR ' + '/' + split(md5FileName)[1], open(md5FileName, 'rb'), 1024)
+ ftp.close()
+ # is that it? Easy.
+
+ except Exception:
+ print 'Cannot Upload to FTP site'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Upload to FTP site',
+ 'Sorry, I failed to upload your submission files to the EMBL FTP site\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to upload to FTP site:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('Submission and MD5 successfully uploaded.\n')
+
+ # Handle the new project
+ # effectively, study = project
+ # existing study = 1, new study = 2
+ newProject = (getConfigurationValue('choose_project') == '2')
+ if newProject:
+
+ # Generate Project and Project Submission XML Files
+ try:
+ projectFileName = join(workingDirectory, 'project.xml')
+ projectText = createProjectXML(projectFileName)
+
+ projectSubmissionFileName = join(workingDirectory, 'project_submission.xml')
+ projectSubmissionText = createProjectSubmissionXML(projectSubmissionFileName
+ ,'proj_sub_' + dateTimeNow
+ ,'project.xml')
+
+ #print('I made this project text:\n' + projectText)
+ #print('I made this project submission text:\n' + projectSubmissionText)
+
+ except Exception:
+ print 'Cannot Create Project Submission XML'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Create Project Submission XML',
+ 'Sorry, I failed to create a project XML file\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to create project submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('Project Submission XML files were created.\n')
+
+ # Use REST to submit this project
+ try:
+ # Return value should be a tuple:
+ # (Success, ProjectAccession, Messages[])
+ (projectSubmissionSuccess, projectAccessionNumber, projectErrorMessages) = performProjectSubmission(projectSubmissionFileName,projectFileName)
+
+ if(projectSubmissionSuccess):
+ # Great. The project was created successfully.
+ # Lets use this new study accession moving forward.
+ assignConfigurationValue('study_accession', projectAccessionNumber)
+ assignConfigurationValue('choose_project','1')
+ pass
+ else:
+ messageText = ('There was a problem in the Project Submission.\n'
+ + 'I cannot continue.\n'
+ + 'These messages were reported by EMBL:\n')
+ for errorMessage in projectErrorMessages:
+ messageText += ('\n' + errorMessage + '\n')
+ tkMessageBox.showinfo('Cannot Submit Project XML via REST', messageText)
+ restLog.write('Failure to submit project submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ except Exception:
+ print 'Cannot Submit Project XML'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Submit Project XML',
+ 'Sorry, I failed to submit the project XML file\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to upload project submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('New study has been uploaded, accession:' + str(getConfigurationValue('study_accession')) + '\n')
+
+ # existing project, we will use the supplied accession #
+ else:
+ restLog.write('Using existing study accession:' + str(getConfigurationValue('study_accession')) + '\n')
+ # projectAccessionNumber = getConfigurationValue('study_accession')
+ pass
+
+ # Generate Analysis and Analysis Submission xmls
+ try:
+ analysisFileName = join(workingDirectory, 'analysis.xml')
+ analysisText = createAnalysisXML(analysisFileName, md5HashValue, zippedShortFileName)
+
+ analysisSubmissionFileName = join(workingDirectory, 'analysis_submission.xml')
+ analysisSubmissionText = createAnalysisSubmissionXML(analysisSubmissionFileName
+ ,'analysis_sub_' + dateTimeNow
+ ,'analysis.xml')
+
+ except Exception:
+ print 'Cannot Create Analysis Submission XML'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Create Analysis Submission XML',
+ 'Sorry, I failed to create a Analysis XML file\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ restLog.write('Failure to create analysis submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ restLog.write('Analysis Submission XML files were created.\n')
+
+ # Use REST to submit this analysis
+ try:
+ # Return value should be a tuple:
+ # (Success, analysisAccessionNumber, Messages[])
+ (analysisSubmissionSuccess, analysisAccessionNumber, analysisErrorMessages) = performAnalysisSubmission(analysisSubmissionFileName,analysisFileName)
+
+ if(analysisSubmissionSuccess):
+ # Great. The analysis was created successfully.
+ pass
+ else:
+ messageText = ('There was a problem in the Analysis Submission.\n'
+ + 'I cannot continue.\n'
+ + 'These messages were reported by EMBL:\n')
+ for errorMessage in analysisErrorMessages:
+ messageText += ('\n' + errorMessage + '\n')
+ tkMessageBox.showinfo('Cannot Submit Analysis XML via REST', messageText)
+ restLog.write('Failure to submit analysis submission file:' + str(sys.exc_info()[1]) + '\n')
+ return
+
+ except Exception:
+ print 'Cannot Submit Analysis XML'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Cannot Submit Analysis XML via REST',
+ 'Sorry, I failed to submit the analysis XML file\nand I cannot continue.\n'
+ + str(sys.exc_info()[1]))
+ return
+
+ restLog.write('New analysis has been Uploaded, accession:' + str(analysisAccessionNumber) + '\n')
+
+ restLog.close()
+
+ # Popup message with Results
+ tkMessageBox.showinfo('Success uploading submission to EMBL.',
+ 'The sequence and analysis was uploaded to EMBL ENA Successfully.\n\n'
+ + 'For your reference:\n\n'
+ + 'You can use this Project/Study accession\nnumber on future submissions:\n'
+ + 'Study Accession:' + str(getConfigurationValue('study_accession') + '\n\n')
+ + 'Use the Analysis Accession number if you\ncontact EMBL regarding this\nsequence submission:\n'
+ + 'Analysis Accession:' + str(analysisAccessionNumber) + '\n\n'
+ + 'Find your submission files here:\n'
+ + workingDirectory + '\n\n'
+ + 'If EMBL successfully validates your sequence, you will\n'
+ + 'recieve an email with an EMBL Sequence accession number.\n'
+ + 'This *SEQUENCE* accession number is necessary for IMGT submission.\n'
+ + 'Contact EMBL Support with your\nAnalysis Accession # if it has been\nmore than 48 hours since submission.\n'
+
+ )
+
+
+ def sampleSequence(self):
+ self.featureInputGuiObject.delete('1.0','end')
+ self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')
+
+ # Clear the password, keep the username
+ #assignConfigurationValue('embl_username','')
+ assignConfigurationValue('embl_password','')
+
+ assignConfigurationValue('sample_id', 'Donor_12345')
+ assignConfigurationValue('gene','HLA-C')
+ assignConfigurationValue('class','1')
+ assignConfigurationValue("allele_name",'Allele:01:02')
+
+ assignConfigurationValue('study_accession','PRJEB12345')
+
+ assignConfigurationValue('choose_project','2')
+
+ assignConfigurationValue('study_identifier','HLA_Analysis_Project')
+ assignConfigurationValue('study_short_title','HLA Typing for Cancer Research.')
+ assignConfigurationValue('study_abstract','An abstract is a more in-depth description of the nature of the research project.')
+
+ assignConfigurationValue('analysis_alias','unique_HLA_analysis_alias')
+ assignConfigurationValue('analysis_title','Novel HLA sequence from patient with Leukemia')
+ assignConfigurationValue('analysis_description','This is an HLA-A sequence from a patient. It was discovered that he has Leukemia, so we decided to sequence his HLA.')
+
+ self.constructSubmission()
+
+ # This method should popup some instruction text in a wee window.
+ # This should be explicit on how to use the tool.
+ def howToUse(self):
+ tkMessageBox.showinfo('How to use this tool',
+ 'This software is to be used to create an\n'
+ + 'EMBL-formatted submission document,\n'
+ + 'which specifies a (novel) HLA allele.\n\n'
+
+ + 'This tool requires you to submit a\n'
+ + 'full length HLA allele, including\n'
+ + '5\' and 3\' UTRs.\n\n'
+
+ + 'Use capital letters for exons,\n'
+ + 'lowercase for introns & UTRs.\n\n'
+
+ + 'Push the "Example Sequence" button to see a small example of'
+ + ' a formatted sequence.\n'
+ + 'Sequences should follow this pattern:\n'
+ + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'
+
+ + 'To use this tool:\n'
+ + '1.) Fill in a Sample ID, Gene Name, and Allele.'
+ + ' This text will be included in the submission.\n'
+ + '2.) Paste your formatted sequence in the\n'
+ + 'Annotated Sequence text area.\n'
+ + '3.) Push \"Generate an EMBL submission\" button'
+ + ' to generate a submission.\n'
+ + '4.) Push the "Save the submission" button'
+ + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n'
+
+ + 'All spaces, tabs, and newlines are'
+ + ' removed before the nucleotide sequence is translated.'
+ )
+
+ def contactInformation(self):
+ # This method should list contact information for MUMC, and a link to the github page.
+ tkMessageBox.showinfo('Contact Information',
+ 'This software was created at\n'
+ + 'Maastricht University Medical Center\n'
+ + 'Transplantation Immunology\n'
+ + 'Tissue Typing Laboratory.\n'
+ + 'by Ben Matern:\n'
+ + 'ben.matern@mumc.nl\n\n'
+
+ + 'Please send Ben your bioinformatics\n'
+ + 'and data related questions.\n\n'
+
+ + 'all other inquiries can be directed\n'
+ + 'to Marcel Tilanus:\n'
+ + 'm.tilanus@mumc.nl\n\n'
+
+ + 'This code will be hosted at:\n'
+ + 'https://github.com/transplantation-\nimmunology/saddle-bags\n'
+ + 'You will find more information on\n'
+ + 'EMBL\'s data format on that page.'
+
+ )
+
+ # Ask user for a output file location, and write the EMBL submission to a file.
+ # This takes the input from the output field, rather than generate a new submission.
+ # So the user can edit the submission before or after saving it.
+ def saveSubmissionFile(self):
+
+ self.dir_opt = options = {}
+ options['initialdir'] = expanduser("~")
+ options['parent'] = self
+ options['title'] = 'Specify your output file.'
+ options['initialfile'] = 'EMBL.HLA.Submission.txt'
+ outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt)
+ submissionText = self.submOutputGuiObject.get('1.0', 'end')
+ outputFileObject.write(submissionText)
+
+ # TODO: Did I detect any exceptions? Maybe I don't have permission to write that file
+ # I saw an error when i wrote to a network drive once.
+
+
+ # Gather sequence information from the input elements, and generate a text EMBL submission.
+ def constructSubmission(self):
+ try:
+
+ allGen = SubmissionGeneratorEMBL()
+ roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end')
+
+ allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence)
+
+ enaSubmission = allGen.buildENASubmission()
+
+ if (enaSubmission is None or len(enaSubmission) < 1):
+ tkMessageBox.showerror('Empty submission text'
+ ,'You are missing some required information.\n'
+ + 'Try the \'Submission Options\' button.\n')
+
+ self.submOutputGuiObject.delete('1.0','end')
+ self.submOutputGuiObject.insert('1.0', '')
+ else:
+ self.submOutputGuiObject.delete('1.0','end')
+ self.submOutputGuiObject.insert('1.0', enaSubmission)
+
+ except KeyError, e:
+ tkMessageBox.showerror('Missing Submission Options'
+ ,'You are missing some required information.\n'
+ + 'Use the \'Submission Options\' button.\n'
+ + 'Missing Data: ' + str(e))
+
+ def saveAndExit(self):
+ assignConfigurationValue('sequence', self.featureInputGuiObject.get('1.0', 'end'))
+ self.parent.destroy()
+
+ def enableGUI(self, event=None):
+ self.toggleGUI(True)
+
+ def disableGUI(self):
+ self.toggleGUI(False)
+
+ def toggleGUI(self, isEnabled):
+ #print ('Toggling GUI Widgets:' + str(isEnabled))
+
+ newState = (NORMAL if (isEnabled) else DISABLED)
+
+ # Choosing the widgets individually, this makes the most sense I think.
+ self.howToUseButton.config(state=newState)
+ self.exampleButton.config(state=newState)
+ self.featureInputGuiObject.config(state=newState)
+ self.submissionOptionsButton.config(state=newState)
+ self.generateSubmissionButton.config(state=newState)
+ self.submOutputGuiObject.config(state=newState)
+ self.uploadButton.config(state=newState)
+ self.saveSubmissionButton.config(state=newState)
+ self.exitButton.config(state=newState)
+
+
+
+
+
diff --git a/src/AlleleGuiEMBLInputForm.py b/src/AlleleGuiEMBLInputForm.py
new file mode 100755
index 0000000..44fcbef
--- /dev/null
+++ b/src/AlleleGuiEMBLInputForm.py
@@ -0,0 +1,375 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import os
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+from Tkinter import *
+
+from AlleleSubCommon import *
+
+class AlleleGuiEMBLInputForm(Tkinter.Frame):
+
+ # Initialize the GUI
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root)
+ root.title("Choose EMBL Submission Options")
+ self.parent = root
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+ # To define the exit behavior. Save and exit.
+ self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions)
+
+ # Define the return behavior. Same as "close window" etc
+ root.bind('', self.returnFunction)
+
+ # This window should not be resizeable. I guess.
+ self.parent.resizable(width=False, height=False)
+
+ #Standard Inputs widths for the form elements
+ formInputWidth = 30
+ labelInputWidth = 30
+
+ self.instructionsFrame = Tkinter.Frame(self)
+ self.instructionText = Tkinter.StringVar()
+ self.instructionText.set('\nThese options are required for an EMBL allele submission.\n')
+ Tkinter.Label(self.instructionsFrame, width=85, height=3, textvariable=self.instructionText).pack()
+ self.instructionsFrame.pack()
+
+ self.submissionDetailsInputFrame2 = Tkinter.Frame(self)
+
+ self.sampleIDInstrText = Tkinter.StringVar()
+ self.sampleIDInstrText.set('Sample ID:')
+ self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=0, column=0)
+ self.inputSampleID = Tkinter.StringVar()
+ self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSampleID).grid(row=0, column=1)
+
+ self.geneInstrStringVar = Tkinter.StringVar()
+ self.geneInstrStringVar.set('Gene:')
+ self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=1, column=0)
+ self.inputGene = Tkinter.StringVar()
+ self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputGene).grid(row=1, column=1)
+
+ self.chooseClassIntVar = IntVar()
+ self.chooseClassIntVar.set(1)
+ Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=2, column=0)
+ Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=2, column=1)
+
+ self.alleleInstrText = Tkinter.StringVar()
+ self.alleleInstrText.set('Allele Local Name:')
+ self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=3, column=0)
+ self.inputAllele = Tkinter.StringVar()
+ self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputAllele).grid(row=3, column=1)
+
+ self.submissionDetailsInputFrame2.pack()
+
+
+ # Make a frame to contain the Test/Production radio buttons.
+ self.testProductionFrame = Tkinter.Frame(self)
+
+ self.testProductionInstrText = Tkinter.StringVar()
+ self.testProductionInstrText.set('\nBy default, you submit to the EMBL test servers,\n'
+ + 'where submissions are regularly deleted.\n'
+ + 'change this option if you want to submit to the live EMBL environment.\n'
+ + 'Login Credentials will not be stored, but they will be sent\n'
+ + 'to EMBL via secure https connection.\n'
+ )
+ self.alleleInstrLabel = Tkinter.Label(self.testProductionFrame, width=70, height=7, textvariable=self.testProductionInstrText).pack()#.grid(row=2, column=0)
+
+ # 1 = Test. 0 = Production/live server
+ self.chooseTestServersIntVar = IntVar()
+ self.chooseTestServersIntVar.set(int(getConfigurationValue('test_submission')))
+
+ Radiobutton(self.testProductionFrame, text="Submit to EMBL TEST / DEMO environment.", variable=self.chooseTestServersIntVar, value=1).pack()
+ Radiobutton(self.testProductionFrame, text="Submit to EMBL LIVE / PROD environment.", variable=self.chooseTestServersIntVar, value=0).pack()
+
+ self.testProductionFrame.pack()
+
+ # Make a frame to contain the input variables
+ self.submissionDetailsInputFrame = Tkinter.Frame(self)
+
+ self.usernameInstrText = Tkinter.StringVar()
+ self.usernameInstrText.set('EMBL Username:')
+ self.usernameInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.usernameInstrText).grid(row=0, column=0)
+ self.inputUsername = Tkinter.StringVar()
+ self.inputUsernameEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputUsername).grid(row=0, column=1)
+
+ self.passwordInstrText = Tkinter.StringVar()
+ self.passwordInstrText.set('EMBL Password:')
+ self.passwordInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.passwordInstrText).grid(row=1, column=0)
+ self.inputPassword = Tkinter.StringVar()
+ self.inputPasswordEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputPassword, show="*").grid(row=1, column=1)
+
+ self.submissionDetailsInputFrame.pack()
+
+
+ # Frame to specify Analysis Information
+ self.newAnalysisFrame = Tkinter.Frame(self)
+
+ self.analysisAliasInstrText = Tkinter.StringVar()
+ self.analysisAliasInstrText.set('Analysis Alias:')
+ self.analysisAliasInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisAliasInstrText).grid(row=0, column=0)
+ self.inputAnalysisAlias = Tkinter.StringVar()
+ self.inputStudyIdEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisAlias).grid(row=0, column=1)
+
+ self.analysisTitleInstrText = Tkinter.StringVar()
+ self.analysisTitleInstrText.set('Analysis Title:')
+ self.analysisTitleInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisTitleInstrText).grid(row=1, column=0)
+ self.inputAnalysisTitle = Tkinter.StringVar()
+ self.inputAnalysisTitleEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisTitle).grid(row=1, column=1)
+
+ self.analysisDescriptionInstrText = Tkinter.StringVar()
+ self.analysisDescriptionInstrText.set('Analysis Description:')
+ self.analysisDescriptionInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisDescriptionInstrText).grid(row=2, column=0)
+ self.inputAnalysisDescription = Tkinter.StringVar()
+ self.inputAnalysisDescriptionEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisDescription).grid(row=2, column=1)
+
+ self.newAnalysisFrame.pack()
+
+
+
+ # A Frame for specifing the details of the Study / Project
+ self.projectDetailsFrame = Tkinter.Frame(self)
+
+ self.alleleInstrText = Tkinter.StringVar()
+ self.alleleInstrText.set('\nEMBL requires that submissions are assigned to a Study/Project.\n'
+ + 'Will you provide an existing EMBL study accession #?\n'
+ + '(ex. \'PRJEB01234\')\n'
+ + 'Or will you specify a new study?\n')
+ self.alleleInstrLabel = Tkinter.Label(self.projectDetailsFrame, width=70, height=6, textvariable=self.alleleInstrText).pack()#.grid(row=2, column=0)
+
+ self.chooseProjectIntVar = IntVar()
+ self.chooseProjectIntVar.set(2)
+
+ # A frame for the "new study" radio button
+ self.existingProjectFrame = Tkinter.Frame(self.projectDetailsFrame)
+ Radiobutton(self.existingProjectFrame, text="Use this study accession:", variable=self.chooseProjectIntVar, value=1).grid(row=0,column=0)
+ self.inputStudyAccession = Tkinter.StringVar()
+ self.inputStudyIdEntry = Tkinter.Entry(self.existingProjectFrame, width=formInputWidth, textvariable=self.inputStudyAccession).grid(row=0, column=1)
+ self.existingProjectFrame.pack()
+
+
+ # Filler Label
+ Tkinter.Label(self.projectDetailsFrame, width=labelInputWidth, height=1, text=' ').pack()
+
+ # This radio button is on the project details frame, but not
+ # on one of it's sub-frames (existingProjectFrame or newProjectFrame)
+ # That's so i can pack it, and not use a grid
+ Radiobutton(self.projectDetailsFrame, text="Create a new study with this information:", variable=self.chooseProjectIntVar, value=2).pack()
+
+ self.newProjectFrame = Tkinter.Frame(self.projectDetailsFrame)
+
+ self.studyIdInstrText = Tkinter.StringVar()
+ self.studyIdInstrText.set('Short Study Identifier:')
+ self.studyIdInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyIdInstrText).grid(row=0, column=0)
+ self.inputStudyId = Tkinter.StringVar()
+ self.inputStudyIdEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyId).grid(row=0, column=1)
+
+ self.studyShortTitleInstrText = Tkinter.StringVar()
+ self.studyShortTitleInstrText.set('Descriptive Study Title:')
+ self.studyShortTitleInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyShortTitleInstrText).grid(row=1, column=0)
+ self.inputStudyShortTitle = Tkinter.StringVar()
+ self.inputStudyShortTitleEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyShortTitle).grid(row=1, column=1)
+
+ self.studyAbstractInstrText = Tkinter.StringVar()
+ self.studyAbstractInstrText.set('Study Description / Abstract:')
+ self.studyAbstractInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyAbstractInstrText).grid(row=2, column=0)
+ self.inputStudyAbstract = Tkinter.StringVar()
+ self.inputStudyAbstractEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyAbstract).grid(row=2, column=1)
+
+ self.newProjectFrame.pack()
+
+ self.projectDetailsFrame.pack()
+
+ # Make a frame for the save options button.
+ self.saveOptionsFrame = Tkinter.Frame(self)
+ Tkinter.Button(self.saveOptionsFrame, text='Save Options', command=self.saveOptions).grid(row=0, column=0)
+ self.saveOptionsFrame.pack()
+
+ # TODO: Should there be a cancel button, to close this window without saving?
+
+ self.loadOptions()
+
+ # I needed a function for the return keypress to latch onto.
+ # It is just a wrapper for the saveOptions method.
+ def returnFunction(self, event):
+ self.saveOptions()
+
+
+ # submissionOptions is a dictionary, passed by the parent.
+ def loadOptions(self):
+ if getConfigurationValue('embl_username') is not None:
+ self.inputUsername.set(getConfigurationValue('embl_username'))
+
+ if getConfigurationValue('embl_password') is not None:
+ self.inputPassword.set(getConfigurationValue('embl_password'))
+
+ if getConfigurationValue('sample_id') is not None:
+ self.inputSampleID.set(getConfigurationValue('sample_id'))
+
+ if getConfigurationValue('gene') is not None:
+ self.inputGene.set(getConfigurationValue('gene'))
+
+ if getConfigurationValue('class') is not None:
+ if (str(getConfigurationValue('class')) == '1'):
+ self.chooseClassIntVar.set(1)
+ elif (str(getConfigurationValue('class')) == '2'):
+ self.chooseClassIntVar.set(2)
+ else:
+ raise Exception('Error loading EMBL submission options. Invalid class:' + str(getConfigurationValue('class')))
+
+ if getConfigurationValue('allele_name') is not None:
+ self.inputAllele.set(getConfigurationValue('allele_name'))
+
+ if getConfigurationValue('choose_project') is not None:
+ if (str(getConfigurationValue('choose_project')) == '1'):
+ self.chooseProjectIntVar.set(1)
+ elif (str(getConfigurationValue('choose_project')) == '2'):
+ self.chooseProjectIntVar.set(2)
+ else:
+ raise Exception('Error loading EMBL submission options. Invalid Project choice:' + str(getConfigurationValue('choose_project')))
+
+ if getConfigurationValue('study_accession') is not None:
+ self.inputStudyAccession.set(getConfigurationValue('study_accession'))
+
+ if getConfigurationValue('study_identifier') is not None:
+ self.inputStudyId.set(getConfigurationValue('study_identifier'))
+
+ if getConfigurationValue('study_short_title') is not None:
+ self.inputStudyShortTitle.set(getConfigurationValue('study_short_title'))
+
+ if getConfigurationValue('study_abstract') is not None:
+ self.inputStudyAbstract.set(getConfigurationValue('study_abstract'))
+
+ if getConfigurationValue('test_submission') is not None:
+ # 1 = Test. 0 = Production/live server
+ self.chooseTestServersIntVar.set(int(getConfigurationValue('test_submission')))
+
+ if getConfigurationValue('analysis_alias') is not None:
+ self.inputAnalysisAlias.set(getConfigurationValue('analysis_alias'))
+ if getConfigurationValue('analysis_title') is not None:
+ self.inputAnalysisTitle.set(getConfigurationValue('analysis_title'))
+ if getConfigurationValue('analysis_description') is not None:
+ self.inputAnalysisDescription.set(getConfigurationValue('analysis_description'))
+
+
+ def saveOptions(self):
+ # Close the window
+ if (self.checkOptions()):
+ print ('Saving Options....')
+
+ assignConfigurationValue('embl_username', self.inputUsername.get())
+ # I store this password so I can use it in the submission
+ # I don't ever want to save the password. Make sure it isn't being saved in the config, in AlleleSubCommon.py
+ assignConfigurationValue('embl_password', self.inputPassword.get())
+ assignConfigurationValue('sample_id', self.inputSampleID.get())
+ assignConfigurationValue('gene', self.inputGene.get())
+ assignConfigurationValue('class', str(self.chooseClassIntVar.get()))
+ assignConfigurationValue('allele_name', self.inputAllele.get())
+ assignConfigurationValue('choose_project', str(self.chooseProjectIntVar.get()))
+ assignConfigurationValue('study_accession', self.inputStudyAccession.get())
+ assignConfigurationValue('study_identifier', self.inputStudyId.get())
+ assignConfigurationValue('study_short_title', self.inputStudyShortTitle.get())
+ assignConfigurationValue('study_abstract', self.inputStudyAbstract.get())
+ assignConfigurationValue('test_submission', str(self.chooseTestServersIntVar.get()))
+ assignConfigurationValue('analysis_alias', str(self.inputAnalysisAlias.get()))
+ assignConfigurationValue('analysis_title', str(self.inputAnalysisTitle.get()))
+ assignConfigurationValue('analysis_description', str(self.inputAnalysisDescription.get()))
+
+ self.parent.destroy()
+
+ else:
+ #print('Not ready to save, you are missing options.')
+ pass
+
+ def checkOptions(self):
+ #print ('Checking options.')
+
+ # Don't check the EMBL Username
+ # Don't check the EMBL Password
+
+ if (not self.inputSampleID.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Sample ID. Please try again.')
+ return False
+
+ if (not self.inputGene.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Gene. Please try again.')
+ return False
+
+ # Don't check the class boolean
+
+ if (not self.inputAllele.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an Allele Name. Please try again.')
+ return False
+
+ if (str(self.chooseProjectIntVar.get()) == '1'):
+ # Use Existing Project
+ if (not self.inputStudyAccession.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Study Accession number. Please try again.')
+ return False
+
+ elif(str(self.chooseProjectIntVar.get()) == '2'):
+ # Use New Project
+ if (not self.inputStudyId.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Study Name. Please try again.')
+ return False
+
+ if (not self.inputStudyShortTitle.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Study Description. Please try again.')
+ return False
+
+
+ if (not self.inputStudyAbstract.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Study Accession number. Please try again.')
+ return False
+
+ else:
+ raise Exception ('Unknown value of self.chooseProjectIntVar. I expect 1 or 2. Observed:' + str(self.chooseProjectIntVar))
+
+
+ if (not self.inputAnalysisAlias.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an Analysis Alias. Please try again.')
+ return False
+
+ if (not self.inputAnalysisTitle.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an Analysis Title. Please try again.')
+ return False
+
+ if (not self.inputAnalysisDescription.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an Analysis Description. Please try again.')
+ return False
+
+
+
+ # All options look good, right?
+
+
+ return True
+
+
+ def closeWindow(self):
+ self.parent.destroy()
+
\ No newline at end of file
diff --git a/src/AlleleGuiIMGT.py b/src/AlleleGuiIMGT.py
new file mode 100755
index 0000000..42c53d4
--- /dev/null
+++ b/src/AlleleGuiIMGT.py
@@ -0,0 +1,325 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import os
+from os.path import expanduser
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+from Tkinter import *
+
+from SubmissionGeneratorIMGT import SubmissionGeneratorIMGT
+from AlleleGuiIMGTInputForm import AlleleGuiIMGTInputForm
+from AlleleSubCommon import *
+#from HLAGene import HLAGene
+
+# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class.
+class AlleleGuiIMGT(Tkinter.Frame):
+
+ # I shouldn't need to write a select-All method but TK is kind of annoying.
+ def selectall(self, event):
+
+ event.widget.tag_add("sel","1.0","end")
+
+
+ # Initialize the GUI
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root)
+ root.title("Create and Save an IMGT Sequence Submission")
+ self.parent = root
+
+ # Ctrl-A doesn't work by default in TK. I guess I need to do it myself.
+ root.bind_class("Text","", self.selectall)
+
+ # To define the exit behavior. Save the input sequence text.
+ self.parent.protocol('WM_DELETE_WINDOW', self.saveAndExit)
+
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+
+ # A frame for the Instructions Label.
+ self.instructionsFrame = Tkinter.Frame(self)
+ self.instructionText = Tkinter.StringVar()
+ self.instructionText.set('\nThis tool will generate an HLA allele submission for\n'
+ + 'the IMGT / HLA nucleotide database.\n'
+ + 'For more information:\n')
+ Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack()
+ self.instructionsFrame.pack(expand=False, fill='both')
+
+ # Make a frame for the more-info buttons
+ self.moreInfoFrame = Tkinter.Frame(self)
+ self.howToUseButton = Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse)
+ self.howToUseButton.grid(row=0, column=0)
+ self.exampleButton = Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence)
+ self.exampleButton.grid(row=0, column=1)
+ self.moreInfoFrame.pack()
+
+ # Create a frame for the input widget, add scrollbars.
+ self.featureInputFrame = Tkinter.Frame(self)
+
+ self.featureInstrText = Tkinter.StringVar()
+ self.featureInstrText.set('Annotated Sequence:')
+ self.featureInstrLabel = Tkinter.Label(self.featureInputFrame, width=80, height=1, textvariable=self.featureInstrText).pack()
+
+ self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL)
+ self.featureInputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.featureInputYScrollbar = Scrollbar(self.featureInputFrame)
+ self.featureInputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.featureInputGuiObject = Tkinter.Text(
+ self.featureInputFrame
+ , width=80, height=8
+ , wrap=NONE
+ , xscrollcommand=self.featureInputXScrollbar.set
+ , yscrollcommand=self.featureInputYScrollbar.set
+ )
+
+ self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview)
+ self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview)
+
+ self.featureInputGuiObject.pack(expand=True, fill='both')
+ self.featureInputFrame.pack(expand=True, fill='both')
+
+
+ # Create Frame for "Generate Submission" button.
+ self.submButtonFrame = Tkinter.Frame(self)
+ self.submissionOptionsButton = Tkinter.Button(self.submButtonFrame, text='Submission Options', command=self.chooseSubmissionOptions)
+ self.submissionOptionsButton.grid(row=0, column=0)
+ self.generateSubmissionButton = Tkinter.Button(self.submButtonFrame, text=unichr(8681) + ' Generate an IMGT submission ' + unichr(8681), command=self.constructSubmission)
+ self.generateSubmissionButton.grid(row=0, column=1)
+ self.submButtonFrame.pack()
+
+
+ # Output interface is contained on a frame.
+ self.submOutputFrame = Tkinter.Frame(self)
+
+ self.outputEMBLSubmission = Tkinter.StringVar()
+ self.outputEMBLSubmission.set('Allele Submission Preview:')
+ self.outputEMBLLabel = Tkinter.Label(self.submOutputFrame, width=80, height=1, textvariable=self.outputEMBLSubmission).pack()
+
+ self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL)
+ self.submOutputXScrollbar.pack(side=BOTTOM, fill=X)
+
+ self.submOutputYScrollbar = Scrollbar(self.submOutputFrame)
+ self.submOutputYScrollbar.pack(side=RIGHT, fill=Y)
+
+ self.submOutputGuiObject = Tkinter.Text(
+ self.submOutputFrame, width=80, height=8, wrap=NONE
+ , xscrollcommand=self.submOutputXScrollbar.set
+ , yscrollcommand=self.submOutputYScrollbar.set
+ )
+
+ self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview)
+ self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview)
+
+ self.submOutputGuiObject.pack(expand=True, fill='both')
+ self.submOutputFrame.pack(expand=True, fill='both')
+
+ self.uploadSubmissionFrame = Tkinter.Frame(self)
+ self.saveSubmissionButton = Tkinter.Button(self.uploadSubmissionFrame, text='Save Submission to My Computer', command=self.saveSubmissionFile)
+ self.saveSubmissionButton.pack(**button_opt)
+ self.exitButton = Tkinter.Button(self.uploadSubmissionFrame, text='Exit', command=self.saveAndExit)
+ self.exitButton.pack(**button_opt)
+ self.uploadSubmissionFrame.pack()
+
+ self.pack(expand=True, fill='both')
+
+
+
+ def chooseSubmissionOptions(self):
+ print ('Opening the IMGT Submission Options Dialog')
+
+ self.disableGUI()
+
+ imgtOptionsRoot = Tkinter.Toplevel()
+ imgtOptionsRoot.bind("", self.enableGUI)
+ AlleleGuiIMGTInputForm(imgtOptionsRoot).pack()
+
+ # Set the X and the Y Position of the options window, so it is nearby.
+ imgtOptionsRoot.update()
+ windowXpos = str(self.parent.winfo_geometry().split('+')[1])
+ windowYpos = str(self.parent.winfo_geometry().split('+')[2])
+ newGeometry = (str(imgtOptionsRoot.winfo_width()) + 'x'
+ + str(imgtOptionsRoot.winfo_height()) + '+'
+ + str(windowXpos) + '+'
+ + str(windowYpos))
+ imgtOptionsRoot.geometry(newGeometry)
+
+ imgtOptionsRoot.mainloop()
+
+
+ def sampleSequence(self):
+ self.featureInputGuiObject.delete('1.0','end')
+ self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat')
+
+ # Clear the password, keep the username
+ assignConfigurationValue('imgt_password','')
+
+ assignConfigurationValue("allele_name",'Allele:01:02')
+ assignConfigurationValue('gene','HLA-C')
+ assignConfigurationValue('sample_id', 'Donor_12345')
+ assignConfigurationValue('class','1')
+
+ assignConfigurationValue('embl_sequence_accession', 'LT123456')
+ assignConfigurationValue('embl_release_date', '01/01/2020')
+
+ assignConfigurationValue('is_published','0')
+
+ assignConfigurationValue('reference_title', 'Published Reference Title')
+ assignConfigurationValue('reference_authors', 'Albert Authorman, Ben Bioinformaticist, Cindy Cell-Culture')
+ assignConfigurationValue('reference_journal', 'Scientific Journal of Research')
+
+ assignConfigurationValue('closest_known_allele', 'HLA-C*01:02:01')
+ assignConfigurationValue('closest_allele_written_description', 'This allele has a C->G polymorphism in Exon 1.\nPosition 5 in the coding sequence.\nThis polymorphism is interesting because of science.')
+
+ assignConfigurationValue('ethnic_origin', 'Unknown')
+ assignConfigurationValue('sex', 'Unknown')
+ assignConfigurationValue('consanguineous', 'Unknown')
+ assignConfigurationValue('homozygous', 'Unknown')
+
+
+ self.constructSubmission()
+
+ # This method should popup some instruction text in a wee window.
+ # This should be explicit on how to use the tool.
+ def howToUse(self):
+ tkMessageBox.showinfo('How to use this tool',
+ 'This software is to be used to create an\n'
+ + 'IMGT-formatted submission document,\n'
+ + 'which specifies a (novel) HLA allele.\n\n'
+
+ + 'This tool requires you to submit a\n'
+ + 'full length HLA allele, including\n'
+ + '5\' and 3\' UTRs.\n\n'
+
+ + 'Use capital letters for exons,\n'
+ + 'lowercase for introns & UTRs.\n\n'
+
+ + 'Push the "Example Sequence" button to see a small example of'
+ + ' a formatted sequence.\n'
+ + 'Sequences should follow this pattern:\n'
+ + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'
+
+ + 'To use this tool:\n'
+ + '1.) Fill in a Sample ID, Gene Name, and Allele.'
+ + ' This text will be included in the submission.\n'
+ + '2.) Paste your formatted sequence in the\n'
+ + 'Annotated Sequence text area.\n'
+ + '3.) Push \"Generate an IMGT submission\" button'
+ + ' to generate a submission.\n'
+ + '4.) Push the "Save the submission" button'
+ + ' to store the submission on your computer.\nYou can submit this file to IMGT.\n\n'
+
+ + 'All spaces, tabs, and newlines are'
+ + ' removed before the nucleotide sequence is translated.'
+ )
+
+ def contactInformation(self):
+ # This method should list contact information for MUMC, and a link to the github page.
+ tkMessageBox.showinfo('Contact Information',
+ 'This software was created at\n'
+ + 'Maastricht University Medical Center\n'
+ + 'Transplantation Immunology\n'
+ + 'Tissue Typing Laboratory.\n'
+ + 'by Ben Matern:\n'
+ + 'ben.matern@mumc.nl\n\n'
+
+ + 'Please send Ben your bioinformatics\n'
+ + 'and data related questions.\n\n'
+
+ + 'all other inquiries can be directed\n'
+ + 'to Marcel Tilanus:\n'
+ + 'm.tilanus@mumc.nl\n\n'
+
+ + 'This code will be hosted at:\n'
+ + 'https://github.com/transplantation-\nimmunology/saddle-bags\n'
+ + 'You will find more information on\n'
+ + 'IMGT\'s data format on that page.'
+
+ )
+
+ # Ask user for a output file location, and write the IMGT submission to a file.
+ # This takes the input from the output field, rather than generate a new submission.
+ # So the user can edit the submission before or after saving it.
+ def saveSubmissionFile(self):
+
+ self.dir_opt = options = {}
+
+ options['initialdir'] = expanduser("~")
+ options['parent'] = self
+ options['title'] = 'Specify your output file.'
+ options['initialfile'] = 'IMGT.HLA.Submission.txt'
+ outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt)
+ submissionText = self.submOutputGuiObject.get('1.0', 'end')
+ outputFileObject.write(submissionText)
+
+ # Gather sequence information from the input elements, and generate a text IMGT submission.
+ def constructSubmission(self):
+ try:
+
+ allGen = SubmissionGeneratorIMGT()
+ roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end')
+
+ allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence)
+ imgtSubmission = allGen.buildIMGTSubmission()
+
+ if (imgtSubmission is None or len(imgtSubmission) < 1):
+ tkMessageBox.showerror('Empty submission text'
+ ,'You are missing some required information.\n'
+ + 'Try the \'Submission Options\' button.\n')
+
+ self.submOutputGuiObject.delete('1.0','end')
+ self.submOutputGuiObject.insert('1.0', '')
+ else:
+ self.submOutputGuiObject.delete('1.0','end')
+ self.submOutputGuiObject.insert('1.0', imgtSubmission)
+
+
+ except KeyError, e:
+ tkMessageBox.showerror('Missing Submission Options'
+ ,'You are missing some required information.\n'
+ + 'Use the \'Submission Options\' button.\n'
+ + 'Missing Data: ' + str(e))
+
+ def saveAndExit(self):
+ assignConfigurationValue('sequence', self.featureInputGuiObject.get('1.0', 'end'))
+ self.parent.destroy()
+
+
+ def enableGUI(self, event=None):
+ self.toggleGUI(True)
+
+ def disableGUI(self):
+ self.toggleGUI(False)
+
+ def toggleGUI(self, isEnabled):
+ #print ('Toggling GUI Widgets:' + str(isEnabled))
+
+ newState = (NORMAL if (isEnabled) else DISABLED)
+
+ # Choosing the widgets individually, this makes the most sense I think.
+ self.howToUseButton.config(state=newState)
+ self.exampleButton.config(state=newState)
+ self.featureInputGuiObject.config(state=newState)
+ self.submissionOptionsButton.config(state=newState)
+ self.generateSubmissionButton.config(state=newState)
+ self.submOutputGuiObject.config(state=newState)
+ #self.uploadButton.config(state=newState)
+ self.saveSubmissionButton.config(state=newState)
+ self.exitButton.config(state=newState)
+
+
+
diff --git a/src/AlleleGuiIMGTInputForm.py b/src/AlleleGuiIMGTInputForm.py
new file mode 100755
index 0000000..644f275
--- /dev/null
+++ b/src/AlleleGuiIMGTInputForm.py
@@ -0,0 +1,475 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import os
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+#from Tkinter import *
+from Tkinter import IntVar, Radiobutton
+#from ttk import *
+
+from AlleleSubCommon import *
+from ScrolledWindow import VerticalScrolledFrame
+
+# I am using this ScrolledWindow class instead of a Frame.
+# This interface is too big for one screen, need a scrollbar.
+
+class AlleleGuiIMGTInputForm(VerticalScrolledFrame):
+
+ # Initialize the GUI
+ def __init__(self, root):
+
+
+ VerticalScrolledFrame.__init__(self, root)
+ #Tkinter.Frame.__init__(self, root)
+ #super(500, 500)
+ root.title("Choose IMGT Submission Options")
+ self.parent = root
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+ # This window should not be resizeable. I guess.
+ # Maybe height should be resizeable, i don't know.
+ self.parent.resizable(width=False, height=False)
+
+ # To define the exit behavior. Save and exit.
+ self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions)
+
+ # Define the return behavior. Same as "close window" etc
+ root.bind('', self.returnFunction)
+
+ self.instructionsFrame = Tkinter.Frame(self.interior)
+ self.instructionText = Tkinter.StringVar()
+ self.instructionText.set('\nThese options are required for an IMGT allele submission.\n'
+ + 'Login Credentials will not be stored, but they will be sent to IMGT via\n'
+ + 'secure https connection.\n')
+ Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack()
+ self.instructionsFrame.pack()
+
+ #Standard Inputs widths for the form elements
+ formInputWidth = 35
+ labelInputWidth = 35
+
+ # Make a frame to contain the input variables
+ # self.interior is defined in the ScrolledWindow class
+ self.submissionDetailsInputFrame = Tkinter.Frame(self.interior)
+
+ self.usernameInstrText = Tkinter.StringVar()
+ self.usernameInstrText.set('IMGT Username:')
+ self.usernameInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.usernameInstrText).grid(row=0, column=0)
+ self.inputUsername = Tkinter.StringVar()
+ self.inputUsernameEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputUsername).grid(row=0, column=1)
+
+ self.passwordInstrText = Tkinter.StringVar()
+ self.passwordInstrText.set('IMGT Password:')
+ self.passwordInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.passwordInstrText).grid(row=1, column=0)
+ self.inputPassword = Tkinter.StringVar()
+ self.inputPasswordEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputPassword, show="*").grid(row=1, column=1)
+
+
+ # TODO: Submitter / Laboratory ID.
+ # This is on the IMGT form.
+ #Do I know this infromation? Do I need to tell user how to get it?
+
+ self.sampleIDInstrText = Tkinter.StringVar()
+ self.sampleIDInstrText.set('Sample ID:')
+ self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=2, column=0)
+ self.inputSampleID = Tkinter.StringVar()
+ self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputSampleID).grid(row=2, column=1)
+
+ self.geneInstrStringVar = Tkinter.StringVar()
+ self.geneInstrStringVar.set('Gene:')
+ self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=3, column=0)
+ self.inputGene = Tkinter.StringVar()
+ self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputGene).grid(row=3, column=1)
+
+ self.chooseClassIntVar = IntVar()
+ self.chooseClassIntVar.set(1)
+ Radiobutton(self.submissionDetailsInputFrame, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=4, column=0)
+ Radiobutton(self.submissionDetailsInputFrame, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=4, column=1)
+
+ self.alleleInstrText = Tkinter.StringVar()
+ self.alleleInstrText.set('Allele Local Name:')
+ self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=5, column=0)
+ self.inputAllele = Tkinter.StringVar()
+ self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputAllele).grid(row=5, column=1)
+
+
+
+
+ # New form stuff
+ # Gotta add this to the load/save config nonsense below.
+
+
+ # TODO: Can I just load an EMBL accession? I think that is possible. Easier than filling it in here
+
+
+ # TODO: When EMBL Sequence Accession # Is provided, I can probably lookup an annotated sequence.
+ # Should I put a button next to this field
+ # Button: "Lookup This EMBL Sequence Accession #"
+ # If it is found, then i already know the sequence with exon boundaries.
+
+
+ # TODO: Do I need to specify if it is EMBL / Genbank / The other one? Probably not.
+ # I can require an EMBL code and disregard Genbank.
+ # Radio Buttons?
+ # EMBL / Genbank Accession #
+ self.emblAccInstrText = Tkinter.StringVar()
+ self.emblAccInstrText.set('EMBL Sequence Accession #:')
+ self.emblAccInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.emblAccInstrText).grid(row=6, column=0)
+ self.inputEmblAcc = Tkinter.StringVar()
+ self.inputEmblAccEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputEmblAcc).grid(row=6, column=1)
+
+
+ # Release Date
+ self.releaseDateInstrText = Tkinter.StringVar()
+ self.releaseDateInstrText.set('IMGT Release Date:')
+ self.releaseDateInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.releaseDateInstrText).grid(row=7, column=0)
+ self.inputReleaseDate = Tkinter.StringVar()
+ self.inputReleaseDateEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputReleaseDate).grid(row=7, column=1)
+
+ # Reference Details
+ # Is this allele in a published paper or not?
+ # 0=unpublished, 1=published
+ self.publishedReferenceIntVar = IntVar()
+ self.publishedReferenceIntVar.set(0)
+
+ self.submissionDetailsInputFrame.pack()
+
+
+ self.unpublishedReferenceFrame = Tkinter.Frame(self.interior)
+
+ self.referenceInstrText = Tkinter.StringVar()
+ self.referenceInstrText.set('\nPlease provide some information about a\npublished paper relevant to this sequence.\n')
+ self.referenceInstrLabel = Tkinter.Label(self.unpublishedReferenceFrame, width=70, height=4, textvariable=self.referenceInstrText).pack()#.grid(row=2, column=0)
+
+ Radiobutton(self.unpublishedReferenceFrame, text="No Published Reference.", variable=self.publishedReferenceIntVar, value=0).pack()
+ self.unpublishedReferenceFrame.pack()
+
+ self.publishedReferenceFrame = Tkinter.Frame(self.interior)
+
+ # Radio Button: Published
+ Radiobutton(self.unpublishedReferenceFrame, text="Use This Reference:", variable=self.publishedReferenceIntVar, value=1).pack()
+
+ # Reference Title
+ self.referenceTitleInstrText = Tkinter.StringVar()
+ self.referenceTitleInstrText.set('Reference Title:')
+ self.referenceTitleInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceTitleInstrText).grid(row=1, column=0)
+ self.inputReferenceTitle = Tkinter.StringVar()
+ self.inputReferenceTitleEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceTitle).grid(row=1, column=1)
+
+ # Authors
+ self.referenceAuthorsInstrText = Tkinter.StringVar()
+ self.referenceAuthorsInstrText.set('Reference Authors:')
+ self.referenceAuthorsInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceAuthorsInstrText).grid(row=2, column=0)
+ self.inputReferenceAuthors = Tkinter.StringVar()
+ self.inputReferenceAuthorsEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceAuthors).grid(row=2, column=1)
+
+ # Journal
+ self.referenceJournalInstrText = Tkinter.StringVar()
+ self.referenceJournalInstrText.set('Reference Journal:')
+ self.referenceJournalInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceJournalInstrText).grid(row=3, column=0)
+ self.inputReferenceJournal = Tkinter.StringVar()
+ self.inputReferenceJournalEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceJournal).grid(row=3, column=1)
+
+ self.publishedReferenceFrame.pack()
+
+ # Make a frame to contain the input variables.
+ # I had to make 2 of them to organize my gui, maybe I can name this better.
+ self.submissionDetailsInputFrame2 = Tkinter.Frame(self.interior)
+
+ # /alignment -> defined by IMGT sequence alignment service
+ # In this case, it is the closest known allele.
+ self.closestAlleleInstrText = Tkinter.StringVar()
+ self.closestAlleleInstrText.set('Closest Known HLA Allele:')
+ self.closestAlleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.closestAlleleInstrText).grid(row=1, column=0)
+ self.inputClosestAllele = Tkinter.StringVar()
+ self.inputClosestAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputClosestAllele).grid(row=1, column=1)
+
+ # Written Description
+ # Looks like this is a description of how the sequence differes from closest knnown allele
+ self.closestAlleleWrittenDescriptionInstrText = Tkinter.StringVar()
+ self.closestAlleleWrittenDescriptionInstrText.set('Differences from Closest Allele:')
+ self.closestAlleleWrittenDescriptionInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.closestAlleleWrittenDescriptionInstrText).grid(row=2, column=0)
+ self.inputClosestAlleleWrittenDescription = Tkinter.StringVar()
+ self.inputClosestAlleleWrittenDescriptionEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputClosestAlleleWrittenDescription).grid(row=2, column=1)
+
+
+ # DONOR INFORMATION
+
+ # Cell ID (cellnum)
+ # Wait, is this the same as the sample ID? Should I move the sample ID field down here?
+ # No. I am disregarding this sample ID.
+
+ # Ethnic Origin
+ self.ethnicOriginInstrText = Tkinter.StringVar()
+ self.ethnicOriginInstrText.set('Ethnic Origin:')
+ self.ethnicOriginInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.ethnicOriginInstrText).grid(row=3, column=0)
+ self.inputEthnicOrigin = Tkinter.StringVar()
+ self.inputEthnicOriginEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputEthnicOrigin).grid(row=3, column=1)
+
+ # Sex
+ self.sexInstrText = Tkinter.StringVar()
+ self.sexInstrText.set('Sex:')
+ self.sexInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.sexInstrText).grid(row=4, column=0)
+ self.inputSex = Tkinter.StringVar()
+ self.inputSexEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSex).grid(row=4, column=1)
+
+ # Consanguineous (T/F)
+ self.consanguineousInstrText = Tkinter.StringVar()
+ self.consanguineousInstrText.set('Sample is Consanguineous:')
+ self.consanguineousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.consanguineousInstrText).grid(row=5, column=0)
+ self.inputConsanguineous = Tkinter.StringVar()
+ self.inputConsanguineousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputConsanguineous).grid(row=5, column=1)
+
+ # Homozygous (T/F)
+ self.homozygousInstrText = Tkinter.StringVar()
+ self.homozygousInstrText.set('Sample is Homozygous:')
+ self.homozygousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.homozygousInstrText).grid(row=6, column=0)
+ self.inputHomozygous = Tkinter.StringVar()
+ self.inputHomozygousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputHomozygous).grid(row=6, column=1)
+
+ # TODO: Comments. Where does this stuff go? This is details about the lab of origin. I haven't tried specifying this one yet, ask James how to do it.
+ # Comments
+
+ # Lab of Origin
+
+ # Lab Contact
+
+ # TODO Add form for cell availability
+ # Cell Availability
+
+ # Material Available (T/F)
+
+ # Cell Bank (Text)
+
+ # Cell Workshop Details
+
+
+
+
+
+
+
+ # Alternative HLA DNA Typing
+ # Loop?
+ # Dropdown Box with another Entry Field?
+ # Yeah Start with them Blank, choose gene from box.
+ # Store in globals, but don't write to config..
+
+ # Source Serology Typing
+ # Maybe the same as DNA typing
+
+ # Sequencing Methods
+
+ # Primers
+ # This is probably a Dropdown with Entry field also.
+
+
+
+ self.submissionDetailsInputFrame2.pack()
+
+
+
+
+
+ # Make a frame for the save options button.
+ self.saveOptionsFrame = Tkinter.Frame(self.interior)
+ Tkinter.Button(self.saveOptionsFrame, text='Save Options', command=self.saveOptions).grid(row=0, column=0)
+ self.saveOptionsFrame.pack()
+
+ self.loadOptions()
+
+ # I needed a function for the return keypress to latch onto.
+ # It is just a wrapper for the saveOptions method.
+ def returnFunction(self, event):
+ self.saveOptions()
+
+ # submissionOptions is a dictionary, passed by the parent.
+ def loadOptions(self):
+ if getConfigurationValue('imgt_username') is not None:
+ self.inputUsername.set(getConfigurationValue('imgt_username'))
+
+ if getConfigurationValue('imgt_password') is not None:
+ self.inputPassword.set(getConfigurationValue('imgt_password'))
+
+ if getConfigurationValue('sample_id') is not None:
+ self.inputSampleID.set(getConfigurationValue('sample_id'))
+
+ if getConfigurationValue('gene') is not None:
+ self.inputGene.set(getConfigurationValue('gene'))
+
+ if getConfigurationValue('class') is not None:
+ if (str(getConfigurationValue('class')) == '1'):
+ self.chooseClassIntVar.set(1)
+ elif (str(getConfigurationValue('class')) == '2'):
+ self.chooseClassIntVar.set(2)
+ else:
+ raise Exception('Error loading IMGT submission options. Invalid class:' + str(getConfigurationValue('class')))
+
+ if getConfigurationValue('allele_name') is not None:
+ self.inputAllele.set(getConfigurationValue('allele_name'))
+
+ if getConfigurationValue('embl_sequence_accession') is not None:
+ self.inputEmblAcc.set(getConfigurationValue('embl_sequence_accession'))
+
+ if getConfigurationValue('embl_release_date') is not None:
+ self.inputReleaseDate.set(getConfigurationValue('embl_release_date'))
+
+ # 0=unpublished, 1=published
+ if getConfigurationValue('is_published') is not None:
+ self.publishedReferenceIntVar.set(getConfigurationValue('is_published'))
+
+ if getConfigurationValue('reference_title') is not None:
+ self.inputReferenceTitle.set(getConfigurationValue('reference_title'))
+ if getConfigurationValue('reference_authors') is not None:
+ self.inputReferenceAuthors.set(getConfigurationValue('reference_authors'))
+ if getConfigurationValue('reference_journal') is not None:
+ self.inputReferenceJournal.set(getConfigurationValue('reference_journal'))
+
+ if getConfigurationValue('reference_journal') is not None:
+ self.inputReferenceJournal.set(getConfigurationValue('reference_journal'))
+
+ if getConfigurationValue('closest_known_allele') is not None:
+ self.inputClosestAllele.set(getConfigurationValue('closest_known_allele'))
+ if getConfigurationValue('closest_allele_written_description') is not None:
+ self.inputClosestAlleleWrittenDescription.set(getConfigurationValue('closest_allele_written_description'))
+
+ if getConfigurationValue('ethnic_origin') is not None:
+ self.inputEthnicOrigin.set(getConfigurationValue('ethnic_origin'))
+ if getConfigurationValue('sex') is not None:
+ self.inputSex.set(getConfigurationValue('sex'))
+ if getConfigurationValue('consanguineous') is not None:
+ self.inputConsanguineous.set(getConfigurationValue('consanguineous'))
+ if getConfigurationValue('homozygous') is not None:
+ self.inputHomozygous.set(getConfigurationValue('homozygous'))
+
+
+
+
+
+ def saveOptions(self):
+ # Close the window
+ if (self.checkOptions()):
+ print ('Saving Options....')
+
+ assignConfigurationValue('imgt_username', self.inputUsername.get())
+ # I store this password so I can use it in the submission
+ # I don't ever want to save the password. Make sure it isn't being saved in the config, in AlleleSubCommon.py
+ assignConfigurationValue('imgt_password', self.inputPassword.get())
+ assignConfigurationValue('sample_id', self.inputSampleID.get())
+ assignConfigurationValue('gene', self.inputGene.get())
+ assignConfigurationValue('class', str(self.chooseClassIntVar.get()))
+ assignConfigurationValue('allele_name', self.inputAllele.get())
+
+ assignConfigurationValue('embl_sequence_accession', self.inputEmblAcc.get())
+ assignConfigurationValue('embl_release_date', self.inputReleaseDate.get())
+
+ assignConfigurationValue('is_published', self.publishedReferenceIntVar.get())
+
+ assignConfigurationValue('reference_title',self.inputReferenceTitle.get())
+ assignConfigurationValue('reference_authors',self.inputReferenceAuthors.get())
+ assignConfigurationValue('reference_journal',self.inputReferenceJournal.get())
+
+ assignConfigurationValue('closest_known_allele', self.inputClosestAllele.get())
+ assignConfigurationValue('closest_allele_written_description', self.inputClosestAlleleWrittenDescription.get())
+
+ assignConfigurationValue('ethnic_origin', self.inputEthnicOrigin.get())
+ assignConfigurationValue('sex', self.inputSex.get())
+
+ # TODO: Accepted values are 'Yes', 'No', 'Unknown'
+ assignConfigurationValue('consanguineous', self.inputConsanguineous.get())
+ assignConfigurationValue('homozygous', self.inputHomozygous.get())
+
+ self.parent.destroy()
+
+ else:
+ #print('Not ready to save, you are missing options.')
+ pass
+
+ def checkOptions(self):
+ # TODO this method
+ print ('Checking options.')
+
+ # Don't check the IMGT Username
+ # Don't check the IMGT Password
+
+ if (not self.inputSampleID.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Sample ID. Please try again.')
+ return False
+ if (not self.inputGene.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing a Gene. Please try again.')
+ return False
+ if (not self.inputAllele.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an Allele Name. Please try again.')
+ return False
+
+ if (not self.inputEmblAcc.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an EMBL Accession Number. Please try again.')
+ return False
+ if (not self.inputReleaseDate.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing an IMGT Submission Release Date. Please try again.')
+ return False
+
+ if (self.publishedReferenceIntVar.get() == 0):
+ # unpublished, nothing to check
+ pass
+ else:
+ if ((not self.inputReferenceTitle.get())
+ or (not self.inputReferenceAuthors.get())
+ or (not self.inputReferenceJournal.get())
+ ):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are must supply information about the published Reference. Please try again.')
+ return False
+
+ if (not self.inputClosestAllele.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'You are missing the closest known reference allele to this sequence. Please provide this information.')
+ return False
+ if (not self.inputEthnicOrigin.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'Please provide a description of an ethnic origin for this sample.')
+ return False
+ if (not self.inputSex.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'Please identify the sex for this sample.')
+ return False
+
+ # TODO: Accepted values are 'Yes', 'No', 'Unknown' I think
+ if (not self.inputConsanguineous.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'Please indicate if the sample is consanguineous or not.')
+ return False
+ if (not self.inputHomozygous.get()):
+ tkMessageBox.showwarning('Missing Form Value',
+ 'Please indicate if the sample is homozygous or not.')
+ return False
+
+
+ # All options look good, right?
+ return True
+
+
+ def closeWindow(self):
+ #writeConfigurationFile()
+
+ self.parent.destroy()
+
\ No newline at end of file
diff --git a/src/AlleleGuiMain.py b/src/AlleleGuiMain.py
new file mode 100755
index 0000000..e49dbd5
--- /dev/null
+++ b/src/AlleleGuiMain.py
@@ -0,0 +1,204 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import os
+
+import Tkinter, Tkconstants, tkFileDialog, tkMessageBox
+from Tkinter import *
+
+from AlleleGuiEMBL import AlleleGuiEMBL
+from AlleleGuiIMGT import AlleleGuiIMGT
+
+from AlleleSubCommon import *
+
+class AlleleGuiMain(Tkinter.Frame):
+
+ # Initialize the GUI
+ def __init__(self, root):
+ Tkinter.Frame.__init__(self, root)
+ root.title("An HLA Allele Submission Generator")
+ self.parent = root
+
+ self.initialize()
+
+ # Initialize GUI elements
+ def initialize(self):
+
+ button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5}
+
+ # Load configuration
+ loadConfigurationFile()
+
+ # To define the exit behavior
+ self.parent.protocol('WM_DELETE_WINDOW', self.closeWindow)
+
+ # This window should not be resizeable. I guess.
+ self.parent.resizable(width=False, height=False)
+
+ # Instruction Frame
+ self.instructionFrame = Tkinter.Frame(self)
+ self.instructionText = Tkinter.StringVar()
+ self.instructionText.set('\nSaddlebags is an HLA Allele Submission Generator.\n'
+ + 'You can generate an allele submission text file for either\n'
+ + 'the EMBL/ENA or IMGT/HLA nucleotide databases. You must choose:\n'
+ )
+ Tkinter.Label(self.instructionFrame, width=85, height=5, textvariable=self.instructionText).pack()
+ self.instructionFrame.pack()
+
+ # Make a frame for the more-info buttons
+ self.moreInfoFrame = Tkinter.Frame(self)
+ Tkinter.Button(self.moreInfoFrame, text='Generate an EMBL submission', command=lambda: self.openAlleleSubGUI('EMBL')).grid(row=0, column=0)
+ Tkinter.Button(self.moreInfoFrame, text='Generate an IMGT submission', command=lambda: self.openAlleleSubGUI('IMGT')).grid(row=0, column=1)
+ Tkinter.Button(self.moreInfoFrame, text=' How to use this tool ', command=self.howToUse).grid(row=1, column=0)
+ Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=1, column=1)
+ self.moreInfoFrame.pack()
+
+ # Frame for the exit button
+ self.exitFrame = Tkinter.Frame(self)
+ Tkinter.Button(self.exitFrame, text='Exit', command=self.closeWindow).pack(**button_opt)
+ self.exitFrame.pack()
+
+ self.pack()
+
+ self.initializeWindowLocation()
+
+ # Put the GUI on the center of the screen. Doesn't make sense for it to start in a corner.
+ # Well, lets divide by 4 instead of 2. Center is too...centered.
+ def initializeWindowLocation(self):
+ self.parent.update_idletasks()
+ w = self.parent.winfo_screenwidth()
+ h = self.parent.winfo_screenheight()
+ size = tuple(int(_) for _ in self.parent.geometry().split('+')[0].split('x'))
+ x = w/4 - size[0]/2
+ y = h/4 - size[1]/2
+ self.parent.geometry("%dx%d+%d+%d" % (size + (x, y)))
+
+
+ # This method should popup some instruction text in a wee window.
+ # This should be explicit on how to use the tool.
+ def howToUse(self):
+ tkMessageBox.showinfo('How to use this tool',
+ 'This software is to be used to create an\n'
+ + 'EMBL-formatted submission document,\n'
+ + 'which specifies a (novel) HLA allele.\n\n'
+
+ + 'This tool requires you to submit a\n'
+ + 'full length HLA allele, including\n'
+ + '5\' and 3\' UTRs.\n\n'
+
+ + 'Use capital letters for exons,\n'
+ + 'lowercase for introns & UTRs.\n\n'
+
+ + 'Push the "Example Sequence" button to see a small example of'
+ + ' a formatted sequence.\n'
+ + 'Sequences should follow this pattern:\n'
+ + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n'
+
+ + 'To use this tool:\n'
+ + '1.) Fill in a Sample ID, Gene Name, and Allele.'
+ + ' This text will be included in the submission.\n'
+ + '2.) Paste your formatted sequence in the\n'
+ + 'Annotated Sequence text area.\n'
+ + '3.) Push \"Generate an EMBL submission\" button'
+ + ' to generate a submission.\n'
+ + '4.) Push the "Save the submission" button'
+ + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n'
+
+ + 'All spaces, tabs, and newlines are'
+ + ' removed before the nucleotide sequence is translated.'
+ )
+
+
+ def contactInformation(self):
+ # This method should list contact information for MUMC, and a link to the github page.
+ tkMessageBox.showinfo('Contact Information',
+ 'This software was created at\n'
+ + 'Maastricht University Medical Center\n'
+ + 'Transplantation Immunology\n'
+ + 'Tissue Typing Laboratory.\n'
+ + 'by Ben Matern:\n'
+ + 'ben.matern@mumc.nl\n\n'
+
+ + 'Please send Ben your bioinformatics\n'
+ + 'and data related questions.\n\n'
+
+ + 'all other inquiries can be directed\n'
+ + 'to Marcel Tilanus:\n'
+ + 'm.tilanus@mumc.nl\n\n'
+
+ + 'This code will be hosted at:\n'
+ + 'https://github.com/transplantation-\nimmunology/saddle-bags\n'
+ + 'You will find more information on\n'
+ + 'EMBL\'s data format on that page.'
+
+ )
+
+ def closeWindow(self):
+ writeConfigurationFile()
+ self.parent.destroy()
+
+
+ def restoreWindowPosition(self):
+ # Geometry is a string that looks like this: 599x144+681+52
+ # WidthxHeight+Xpos+Ypos
+ newGeometry = self.windowWidth + 'x' + self.windowHeight + '+' + self.windowXpos + '+' + self.windowYpos
+ self.parent.geometry(newGeometry)
+
+
+ def onCloseOtherFrame(self, event):
+ # is triggered for each widget on the subframe.
+ # We want to only trigger if the main subframe is destroyed.
+ if(event.widget is self.alleleSubRoot):
+ self.parent.deiconify()
+ self.restoreWindowPosition()
+
+ def rememberWindowPosition(self):
+ # Remember the geometry of this window.
+ self.windowWidth = str(self.parent.winfo_width())
+ self.windowHeight = str(self.parent.winfo_height())
+ # "Geometry" is a string that looks like this: 599x144+681+52
+ # WidthxHeight+Xpos+Ypos
+ windowGeometryPosTokens = self.parent.winfo_geometry().split('+')
+ self.windowXpos = str(windowGeometryPosTokens[1])
+ self.windowYpos = str(windowGeometryPosTokens[2])
+
+ def openAlleleSubGUI(self, submissionType):
+ self.rememberWindowPosition()
+
+ self.parent.withdraw()
+ self.alleleSubRoot = Tkinter.Toplevel()
+ self.alleleSubRoot.bind("", self.onCloseOtherFrame)
+
+ if(submissionType=='IMGT'):
+ print ('Opening the IMGT Submission GUI')
+ AlleleGuiIMGT(self.alleleSubRoot).pack()
+ elif(submissionType=='EMBL'):
+ print ('Opening the EMBL Submission GUI')
+ AlleleGuiEMBL(self.alleleSubRoot).pack()
+ else:
+ raise Exception('Unknown Submission Type. I expected IMGT or EMBL:' + str(submissionType))
+
+ # Set the X and the Y Position of the window, so it is nearby.
+ # it is necessary to update the window before assigning geometry.
+ # Using Size Values from Subwindow, but Position values from Parent window
+ self.alleleSubRoot.update()
+ #print('after update geometry subwindow:' + self.alleleSubRoot.winfo_geometry())
+ newGeometry = (str(self.alleleSubRoot.winfo_width()) + 'x'
+ + str(self.alleleSubRoot.winfo_height()) + '+'
+ + str(self.windowXpos) + '+'
+ + str(self.windowYpos))
+ self.alleleSubRoot.geometry(newGeometry)
+
+ self.alleleSubRoot.mainloop()
diff --git a/src/AlleleSubCommon.py b/src/AlleleSubCommon.py
new file mode 100755
index 0000000..f108d8d
--- /dev/null
+++ b/src/AlleleSubCommon.py
@@ -0,0 +1,319 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import xml.etree.ElementTree as ET
+import xml.dom.minidom
+
+from os.path import isdir, split
+from os import makedirs
+
+from Bio.Seq import Seq
+from Bio.Alphabet import generic_dna
+
+import tkMessageBox
+
+import sys
+from os.path import dirname, join, abspath, isfile, expanduser
+
+from HLAGene import *
+
+# This is a short wrapper method to use biopython's translation method.
+# Most of this code is just checking for things that went wrong
+def translateSequence(inputSequence):
+
+ proteinSequence = ''
+
+ try:
+ # Do nothing if the input sequence is blank.
+ if( len(inputSequence) > 0 ):
+
+ coding_dna = Seq(inputSequence, generic_dna)
+ proteinSequence = str(coding_dna.translate())
+ print ('Exon Sequence before translation:' + coding_dna)
+ print ('Translated Protein:' + proteinSequence)
+
+ # Perform Sanity Checks.
+ # Stop codon *should* be at the end of the protein.
+ # Here we seek out the first instance of a stop codon,
+ # and remove the peptides afterwards.
+ # because that's what happens in real life.
+ stopCodonLocation = proteinSequence.find('*')
+
+ # If no stop codon was found
+ if (stopCodonLocation == -1):
+ assignConfigurationValue('is_pseudo_gene','1')
+ # If multiple of three (correct codon length)
+ if(len(coding_dna) % 3 == 0):
+ tkMessageBox.showinfo('No Stop Codon Found',
+ 'The translated protein does not contain a stop codon.\n' +
+ 'This is indicated by a /pseudo flag in the sequence submission.'
+ )
+
+ # Wrong Codon Length
+ else:
+ tkMessageBox.showinfo('No Stop Codon Found',
+ 'The translated protein does not contain a stop codon.\n' +
+ 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n' +
+ 'This is indicated by a /pseudo flag in the sequence submission.')
+
+ # If Stop Codon is in the end of the protein (This is expected and correct)
+ elif (stopCodonLocation == len(proteinSequence) - 1):
+ assignConfigurationValue('is_pseudo_gene','0')
+
+ # If multiple of three (correct codon length)
+ if(len(coding_dna) % 3 == 0):
+ # Everything is fine in this case. Trim off the stop codon
+ proteinSequence = proteinSequence[0:stopCodonLocation]
+ pass
+ # Wrong Codon Length
+ else:
+ tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon',
+ 'The stop codon is at the correct position in the protein, but ' +
+ 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' +
+ 'Please double check your sequence.')
+ proteinSequence = proteinSequence[0:stopCodonLocation]
+
+ # Else Stop Codon is premature (before the end of the protein)
+ else:
+ assignConfigurationValue('is_pseudo_gene','1')
+
+ # If multiple of three (correct codon length)
+ if(len(coding_dna) % 3 == 0):
+ tkMessageBox.showinfo('Premature Stop Codon Detected',
+ 'Premature stop codon found:\nProtein Position (' +
+ str(stopCodonLocation + 1) + '/' +
+ str(len(proteinSequence)) + ')\n\n' +
+ 'This is indicated by a /pseudo flag in the sequence submission.\n' +
+ 'Double check your protein sequence,\n' +
+ 'this might indicate a missense mutation.\n\n' +
+ 'Translated Protein:\n' + proteinSequence +
+ '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
+ '\n'
+ )
+ proteinSequence = proteinSequence[0:stopCodonLocation]
+
+
+ # Wrong Codon Length
+ else:
+ tkMessageBox.showinfo('Premature Stop Codon Detected',
+ 'Premature stop codon found:\nProtein Position (' +
+ str(stopCodonLocation + 1) + '/' +
+ str(len(proteinSequence)) + ')\n\n' +
+ 'This is indicated by a /pseudo flag in the sequence submission.\n' +
+ 'Nucleotide count is not a multiple of 3,\n' +
+ 'Double check your protein sequence,\n' +
+ 'this might indicate a missense mutation.\n\n' +
+ 'Translated Protein:\n' + proteinSequence +
+ '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] +
+ '\n'
+ )
+ proteinSequence = proteinSequence[0:stopCodonLocation]
+ else:
+ print('Translating a nucleotide sequence of length 0. That was easy.')
+ pass
+
+ return proteinSequence
+
+ except Exception:
+ print 'Problem when translating protein:'
+ print sys.exc_info()[1]
+ tkMessageBox.showinfo('Protein Translation Error',
+ 'I could not translate your protein:\n' + str(sys.exc_info()[1]))
+
+ raise
+
+
+
+# The input file should be a string of nucleotides, with capital letters to identify exons and introns.
+# Annotations are expected and read in this format:
+# fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr
+# agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag
+# All spaces, line feeds, and tabs are removed and ignored.
+def annotateRoughInputSequence(inputSequenceText):
+
+ # TODO: I should accept a Fasta Input.
+ # Disregard the header line completely. Is there still sequence?
+ resultGeneLoci = HLAGene()
+
+ # Trim out any spaces, tabs, newlines. Uppercase.
+ cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','')
+
+ # Capitalize, so I can store a copy of the full unannotated sequence.
+ unannotatedGene = cleanedGene.upper()
+ resultGeneLoci.fullSequence = unannotatedGene
+ print('Total Sequence Length = ' + str(len(unannotatedGene)))
+
+ # Loop through the cleaned and annotated input sequence,
+ # capitals and lowercase letters to determine exon start and end
+ if(len(cleanedGene) > 0):
+
+ # Is the first feature an exon or an intron?
+ # If we begin in an Exon
+ if( cleanedGene[0] in ('A','G','C','T')):
+ insideAnExon = True
+ # If we begin in an Intron/UTR
+ elif( cleanedGene[0] in ('a','g','c','t')):
+ insideAnExon = False
+ else:
+ # Nonstandard nucleotide? I should start panicking.
+ #raise Exception('Nonstandard Nucleotide, not sure how to handle it')
+ print('Nonstandard Nucleotide at the beginning of the sequence, not sure how to handle it')
+ insideAnExon = False
+
+
+ locusBeginPosition = 0
+ for x in range(0, len(cleanedGene)):
+ currentChar = cleanedGene[x]
+
+ # Is this a standard nucleotide character?
+ if(currentChar.upper() in ('A','G','C','T')):
+
+ if(currentChar.isupper()):
+ if(insideAnExon):
+ #We're STILL in an exon. In this case, I should just do nothing and continue.
+ pass
+ else:
+ #In this case, we're just starting an EXON.
+ #Store the last Intron in the list.
+ currentIntron = GeneLocus()
+ currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper()
+ currentIntron.exon = False
+ resultGeneLoci.loci.append(currentIntron)
+ insideAnExon=True
+ locusBeginPosition = x
+ pass
+
+ else:
+ if not (insideAnExon):
+ #We're STILL in an intron. Continue.
+ pass
+ else:
+ #Starting a new Intron.
+ # Store an Exon in the list.
+ currentExon = GeneLocus()
+ currentExon.sequence = cleanedGene[locusBeginPosition:x].upper()
+ currentExon.exon = True
+ resultGeneLoci.loci.append(currentExon)
+ insideAnExon = False
+ locusBeginPosition=x
+ pass
+ else:
+ print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar
+ + '. If this is a wildcard character, you might be ok.')
+
+ # We've reached the end of the loop and we still need to store the last feature.
+ # Should be a 3' UTR, but I can't be sure, people like to put in weird sequences.
+ currentIntron = GeneLocus()
+ currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper()
+ currentIntron.exon = insideAnExon
+ resultGeneLoci.loci.append(currentIntron)
+
+ # Annotate the loci (name them) and print the results of the read file.
+ resultGeneLoci.annotateLoci()
+ resultGeneLoci.printGeneSummary()
+
+ # If the sequence is empty
+ else:
+ print('Empty sequence, I don\'t have anything to do.')
+
+ return resultGeneLoci
+ #self.sequenceAnnotation = resultGeneLoci
+
+# This method is a directory-safe way to open up a write file.
+def createOutputFile(outputfileName):
+ tempDir, tempFilename = split(outputfileName)
+ if not isdir(tempDir):
+ print('Making Directory:' + tempDir)
+ makedirs(tempDir)
+ resultsOutput = open(outputfileName, 'w')
+ return resultsOutput
+
+# I'm storing global variables in a dictionary for now.
+def initializeGlobalVariables():
+ global globalVariables
+
+ if not ("globalVariables" in globals()):
+ globalVariables={}
+
+def assignConfigurationValue(configurationKey, configurationValue):
+ initializeGlobalVariables()
+ globalVariables[configurationKey] = configurationValue
+
+def getConfigurationValue(configurationKey):
+ if configurationKey in globalVariables.keys():
+ return globalVariables[configurationKey]
+ else:
+ print ('Configuration Key Not Found:' + configurationKey)
+ #raise KeyError('Key Not Found:' + configurationKey)
+ return None
+
+def assignConfigName():
+ assignConfigurationValue('config_file_location',join(expanduser("~"),'Saddlebags.Config.xml'))
+
+def writeConfigurationFile():
+ assignConfigName()
+ print ('Writing a config file to:\n' + globalVariables['config_file_location'])
+
+ root = ET.Element("config")
+
+ for key in globalVariables.keys():
+ # Some config values I don't want to store.
+ # Add to this: Sequence Text, EMBL Submission Text, IMGT Submission Text
+ if(key not in [
+ 'embl_password'
+ ,'imgt_password'
+ , 'sequence'
+ ]):
+ ET.SubElement(root, key).text = globalVariables[key]
+
+ xmlText = ET.tostring(root, encoding='utf8', method='xml')
+ prettyXmlText = xml.dom.minidom.parseString(xmlText).toprettyxml()
+
+ xmlOutput = createOutputFile(globalVariables['config_file_location'])
+ xmlOutput.write(prettyXmlText)
+ xmlOutput.close()
+
+
+
+
+
+def loadConfigurationFile():
+ assignConfigName()
+
+ if not isfile(globalVariables['config_file_location']):
+ print ('The config file does not exist yet. I will not load it:\n' + globalVariables['config_file_location'])
+
+ # Here is where I assign the common configuration values
+ # test_submission indicates if we should use the "test" values.
+ # I think I'll use this value for both EMBL and IMGT submissions, if it applies.
+ assignConfigurationValue('test_submission', '1')
+
+ # I'm storing FTP without the ftp:// identifier, because it is not necessary.
+ # The test and prod ftp sites have the same address. This is intentional, embl doesn't have a test ftp
+ assignConfigurationValue('embl_ftp_upload_site_test', 'webin.ebi.ac.uk')
+ assignConfigurationValue('embl_ftp_upload_site_prod', 'webin.ebi.ac.uk')
+ assignConfigurationValue('embl_rest_address_test', 'https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/')
+ assignConfigurationValue('embl_rest_address_prod', 'https://www.ebi.ac.uk/ena/submit/drop-box/submit/')
+
+ else:
+ print ('The config file already exists, I will load it:\n' + globalVariables['config_file_location'])
+
+ tree = ET.parse(globalVariables['config_file_location'])
+ root = tree.getroot()
+
+ for child in root:
+ assignConfigurationValue(child.tag, child.text)
+
+
\ No newline at end of file
diff --git a/src/AlleleSubInstallerOptions_Windows.spec b/src/AlleleSubInstallerOptions_Windows.spec
index 72a4717..4210ec3 100755
--- a/src/AlleleSubInstallerOptions_Windows.spec
+++ b/src/AlleleSubInstallerOptions_Windows.spec
@@ -1,21 +1,19 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
+# along with saddle-bags. If not, see .
-# Version 1.0
-
-# This file contains specifications for packaging of the MinION Extractor GUI
+# This file contains specifications for packaging of saddlebags
# As a standalone executable. This file is meant to be used with pyinstaller
# http://www.pyinstaller.org/
@@ -25,7 +23,7 @@
block_cipher = None
-a = Analysis(['AlleleSubmissionEMBL.py'],
+a = Analysis(['AlleleSubmissionMain.py'],
binaries=None,
datas=None,
hiddenimports=['six', 'packaging', 'packaging.requirements', 'packaging.version', 'packaging.specifiers', 'Tkinter', 'tkFileDialog', 'Tkconstants'],
@@ -42,7 +40,7 @@ exe = EXE(pyz,
a.binaries,
a.zipfiles,
a.datas,
- name='AlleleSubmissionEMBLWindows',
+ name='SaddlebagsWindows',
debug=False,
strip=False,
upx=True,
diff --git a/src/AlleleSubmissionEMBLRestMethods.py b/src/AlleleSubmissionEMBLRestMethods.py
new file mode 100644
index 0000000..566cb2e
--- /dev/null
+++ b/src/AlleleSubmissionEMBLRestMethods.py
@@ -0,0 +1,129 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+
+
+
+from AlleleSubCommon import *
+import pycurl
+import StringIO
+
+import xml.etree.ElementTree as ET
+import xml.dom.minidom
+
+# Here we have methods to perform REST interactions necessary for EMBL submission.
+
+def performProjectSubmission(submissionFileName, projectFileName):
+ POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)),
+ ('PROJECT', (pycurl.FORM_FILE, projectFileName))]
+
+ responseText = performSubmission(submissionFileName, POST_DATA)
+ return interpretProjectSubmissionResults(responseText)
+
+def performAnalysisSubmission(submissionFileName, analysisFileName):
+ POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)),
+ ('ANALYSIS', (pycurl.FORM_FILE, analysisFileName))]
+
+ responseText = performSubmission(submissionFileName, POST_DATA)
+
+ return interpretAnalysisSubmissionResults(responseText)
+
+def performSubmission(submissionFileName, POST_DATA):
+ if (str(getConfigurationValue('test_submission')) == '0'):
+ print 'THIS IS A LIVE SUBMISSION AT EMBL.'
+ requestURL = str(getConfigurationValue('embl_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password'))
+ else:
+ print 'THIS IS A TEST SUBMISSION AT EMBL.'
+ requestURL = str(getConfigurationValue('embl_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password'))
+
+ curlResponseBuffer = StringIO.StringIO()
+ curlObject = pycurl.Curl()
+ curlObject.setopt(curlObject.URL, requestURL)
+ curlObject.setopt(curlObject.POST, 1)
+ curlObject.setopt(curlObject.HTTPPOST, POST_DATA)
+ curlObject.setopt(curlObject.USERAGENT, 'Curl')
+ curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write)
+ curlObject.setopt(pycurl.HTTPHEADER, ['Accept:application/xml'])
+ # Insecure. Any security experts want to make this better?
+ curlObject.setopt(pycurl.SSL_VERIFYHOST, 0)
+ curlObject.setopt(pycurl.SSL_VERIFYPEER, 0)
+ curlObject.perform()
+ curlObject.close()
+
+ responseText = curlResponseBuffer.getvalue()
+
+ # write XML to file.
+ projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml')
+ resultsFile = createOutputFile(projectSubResultsFileName)
+ resultsFile.write(responseText)
+ resultsFile.close()
+
+ return responseText
+
+def interpretProjectSubmissionResults(responseText):
+ # Open XML to report results:
+ root = ET.fromstring(responseText)
+ submissionSuccess = (root.attrib['success'] == 'true')
+
+ projectAccession = None
+ messages = []
+
+ for child in root:
+ if(child.tag == 'PROJECT'):
+ if ('accession' in child.attrib.keys()):
+ projectAccession = child.attrib['accession']
+ else:
+ projectAccession = None
+ #print('I found a project node.')
+ elif(child.tag == 'MESSAGES'):
+ print('I found some messages.')
+ for messageNode in child:
+ #print (messageNode.tag + ':' + messageNode.text)
+ messages.append(messageNode.tag + ':' + messageNode.text)
+ else:
+ # Don't care about the other nodes
+ pass
+
+ # Return value should be a tuple:
+ # (Success, ProjectAccession, Messages[])
+ return (submissionSuccess,projectAccession,messages)
+
+def interpretAnalysisSubmissionResults(responseText):
+ root = ET.fromstring(responseText)
+ submissionSuccess = (root.attrib['success'] == 'true')
+
+ analysisAccession = None
+ messages = []
+
+ for child in root:
+ if(child.tag == 'ANALYSIS'):
+ if ('accession' in child.attrib.keys()):
+ analysisAccession = child.attrib['accession']
+ else:
+ analysisAccession = None
+ #print('I found a project node.')
+ elif(child.tag == 'MESSAGES'):
+ print('I found some messages.')
+ for messageNode in child:
+ #print (messageNode.tag + ':' + messageNode.text)
+ messages.append(messageNode.tag + ':' + messageNode.text)
+ else:
+ # Don't care about the other nodes
+ pass
+
+ # Return value should be a tuple:
+ # (Success, ProjectAccession, Messages[])
+ return (submissionSuccess,analysisAccession,messages)
+
diff --git a/src/AlleleSubmissionEMBLXml.py b/src/AlleleSubmissionEMBLXml.py
new file mode 100755
index 0000000..32ae791
--- /dev/null
+++ b/src/AlleleSubmissionEMBLXml.py
@@ -0,0 +1,118 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+from AlleleSubCommon import *
+
+import xml.etree.ElementTree as ET
+import xml.dom.minidom
+
+# Here we have methods to create XML files necessary for EMBL submission.
+# Schemas are defined on github.
+# https://github.com/enasequence/schema
+
+def writeToXml(fullXmlFilePath, xmlElementTree):
+ xmlText = ET.tostring(xmlElementTree, encoding='utf8', method='xml')
+ prettyXmlText = xml.dom.minidom.parseString(xmlText).toprettyxml()
+
+ xmlOutput = createOutputFile(fullXmlFilePath)
+ xmlOutput.write(prettyXmlText)
+ xmlOutput.close
+
+ return prettyXmlText
+
+def getCenterName():
+ # TODO: Should I use REST here?
+ # Probably not, center_name is not required in the xmls.
+ return 'Center_Name'
+
+def createProjectXML(fullXmlFilePath):
+ # They are called "Project" in xml, but "Study" on the website.
+ # Project = Study
+ root = ET.Element('PROJECT_SET')
+
+ projectID = getConfigurationValue('study_identifier')
+ projectShortTitle = getConfigurationValue('study_short_title')
+ projectAbstract = getConfigurationValue('study_abstract')
+
+ projectElement = ET.SubElement(root, 'PROJECT')
+ projectElement.set('alias', projectID)
+ # Center Name is optional according to schemas. Forget it. EMBL Knows our login info.
+ #projectElement.set('center_name', getCenterName() )
+ titleElement = ET.SubElement(projectElement, 'TITLE')
+ titleElement.text = projectShortTitle
+ descriptionElement = ET.SubElement(projectElement, 'DESCRIPTION')
+ descriptionElement.text = projectAbstract
+ submissionProjectElement = ET.SubElement(projectElement, 'SUBMISSION_PROJECT')
+ sequencingProjectElement = ET.SubElement(submissionProjectElement, 'SEQUENCING_PROJECT')
+
+ return writeToXml(fullXmlFilePath, root)
+
+def createProjectSubmissionXML(fullXmlFilePath, submissionAlias, shortProjectFileName):
+ root = ET.Element('SUBMISSION')
+ root.set('alias', submissionAlias)
+ # Center Name is optional according to schemas. Forget it.
+ #root.set('center_name', getCenterName() )
+ actionsElement = ET.SubElement(root, 'ACTIONS')
+ actionElement = ET.SubElement(actionsElement, 'ACTION')
+ addElement = ET.SubElement(actionElement, 'ADD')
+ addElement.set('source',shortProjectFileName)
+ addElement.set('schema','project')
+
+ return writeToXml(fullXmlFilePath, root)
+
+def createAnalysisXML(fullXmlFilePath, checksumValue, flatfileZipFileName):
+ # An analysis xml is just a wrapper for a sequence submission.
+ root = ET.Element('ANALYSIS_SET')
+
+ # TODO: I haven't created these three analysis configuration values yet.
+ # Probably need to add this to the GUI, or somehow generate them automagically.
+ analysisElement = ET.SubElement(root, 'ANALYSIS')
+ analysisElement.set('alias', getConfigurationValue('analysis_alias'))
+
+ titleElement = ET.SubElement(analysisElement, 'TITLE')
+ titleElement.text = (getConfigurationValue('analysis_title'))
+
+ descriptionElement = ET.SubElement(analysisElement, 'DESCRIPTION')
+ descriptionElement.text = (getConfigurationValue('analysis_description'))
+
+ studyRefElement = ET.SubElement(analysisElement, 'STUDY_REF')
+ studyRefElement.set('accession', getConfigurationValue('study_accession'))
+
+ analysisTypeElement = ET.SubElement(analysisElement, 'ANALYSIS_TYPE')
+ sequenceFlatfileElement = ET.SubElement(analysisTypeElement, 'SEQUENCE_FLATFILE')
+
+ filesElement = ET.SubElement(analysisElement, 'FILES')
+
+ fileElement = ET.SubElement(filesElement, 'FILE')
+ fileElement.set('checksum', checksumValue)
+ fileElement.set('checksum_method', 'MD5')
+ fileElement.set('filename', flatfileZipFileName)
+ fileElement.set('filetype', 'flatfile')
+
+ return writeToXml(fullXmlFilePath, root)
+
+def createAnalysisSubmissionXML(fullXmlFilePath, submissionAlias, shortAnalysisFileName):
+ root = ET.Element('SUBMISSION')
+
+ root.set('alias', submissionAlias)
+ actionsElement = ET.SubElement(root, 'ACTIONS')
+ actionElement = ET.SubElement(actionsElement, 'ACTION')
+ addElement = ET.SubElement(actionElement, 'ADD')
+ addElement.set('source',shortAnalysisFileName)
+ addElement.set('schema','analysis')
+
+ return writeToXml(fullXmlFilePath, root)
+
+
diff --git a/src/AlleleSubmissionEMBL.py b/src/AlleleSubmissionMain.py
similarity index 70%
rename from src/AlleleSubmissionEMBL.py
rename to src/AlleleSubmissionMain.py
index 6070ddb..e7f8274 100755
--- a/src/AlleleSubmissionEMBL.py
+++ b/src/AlleleSubmissionMain.py
@@ -1,39 +1,40 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
-
-# Version 1.0
-
-SoftwareVersion = "Bhast Version 1.0"
+# along with saddle-bags. If not, see .
import Tkinter
import sys
-from AlleleGui import AlleleGui
+from AlleleGuiMain import AlleleGuiMain
+from AlleleSubCommon import *
+SoftwareVersion = 'saddlebags Version 1.1'
+
if __name__=='__main__':
try:
# This is a really simple way to read commandline args,
# because there really shouldn't be any.
# TODO: Be more graceful with this, there are better ways to read args.
-
# No parameters are expected at all. sys.argv[0] doesn't count.
if (len(sys.argv) == 1):
- print('\n\n\n\n\n***Creating an EMBL Allele submission***\n')
+
+ loadConfigurationFile()
+
+ print('\n\n\n\n\n***Starting the HLA Allele Submission Tool***\n')
root = Tkinter.Tk()
- AlleleGui(root).pack()
+ AlleleGuiMain(root).pack()
root.mainloop()
print('Done. Hooray.')
@@ -45,14 +46,16 @@
sys.argv[1].lower() == '-version')
):
print (SoftwareVersion)
+ pass
+ #
# You executed the software wrong. Sorry.
else:
print("usage:\n" +
"\tRun this program using standard python call:\n" +
- "\t$python AlleleSubmissionEMBL.py\n" +
+ "\t$python AlleleSubmissionMain.py\n" +
"\tbiopython must be accessible in your python environment. To run using Anaconda,\n"
- "\tCheck readme at https://github.com/transplantation-immunology/EMBL-HLA-Submission\n"
+ "\tCheck readme at https://github.com/transplantation-immunology/saddle-bags\n"
)
@@ -61,5 +64,4 @@
# This is not really doing anything.
print 'Unexpected problem during execution:'
print sys.exc_info()[1]
- raise
-
+ raise
\ No newline at end of file
diff --git a/src/HLAGene.py b/src/HLAGene.py
index 69565e3..d9940b3 100755
--- a/src/HLAGene.py
+++ b/src/HLAGene.py
@@ -1,19 +1,17 @@
-# This file is part of EMBL-HLA-Submission.
+# This file is part of saddle-bags.
#
-# EMBL-HLA-Submission is free software: you can redistribute it and/or modify
+# saddle-bags is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
-# EMBL-HLA-Submission is distributed in the hope that it will be useful,
+# saddle-bags is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
-# along with EMBL-HLA-Submission. If not, see .
-
-# Version 1.0
+# along with saddle-bags. If not, see .
# The GeneLocus class specifies a locus on a Gene,
# Either an Exon, intron, or UTR.
diff --git a/src/ScrolledWindow.py b/src/ScrolledWindow.py
new file mode 100644
index 0000000..15e00f8
--- /dev/null
+++ b/src/ScrolledWindow.py
@@ -0,0 +1,64 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+
+
+from Tkinter import Scrollbar, Frame, Canvas, VERTICAL, RIGHT, Y, LEFT, BOTH, TRUE, FALSE, NW
+
+# http://tkinter.unpythonic.net/wiki/VerticalScrolledFrame
+
+class VerticalScrolledFrame(Frame):
+ def __init__(self, parent, *args, **kw):
+ Frame.__init__(self, parent, *args, **kw)
+
+
+ # create a canvas object and a vertical scrollbar for scrolling it
+ vscrollbar = Scrollbar(self, orient=VERTICAL)
+ vscrollbar.pack(fill=Y, side=RIGHT, expand=FALSE)
+ canvas = Canvas(self, bd=0, highlightthickness=0,
+ yscrollcommand=vscrollbar.set)
+ canvas.pack(side=LEFT, fill=BOTH, expand=TRUE)
+ vscrollbar.config(command=canvas.yview)
+
+ # TODO: Fix the vertical scrolling behavior. The Mouse does not work.
+
+
+ # reset the view
+ canvas.xview_moveto(0)
+ canvas.yview_moveto(0)
+
+ # create a frame inside the canvas which will be scrolled with it
+ self.interior = interior = Frame(canvas)
+ interior_id = canvas.create_window(0, 0, window=interior,
+ anchor=NW)
+
+ # track changes to the canvas and frame width and sync them,
+ # also updating the scrollbar
+ def _configure_interior(event):
+
+ # update the scrollbars to match the size of the inner frame
+ size = (interior.winfo_reqwidth(), interior.winfo_reqheight())
+ canvas.config(scrollregion="0 0 %s %s" % size)
+ if interior.winfo_reqwidth() != canvas.winfo_width():
+ # update the canvas's width to fit the inner frame
+ canvas.config(width=interior.winfo_reqwidth())
+ interior.bind('', _configure_interior)
+
+ def _configure_canvas(event):
+
+ if interior.winfo_reqwidth() != canvas.winfo_width():
+ # update the inner frame's width to fill the canvas
+ canvas.itemconfigure(interior_id, width=canvas.winfo_width())
+ canvas.bind('', _configure_canvas)
diff --git a/src/SubmissionGeneratorEMBL.py b/src/SubmissionGeneratorEMBL.py
new file mode 100755
index 0000000..15b7fe1
--- /dev/null
+++ b/src/SubmissionGeneratorEMBL.py
@@ -0,0 +1,349 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+from Bio.Seq import Seq
+from Bio.Alphabet import generic_dna
+import sys
+import tkMessageBox
+
+import math
+
+from HLAGene import *
+
+from AlleleSubCommon import *
+
+# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission
+# In ENA format.
+class SubmissionGeneratorEMBL():
+
+ def __init__(self):
+ self.sequenceAnnotation = HLAGene()
+
+
+ def printHeader(self):
+ #print('The EMBL Print Header Method.')
+ headerText = ''
+
+ # Print header
+ headerText += 'ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n'
+ headerText += 'XX\n'
+ # A valid document should have an AC (Accession Number) and DE (Description) field.
+ # I don't have an AC number available, so it's blank.
+ headerText += 'AC \n'
+ headerText += 'XX\n'
+ #headerText += 'DE Human Leukocyte Antigen\n'
+ #Requested change to the DE line. It should look like:
+ #Homo sapiens HLA-B gene for MHC class I antigen, allele "/allele name"
+ headerText += ('DE Homo sapiens ' + str(getConfigurationValue('gene'))
+ + ' gene for MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II'))
+ + ' antigen, allele "' + str(getConfigurationValue('allele_name')) + '"\n')
+ headerText += 'XX\n'
+
+ # Print key
+ headerText += ('FH Key Location/Qualifiers\n')
+ headerText += ('FH\n')
+
+ # Print source
+ # It's from a human.
+ headerText += ('FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n')
+ headerText += ('FT /organism="Homo sapiens"\n')
+ headerText += ('FT /db_xref="taxon:9606"\n')
+ headerText += ('FT /mol_type="genomic DNA"\n')
+ headerText += ('FT /chromosome="6"\n')
+ headerText += ('FT /isolate="' + str(getConfigurationValue('sample_id')) + '"\n')
+
+ return headerText
+
+ def printMRNA(self):
+ mRNAText = ''
+ # Print mRNA
+ mRNAText += ('FT mRNA join(')
+
+ # Iterate through the indices of the UTRs and exons.
+ # The 3' and 5' UTR are included in the mRNA
+ for x in range(0,len(self.sequenceAnnotation.loci)):
+ geneLocus = self.sequenceAnnotation.loci[x]
+ # If it is an exon or UTR
+ if (geneLocus.exon or 'UT' in geneLocus.name):
+ mRNAText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ','
+
+ # Trim off the last comma and add a parenthese
+ mRNAText = mRNAText[0:len(mRNAText)-1] + ')\n'
+
+ mRNAText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ mRNAText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ mRNAText += ('FT /product=\"MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II')) + ' antigen\"\n')
+
+ return mRNAText
+
+
+ def printCDS(self):
+ # I need to perform the translation first, so I know if this is a "pseudogene" or not
+ peptideSequence = translateSequence(self.sequenceAnnotation.getExonSequence())
+
+ cdsText = ''
+
+ # Print CDS
+ # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns
+ # The range 1:featureCount-1 will exclude the UTRs.
+ cdsText += ('FT CDS join(')
+ for x in range(0,len(self.sequenceAnnotation.loci)):
+ geneLocus = self.sequenceAnnotation.loci[x]
+ if (geneLocus.exon):
+ cdsText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex)
+ if not x==len(self.sequenceAnnotation.loci)-2:
+ cdsText += ','
+ else:
+ cdsText += ')\n'
+
+ cdsText += ('FT /transl_table=1\n')
+ cdsText += ('FT /codon_start=1\n')
+
+ # If this sequence has premature stop codon, add the "/pseudo" flag.
+ # This indicates the gene is a /pseudo gene, not a complete protein.
+ if(str(getConfigurationValue('is_pseudo_gene') == '1')):
+ print("putting pseudo in the submission")
+ cdsText += ('FT /pseudo\n')
+ else:
+ print("not putting pseudo in the submission")
+ pass
+
+
+ cdsText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ cdsText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ cdsText += ('FT /product=\"MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II')) + ' antigen\"\n')
+ cdsText += ('FT /translation=\"')
+
+ # Some simple formatting for the peptide sequence, making it human and computer readable.
+ # 80 peptides per line. Except the first line, which is 66.
+ # 66 is 80-14, where 14 is the length of { /translation=" }
+
+ # The translation is commented out here. I had to move it to the top of this method.
+ #peptideSequence = self.translateSequence(self.sequenceAnnotation.getExonSequence())
+ if(len(peptideSequence) < 66):
+ cdsText += (peptideSequence) + '\"\n'
+ else:
+ cdsText += peptideSequence[0:66] + '\n'
+ i=66
+ while (i < len(peptideSequence)):
+ cdsText += 'FT ' + peptideSequence[i:i+80]
+ i += 80
+
+ # If we're not yet at the end of the sequence, go to the next line
+ if(i < len(peptideSequence)):
+ cdsText += '\n'
+ # We're at the end. close the quote and new line.
+ else:
+ cdsText += '\"\n'
+
+ return cdsText
+
+ def printFeatures(self):
+ featureText = ''
+
+ exonIndex = 1
+ intronIndex = 1
+
+ geneHas3UTR = False
+ geneHas5UTR = False
+
+ for x in range(0,len(self.sequenceAnnotation.loci)):
+ currentFeature = self.sequenceAnnotation.loci[x]
+
+ # 3' UTR
+ if(currentFeature.name == '3UT'):
+ featureText += ('FT 3\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT /note=\"3\'UTR\"\n')
+ featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ geneHas3UTR = True
+
+ # 5' UTR
+ elif(currentFeature.name == '5UT'):
+ featureText += ('FT 5\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT /note=\"5\'UTR\"\n')
+ featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ geneHas5UTR = True
+
+ # Exon
+ elif(currentFeature.exon):
+ featureText += ('FT exon ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT /number=' + str(exonIndex) + '\n')
+ featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ exonIndex += 1
+
+ # Intron
+ else:
+ featureText += ('FT intron ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT /number=' + str(intronIndex) + '\n')
+ featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n')
+ featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n')
+ intronIndex += 1
+
+
+ featureText += ('XX\n')
+
+ # Do a quick sanity check. If we are missing either UTR I should warn the user.
+ # But move on with your life, this is not worth getting upset over.
+ if (not geneHas3UTR and not geneHas5UTR):
+ tkMessageBox.showinfo('Missing UTRs',
+ 'This sequence has no 5\' or 3\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ elif (not geneHas5UTR):
+ tkMessageBox.showinfo('Missing 5\' UTR',
+ 'This sequence has no 5\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ elif (not geneHas3UTR):
+ tkMessageBox.showinfo('Missing 3\' UTR',
+ 'This sequence has no 3\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ else:
+ print('The UTRs look fine.')
+ pass
+
+ return featureText
+
+ def printSequence(self):
+ sequenceText = ''
+
+ completeSequence = self.sequenceAnnotation.getCompleteSequence().upper()
+
+ cCount = completeSequence.count('C')
+ gCount = completeSequence.count('G')
+ tCount = completeSequence.count('T')
+ aCount = completeSequence.count('A')
+ otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount)
+
+ sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; '
+ + str(aCount) + ' A; ' + str(cCount) + ' C; '
+ + str(gCount) + ' G; ' + str(tCount) + ' T; '
+ + str(otherCount) + ' other;\n')
+
+ # Here's some logic to print the sequence information in groups of 10.
+ # This format is specified in the User manual specified by EMBL.
+ currentSeqIndex = 0
+
+ while (currentSeqIndex < self.sequenceAnnotation.totalLength()):
+ # The character code for a sequence region is two blank spaces,
+ # followed by three blank spaces, for a total of 5 blanks.
+ sequenceText += ' '
+ sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60]
+
+ # A sequenceChunk is 10 nucleotides in this context.
+ # Format specifies up to six "chunks" per line.
+ for i in range(0,6):
+ sequenceChunk = sequenceRow[i*10 : (i+1)*10]
+ sequenceText += sequenceChunk + ' '
+
+ # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row.
+ if(len(sequenceRow) == 60):
+ sequenceText += str(currentSeqIndex + 60) + '\n'
+ # but if line is not complete (this is more likely, and more complicated.)
+ else:
+ # Fill with spaces to align the nucleotide indices at the end of the sequence.
+ numberSpaces = 60-len(sequenceRow)
+ for n in range (0, numberSpaces):
+ sequenceText += ' '
+ sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n')
+
+ # The next row of the sequence
+ currentSeqIndex += 60
+
+ return sequenceText
+
+
+ # Create the text submission based on the ENA format.
+ def buildENASubmission(self):
+
+ # ENA format is the preferred submission type for EMBL. More information:
+ # http://www.ebi.ac.uk/ena/submit/sequence-submission
+ # http://www.ebi.ac.uk/ena/submit/entry-upload-templates
+ # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt
+ # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html
+ # http://www.ebi.ac.uk/ena/software/flat-file-validator
+
+ documentBuffer = ''
+
+ totalLength = self.sequenceAnnotation.totalLength()
+ print('total calculated length = ' + str(totalLength))
+
+ if(totalLength > 0 and self.validateInputs()):
+
+ # These are the main sections of the ENA submission.
+ documentBuffer += self.printHeader()
+ documentBuffer += self.printMRNA()
+ documentBuffer += self.printCDS()
+ documentBuffer += self.printFeatures()
+ documentBuffer += self.printSequence()
+
+ # Print entry terminator. The last line of an ENA entry.
+ documentBuffer += ('//\n')
+
+ else:
+ tkMessageBox.showinfo('No HLA Sequence Found',
+ 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' )
+ return None
+
+
+ return documentBuffer
+
+
+ # Return True if our input values are all present and accomodated for.
+ # If something is missing, then throw a fit and give up.
+ # TODO: I should probably not raise these exceptions actually.
+ # Instead, I should have the GUI Automatically open the choose options screen
+
+ # TODO: Maybe I should delete this method, and add error handling to the generate methods.
+ def validateInputs(self):
+ #raise Exception ('Validate Inputs Method is being used, after all.')
+
+ if (getConfigurationValue('sample_id') is None or len(getConfigurationValue('sample_id')) < 1):
+ print('Invalid Sequence ID:' + str(getConfigurationValue('sample_id')))
+ #raise Exception ('Invalid Sequence ID:' + str(getConfigurationValue('sample_id')))
+ return False
+
+ elif (self.sequenceAnnotation is None):
+ #raise Exception ('Invalid Sequence Annotation:' + str(self.sequenceAnnotation))
+ print('Invalid Sequence Annotation:' + str(self.sequenceAnnotation))
+ return False
+
+ elif (getConfigurationValue('gene') is None or len(getConfigurationValue('gene')) < 1):
+ #raise Exception ('Invalid Input Gene:' + str(getConfigurationValue('gene')))
+ print('Invalid Input Gene:' + str(getConfigurationValue('gene')))
+ return False
+
+ elif (getConfigurationValue('allele_name') is None or len(getConfigurationValue('allele_name')) < 1):
+ #raise Exception ('Invalid Input Allele:' + str(getConfigurationValue('allele_name')))
+ print('Invalid Input Allele:' + str(getConfigurationValue('allele_name')))
+ return False
+
+ elif (getConfigurationValue('class') is None or len(getConfigurationValue('class')) < 1):
+ #raise Exception ('Invalid Input Class:' + str(getConfigurationValue('class')))
+ print('Invalid Input Class:' + str(getConfigurationValue('class')))
+ return False
+
+ else:
+ return True
+
diff --git a/src/SubmissionGeneratorIMGT.py b/src/SubmissionGeneratorIMGT.py
new file mode 100755
index 0000000..2365d70
--- /dev/null
+++ b/src/SubmissionGeneratorIMGT.py
@@ -0,0 +1,395 @@
+# This file is part of saddle-bags.
+#
+# saddle-bags is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# saddle-bags is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with saddle-bags. If not, see .
+
+import sys
+
+import datetime
+import tkMessageBox
+
+from AlleleSubCommon import *
+#import math
+
+from HLAGene import *
+
+# The AlleleGenerator class contains logic to generate an IMGT HLA allele submission
+# In ENA format.
+class SubmissionGeneratorIMGT():
+
+ def __init__(self):
+ self.sequenceAnnotation = HLAGene()
+
+ # Create the text submission based on the IMGT format.
+ def buildIMGTSubmission(self):
+
+ documentBuffer = ''
+
+ totalLength = self.sequenceAnnotation.totalLength()
+ print('total calculated length = ' + str(totalLength))
+
+ if(totalLength > 0):
+
+ print ('im gonna add the header in here:')
+
+ # These are the main sections of the ENA submission.
+ documentBuffer += self.printHeader()
+ documentBuffer += self.printSubmitter()
+ documentBuffer += self.printSource()
+ documentBuffer += self.printMethods()
+ documentBuffer += self.printFeatures()
+ documentBuffer += self.printSequence()
+
+ # Print entry terminator. The last line of an ENA entry.
+ documentBuffer += ('//\n')
+
+ else:
+ tkMessageBox.showinfo('No HLA Sequence Found',
+ 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' )
+
+ pass
+
+
+ return documentBuffer
+
+
+ def printHeader(self):
+
+ headerText = ''
+
+ # TODO: Get these values from IMGT, they shouldn't be hardcoded.
+ # Maybe it should be an unknown identifier with
+ imgtIdentifier = 'HWS10012345'
+ imgtIdentifierWithVersion = 'HWS10012345.1'
+ currentSubmissionDate = '{:%d/%m/%Y}'.format(datetime.datetime.now())
+
+ headerText += 'ID ' + str(imgtIdentifier) + '; Sequence Submission; Confidential; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n'
+ headerText += 'XX\n'
+ headerText += 'AC ' + str(imgtIdentifier) + ';\n'
+ headerText += 'XX\n'
+ headerText += 'SV ' + str(imgtIdentifierWithVersion) + '\n'
+ headerText += 'XX\n'
+ headerText += 'DT ' + str(currentSubmissionDate) + ' (Submitted)\n'
+ headerText += 'DT ' + str(getConfigurationValue('embl_release_date')) + ' (Release)\n'
+ headerText += 'XX\n'
+
+ # TODO: I'm using the local allele name that is assigned by the user.
+ # Maybe this allele name should be based on the closest allele.
+ # Do I want the allele name, or should I generate a new one based on the closest allele?
+
+ headerText += 'DE ' + str(getConfigurationValue('allele_name')) + '\n'
+ headerText += 'XX\n'
+ headerText += 'KW HLA WEB SUBMISSION;\n'
+ headerText += 'XX\n'
+
+ # The new allele description is split into multiple lines. I should add a new 'CC' line for each part of the description.
+ rawDescription = str(getConfigurationValue('closest_allele_written_description'))
+ rawDescriptionLineTokens = rawDescription.split('\n')
+ for lineToken in rawDescriptionLineTokens:
+ headerText += 'CC ' + lineToken + '\n'
+
+ #headerText += 'CC A*03:01:01:01new is identical to A*03:01:01:01 except for position 382 is a A\n'
+ #headerText += 'CC in the new allele. This result in an amino change from W to stopcodon.\n'
+
+ headerText += 'XX\n'
+ headerText += 'OS Homo sapiens (human);\n'
+ headerText += 'OC Eukaryota; Metazoa; Chordata; Vertebrata; Mammalia; Eutheria; Primates;\n'
+ headerText += 'OC Catarrhini; Hominidae; Homo.\n'
+ headerText += 'XX\n'
+ # TODO: Our submission says GENBANK, but we're using EMBL Numbers. Also what does that [1] mean?
+ headerText += 'DR GENBANK; ' + str(getConfigurationValue('embl_sequence_accession')) + '.\n'
+ headerText += 'XX\n'
+ headerText += 'RN [1]\n'
+ # TODO: This submission is Unpublished. What if it is published?
+ # Ask James what a published study looks like. I need to include study name etc.
+ headerText += 'RC Unpublished.\n'
+ headerText += 'XX\n'
+ headerText += 'FH Key Location/Qualifier\n'
+ headerText += 'FH\n'
+
+ return headerText
+
+ def printSubmitter(self):
+ submitterText = ''
+
+ # TODO: I don't know any of this data. Should it be int he form?
+ # Maybe I just need the submitter ID, and i can or can not get the rest?
+ # I should be able to calculate the indices, at least.
+
+ submitterText += 'FT submittor 1..' + str(self.sequenceAnnotation.totalLength()) + '\n'
+ submitterText += 'FT /ID="**IMGT_SUBMITTER_EMAIL_ID**"\n'
+ submitterText += 'FT /name="**IMGT_SUBMITTER_NAME**"\n'
+ submitterText += 'FT /alt_contact=""\n'
+ submitterText += 'FT /email="**IMGT_SUBMITTER_EMAIL_ADDRESS**"\n'
+
+ return submitterText
+
+ def printSource(self):
+ sourceText = ''
+
+ # TODO: Submitting Laboratory Information. Can this be fetched from IMGT?
+
+ sourceText += 'FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n'
+ sourceText += 'FT /cell_id="' + str(getConfigurationValue('sample_id')) + '"\n'
+ sourceText += 'FT /ethnic_origin="' + str(getConfigurationValue('ethnic_origin')) + '"\n'
+ sourceText += 'FT /sex="' + str(getConfigurationValue('sex')) + '"\n'
+ sourceText += 'FT /consanguineous="' + str(getConfigurationValue('consanguineous')) + '"\n'
+ sourceText += 'FT /homozygous="Yes"\n'
+ sourceText += 'FT /lab_of_origin="**IMGT_SUBMITTING_LAB_NAME**"\n'
+ sourceText += 'FT /lab_contact="**IMGT_SUBMITTER_NAME**"\n'
+
+ # TODO: No Material Available. What if Material is available?
+ # I think I need to add this to the form still.
+ # Same story with "cell_bank"
+
+ sourceText += 'FT /material_available="No Material Available"\n'
+ sourceText += 'FT /cell_bank="Not Available"\n'
+
+ # TODO: James suggested that I only allow valid fully-sequenced alleles.
+ # Should I validate this, or should I leave that work to IMGT?
+
+ sourceText += 'FT /HLA-A*="02:01,03new"\n'
+ sourceText += 'FT /HLA-C*="07,-"\n'
+ sourceText += 'FT /HLA-B*="07,-"\n'
+ sourceText += 'FT /HLA-DRB1*="15:01,-"\n'
+
+ return sourceText
+
+ def printMethods(self):
+ methodsText = ''
+
+ # TODO: Get primer info from the form. Make sure this all is correct
+
+ methodsText += 'FT method 1..' + str(self.sequenceAnnotation.totalLength()) + '\n'
+
+ # TODO: What are the options for sequencing methodology?
+ # I can provide an open-text field.
+
+ methodsText += 'FT /primary_sequencing="Direct sequencing of PCR product from DNA (SBT)"\n'
+ methodsText += 'FT /secondary_sequencing="Direct sequencing of PCR product from DNA (SBT)"\n'
+ methodsText += 'FT /type_of_primer="Both allele and locus specific"\n'
+ methodsText += 'FT /sequenced_in_isolation="Yes"\n'
+
+ # TODO Add these primers dynamically
+ # A primer has these pieces of information
+ # "ID" ("primer_1") "Sequence" "Feature" "locus/indices"
+ # locus seems to be genomic index, from the beginning of the sequence.
+ # I suppose this has to be locations in the reference sequence?
+ # I should store a dictionary of primers in the configuration.
+ # Errr, nodes underneath the Primer nodes.
+ # They put a "tab" character between some of this data. Why? Because Tabs, sigh.
+
+ methodsText += 'FT /primer_1="97022 GAGCCCCGCTTCAACGCC E2 257-274"\n'
+ methodsText += 'FT /primer_2="09148 CCAGGCGTGGCTCTCAGA 5UT -265--248"\n'
+ methodsText += 'FT /primer_3="09152 AACCTACGTAGGGTCCTTCA 5UT -161--142"\n'
+ methodsText += 'FT /primer_4="09154 AGTGTCGTCGCGGTCGCT 5UT -72--55"\n'
+ methodsText += 'FT /primer_5="09167 CAGACSCCGAGGATGGCC 5UT -12-6"\n'
+ methodsText += 'FT /primer_6="09162 AACACCCAACACACATTAGGT I7 2745-2765"\n'
+ methodsText += 'FT /primer_7="09168 GGGAGCACAGGTCAGCGTGGGAAG 3UT 3075-3098"\n'
+ methodsText += 'FT /primer_8="98008 GTTTAGGCCAAAAATYCCCCC I2 635-655"\n'
+ methodsText += 'FT /no_of_reactions="3"\n'
+ methodsText += 'FT /sequencing_direction="Both"\n'
+
+
+ # TODO: There's something up with these primers.
+ # Why are they in the comments? Did we run out of space?
+
+ methodsText += 'FT /method_comments="98021 GTCCAGGCTGGTGTCTGG I3 1432-1449\n'
+ methodsText += 'FT 01026seq GGGGAGAAGCAASGGGC I1 108-124 02100seq\n'
+ methodsText += 'FT CCGCACGCACCCACCG 5UT -44--29 03026 GAGGTTCCTCTAGGACCTTAA I5\n'
+ methodsText += 'FT 2439-2459 03052 TAAGGAGGGAGAYGGGGGT I4 1847-1865 03055\n'
+ methodsText += 'FT CTGCYGTGAKGTGGAGGAG E5 2035-2053 14256 GAATCCTCCTGGGTTTCCAG\n'
+ methodsText += 'FT I3 1115-1134 97094seq TGTCGTCCACGTAGC E2 279-293 98070\n'
+ methodsText += 'FT GGCCTAAACTGAAAATGAAACC I2 622-643 00029 GGTCCCAATTGTCTCCCCTC\n'
+ methodsText += 'FT I3 1055-1074 02038seq GGCCAGCAATGATGC E5 1981-1995 03017\n'
+ methodsText += 'FT CCTTTGCAGAAACAAAGTCAGGGT 3UT 2970-2993 03050\n'
+ methodsText += 'FT TTAAGGTCCTAGAGGAACCTC I5 2439-2459 14019 CCAGACACCAGCCTGGAC\n'
+ methodsText += 'FT I3 1432-1449 Exons and introns are defined as in regular\n'
+ methodsText += 'FT HLA genes although in this allele a stopcodon is present in\n'
+ methodsText += 'FT Exon 2."\n'
+
+ # This is the "closest allele, right?"
+
+ methodsText += 'FT /alignment="' + str(getConfigurationValue('closest_known_allele')) + '"\n'
+
+ return methodsText
+
+ def printFeatures(self):
+ featureText = ''
+
+ # TODO: I might double check with James Robinson about the backslashes before "number".
+ # Seems inconsistent.
+
+
+ #featureText += 'FT CDS join(248..320,457..720,962..1237,1816..2091,2194..2310,\n'
+ #featureText += 'FT 2753..2785,2928..2975,3145..3149)\n'
+ # Coding sequence is just the exons. Print out each exon.
+ # Ignoring line-breaks for now, this might create a really wide line. Ok?
+ featureText += ('FT CDS join(')
+ for x in range(0,len(self.sequenceAnnotation.loci)):
+ geneLocus = self.sequenceAnnotation.loci[x]
+ if (geneLocus.exon):
+ featureText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex)
+ if not x==len(self.sequenceAnnotation.loci)-2:
+ featureText += ','
+ else:
+ featureText += ')\n'
+
+
+ exonIndex = 1
+ intronIndex = 1
+
+ geneHas3UTR = False
+ geneHas5UTR = False
+
+ for x in range(0,len(self.sequenceAnnotation.loci)):
+ currentFeature = self.sequenceAnnotation.loci[x]
+
+ # 3' UTR
+ if(currentFeature.name == '3UT'):
+ featureText += ('FT 3\' UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
+ geneHas3UTR = True
+
+ # 5' UTR
+ elif(currentFeature.name == '5UT'):
+ featureText += ('FT 5\' UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n')
+ geneHas5UTR = True
+
+ # Exon
+ elif(currentFeature.exon):
+ featureText += ('FT Exon ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT \\number="' + str(exonIndex) + '"\n')
+ exonIndex += 1
+
+ # Intron
+ else:
+ featureText += ('FT Intron ' + str(currentFeature.beginIndex)
+ + '..' + str(currentFeature.endIndex) + '\n')
+ featureText += ('FT \\number="' + str(intronIndex) + '"\n')
+ intronIndex += 1
+
+ # Do a quick sanity check. If we are missing either UTR I should warn the user.
+ # But move on with your life, this is not worth getting upset over.
+ if (not geneHas3UTR and not geneHas5UTR):
+ tkMessageBox.showinfo('Missing UTRs',
+ 'This sequence has no 5\' or 3\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ elif (not geneHas5UTR):
+ tkMessageBox.showinfo('Missing 5\' UTR',
+ 'This sequence has no 5\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ elif (not geneHas3UTR):
+ tkMessageBox.showinfo('Missing 3\' UTR',
+ 'This sequence has no 3\' UTR.\n\n' +
+ 'Use lowercase nucleotides at the\n' +
+ 'beginning and end of your DNA\n' +
+ 'sequence to specify the 5\' and 3\' UTRs.' )
+ else:
+ print('The UTRs look fine.')
+ pass
+
+
+
+ return featureText
+
+ def printSequence(self):
+ sequenceText = ''
+
+ completeSequence = self.sequenceAnnotation.getCompleteSequence().upper()
+
+ cCount = completeSequence.count('C')
+ gCount = completeSequence.count('G')
+ tCount = completeSequence.count('T')
+ aCount = completeSequence.count('A')
+ otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount)
+
+ sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; '
+ + str(aCount) + ' A; ' + str(cCount) + ' C; '
+ + str(gCount) + ' G; ' + str(tCount) + ' T; '
+ + str(otherCount) + ' other;\n')
+
+ # Here's some logic to print the sequence information in groups of 10.
+ # This format is specified in the User manual specified by EMBL.
+ currentSeqIndex = 0
+
+ while (currentSeqIndex < self.sequenceAnnotation.totalLength()):
+ # The character code for a sequence region is two blank spaces,
+ # followed by three blank spaces, for a total of 5 blanks.
+ sequenceText += ' '
+ sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60]
+
+ # A sequenceChunk is 10 nucleotides in this context.
+ # Format specifies up to six "chunks" per line.
+ for i in range(0,6):
+ sequenceChunk = sequenceRow[i*10 : (i+1)*10]
+ sequenceText += sequenceChunk + ' '
+
+ # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row.
+ if(len(sequenceRow) == 60):
+ sequenceText += str(currentSeqIndex + 60) + '\n'
+ # but if line is not complete (this is more likely, and more complicated.)
+ else:
+ # Fill with spaces to align the nucleotide indices at the end of the sequence.
+ numberSpaces = 60-len(sequenceRow)
+ for n in range (0, numberSpaces):
+ sequenceText += ' '
+ sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n')
+
+ # The next row of the sequence
+ currentSeqIndex += 60
+
+ return sequenceText
+
+
+ # Return True if our input values are all present and accomodated for.
+ # If something is missing, then throw a fit and give up.
+ # TODO: I should probably not raise these exceptions actually.
+ # Instead, I should have the GUI Automatically open the choose options screen
+ def validateInputs(self):
+
+ # TODO: I'm using the self. values. These should mostly be configuration values, load them from there instead.
+
+ # TODO: This method is not being used? Right, I should just delete this method
+ # Instead of this method, maybe I should consider adding more robust error handling to the sequence generator.
+
+ raise Exception ('Validate Inputs Method is being used, after all.')
+
+ if (self.inputSampleID is None or len(self.inputSampleID) < 1):
+ raise Exception ('Invalid Sequence ID:' + str(self.inputSampleID))
+ return False
+
+ elif (self.sequenceAnnotation is None):
+ raise Exception ('Invalid Sequence Annotation:' + str(self.sequenceAnnotation))
+ return False
+
+ elif (getConfigurationValue('gene') is None or len(getConfigurationValue('gene')) < 1):
+ raise Exception ('Invalid Input Gene:' + str(getConfigurationValue('gene')))
+ return False
+
+ elif (self.inputAllele is None or len(self.inputAllele) < 1):
+ raise Exception ('Invalid Input Allele:' + str(self.inputAllele))
+ return False
+
+ elif (self.inputClass is None or len(self.inputClass) < 1):
+ raise Exception ('Invalid Input Class:' + str(self.inputClass))
+ return False
+
+ else:
+ return True
+
+