diff --git a/HEADER b/HEADER index 6d61fb3..dc43bfc 100755 --- a/HEADER +++ b/HEADER @@ -1,16 +1,15 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 diff --git a/MakeExecutables.bat b/MakeExecutables.bat index db32d1d..68cbe52 100755 --- a/MakeExecutables.bat +++ b/MakeExecutables.bat @@ -1,17 +1,17 @@ -:: This file is part of EMBL-HLA-Submission. +:: This file is part of saddle-bags. :: -:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: saddle-bags is free software: you can redistribute it and/or modify :: it under the terms of the GNU Lesser General Public License as published by :: the Free Software Foundation, either version 3 of the License, or :: (at your option) any later version. :: -:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: saddle-bags is distributed in the hope that it will be useful, :: but WITHOUT ANY WARRANTY; without even the implied warranty of :: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the :: GNU Lesser General Public License for more details. :: :: You should have received a copy of the GNU Lesser General Public License -:: along with EMBL-HLA-Submission. If not, see . +:: along with saddle-bags. If not, see . :: Version 1.0 diff --git a/MakeExecutables.sh b/MakeExecutables.sh index 1e83bbe..364309b 100755 --- a/MakeExecutables.sh +++ b/MakeExecutables.sh @@ -1,19 +1,18 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 # This isn't working in it's current state. The spec file is apparently just for windows, # it's still pulling in windows DLLs. diff --git a/README.md b/README.md index 6a6fda3..3635d17 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ To set up the environment in anaconda: Linux/Mac: ``` -conda create --name AlleleSubEnvironment biopython six +conda create --name AlleleSubEnvironment biopython six pycurl source activate AlleleSubEnvironment pip install pyinstaller packaging source deactivate diff --git a/Run_allele_submission.bat b/Run_allele_submission.bat index a7152a8..b893555 100755 --- a/Run_allele_submission.bat +++ b/Run_allele_submission.bat @@ -1,21 +1,21 @@ -:: This file is part of EMBL-HLA-Submission. +:: This file is part of saddle-bags. :: -:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: saddle-bags is free software: you can redistribute it and/or modify :: it under the terms of the GNU Lesser General Public License as published by :: the Free Software Foundation, either version 3 of the License, or :: (at your option) any later version. :: -:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: saddle-bags is distributed in the hope that it will be useful, :: but WITHOUT ANY WARRANTY; without even the implied warranty of :: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the :: GNU Lesser General Public License for more details. :: :: You should have received a copy of the GNU Lesser General Public License -:: along with EMBL-HLA-Submission. If not, see . +:: along with saddle-bags. If not, see . :: Version 1.0 :: See the file README.MD for how to set up your anaconda environment. -activate AlleleSubEnvironment && cd src && python AlleleSubmissionEMBL.py && deactivate +activate AlleleSubEnvironment && cd src && python AlleleSubmissionMain.py && deactivate diff --git a/Run_allele_submission.sh b/Run_allele_submission.sh index ad471f2..3da7d12 100755 --- a/Run_allele_submission.sh +++ b/Run_allele_submission.sh @@ -1,26 +1,24 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 +# along with saddle-bags. If not, see . # See the file README.MD for how to set up your anaconda environment. cd src source activate AlleleSubEnvironment -python AlleleSubmissionEMBL.py +python AlleleSubmissionMain.py source deactivate diff --git a/src/AlleleGenerator.py b/src/AlleleGenerator.py deleted file mode 100755 index 88a1604..0000000 --- a/src/AlleleGenerator.py +++ /dev/null @@ -1,484 +0,0 @@ -# This file is part of EMBL-HLA-Submission. -# -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# EMBL-HLA-Submission is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 - -from Bio.Seq import Seq -from Bio.Alphabet import generic_dna -import sys -import tkMessageBox - -import math - -from HLAGene import * - -# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission -# In ENA format. -class AlleleGenerator(): - - def __init__(self): - - self.inputFileName = '' - self.outputFileName = '' - self.sequenceAnnotation = HLAGene() - self.inputCellNummer = 0 - self.inputGene = '' - self.inputAllele = '' - - # This is a short wrapper method to use biopython's translation method. - # Most of this code is just checking for things that went wrong - def translateSequence(self,inputSequence): - - proteinSequence = '' - - try: - # Do nothing if the input sequence is blank. - if( len(inputSequence) > 0 ): - - coding_dna = Seq(inputSequence, generic_dna) - proteinSequence = str(coding_dna.translate()) - print ('Exon Sequence before translation:' + coding_dna) - print ('Translated Protein:' + proteinSequence) - - # Perform Sanity Checks. - # Stop codon *should* be at the end of the protein. - # Here we seek out the first instance of a stop codon, - # and remove the peptides afterwards. - # because that's what happens in real life. - stopCodonLocation = proteinSequence.find('*') - - # If no stop codon was found - if (stopCodonLocation == -1): - # If multiple of three (correct codon length) - if(len(coding_dna) % 3 == 0): - tkMessageBox.showinfo('No Stop Codon Found', - 'The translated protein does not contain a stop codon.' ) - - # Wrong Codon Length - else: - tkMessageBox.showinfo('No Stop Codon Found', - 'The translated protein does not contain a stop codon.\n' + - 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.') - - # If Stop Codon is in the end of the protein (This is expected and correct) - elif (stopCodonLocation == len(proteinSequence) - 1): - # If multiple of three (correct codon length) - if(len(coding_dna) % 3 == 0): - # Everything is fine in this case. Trim off the stop codon - proteinSequence = proteinSequence[0:stopCodonLocation] - pass - # Wrong Codon Length - else: - tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon', - 'The stop codon is at the correct position in the protein, but ' + - 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' + - 'Please double check your sequence.') - proteinSequence = proteinSequence[0:stopCodonLocation] - - # Else Stop Codon is premature (before the end of the protein) - else: - # If multiple of three (correct codon length) - if(len(coding_dna) % 3 == 0): - tkMessageBox.showinfo('Premature Stop Codon Detected', - 'Premature stop codon found:\nProtein Position (' + - str(stopCodonLocation + 1) + '/' + - str(len(proteinSequence)) + ')\n\n' + - 'Double check your protein sequence,\n' + - 'this might indicate a missense mutation.\n\n' + - 'Translated Protein:\n' + proteinSequence + - '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + - '\n' - ) - proteinSequence = proteinSequence[0:stopCodonLocation] - - - # Wrong Codon Length - else: - tkMessageBox.showinfo('Premature Stop Codon Detected', - 'Premature stop codon found:\nProtein Position (' + - str(stopCodonLocation + 1) + '/' + - str(len(proteinSequence)) + ')\n\n' + - 'Nucleotide count is not a multiple of 3,\n' + - 'Double check your protein sequence,\n' + - 'this might indicate a missense mutation.\n\n' + - 'Translated Protein:\n' + proteinSequence + - '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + - '\n' - ) - proteinSequence = proteinSequence[0:stopCodonLocation] - else: - print('Translating a nucleotide sequence of length 0. That was easy.') - pass - - return proteinSequence - - except Exception: - print 'Problem when translating protein:' - print sys.exc_info()[1] - tkMessageBox.showinfo('Protein Translation Error', - 'I could not translate your protein:\n' + str(sys.exc_info()[1])) - - raise - - # The input file should be a string of nucleotides, with capital letters to identify exons and introns. - # Annotations are expected and read in this format: - # fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr - # agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag - # All spaces, line feeds, and tabs are removed and ignored. - def processInputSequence(self, inputSequenceText): - - resultGeneLoci = HLAGene() - - # Trim out any spaces, tabs, newlines. Uppercase. - cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','') - - # Capitalize, so I can store a copy of the full unannotated sequence. - unannotatedGene = cleanedGene.upper() - resultGeneLoci.fullSequence = unannotatedGene - print('Total Sequence Length = ' + str(len(unannotatedGene))) - - # Loop through the cleaned and annotated input sequence, - # capitals and lowercase letters to determine exon start and end - if(len(cleanedGene) > 0): - - # Is the first feature an exon or an intron? - # If we begin in an Exon - if( cleanedGene[0] in ('A','G','C','T')): - insideAnExon = True - # If we begin in an Intron/UTR - elif( cleanedGene[0] in ('a','g','c','t')): - insideAnExon = False - else: - # Nonstandard nucleotide? I should start panicking. - #raise Exception('Nonstandard Nucleotide, not sure how to handle it') - print('Nonstandard Nucleotide at the beginning of the sequence, not sure how to handle it') - insideAnExon = False - - - locusBeginPosition = 0 - for x in range(0, len(cleanedGene)): - currentChar = cleanedGene[x] - - # Is this a standard nucleotide character? - if(currentChar.upper() in ('A','G','C','T')): - - if(currentChar.isupper()): - if(insideAnExon): - #We're STILL in an exon. In this case, I should just do nothing and continue. - pass - else: - #In this case, we're just starting an EXON. - #Store the last Intron in the list. - currentIntron = GeneLocus() - currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper() - currentIntron.exon = False - resultGeneLoci.loci.append(currentIntron) - insideAnExon=True - locusBeginPosition = x - pass - - else: - if not (insideAnExon): - #We're STILL in an intron. Continue. - pass - else: - #Starting a new Intron. - # Store an Exon in the list. - currentExon = GeneLocus() - currentExon.sequence = cleanedGene[locusBeginPosition:x].upper() - currentExon.exon = True - resultGeneLoci.loci.append(currentExon) - insideAnExon = False - locusBeginPosition=x - pass - else: - print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar - + '. If this is a wildcard character, you might be ok.') - - # We've reached the end of the loop and we still need to store the last feature. - # Should be a 3' UTR, but I can't be sure, people like to put in weird sequences. - currentIntron = GeneLocus() - currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper() - currentIntron.exon = insideAnExon - resultGeneLoci.loci.append(currentIntron) - - # Annotate the loci (name them) and print the results of the read file. - resultGeneLoci.annotateLoci() - resultGeneLoci.printGeneSummary() - - # If the sequence is empty - else: - print('Empty sequence, I don\'t have anything to do.') - - self.sequenceAnnotation = resultGeneLoci - - - - def printHeader(self): - headerText = '' - - # Print header - headerText += 'ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n' - headerText += 'XX\n' - # A valid document should have an AC (Accession Number) and DE (Description) field. - # I don't have an AC number available, so it's blank. - headerText += 'AC \n' - headerText += 'XX\n' - headerText += 'DE Human Leukocyte Antigen\n' - headerText += 'XX\n' - - # Print key - headerText += ('FH Key Location/Qualifiers\n') - headerText += ('FH\n') - - # Print source - # It's from a human. - headerText += ('FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n') - headerText += ('FT /organism="Homo sapiens"\n') - headerText += ('FT /db_xref="taxon:9606"\n') - headerText += ('FT /mol_type="genomic DNA"\n') - headerText += ('FT /chromosome="6"\n') - headerText += ('FT /isolate="' + str(self.inputCellNummer) + '"\n') - - return headerText - - def printMRNA(self): - mRNAText = '' - # Print mRNA - mRNAText += ('FT mRNA join(') - - # Iterate through the indices of the UTRs and exons. - # The 3' and 5' UTR are included in the mRNA - for x in range(0,len(self.sequenceAnnotation.loci)): - geneLocus = self.sequenceAnnotation.loci[x] - # If it is an exon or UTR - if (geneLocus.exon or 'UT' in geneLocus.name): - mRNAText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ',' - - # Trim off the last comma and add a parenthese - mRNAText = mRNAText[0:len(mRNAText)-1] + ')\n' - - mRNAText += ('FT /gene="' + str(self.inputGene) + '"\n') - mRNAText += ('FT /allele="' + str(self.inputAllele) + '"\n') - mRNAText += ('FT /product=\"MHC class I antigen\"\n') - - return mRNAText - - - def printCDS(self): - cdsText = '' - - # Print CDS - # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns - # The range 1:featureCount-1 will exclude the UTRs. - cdsText += ('FT CDS join(') - for x in range(0,len(self.sequenceAnnotation.loci)): - geneLocus = self.sequenceAnnotation.loci[x] - if (geneLocus.exon): - cdsText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) - if not x==len(self.sequenceAnnotation.loci)-2: - cdsText += ',' - else: - cdsText += ')\n' - - cdsText += ('FT /transl_table=1\n') - cdsText += ('FT /codon_start=1\n') - cdsText += ('FT /gene="' + str(self.inputGene) + '"\n') - cdsText += ('FT /allele="' + str(self.inputAllele) + '"\n') - cdsText += ('FT /product=\"MHC class I antigen\"\n') - cdsText += ('FT /translation=\"') - - # Some simple formatting for the peptide sequence, making it human and computer readable. - # 80 peptides per line. Except the first line, which is 66. - # 66 is 80-14, where 14 is the length of { /translation=" } - peptideSequence = self.translateSequence(self.sequenceAnnotation.getExonSequence()) - if(len(peptideSequence) < 66): - cdsText += (peptideSequence) + '\"\n' - else: - cdsText += peptideSequence[0:66] + '\n' - i=66 - while (i < len(peptideSequence)): - cdsText += 'FT ' + peptideSequence[i:i+80] + '\n' - i += 80 - - return cdsText - - def printFeatures(self): - featureText = '' - - exonIndex = 1 - intronIndex = 1 - - geneHas3UTR = False - geneHas5UTR = False - - for x in range(0,len(self.sequenceAnnotation.loci)): - currentFeature = self.sequenceAnnotation.loci[x] - - # 3' UTR - if(currentFeature.name == '3UT'): - featureText += ('FT 3\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /note=\"3\'UTR\"\n') - featureText += ('FT /gene="' + str(self.inputGene) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - geneHas3UTR = True - - # 5' UTR - elif(currentFeature.name == '5UT'): - featureText += ('FT 5\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /note=\"5\'UTR\"\n') - featureText += ('FT /gene="' + str(self.inputGene) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - geneHas5UTR = True - - # Exon - elif(currentFeature.exon): - featureText += ('FT exon ' + str(currentFeature.beginIndex) - + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /number=' + str(exonIndex) + '\n') - featureText += ('FT /gene="' + str(self.inputGene) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - exonIndex += 1 - - # Intron - else: - featureText += ('FT intron ' + str(currentFeature.beginIndex) - + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /number=' + str(intronIndex) + '\n') - featureText += ('FT /gene="' + str(self.inputGene) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - intronIndex += 1 - - - featureText += ('XX\n') - - # Do a quick sanity check. If we are missing either UTR I should warn the user. - # But move on with your life, this is not worth getting upset over. - if (not geneHas3UTR and not geneHas5UTR): - tkMessageBox.showinfo('Missing UTRs', - 'This sequence has no 5\' or 3\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - elif (not geneHas5UTR): - tkMessageBox.showinfo('Missing 5\' UTR', - 'This sequence has no 5\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - elif (not geneHas3UTR): - tkMessageBox.showinfo('Missing 3\' UTR', - 'This sequence has no 3\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - else: - print('The UTRs look fine.') - pass - - return featureText - - def printSequence(self): - sequenceText = '' - - completeSequence = self.sequenceAnnotation.getCompleteSequence().upper() - - cCount = completeSequence.count('C') - gCount = completeSequence.count('G') - tCount = completeSequence.count('T') - aCount = completeSequence.count('A') - otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount) - - sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; ' - + str(aCount) + ' A; ' + str(cCount) + ' C; ' - + str(gCount) + ' G; ' + str(tCount) + ' T; ' - + str(otherCount) + ' other;\n') - - # Here's some logic to print the sequence information in groups of 10. - # This format is specified in the User manual specified by EMBL. - currentSeqIndex = 0 - - while (currentSeqIndex < self.sequenceAnnotation.totalLength()): - # The character code for a sequence region is two blank spaces, - # followed by three blank spaces, for a total of 5 blanks. - sequenceText += ' ' - sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60] - - # A sequenceChunk is 10 nucleotides in this context. - # Format specifies up to six "chunks" per line. - for i in range(0,6): - sequenceChunk = sequenceRow[i*10 : (i+1)*10] - sequenceText += sequenceChunk + ' ' - - # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row. - if(len(sequenceRow) == 60): - sequenceText += str(currentSeqIndex + 60) + '\n' - # but if line is not complete (this is more likely, and more complicated.) - else: - # Fill with spaces to align the nucleotide indices at the end of the sequence. - numberSpaces = 60-len(sequenceRow) - for n in range (0, numberSpaces): - sequenceText += ' ' - sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n') - - # The next row of the sequence - currentSeqIndex += 60 - - return sequenceText - - - # Create the text submission based on the ENA format. - def buildENASubmission(self): - - # ENA format is the preferred submission type for EMBL. More information: - # http://www.ebi.ac.uk/ena/submit/sequence-submission - # http://www.ebi.ac.uk/ena/submit/entry-upload-templates - # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt - # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html - # http://www.ebi.ac.uk/ena/software/flat-file-validator - - documentBuffer = '' - - totalLength = self.sequenceAnnotation.totalLength() - print('total calculated length = ' + str(totalLength)) - - if(totalLength > 0): - - # These are the main sections of the ENA submission. - documentBuffer += self.printHeader() - documentBuffer += self.printMRNA() - documentBuffer += self.printCDS() - documentBuffer += self.printFeatures() - documentBuffer += self.printSequence() - - # Print entry terminator. The last line of an ENA entry. - documentBuffer += ('//\n') - - else: - tkMessageBox.showinfo('No HLA Sequence Found', - 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' ) - - pass - - - return documentBuffer - - # Simple method to write the results to a file on your computer. - def outputENASubmissionToFile(self, outputText): - - outputFileObject = open(self.outputFileName, 'w') - outputFileObject.write(outputText) - outputFileObject.close() - diff --git a/src/AlleleGui.py b/src/AlleleGui.py deleted file mode 100755 index 1a99985..0000000 --- a/src/AlleleGui.py +++ /dev/null @@ -1,242 +0,0 @@ -# This file is part of EMBL-HLA-Submission. -# -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# EMBL-HLA-Submission is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 -SoftwareVersion = "Bhast Version 1.0" - -import os - -import Tkinter, Tkconstants, tkFileDialog, tkMessageBox -from Tkinter import * - -from AlleleGenerator import AlleleGenerator -from HLAGene import * - -# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class. -class AlleleGui(Tkinter.Frame): - - # Initialize the GUI - def __init__(self, root): - Tkinter.Frame.__init__(self, root) - root.title("Bhast - A Novel HLA Allele Submission Generator") - self.parent = root - - # Ctrl-A doesn't work by default in TK. I guess I need to do it myself. - root.bind_class("Text","", self.selectall) - - self.initialize() - - # I shouldn't need to write a select-All method but TK is kind of annoying. - def selectall(self, event): - - event.widget.tag_add("sel","1.0","end") - - # Initialize GUI elements - def initialize(self): - - button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} - - self.cellNumInstrText = Tkinter.StringVar() - self.cellNumInstrText.set('Sample ID:') - self.inputCellNummer = Tkinter.StringVar() - - self.geneInstrText = Tkinter.StringVar() - self.geneInstrText.set('Gene:') - self.inputGene = Tkinter.StringVar() - - self.alleleInstrText = Tkinter.StringVar() - self.alleleInstrText.set('Allele:') - self.inputAllele = Tkinter.StringVar() - - self.featureInstrText = Tkinter.StringVar() - self.featureInstrText.set('Annotated Sequence:') - - self.outputEMBLSubmission = Tkinter.StringVar() - self.outputEMBLSubmission.set('Resulting Allele Submission:') - - Tkinter.Label(self, width=80, height=1, textvariable=self.cellNumInstrText).pack() - Tkinter.Entry(self, width=15, textvariable=self.inputCellNummer).pack() - - Tkinter.Label(self, width=80, height=1, textvariable=self.geneInstrText).pack() - Tkinter.Entry(self, width=15, textvariable=self.inputGene).pack() - - Tkinter.Label(self, width=80, height=1, textvariable=self.alleleInstrText).pack() - Tkinter.Entry(self, width=15, textvariable=self.inputAllele).pack() - - Tkinter.Label(self, width=80, height=1, textvariable=self.featureInstrText).pack() - - # Create a frame for the input widget, add scrollbars. - self.featureInputFrame = Tkinter.Frame(self) - - self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL) - self.featureInputXScrollbar.pack(side=BOTTOM, fill=X) - - self.featureInputYScrollbar = Scrollbar(self.featureInputFrame) - self.featureInputYScrollbar.pack(side=RIGHT, fill=Y) - - self.featureInputGuiObject = Tkinter.Text( - self.featureInputFrame, width=80, height=12, wrap=NONE - , xscrollcommand=self.featureInputXScrollbar.set - , yscrollcommand=self.featureInputYScrollbar.set - ) - - self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview) - self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview) - - self.featureInputGuiObject.pack() - self.featureInputFrame.pack() - - Tkinter.Button(self, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission).pack(**button_opt) - - Tkinter.Label(self, width=80, height=1, textvariable=self.outputEMBLSubmission).pack() - - # Output interface is contained on a frame. - self.submOutputFrame = Tkinter.Frame(self) - - self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL) - self.submOutputXScrollbar.pack(side=BOTTOM, fill=X) - - self.submOutputYScrollbar = Scrollbar(self.submOutputFrame) - self.submOutputYScrollbar.pack(side=RIGHT, fill=Y) - - self.submOutputGuiObject = Tkinter.Text( - self.submOutputFrame, width=80, height=15, wrap=NONE - , xscrollcommand=self.submOutputXScrollbar.set - , yscrollcommand=self.submOutputYScrollbar.set - ) - - self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview) - self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview) - - self.submOutputGuiObject.pack() - self.submOutputFrame.pack() - - # This is the directory the python executable is running from. - # self.idir is used inside the saveSubmissionFile method. - # Maybe the code should be in there. - FileAndPath = os.path.abspath(__file__) - self.idir, self.ifile = os.path.split(FileAndPath) - - Tkinter.Button(self, text='Save this submission to my computer', command=self.saveSubmissionFile).pack(**button_opt) - - self.instructionText = Tkinter.StringVar() - self.instructionText.set('This tool was developed by the Tissue Typing Laboratory at\nMaastricht University Medical Center.\nFor more information:') - Tkinter.Label(self, width=85, height=3, textvariable=self.instructionText).pack() - - # Make a frame for the more-info buttons - self.moreInfoFrame = Tkinter.Frame(self) - - Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse).grid(row=0, column=0) - Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=0, column=1) - Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence).grid(row=0, column=2) - - self.moreInfoFrame.pack() - - - def sampleSequence(self): - self.featureInputGuiObject.delete('1.0','end') - self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') - - self.inputAllele.set('Allele:01:02') - self.inputGene.set('HLA-C') - self.inputCellNummer.set('Donor_12345') - - self.constructSubmission() - - # This method should popup some instruction text in a wee window. - # This should be explicit on how to use the tool. - def howToUse(self): - tkMessageBox.showinfo('How to use this tool', - 'This software is to be used to create an\n' - + 'EMBL-formatted submission document,\n' - + 'which specifies a (novel) HLA allele.\n\n' - - + 'This tool requires you to submit a\n' - + 'full length HLA allele, including\n' - + '5\' and 3\' UTRs.\n\n' - - + 'Use capital letters for exons,\n' - + 'lowercase for introns & UTRs.\n\n' - - + 'Push the "Example Sequence" button to see a small example of' - + ' a formatted sequence.\n' - + 'Sequences should follow this pattern:\n' - + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' - - + 'To use this tool:\n' - + '1.) Fill in a Sample ID, Gene Name, and Allele.' - + ' This text will be included in the submission.\n' - + '2.) Paste your formatted sequence in the\n' - + 'Annotated Sequence text area.\n' - + '3.) Push \"Generate an EMBL submission\" button' - + ' to generate a submission.\n' - + '4.) Push the "Save the submission" button' - + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n' - - + 'All spaces, tabs, and newlines are' - + ' removed before the nucleotide sequence is translated.' - ) - - def contactInformation(self): - # This method should list contact information for MUMC, and a link to the github page. - tkMessageBox.showinfo('Contact Information', - 'This software was created at\n' - + 'Maastricht University Medical Center\n' - + 'Transplantation Immunology\n' - + 'Tissue Typing Laboratory.\n' - + 'by Ben Matern:\n' - + 'ben.matern@mumc.nl\n\n' - - + 'Please send Ben your bioinformatics\n' - + 'and data related questions.\n\n' - - + 'all other inquiries can be directed\n' - + 'to Marcel Tilanus:\n' - + 'm.tilanus@mumc.nl\n\n' - - + 'This code will be hosted at:\n' - + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n' - + 'You will find more information on\n' - + 'EMBL\'s data format on that page.' - - ) - - # Ask user for a output file location, and write the EMBL submission to a file. - # This takes the input from the output field, rather than generate a new submission. - # So the user can edit the submission before or after saving it. - def saveSubmissionFile(self): - - self.dir_opt = options = {} - options['initialdir'] = self.idir - options['parent'] = self - options['title'] = 'Specify your output file.' - options['initialfile'] = 'NovelAlleleEMBLSubmission.txt' - outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt) - submissionText = self.submOutputGuiObject.get('1.0', 'end') - outputFileObject.write(submissionText) - - # Gather sequence information from the input elements, and generate a text EMBL submission. - def constructSubmission(self): - - allGen = AlleleGenerator() - roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') - allGen.inputCellNummer = self.inputCellNummer.get() - allGen.inputGene = self.inputGene.get() - allGen.inputAllele = self.inputAllele.get() - allGen.processInputSequence(roughFeatureSequence) - enaSubmission = allGen.buildENASubmission() - self.submOutputGuiObject.delete('1.0','end') - self.submOutputGuiObject.insert('1.0', enaSubmission) - diff --git a/src/AlleleGuiEMBL.py b/src/AlleleGuiEMBL.py new file mode 100755 index 0000000..8737461 --- /dev/null +++ b/src/AlleleGuiEMBL.py @@ -0,0 +1,649 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + + +import os +from os import makedirs +from os.path import expanduser, join, isdir + +import datetime +import hashlib +import ftplib +import gzip +import shutil +#import pycurl +#import StringIO + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +from Tkinter import * + +from SubmissionGeneratorEMBL import SubmissionGeneratorEMBL +from AlleleGuiEMBLInputForm import AlleleGuiEMBLInputForm +from AlleleSubCommon import * +from AlleleSubmissionEMBLXml import * +from AlleleSubmissionEMBLRestMethods import * +#from HLAGene import HLAGene + +# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class. +class AlleleGuiEMBL(Tkinter.Frame): + + # I shouldn't need to write a select-All method but TK is kind of annoying. + def selectall(self, event): + event.widget.tag_add("sel","1.0","end") + + # Initialize the GUI + def __init__(self, root): + Tkinter.Frame.__init__(self, root) + root.title("Create and Submit an EMBL Sequence Submission") + self.parent = root + + # Ctrl-A doesn't work by default in TK. I guess I need to do it myself. + root.bind_class("Text","", self.selectall) + + # To define the exit behavior. Save the input sequence text. + self.parent.protocol('WM_DELETE_WINDOW', self.saveAndExit) + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + + # A frame for the Instructions Label. + self.instructionsFrame = Tkinter.Frame(self) + self.instructionText = Tkinter.StringVar() + self.instructionText.set('\nThis tool will generate an HLA allele submission for\n' + + 'the EMBL / ENA nucleotide database.\n' + + 'If you provide login credentials, you may automatically submit the sequence.\n' + + 'For more information:\n') + Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack() + self.instructionsFrame.pack(expand=False, fill='both') + + # Make a frame for the more-info buttons + self.moreInfoFrame = Tkinter.Frame(self) + self.howToUseButton = Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse) + self.howToUseButton.grid(row=0, column=0) + self.exampleButton = Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence) + self.exampleButton.grid(row=0, column=1) + self.moreInfoFrame.pack() + + # Create a frame for the input widget, add scrollbars. + self.featureInputFrame = Tkinter.Frame(self) + + self.featureInstrText = Tkinter.StringVar() + self.featureInstrText.set('Annotated Sequence:') + self.featureInstrLabel = Tkinter.Label(self.featureInputFrame, width=80, height=1, textvariable=self.featureInstrText).pack() + + self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL) + self.featureInputXScrollbar.pack(side=BOTTOM, fill=X) + + self.featureInputYScrollbar = Scrollbar(self.featureInputFrame) + self.featureInputYScrollbar.pack(side=RIGHT, fill=Y) + + self.featureInputGuiObject = Tkinter.Text( + self.featureInputFrame + , width=80, height=8 + , wrap=NONE + , xscrollcommand=self.featureInputXScrollbar.set + , yscrollcommand=self.featureInputYScrollbar.set + ) + + self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview) + self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview) + + self.featureInputGuiObject.pack(expand=True, fill='both') + self.featureInputFrame.pack(expand=True, fill='both') + + + # Create Frame for "Generate Submission" button. + self.submButtonFrame = Tkinter.Frame(self) + self.submissionOptionsButton = Tkinter.Button(self.submButtonFrame, text='Submission Options', command=self.chooseSubmissionOptions) + self.submissionOptionsButton.grid(row=0, column=0) + self.generateSubmissionButton = Tkinter.Button(self.submButtonFrame, text=unichr(8681) + ' Generate an EMBL submission ' + unichr(8681), command=self.constructSubmission) + self.generateSubmissionButton.grid(row=0, column=1) + self.submButtonFrame.pack() + + + # Output interface is contained on a frame. + self.submOutputFrame = Tkinter.Frame(self) + + self.outputEMBLSubmission = Tkinter.StringVar() + self.outputEMBLSubmission.set('Allele Submission Preview:') + self.outputEMBLLabel = Tkinter.Label(self.submOutputFrame, width=80, height=1, textvariable=self.outputEMBLSubmission).pack() + + self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL) + self.submOutputXScrollbar.pack(side=BOTTOM, fill=X) + + self.submOutputYScrollbar = Scrollbar(self.submOutputFrame) + self.submOutputYScrollbar.pack(side=RIGHT, fill=Y) + + self.submOutputGuiObject = Tkinter.Text( + self.submOutputFrame, width=80, height=8, wrap=NONE + , xscrollcommand=self.submOutputXScrollbar.set + , yscrollcommand=self.submOutputYScrollbar.set + ) + + self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview) + self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview) + + self.submOutputGuiObject.pack(expand=True, fill='both') + self.submOutputFrame.pack(expand=True, fill='both') + + self.uploadSubmissionFrame = Tkinter.Frame(self) + self.uploadButton = Tkinter.Button(self.uploadSubmissionFrame, text='Upload Submission to EMBL', command=self.uploadSubmission) + self.uploadButton.pack(**button_opt) + self.saveSubmissionButton = Tkinter.Button(self.uploadSubmissionFrame, text='Save Submission to My Computer', command=self.saveSubmissionFile) + self.saveSubmissionButton.pack(**button_opt) + self.exitButton = Tkinter.Button(self.uploadSubmissionFrame, text='Exit', command=self.saveAndExit) + self.exitButton.pack(**button_opt) + self.uploadSubmissionFrame.pack() + + self.pack(expand=True, fill='both') + + def chooseSubmissionOptions(self): + print ('Opening the EMBL Submission Options Dialog') + + self.disableGUI() + + emblOptionsRoot = Tkinter.Toplevel() + emblOptionsRoot.bind("", self.enableGUI) + AlleleGuiEMBLInputForm(emblOptionsRoot).pack() + + # Set the X and the Y Position of the options window, so it is nearby. + emblOptionsRoot.update() + windowXpos = str(self.parent.winfo_geometry().split('+')[1]) + windowYpos = str(self.parent.winfo_geometry().split('+')[2]) + newGeometry = (str(emblOptionsRoot.winfo_width()) + 'x' + + str(emblOptionsRoot.winfo_height()) + '+' + + str(windowXpos) + '+' + + str(windowYpos)) + emblOptionsRoot.geometry(newGeometry) + + emblOptionsRoot.mainloop() + + + def writeMd5(self, inputFileName, outputFileName): + hash_md5 = hashlib.md5() + with open(inputFileName, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + hashValue= hash_md5.hexdigest() + + outputFile = createOutputFile(outputFileName) + # The Ubuntu md5sum program seems to write a single checksum and filename with 2 spaces between + # I don't know why 2 spaces, but I'll roll with it. + outputFile.write(str(hashValue) + ' ' + str(split(inputFileName)[1])) + outputFile.close() + + return hashValue + + + + + def uploadSubmission(self): + print('Uploading Submission to EMBL') + + # Determine a working directory. Folder underneath executable called temp. + try: + workingDirectory = join(expanduser("~"), 'temp_upload_directory') + print('I can work in this directory:' + workingDirectory) + + if not isdir(workingDirectory): + print('Making Directory:' + workingDirectory) + makedirs(workingDirectory) + except Exception: + print 'Cannot Initialize Working Directory' + print sys.exc_info()[1] + tkMessageBox.showinfo('Working Directory Error', + 'Sorry, I failed to create this working directory:\n' + + str(workingDirectory) + + '\n and I cannot continue.\nMaybe this is a ' + + 'permissions issue, are these folders read only?\n' + + str(sys.exc_info()[1])) + return + + restLog = createOutputFile(join(workingDirectory, 'Submission_Log.txt')) + + + + # TODO: Make a REST log. + # For each step report success or failure. Same as popup messages. + + + + emblUsername = getConfigurationValue('embl_username') + emblPassword = getConfigurationValue('embl_password') + if(emblUsername is None + or len(emblUsername) < 1 + or emblPassword is None + or len(emblPassword) < 1): + tkMessageBox.showinfo('Missing Login Credentials', + 'You must provide EMBL username and password.\n' + 'Please use the "Submission Options" button.') + restLog.write('Missing EMBL Username or Password.' + '\n') + return + else: + restLog.write('EMBL Username and Password exist.' + '\n') + + + useTestServers = (int(getConfigurationValue('test_submission')) == 1) + # Are you sure? + if useTestServers: + restLog.write('Using Test EMBL Server.' + '\n') + result = tkMessageBox.askquestion("Submit to TEST / DEMO environment", "You are about to submit a sequence to the\n\nTEST / DEMO EMBL environment.\n\nAre You Sure?", icon='warning') + else: + restLog.write('Using Production EMBL Server.' + '\n') + result = tkMessageBox.askquestion("Submit to LIVE / PROD environment", "You are about to submit a sequence to the\n\nLIVE / PROD EMBL environment.\n\nAre You Sure?", icon='warning') + + if result == 'yes': + pass + else: + return + + # TODO: Existing project? Maybe I should check if the study/project exists, before I get started + + + + + + # Give my submission a filename. SOmething with a datetime stamp + try: + # This includes a "seconds" measure, should be pretty unique. + dateTimeNow = '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.datetime.now()) + submissionShortFileName = 'HLA_Submission_' + dateTimeNow + '.txt' + submissionFileName = join(workingDirectory, submissionShortFileName) + zippedShortFileName = submissionShortFileName + '.gz' + zippedFileName = join(workingDirectory, zippedShortFileName) + md5FileName = zippedFileName + '.md5' + + submissionText = self.submOutputGuiObject.get('1.0', 'end') + + outputFileObject = open(submissionFileName, 'w') + outputFileObject.write(submissionText) + outputFileObject.close() + + except Exception: + print 'Cannot Write Submission Flatfile' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Write Submission Flatfile', + 'Sorry, I failed to create the submission file:\n' + + str(submissionText) + + '\n and I cannot continue.\nMaybe this is a ' + + 'permissions issue, are these folders read only?\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create submission file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('Submission file was created:' + str(submissionFileName) + '\n') + + # gzip the submission file. Make a gz file. + try: + #zippedFileName = submissionFileName + '.gz' + + with open(submissionFileName, 'rb') as fileIn, gzip.open(zippedFileName, 'wb') as fileOut: + shutil.copyfileobj(fileIn, fileOut) + + except Exception: + print 'Cannot Compress Submission File' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Compress Submission File', + 'Sorry, I failed to compress the submission file:\n' + + str(zippedFileName) + + '\n and I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('Zip file was created:' + str(zippedFileName) + '\n') + + # Calculate an MD5SUM + try: + #md5FileName = zippedFileName + '.md5' + md5HashValue = self.writeMd5(zippedFileName,md5FileName) + + except Exception: + print 'Cannot Calculate MD5' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Calculate an Md5 checksum', + 'Sorry, I failed to calculate an md5 checksum\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('md5 file was created:' + str(md5FileName) + '\n') + + # Use FTP to send the file to EMBL + try: + if useTestServers: + ftpServerAddress = getConfigurationValue('embl_ftp_upload_site_test') + else: + ftpServerAddress = getConfigurationValue('embl_ftp_upload_site_prod') + + #print ('attempting to open ftp connection') + ftp = ftplib.FTP(ftpServerAddress) + ftp.login(getConfigurationValue('embl_username'), getConfigurationValue('embl_password')) + ftp.storbinary('STOR ' + '/' + split(zippedFileName)[1], open(zippedFileName, 'rb'), 1024) + ftp.storbinary('STOR ' + '/' + split(md5FileName)[1], open(md5FileName, 'rb'), 1024) + ftp.close() + # is that it? Easy. + + except Exception: + print 'Cannot Upload to FTP site' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Upload to FTP site', + 'Sorry, I failed to upload your submission files to the EMBL FTP site\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to upload to FTP site:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('Submission and MD5 successfully uploaded.\n') + + # Handle the new project + # effectively, study = project + # existing study = 1, new study = 2 + newProject = (getConfigurationValue('choose_project') == '2') + if newProject: + + # Generate Project and Project Submission XML Files + try: + projectFileName = join(workingDirectory, 'project.xml') + projectText = createProjectXML(projectFileName) + + projectSubmissionFileName = join(workingDirectory, 'project_submission.xml') + projectSubmissionText = createProjectSubmissionXML(projectSubmissionFileName + ,'proj_sub_' + dateTimeNow + ,'project.xml') + + #print('I made this project text:\n' + projectText) + #print('I made this project submission text:\n' + projectSubmissionText) + + except Exception: + print 'Cannot Create Project Submission XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Create Project Submission XML', + 'Sorry, I failed to create a project XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create project submission file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('Project Submission XML files were created.\n') + + # Use REST to submit this project + try: + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + (projectSubmissionSuccess, projectAccessionNumber, projectErrorMessages) = performProjectSubmission(projectSubmissionFileName,projectFileName) + + if(projectSubmissionSuccess): + # Great. The project was created successfully. + # Lets use this new study accession moving forward. + assignConfigurationValue('study_accession', projectAccessionNumber) + assignConfigurationValue('choose_project','1') + pass + else: + messageText = ('There was a problem in the Project Submission.\n' + + 'I cannot continue.\n' + + 'These messages were reported by EMBL:\n') + for errorMessage in projectErrorMessages: + messageText += ('\n' + errorMessage + '\n') + tkMessageBox.showinfo('Cannot Submit Project XML via REST', messageText) + restLog.write('Failure to submit project submission file:' + str(sys.exc_info()[1]) + '\n') + return + + except Exception: + print 'Cannot Submit Project XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Submit Project XML', + 'Sorry, I failed to submit the project XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to upload project submission file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('New study has been uploaded, accession:' + str(getConfigurationValue('study_accession')) + '\n') + + # existing project, we will use the supplied accession # + else: + restLog.write('Using existing study accession:' + str(getConfigurationValue('study_accession')) + '\n') + # projectAccessionNumber = getConfigurationValue('study_accession') + pass + + # Generate Analysis and Analysis Submission xmls + try: + analysisFileName = join(workingDirectory, 'analysis.xml') + analysisText = createAnalysisXML(analysisFileName, md5HashValue, zippedShortFileName) + + analysisSubmissionFileName = join(workingDirectory, 'analysis_submission.xml') + analysisSubmissionText = createAnalysisSubmissionXML(analysisSubmissionFileName + ,'analysis_sub_' + dateTimeNow + ,'analysis.xml') + + except Exception: + print 'Cannot Create Analysis Submission XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Create Analysis Submission XML', + 'Sorry, I failed to create a Analysis XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create analysis submission file:' + str(sys.exc_info()[1]) + '\n') + return + + restLog.write('Analysis Submission XML files were created.\n') + + # Use REST to submit this analysis + try: + # Return value should be a tuple: + # (Success, analysisAccessionNumber, Messages[]) + (analysisSubmissionSuccess, analysisAccessionNumber, analysisErrorMessages) = performAnalysisSubmission(analysisSubmissionFileName,analysisFileName) + + if(analysisSubmissionSuccess): + # Great. The analysis was created successfully. + pass + else: + messageText = ('There was a problem in the Analysis Submission.\n' + + 'I cannot continue.\n' + + 'These messages were reported by EMBL:\n') + for errorMessage in analysisErrorMessages: + messageText += ('\n' + errorMessage + '\n') + tkMessageBox.showinfo('Cannot Submit Analysis XML via REST', messageText) + restLog.write('Failure to submit analysis submission file:' + str(sys.exc_info()[1]) + '\n') + return + + except Exception: + print 'Cannot Submit Analysis XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Submit Analysis XML via REST', + 'Sorry, I failed to submit the analysis XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + return + + restLog.write('New analysis has been Uploaded, accession:' + str(analysisAccessionNumber) + '\n') + + restLog.close() + + # Popup message with Results + tkMessageBox.showinfo('Success uploading submission to EMBL.', + 'The sequence and analysis was uploaded to EMBL ENA Successfully.\n\n' + + 'For your reference:\n\n' + + 'You can use this Project/Study accession\nnumber on future submissions:\n' + + 'Study Accession:' + str(getConfigurationValue('study_accession') + '\n\n') + + 'Use the Analysis Accession number if you\ncontact EMBL regarding this\nsequence submission:\n' + + 'Analysis Accession:' + str(analysisAccessionNumber) + '\n\n' + + 'Find your submission files here:\n' + + workingDirectory + '\n\n' + + 'If EMBL successfully validates your sequence, you will\n' + + 'recieve an email with an EMBL Sequence accession number.\n' + + 'This *SEQUENCE* accession number is necessary for IMGT submission.\n' + + 'Contact EMBL Support with your\nAnalysis Accession # if it has been\nmore than 48 hours since submission.\n' + + ) + + + def sampleSequence(self): + self.featureInputGuiObject.delete('1.0','end') + self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') + + # Clear the password, keep the username + #assignConfigurationValue('embl_username','') + assignConfigurationValue('embl_password','') + + assignConfigurationValue('sample_id', 'Donor_12345') + assignConfigurationValue('gene','HLA-C') + assignConfigurationValue('class','1') + assignConfigurationValue("allele_name",'Allele:01:02') + + assignConfigurationValue('study_accession','PRJEB12345') + + assignConfigurationValue('choose_project','2') + + assignConfigurationValue('study_identifier','HLA_Analysis_Project') + assignConfigurationValue('study_short_title','HLA Typing for Cancer Research.') + assignConfigurationValue('study_abstract','An abstract is a more in-depth description of the nature of the research project.') + + assignConfigurationValue('analysis_alias','unique_HLA_analysis_alias') + assignConfigurationValue('analysis_title','Novel HLA sequence from patient with Leukemia') + assignConfigurationValue('analysis_description','This is an HLA-A sequence from a patient. It was discovered that he has Leukemia, so we decided to sequence his HLA.') + + self.constructSubmission() + + # This method should popup some instruction text in a wee window. + # This should be explicit on how to use the tool. + def howToUse(self): + tkMessageBox.showinfo('How to use this tool', + 'This software is to be used to create an\n' + + 'EMBL-formatted submission document,\n' + + 'which specifies a (novel) HLA allele.\n\n' + + + 'This tool requires you to submit a\n' + + 'full length HLA allele, including\n' + + '5\' and 3\' UTRs.\n\n' + + + 'Use capital letters for exons,\n' + + 'lowercase for introns & UTRs.\n\n' + + + 'Push the "Example Sequence" button to see a small example of' + + ' a formatted sequence.\n' + + 'Sequences should follow this pattern:\n' + + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' + + + 'To use this tool:\n' + + '1.) Fill in a Sample ID, Gene Name, and Allele.' + + ' This text will be included in the submission.\n' + + '2.) Paste your formatted sequence in the\n' + + 'Annotated Sequence text area.\n' + + '3.) Push \"Generate an EMBL submission\" button' + + ' to generate a submission.\n' + + '4.) Push the "Save the submission" button' + + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n' + + + 'All spaces, tabs, and newlines are' + + ' removed before the nucleotide sequence is translated.' + ) + + def contactInformation(self): + # This method should list contact information for MUMC, and a link to the github page. + tkMessageBox.showinfo('Contact Information', + 'This software was created at\n' + + 'Maastricht University Medical Center\n' + + 'Transplantation Immunology\n' + + 'Tissue Typing Laboratory.\n' + + 'by Ben Matern:\n' + + 'ben.matern@mumc.nl\n\n' + + + 'Please send Ben your bioinformatics\n' + + 'and data related questions.\n\n' + + + 'all other inquiries can be directed\n' + + 'to Marcel Tilanus:\n' + + 'm.tilanus@mumc.nl\n\n' + + + 'This code will be hosted at:\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + + 'You will find more information on\n' + + 'EMBL\'s data format on that page.' + + ) + + # Ask user for a output file location, and write the EMBL submission to a file. + # This takes the input from the output field, rather than generate a new submission. + # So the user can edit the submission before or after saving it. + def saveSubmissionFile(self): + + self.dir_opt = options = {} + options['initialdir'] = expanduser("~") + options['parent'] = self + options['title'] = 'Specify your output file.' + options['initialfile'] = 'EMBL.HLA.Submission.txt' + outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt) + submissionText = self.submOutputGuiObject.get('1.0', 'end') + outputFileObject.write(submissionText) + + # TODO: Did I detect any exceptions? Maybe I don't have permission to write that file + # I saw an error when i wrote to a network drive once. + + + # Gather sequence information from the input elements, and generate a text EMBL submission. + def constructSubmission(self): + try: + + allGen = SubmissionGeneratorEMBL() + roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') + + allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence) + + enaSubmission = allGen.buildENASubmission() + + if (enaSubmission is None or len(enaSubmission) < 1): + tkMessageBox.showerror('Empty submission text' + ,'You are missing some required information.\n' + + 'Try the \'Submission Options\' button.\n') + + self.submOutputGuiObject.delete('1.0','end') + self.submOutputGuiObject.insert('1.0', '') + else: + self.submOutputGuiObject.delete('1.0','end') + self.submOutputGuiObject.insert('1.0', enaSubmission) + + except KeyError, e: + tkMessageBox.showerror('Missing Submission Options' + ,'You are missing some required information.\n' + + 'Use the \'Submission Options\' button.\n' + + 'Missing Data: ' + str(e)) + + def saveAndExit(self): + assignConfigurationValue('sequence', self.featureInputGuiObject.get('1.0', 'end')) + self.parent.destroy() + + def enableGUI(self, event=None): + self.toggleGUI(True) + + def disableGUI(self): + self.toggleGUI(False) + + def toggleGUI(self, isEnabled): + #print ('Toggling GUI Widgets:' + str(isEnabled)) + + newState = (NORMAL if (isEnabled) else DISABLED) + + # Choosing the widgets individually, this makes the most sense I think. + self.howToUseButton.config(state=newState) + self.exampleButton.config(state=newState) + self.featureInputGuiObject.config(state=newState) + self.submissionOptionsButton.config(state=newState) + self.generateSubmissionButton.config(state=newState) + self.submOutputGuiObject.config(state=newState) + self.uploadButton.config(state=newState) + self.saveSubmissionButton.config(state=newState) + self.exitButton.config(state=newState) + + + + + diff --git a/src/AlleleGuiEMBLInputForm.py b/src/AlleleGuiEMBLInputForm.py new file mode 100755 index 0000000..44fcbef --- /dev/null +++ b/src/AlleleGuiEMBLInputForm.py @@ -0,0 +1,375 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import os + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +from Tkinter import * + +from AlleleSubCommon import * + +class AlleleGuiEMBLInputForm(Tkinter.Frame): + + # Initialize the GUI + def __init__(self, root): + Tkinter.Frame.__init__(self, root) + root.title("Choose EMBL Submission Options") + self.parent = root + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + # To define the exit behavior. Save and exit. + self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions) + + # Define the return behavior. Same as "close window" etc + root.bind('', self.returnFunction) + + # This window should not be resizeable. I guess. + self.parent.resizable(width=False, height=False) + + #Standard Inputs widths for the form elements + formInputWidth = 30 + labelInputWidth = 30 + + self.instructionsFrame = Tkinter.Frame(self) + self.instructionText = Tkinter.StringVar() + self.instructionText.set('\nThese options are required for an EMBL allele submission.\n') + Tkinter.Label(self.instructionsFrame, width=85, height=3, textvariable=self.instructionText).pack() + self.instructionsFrame.pack() + + self.submissionDetailsInputFrame2 = Tkinter.Frame(self) + + self.sampleIDInstrText = Tkinter.StringVar() + self.sampleIDInstrText.set('Sample ID:') + self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=0, column=0) + self.inputSampleID = Tkinter.StringVar() + self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSampleID).grid(row=0, column=1) + + self.geneInstrStringVar = Tkinter.StringVar() + self.geneInstrStringVar.set('Gene:') + self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=1, column=0) + self.inputGene = Tkinter.StringVar() + self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputGene).grid(row=1, column=1) + + self.chooseClassIntVar = IntVar() + self.chooseClassIntVar.set(1) + Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=2, column=0) + Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=2, column=1) + + self.alleleInstrText = Tkinter.StringVar() + self.alleleInstrText.set('Allele Local Name:') + self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=3, column=0) + self.inputAllele = Tkinter.StringVar() + self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputAllele).grid(row=3, column=1) + + self.submissionDetailsInputFrame2.pack() + + + # Make a frame to contain the Test/Production radio buttons. + self.testProductionFrame = Tkinter.Frame(self) + + self.testProductionInstrText = Tkinter.StringVar() + self.testProductionInstrText.set('\nBy default, you submit to the EMBL test servers,\n' + + 'where submissions are regularly deleted.\n' + + 'change this option if you want to submit to the live EMBL environment.\n' + + 'Login Credentials will not be stored, but they will be sent\n' + + 'to EMBL via secure https connection.\n' + ) + self.alleleInstrLabel = Tkinter.Label(self.testProductionFrame, width=70, height=7, textvariable=self.testProductionInstrText).pack()#.grid(row=2, column=0) + + # 1 = Test. 0 = Production/live server + self.chooseTestServersIntVar = IntVar() + self.chooseTestServersIntVar.set(int(getConfigurationValue('test_submission'))) + + Radiobutton(self.testProductionFrame, text="Submit to EMBL TEST / DEMO environment.", variable=self.chooseTestServersIntVar, value=1).pack() + Radiobutton(self.testProductionFrame, text="Submit to EMBL LIVE / PROD environment.", variable=self.chooseTestServersIntVar, value=0).pack() + + self.testProductionFrame.pack() + + # Make a frame to contain the input variables + self.submissionDetailsInputFrame = Tkinter.Frame(self) + + self.usernameInstrText = Tkinter.StringVar() + self.usernameInstrText.set('EMBL Username:') + self.usernameInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.usernameInstrText).grid(row=0, column=0) + self.inputUsername = Tkinter.StringVar() + self.inputUsernameEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputUsername).grid(row=0, column=1) + + self.passwordInstrText = Tkinter.StringVar() + self.passwordInstrText.set('EMBL Password:') + self.passwordInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.passwordInstrText).grid(row=1, column=0) + self.inputPassword = Tkinter.StringVar() + self.inputPasswordEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputPassword, show="*").grid(row=1, column=1) + + self.submissionDetailsInputFrame.pack() + + + # Frame to specify Analysis Information + self.newAnalysisFrame = Tkinter.Frame(self) + + self.analysisAliasInstrText = Tkinter.StringVar() + self.analysisAliasInstrText.set('Analysis Alias:') + self.analysisAliasInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisAliasInstrText).grid(row=0, column=0) + self.inputAnalysisAlias = Tkinter.StringVar() + self.inputStudyIdEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisAlias).grid(row=0, column=1) + + self.analysisTitleInstrText = Tkinter.StringVar() + self.analysisTitleInstrText.set('Analysis Title:') + self.analysisTitleInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisTitleInstrText).grid(row=1, column=0) + self.inputAnalysisTitle = Tkinter.StringVar() + self.inputAnalysisTitleEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisTitle).grid(row=1, column=1) + + self.analysisDescriptionInstrText = Tkinter.StringVar() + self.analysisDescriptionInstrText.set('Analysis Description:') + self.analysisDescriptionInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisDescriptionInstrText).grid(row=2, column=0) + self.inputAnalysisDescription = Tkinter.StringVar() + self.inputAnalysisDescriptionEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisDescription).grid(row=2, column=1) + + self.newAnalysisFrame.pack() + + + + # A Frame for specifing the details of the Study / Project + self.projectDetailsFrame = Tkinter.Frame(self) + + self.alleleInstrText = Tkinter.StringVar() + self.alleleInstrText.set('\nEMBL requires that submissions are assigned to a Study/Project.\n' + + 'Will you provide an existing EMBL study accession #?\n' + + '(ex. \'PRJEB01234\')\n' + + 'Or will you specify a new study?\n') + self.alleleInstrLabel = Tkinter.Label(self.projectDetailsFrame, width=70, height=6, textvariable=self.alleleInstrText).pack()#.grid(row=2, column=0) + + self.chooseProjectIntVar = IntVar() + self.chooseProjectIntVar.set(2) + + # A frame for the "new study" radio button + self.existingProjectFrame = Tkinter.Frame(self.projectDetailsFrame) + Radiobutton(self.existingProjectFrame, text="Use this study accession:", variable=self.chooseProjectIntVar, value=1).grid(row=0,column=0) + self.inputStudyAccession = Tkinter.StringVar() + self.inputStudyIdEntry = Tkinter.Entry(self.existingProjectFrame, width=formInputWidth, textvariable=self.inputStudyAccession).grid(row=0, column=1) + self.existingProjectFrame.pack() + + + # Filler Label + Tkinter.Label(self.projectDetailsFrame, width=labelInputWidth, height=1, text=' ').pack() + + # This radio button is on the project details frame, but not + # on one of it's sub-frames (existingProjectFrame or newProjectFrame) + # That's so i can pack it, and not use a grid + Radiobutton(self.projectDetailsFrame, text="Create a new study with this information:", variable=self.chooseProjectIntVar, value=2).pack() + + self.newProjectFrame = Tkinter.Frame(self.projectDetailsFrame) + + self.studyIdInstrText = Tkinter.StringVar() + self.studyIdInstrText.set('Short Study Identifier:') + self.studyIdInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyIdInstrText).grid(row=0, column=0) + self.inputStudyId = Tkinter.StringVar() + self.inputStudyIdEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyId).grid(row=0, column=1) + + self.studyShortTitleInstrText = Tkinter.StringVar() + self.studyShortTitleInstrText.set('Descriptive Study Title:') + self.studyShortTitleInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyShortTitleInstrText).grid(row=1, column=0) + self.inputStudyShortTitle = Tkinter.StringVar() + self.inputStudyShortTitleEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyShortTitle).grid(row=1, column=1) + + self.studyAbstractInstrText = Tkinter.StringVar() + self.studyAbstractInstrText.set('Study Description / Abstract:') + self.studyAbstractInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyAbstractInstrText).grid(row=2, column=0) + self.inputStudyAbstract = Tkinter.StringVar() + self.inputStudyAbstractEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyAbstract).grid(row=2, column=1) + + self.newProjectFrame.pack() + + self.projectDetailsFrame.pack() + + # Make a frame for the save options button. + self.saveOptionsFrame = Tkinter.Frame(self) + Tkinter.Button(self.saveOptionsFrame, text='Save Options', command=self.saveOptions).grid(row=0, column=0) + self.saveOptionsFrame.pack() + + # TODO: Should there be a cancel button, to close this window without saving? + + self.loadOptions() + + # I needed a function for the return keypress to latch onto. + # It is just a wrapper for the saveOptions method. + def returnFunction(self, event): + self.saveOptions() + + + # submissionOptions is a dictionary, passed by the parent. + def loadOptions(self): + if getConfigurationValue('embl_username') is not None: + self.inputUsername.set(getConfigurationValue('embl_username')) + + if getConfigurationValue('embl_password') is not None: + self.inputPassword.set(getConfigurationValue('embl_password')) + + if getConfigurationValue('sample_id') is not None: + self.inputSampleID.set(getConfigurationValue('sample_id')) + + if getConfigurationValue('gene') is not None: + self.inputGene.set(getConfigurationValue('gene')) + + if getConfigurationValue('class') is not None: + if (str(getConfigurationValue('class')) == '1'): + self.chooseClassIntVar.set(1) + elif (str(getConfigurationValue('class')) == '2'): + self.chooseClassIntVar.set(2) + else: + raise Exception('Error loading EMBL submission options. Invalid class:' + str(getConfigurationValue('class'))) + + if getConfigurationValue('allele_name') is not None: + self.inputAllele.set(getConfigurationValue('allele_name')) + + if getConfigurationValue('choose_project') is not None: + if (str(getConfigurationValue('choose_project')) == '1'): + self.chooseProjectIntVar.set(1) + elif (str(getConfigurationValue('choose_project')) == '2'): + self.chooseProjectIntVar.set(2) + else: + raise Exception('Error loading EMBL submission options. Invalid Project choice:' + str(getConfigurationValue('choose_project'))) + + if getConfigurationValue('study_accession') is not None: + self.inputStudyAccession.set(getConfigurationValue('study_accession')) + + if getConfigurationValue('study_identifier') is not None: + self.inputStudyId.set(getConfigurationValue('study_identifier')) + + if getConfigurationValue('study_short_title') is not None: + self.inputStudyShortTitle.set(getConfigurationValue('study_short_title')) + + if getConfigurationValue('study_abstract') is not None: + self.inputStudyAbstract.set(getConfigurationValue('study_abstract')) + + if getConfigurationValue('test_submission') is not None: + # 1 = Test. 0 = Production/live server + self.chooseTestServersIntVar.set(int(getConfigurationValue('test_submission'))) + + if getConfigurationValue('analysis_alias') is not None: + self.inputAnalysisAlias.set(getConfigurationValue('analysis_alias')) + if getConfigurationValue('analysis_title') is not None: + self.inputAnalysisTitle.set(getConfigurationValue('analysis_title')) + if getConfigurationValue('analysis_description') is not None: + self.inputAnalysisDescription.set(getConfigurationValue('analysis_description')) + + + def saveOptions(self): + # Close the window + if (self.checkOptions()): + print ('Saving Options....') + + assignConfigurationValue('embl_username', self.inputUsername.get()) + # I store this password so I can use it in the submission + # I don't ever want to save the password. Make sure it isn't being saved in the config, in AlleleSubCommon.py + assignConfigurationValue('embl_password', self.inputPassword.get()) + assignConfigurationValue('sample_id', self.inputSampleID.get()) + assignConfigurationValue('gene', self.inputGene.get()) + assignConfigurationValue('class', str(self.chooseClassIntVar.get())) + assignConfigurationValue('allele_name', self.inputAllele.get()) + assignConfigurationValue('choose_project', str(self.chooseProjectIntVar.get())) + assignConfigurationValue('study_accession', self.inputStudyAccession.get()) + assignConfigurationValue('study_identifier', self.inputStudyId.get()) + assignConfigurationValue('study_short_title', self.inputStudyShortTitle.get()) + assignConfigurationValue('study_abstract', self.inputStudyAbstract.get()) + assignConfigurationValue('test_submission', str(self.chooseTestServersIntVar.get())) + assignConfigurationValue('analysis_alias', str(self.inputAnalysisAlias.get())) + assignConfigurationValue('analysis_title', str(self.inputAnalysisTitle.get())) + assignConfigurationValue('analysis_description', str(self.inputAnalysisDescription.get())) + + self.parent.destroy() + + else: + #print('Not ready to save, you are missing options.') + pass + + def checkOptions(self): + #print ('Checking options.') + + # Don't check the EMBL Username + # Don't check the EMBL Password + + if (not self.inputSampleID.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Sample ID. Please try again.') + return False + + if (not self.inputGene.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Gene. Please try again.') + return False + + # Don't check the class boolean + + if (not self.inputAllele.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Allele Name. Please try again.') + return False + + if (str(self.chooseProjectIntVar.get()) == '1'): + # Use Existing Project + if (not self.inputStudyAccession.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Study Accession number. Please try again.') + return False + + elif(str(self.chooseProjectIntVar.get()) == '2'): + # Use New Project + if (not self.inputStudyId.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Study Name. Please try again.') + return False + + if (not self.inputStudyShortTitle.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Study Description. Please try again.') + return False + + + if (not self.inputStudyAbstract.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Study Accession number. Please try again.') + return False + + else: + raise Exception ('Unknown value of self.chooseProjectIntVar. I expect 1 or 2. Observed:' + str(self.chooseProjectIntVar)) + + + if (not self.inputAnalysisAlias.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Alias. Please try again.') + return False + + if (not self.inputAnalysisTitle.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Title. Please try again.') + return False + + if (not self.inputAnalysisDescription.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Description. Please try again.') + return False + + + + # All options look good, right? + + + return True + + + def closeWindow(self): + self.parent.destroy() + \ No newline at end of file diff --git a/src/AlleleGuiIMGT.py b/src/AlleleGuiIMGT.py new file mode 100755 index 0000000..42c53d4 --- /dev/null +++ b/src/AlleleGuiIMGT.py @@ -0,0 +1,325 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import os +from os.path import expanduser + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +from Tkinter import * + +from SubmissionGeneratorIMGT import SubmissionGeneratorIMGT +from AlleleGuiIMGTInputForm import AlleleGuiIMGTInputForm +from AlleleSubCommon import * +#from HLAGene import HLAGene + +# The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class. +class AlleleGuiIMGT(Tkinter.Frame): + + # I shouldn't need to write a select-All method but TK is kind of annoying. + def selectall(self, event): + + event.widget.tag_add("sel","1.0","end") + + + # Initialize the GUI + def __init__(self, root): + Tkinter.Frame.__init__(self, root) + root.title("Create and Save an IMGT Sequence Submission") + self.parent = root + + # Ctrl-A doesn't work by default in TK. I guess I need to do it myself. + root.bind_class("Text","", self.selectall) + + # To define the exit behavior. Save the input sequence text. + self.parent.protocol('WM_DELETE_WINDOW', self.saveAndExit) + + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + + # A frame for the Instructions Label. + self.instructionsFrame = Tkinter.Frame(self) + self.instructionText = Tkinter.StringVar() + self.instructionText.set('\nThis tool will generate an HLA allele submission for\n' + + 'the IMGT / HLA nucleotide database.\n' + + 'For more information:\n') + Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack() + self.instructionsFrame.pack(expand=False, fill='both') + + # Make a frame for the more-info buttons + self.moreInfoFrame = Tkinter.Frame(self) + self.howToUseButton = Tkinter.Button(self.moreInfoFrame, text='How to use this tool', command=self.howToUse) + self.howToUseButton.grid(row=0, column=0) + self.exampleButton = Tkinter.Button(self.moreInfoFrame, text='Example Sequence', command=self.sampleSequence) + self.exampleButton.grid(row=0, column=1) + self.moreInfoFrame.pack() + + # Create a frame for the input widget, add scrollbars. + self.featureInputFrame = Tkinter.Frame(self) + + self.featureInstrText = Tkinter.StringVar() + self.featureInstrText.set('Annotated Sequence:') + self.featureInstrLabel = Tkinter.Label(self.featureInputFrame, width=80, height=1, textvariable=self.featureInstrText).pack() + + self.featureInputXScrollbar = Scrollbar(self.featureInputFrame, orient=HORIZONTAL) + self.featureInputXScrollbar.pack(side=BOTTOM, fill=X) + + self.featureInputYScrollbar = Scrollbar(self.featureInputFrame) + self.featureInputYScrollbar.pack(side=RIGHT, fill=Y) + + self.featureInputGuiObject = Tkinter.Text( + self.featureInputFrame + , width=80, height=8 + , wrap=NONE + , xscrollcommand=self.featureInputXScrollbar.set + , yscrollcommand=self.featureInputYScrollbar.set + ) + + self.featureInputXScrollbar.config(command=self.featureInputGuiObject.xview) + self.featureInputYScrollbar.config(command=self.featureInputGuiObject.yview) + + self.featureInputGuiObject.pack(expand=True, fill='both') + self.featureInputFrame.pack(expand=True, fill='both') + + + # Create Frame for "Generate Submission" button. + self.submButtonFrame = Tkinter.Frame(self) + self.submissionOptionsButton = Tkinter.Button(self.submButtonFrame, text='Submission Options', command=self.chooseSubmissionOptions) + self.submissionOptionsButton.grid(row=0, column=0) + self.generateSubmissionButton = Tkinter.Button(self.submButtonFrame, text=unichr(8681) + ' Generate an IMGT submission ' + unichr(8681), command=self.constructSubmission) + self.generateSubmissionButton.grid(row=0, column=1) + self.submButtonFrame.pack() + + + # Output interface is contained on a frame. + self.submOutputFrame = Tkinter.Frame(self) + + self.outputEMBLSubmission = Tkinter.StringVar() + self.outputEMBLSubmission.set('Allele Submission Preview:') + self.outputEMBLLabel = Tkinter.Label(self.submOutputFrame, width=80, height=1, textvariable=self.outputEMBLSubmission).pack() + + self.submOutputXScrollbar = Scrollbar(self.submOutputFrame, orient=HORIZONTAL) + self.submOutputXScrollbar.pack(side=BOTTOM, fill=X) + + self.submOutputYScrollbar = Scrollbar(self.submOutputFrame) + self.submOutputYScrollbar.pack(side=RIGHT, fill=Y) + + self.submOutputGuiObject = Tkinter.Text( + self.submOutputFrame, width=80, height=8, wrap=NONE + , xscrollcommand=self.submOutputXScrollbar.set + , yscrollcommand=self.submOutputYScrollbar.set + ) + + self.submOutputXScrollbar.config(command=self.submOutputGuiObject.xview) + self.submOutputYScrollbar.config(command=self.submOutputGuiObject.yview) + + self.submOutputGuiObject.pack(expand=True, fill='both') + self.submOutputFrame.pack(expand=True, fill='both') + + self.uploadSubmissionFrame = Tkinter.Frame(self) + self.saveSubmissionButton = Tkinter.Button(self.uploadSubmissionFrame, text='Save Submission to My Computer', command=self.saveSubmissionFile) + self.saveSubmissionButton.pack(**button_opt) + self.exitButton = Tkinter.Button(self.uploadSubmissionFrame, text='Exit', command=self.saveAndExit) + self.exitButton.pack(**button_opt) + self.uploadSubmissionFrame.pack() + + self.pack(expand=True, fill='both') + + + + def chooseSubmissionOptions(self): + print ('Opening the IMGT Submission Options Dialog') + + self.disableGUI() + + imgtOptionsRoot = Tkinter.Toplevel() + imgtOptionsRoot.bind("", self.enableGUI) + AlleleGuiIMGTInputForm(imgtOptionsRoot).pack() + + # Set the X and the Y Position of the options window, so it is nearby. + imgtOptionsRoot.update() + windowXpos = str(self.parent.winfo_geometry().split('+')[1]) + windowYpos = str(self.parent.winfo_geometry().split('+')[2]) + newGeometry = (str(imgtOptionsRoot.winfo_width()) + 'x' + + str(imgtOptionsRoot.winfo_height()) + '+' + + str(windowXpos) + '+' + + str(windowYpos)) + imgtOptionsRoot.geometry(newGeometry) + + imgtOptionsRoot.mainloop() + + + def sampleSequence(self): + self.featureInputGuiObject.delete('1.0','end') + self.featureInputGuiObject.insert('1.0', 'aag\nCGTCGT\nccg\nGGCTGA\naat') + + # Clear the password, keep the username + assignConfigurationValue('imgt_password','') + + assignConfigurationValue("allele_name",'Allele:01:02') + assignConfigurationValue('gene','HLA-C') + assignConfigurationValue('sample_id', 'Donor_12345') + assignConfigurationValue('class','1') + + assignConfigurationValue('embl_sequence_accession', 'LT123456') + assignConfigurationValue('embl_release_date', '01/01/2020') + + assignConfigurationValue('is_published','0') + + assignConfigurationValue('reference_title', 'Published Reference Title') + assignConfigurationValue('reference_authors', 'Albert Authorman, Ben Bioinformaticist, Cindy Cell-Culture') + assignConfigurationValue('reference_journal', 'Scientific Journal of Research') + + assignConfigurationValue('closest_known_allele', 'HLA-C*01:02:01') + assignConfigurationValue('closest_allele_written_description', 'This allele has a C->G polymorphism in Exon 1.\nPosition 5 in the coding sequence.\nThis polymorphism is interesting because of science.') + + assignConfigurationValue('ethnic_origin', 'Unknown') + assignConfigurationValue('sex', 'Unknown') + assignConfigurationValue('consanguineous', 'Unknown') + assignConfigurationValue('homozygous', 'Unknown') + + + self.constructSubmission() + + # This method should popup some instruction text in a wee window. + # This should be explicit on how to use the tool. + def howToUse(self): + tkMessageBox.showinfo('How to use this tool', + 'This software is to be used to create an\n' + + 'IMGT-formatted submission document,\n' + + 'which specifies a (novel) HLA allele.\n\n' + + + 'This tool requires you to submit a\n' + + 'full length HLA allele, including\n' + + '5\' and 3\' UTRs.\n\n' + + + 'Use capital letters for exons,\n' + + 'lowercase for introns & UTRs.\n\n' + + + 'Push the "Example Sequence" button to see a small example of' + + ' a formatted sequence.\n' + + 'Sequences should follow this pattern:\n' + + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' + + + 'To use this tool:\n' + + '1.) Fill in a Sample ID, Gene Name, and Allele.' + + ' This text will be included in the submission.\n' + + '2.) Paste your formatted sequence in the\n' + + 'Annotated Sequence text area.\n' + + '3.) Push \"Generate an IMGT submission\" button' + + ' to generate a submission.\n' + + '4.) Push the "Save the submission" button' + + ' to store the submission on your computer.\nYou can submit this file to IMGT.\n\n' + + + 'All spaces, tabs, and newlines are' + + ' removed before the nucleotide sequence is translated.' + ) + + def contactInformation(self): + # This method should list contact information for MUMC, and a link to the github page. + tkMessageBox.showinfo('Contact Information', + 'This software was created at\n' + + 'Maastricht University Medical Center\n' + + 'Transplantation Immunology\n' + + 'Tissue Typing Laboratory.\n' + + 'by Ben Matern:\n' + + 'ben.matern@mumc.nl\n\n' + + + 'Please send Ben your bioinformatics\n' + + 'and data related questions.\n\n' + + + 'all other inquiries can be directed\n' + + 'to Marcel Tilanus:\n' + + 'm.tilanus@mumc.nl\n\n' + + + 'This code will be hosted at:\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + + 'You will find more information on\n' + + 'IMGT\'s data format on that page.' + + ) + + # Ask user for a output file location, and write the IMGT submission to a file. + # This takes the input from the output field, rather than generate a new submission. + # So the user can edit the submission before or after saving it. + def saveSubmissionFile(self): + + self.dir_opt = options = {} + + options['initialdir'] = expanduser("~") + options['parent'] = self + options['title'] = 'Specify your output file.' + options['initialfile'] = 'IMGT.HLA.Submission.txt' + outputFileObject = tkFileDialog.asksaveasfile(**self.dir_opt) + submissionText = self.submOutputGuiObject.get('1.0', 'end') + outputFileObject.write(submissionText) + + # Gather sequence information from the input elements, and generate a text IMGT submission. + def constructSubmission(self): + try: + + allGen = SubmissionGeneratorIMGT() + roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') + + allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence) + imgtSubmission = allGen.buildIMGTSubmission() + + if (imgtSubmission is None or len(imgtSubmission) < 1): + tkMessageBox.showerror('Empty submission text' + ,'You are missing some required information.\n' + + 'Try the \'Submission Options\' button.\n') + + self.submOutputGuiObject.delete('1.0','end') + self.submOutputGuiObject.insert('1.0', '') + else: + self.submOutputGuiObject.delete('1.0','end') + self.submOutputGuiObject.insert('1.0', imgtSubmission) + + + except KeyError, e: + tkMessageBox.showerror('Missing Submission Options' + ,'You are missing some required information.\n' + + 'Use the \'Submission Options\' button.\n' + + 'Missing Data: ' + str(e)) + + def saveAndExit(self): + assignConfigurationValue('sequence', self.featureInputGuiObject.get('1.0', 'end')) + self.parent.destroy() + + + def enableGUI(self, event=None): + self.toggleGUI(True) + + def disableGUI(self): + self.toggleGUI(False) + + def toggleGUI(self, isEnabled): + #print ('Toggling GUI Widgets:' + str(isEnabled)) + + newState = (NORMAL if (isEnabled) else DISABLED) + + # Choosing the widgets individually, this makes the most sense I think. + self.howToUseButton.config(state=newState) + self.exampleButton.config(state=newState) + self.featureInputGuiObject.config(state=newState) + self.submissionOptionsButton.config(state=newState) + self.generateSubmissionButton.config(state=newState) + self.submOutputGuiObject.config(state=newState) + #self.uploadButton.config(state=newState) + self.saveSubmissionButton.config(state=newState) + self.exitButton.config(state=newState) + + + diff --git a/src/AlleleGuiIMGTInputForm.py b/src/AlleleGuiIMGTInputForm.py new file mode 100755 index 0000000..644f275 --- /dev/null +++ b/src/AlleleGuiIMGTInputForm.py @@ -0,0 +1,475 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import os + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +#from Tkinter import * +from Tkinter import IntVar, Radiobutton +#from ttk import * + +from AlleleSubCommon import * +from ScrolledWindow import VerticalScrolledFrame + +# I am using this ScrolledWindow class instead of a Frame. +# This interface is too big for one screen, need a scrollbar. + +class AlleleGuiIMGTInputForm(VerticalScrolledFrame): + + # Initialize the GUI + def __init__(self, root): + + + VerticalScrolledFrame.__init__(self, root) + #Tkinter.Frame.__init__(self, root) + #super(500, 500) + root.title("Choose IMGT Submission Options") + self.parent = root + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + # This window should not be resizeable. I guess. + # Maybe height should be resizeable, i don't know. + self.parent.resizable(width=False, height=False) + + # To define the exit behavior. Save and exit. + self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions) + + # Define the return behavior. Same as "close window" etc + root.bind('', self.returnFunction) + + self.instructionsFrame = Tkinter.Frame(self.interior) + self.instructionText = Tkinter.StringVar() + self.instructionText.set('\nThese options are required for an IMGT allele submission.\n' + + 'Login Credentials will not be stored, but they will be sent to IMGT via\n' + + 'secure https connection.\n') + Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack() + self.instructionsFrame.pack() + + #Standard Inputs widths for the form elements + formInputWidth = 35 + labelInputWidth = 35 + + # Make a frame to contain the input variables + # self.interior is defined in the ScrolledWindow class + self.submissionDetailsInputFrame = Tkinter.Frame(self.interior) + + self.usernameInstrText = Tkinter.StringVar() + self.usernameInstrText.set('IMGT Username:') + self.usernameInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.usernameInstrText).grid(row=0, column=0) + self.inputUsername = Tkinter.StringVar() + self.inputUsernameEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputUsername).grid(row=0, column=1) + + self.passwordInstrText = Tkinter.StringVar() + self.passwordInstrText.set('IMGT Password:') + self.passwordInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.passwordInstrText).grid(row=1, column=0) + self.inputPassword = Tkinter.StringVar() + self.inputPasswordEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputPassword, show="*").grid(row=1, column=1) + + + # TODO: Submitter / Laboratory ID. + # This is on the IMGT form. + #Do I know this infromation? Do I need to tell user how to get it? + + self.sampleIDInstrText = Tkinter.StringVar() + self.sampleIDInstrText.set('Sample ID:') + self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=2, column=0) + self.inputSampleID = Tkinter.StringVar() + self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputSampleID).grid(row=2, column=1) + + self.geneInstrStringVar = Tkinter.StringVar() + self.geneInstrStringVar.set('Gene:') + self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=3, column=0) + self.inputGene = Tkinter.StringVar() + self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputGene).grid(row=3, column=1) + + self.chooseClassIntVar = IntVar() + self.chooseClassIntVar.set(1) + Radiobutton(self.submissionDetailsInputFrame, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=4, column=0) + Radiobutton(self.submissionDetailsInputFrame, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=4, column=1) + + self.alleleInstrText = Tkinter.StringVar() + self.alleleInstrText.set('Allele Local Name:') + self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=5, column=0) + self.inputAllele = Tkinter.StringVar() + self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputAllele).grid(row=5, column=1) + + + + + # New form stuff + # Gotta add this to the load/save config nonsense below. + + + # TODO: Can I just load an EMBL accession? I think that is possible. Easier than filling it in here + + + # TODO: When EMBL Sequence Accession # Is provided, I can probably lookup an annotated sequence. + # Should I put a button next to this field + # Button: "Lookup This EMBL Sequence Accession #" + # If it is found, then i already know the sequence with exon boundaries. + + + # TODO: Do I need to specify if it is EMBL / Genbank / The other one? Probably not. + # I can require an EMBL code and disregard Genbank. + # Radio Buttons? + # EMBL / Genbank Accession # + self.emblAccInstrText = Tkinter.StringVar() + self.emblAccInstrText.set('EMBL Sequence Accession #:') + self.emblAccInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.emblAccInstrText).grid(row=6, column=0) + self.inputEmblAcc = Tkinter.StringVar() + self.inputEmblAccEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputEmblAcc).grid(row=6, column=1) + + + # Release Date + self.releaseDateInstrText = Tkinter.StringVar() + self.releaseDateInstrText.set('IMGT Release Date:') + self.releaseDateInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.releaseDateInstrText).grid(row=7, column=0) + self.inputReleaseDate = Tkinter.StringVar() + self.inputReleaseDateEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputReleaseDate).grid(row=7, column=1) + + # Reference Details + # Is this allele in a published paper or not? + # 0=unpublished, 1=published + self.publishedReferenceIntVar = IntVar() + self.publishedReferenceIntVar.set(0) + + self.submissionDetailsInputFrame.pack() + + + self.unpublishedReferenceFrame = Tkinter.Frame(self.interior) + + self.referenceInstrText = Tkinter.StringVar() + self.referenceInstrText.set('\nPlease provide some information about a\npublished paper relevant to this sequence.\n') + self.referenceInstrLabel = Tkinter.Label(self.unpublishedReferenceFrame, width=70, height=4, textvariable=self.referenceInstrText).pack()#.grid(row=2, column=0) + + Radiobutton(self.unpublishedReferenceFrame, text="No Published Reference.", variable=self.publishedReferenceIntVar, value=0).pack() + self.unpublishedReferenceFrame.pack() + + self.publishedReferenceFrame = Tkinter.Frame(self.interior) + + # Radio Button: Published + Radiobutton(self.unpublishedReferenceFrame, text="Use This Reference:", variable=self.publishedReferenceIntVar, value=1).pack() + + # Reference Title + self.referenceTitleInstrText = Tkinter.StringVar() + self.referenceTitleInstrText.set('Reference Title:') + self.referenceTitleInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceTitleInstrText).grid(row=1, column=0) + self.inputReferenceTitle = Tkinter.StringVar() + self.inputReferenceTitleEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceTitle).grid(row=1, column=1) + + # Authors + self.referenceAuthorsInstrText = Tkinter.StringVar() + self.referenceAuthorsInstrText.set('Reference Authors:') + self.referenceAuthorsInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceAuthorsInstrText).grid(row=2, column=0) + self.inputReferenceAuthors = Tkinter.StringVar() + self.inputReferenceAuthorsEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceAuthors).grid(row=2, column=1) + + # Journal + self.referenceJournalInstrText = Tkinter.StringVar() + self.referenceJournalInstrText.set('Reference Journal:') + self.referenceJournalInstrLabel = Tkinter.Label(self.publishedReferenceFrame, width=labelInputWidth, height=1, textvariable=self.referenceJournalInstrText).grid(row=3, column=0) + self.inputReferenceJournal = Tkinter.StringVar() + self.inputReferenceJournalEntry = Tkinter.Entry(self.publishedReferenceFrame, width=formInputWidth, textvariable=self.inputReferenceJournal).grid(row=3, column=1) + + self.publishedReferenceFrame.pack() + + # Make a frame to contain the input variables. + # I had to make 2 of them to organize my gui, maybe I can name this better. + self.submissionDetailsInputFrame2 = Tkinter.Frame(self.interior) + + # /alignment -> defined by IMGT sequence alignment service + # In this case, it is the closest known allele. + self.closestAlleleInstrText = Tkinter.StringVar() + self.closestAlleleInstrText.set('Closest Known HLA Allele:') + self.closestAlleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.closestAlleleInstrText).grid(row=1, column=0) + self.inputClosestAllele = Tkinter.StringVar() + self.inputClosestAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputClosestAllele).grid(row=1, column=1) + + # Written Description + # Looks like this is a description of how the sequence differes from closest knnown allele + self.closestAlleleWrittenDescriptionInstrText = Tkinter.StringVar() + self.closestAlleleWrittenDescriptionInstrText.set('Differences from Closest Allele:') + self.closestAlleleWrittenDescriptionInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.closestAlleleWrittenDescriptionInstrText).grid(row=2, column=0) + self.inputClosestAlleleWrittenDescription = Tkinter.StringVar() + self.inputClosestAlleleWrittenDescriptionEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputClosestAlleleWrittenDescription).grid(row=2, column=1) + + + # DONOR INFORMATION + + # Cell ID (cellnum) + # Wait, is this the same as the sample ID? Should I move the sample ID field down here? + # No. I am disregarding this sample ID. + + # Ethnic Origin + self.ethnicOriginInstrText = Tkinter.StringVar() + self.ethnicOriginInstrText.set('Ethnic Origin:') + self.ethnicOriginInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.ethnicOriginInstrText).grid(row=3, column=0) + self.inputEthnicOrigin = Tkinter.StringVar() + self.inputEthnicOriginEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputEthnicOrigin).grid(row=3, column=1) + + # Sex + self.sexInstrText = Tkinter.StringVar() + self.sexInstrText.set('Sex:') + self.sexInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.sexInstrText).grid(row=4, column=0) + self.inputSex = Tkinter.StringVar() + self.inputSexEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSex).grid(row=4, column=1) + + # Consanguineous (T/F) + self.consanguineousInstrText = Tkinter.StringVar() + self.consanguineousInstrText.set('Sample is Consanguineous:') + self.consanguineousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.consanguineousInstrText).grid(row=5, column=0) + self.inputConsanguineous = Tkinter.StringVar() + self.inputConsanguineousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputConsanguineous).grid(row=5, column=1) + + # Homozygous (T/F) + self.homozygousInstrText = Tkinter.StringVar() + self.homozygousInstrText.set('Sample is Homozygous:') + self.homozygousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.homozygousInstrText).grid(row=6, column=0) + self.inputHomozygous = Tkinter.StringVar() + self.inputHomozygousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputHomozygous).grid(row=6, column=1) + + # TODO: Comments. Where does this stuff go? This is details about the lab of origin. I haven't tried specifying this one yet, ask James how to do it. + # Comments + + # Lab of Origin + + # Lab Contact + + # TODO Add form for cell availability + # Cell Availability + + # Material Available (T/F) + + # Cell Bank (Text) + + # Cell Workshop Details + + + + + + + + # Alternative HLA DNA Typing + # Loop? + # Dropdown Box with another Entry Field? + # Yeah Start with them Blank, choose gene from box. + # Store in globals, but don't write to config.. + + # Source Serology Typing + # Maybe the same as DNA typing + + # Sequencing Methods + + # Primers + # This is probably a Dropdown with Entry field also. + + + + self.submissionDetailsInputFrame2.pack() + + + + + + # Make a frame for the save options button. + self.saveOptionsFrame = Tkinter.Frame(self.interior) + Tkinter.Button(self.saveOptionsFrame, text='Save Options', command=self.saveOptions).grid(row=0, column=0) + self.saveOptionsFrame.pack() + + self.loadOptions() + + # I needed a function for the return keypress to latch onto. + # It is just a wrapper for the saveOptions method. + def returnFunction(self, event): + self.saveOptions() + + # submissionOptions is a dictionary, passed by the parent. + def loadOptions(self): + if getConfigurationValue('imgt_username') is not None: + self.inputUsername.set(getConfigurationValue('imgt_username')) + + if getConfigurationValue('imgt_password') is not None: + self.inputPassword.set(getConfigurationValue('imgt_password')) + + if getConfigurationValue('sample_id') is not None: + self.inputSampleID.set(getConfigurationValue('sample_id')) + + if getConfigurationValue('gene') is not None: + self.inputGene.set(getConfigurationValue('gene')) + + if getConfigurationValue('class') is not None: + if (str(getConfigurationValue('class')) == '1'): + self.chooseClassIntVar.set(1) + elif (str(getConfigurationValue('class')) == '2'): + self.chooseClassIntVar.set(2) + else: + raise Exception('Error loading IMGT submission options. Invalid class:' + str(getConfigurationValue('class'))) + + if getConfigurationValue('allele_name') is not None: + self.inputAllele.set(getConfigurationValue('allele_name')) + + if getConfigurationValue('embl_sequence_accession') is not None: + self.inputEmblAcc.set(getConfigurationValue('embl_sequence_accession')) + + if getConfigurationValue('embl_release_date') is not None: + self.inputReleaseDate.set(getConfigurationValue('embl_release_date')) + + # 0=unpublished, 1=published + if getConfigurationValue('is_published') is not None: + self.publishedReferenceIntVar.set(getConfigurationValue('is_published')) + + if getConfigurationValue('reference_title') is not None: + self.inputReferenceTitle.set(getConfigurationValue('reference_title')) + if getConfigurationValue('reference_authors') is not None: + self.inputReferenceAuthors.set(getConfigurationValue('reference_authors')) + if getConfigurationValue('reference_journal') is not None: + self.inputReferenceJournal.set(getConfigurationValue('reference_journal')) + + if getConfigurationValue('reference_journal') is not None: + self.inputReferenceJournal.set(getConfigurationValue('reference_journal')) + + if getConfigurationValue('closest_known_allele') is not None: + self.inputClosestAllele.set(getConfigurationValue('closest_known_allele')) + if getConfigurationValue('closest_allele_written_description') is not None: + self.inputClosestAlleleWrittenDescription.set(getConfigurationValue('closest_allele_written_description')) + + if getConfigurationValue('ethnic_origin') is not None: + self.inputEthnicOrigin.set(getConfigurationValue('ethnic_origin')) + if getConfigurationValue('sex') is not None: + self.inputSex.set(getConfigurationValue('sex')) + if getConfigurationValue('consanguineous') is not None: + self.inputConsanguineous.set(getConfigurationValue('consanguineous')) + if getConfigurationValue('homozygous') is not None: + self.inputHomozygous.set(getConfigurationValue('homozygous')) + + + + + + def saveOptions(self): + # Close the window + if (self.checkOptions()): + print ('Saving Options....') + + assignConfigurationValue('imgt_username', self.inputUsername.get()) + # I store this password so I can use it in the submission + # I don't ever want to save the password. Make sure it isn't being saved in the config, in AlleleSubCommon.py + assignConfigurationValue('imgt_password', self.inputPassword.get()) + assignConfigurationValue('sample_id', self.inputSampleID.get()) + assignConfigurationValue('gene', self.inputGene.get()) + assignConfigurationValue('class', str(self.chooseClassIntVar.get())) + assignConfigurationValue('allele_name', self.inputAllele.get()) + + assignConfigurationValue('embl_sequence_accession', self.inputEmblAcc.get()) + assignConfigurationValue('embl_release_date', self.inputReleaseDate.get()) + + assignConfigurationValue('is_published', self.publishedReferenceIntVar.get()) + + assignConfigurationValue('reference_title',self.inputReferenceTitle.get()) + assignConfigurationValue('reference_authors',self.inputReferenceAuthors.get()) + assignConfigurationValue('reference_journal',self.inputReferenceJournal.get()) + + assignConfigurationValue('closest_known_allele', self.inputClosestAllele.get()) + assignConfigurationValue('closest_allele_written_description', self.inputClosestAlleleWrittenDescription.get()) + + assignConfigurationValue('ethnic_origin', self.inputEthnicOrigin.get()) + assignConfigurationValue('sex', self.inputSex.get()) + + # TODO: Accepted values are 'Yes', 'No', 'Unknown' + assignConfigurationValue('consanguineous', self.inputConsanguineous.get()) + assignConfigurationValue('homozygous', self.inputHomozygous.get()) + + self.parent.destroy() + + else: + #print('Not ready to save, you are missing options.') + pass + + def checkOptions(self): + # TODO this method + print ('Checking options.') + + # Don't check the IMGT Username + # Don't check the IMGT Password + + if (not self.inputSampleID.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Sample ID. Please try again.') + return False + if (not self.inputGene.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing a Gene. Please try again.') + return False + if (not self.inputAllele.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Allele Name. Please try again.') + return False + + if (not self.inputEmblAcc.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an EMBL Accession Number. Please try again.') + return False + if (not self.inputReleaseDate.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an IMGT Submission Release Date. Please try again.') + return False + + if (self.publishedReferenceIntVar.get() == 0): + # unpublished, nothing to check + pass + else: + if ((not self.inputReferenceTitle.get()) + or (not self.inputReferenceAuthors.get()) + or (not self.inputReferenceJournal.get()) + ): + tkMessageBox.showwarning('Missing Form Value', + 'You are must supply information about the published Reference. Please try again.') + return False + + if (not self.inputClosestAllele.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing the closest known reference allele to this sequence. Please provide this information.') + return False + if (not self.inputEthnicOrigin.get()): + tkMessageBox.showwarning('Missing Form Value', + 'Please provide a description of an ethnic origin for this sample.') + return False + if (not self.inputSex.get()): + tkMessageBox.showwarning('Missing Form Value', + 'Please identify the sex for this sample.') + return False + + # TODO: Accepted values are 'Yes', 'No', 'Unknown' I think + if (not self.inputConsanguineous.get()): + tkMessageBox.showwarning('Missing Form Value', + 'Please indicate if the sample is consanguineous or not.') + return False + if (not self.inputHomozygous.get()): + tkMessageBox.showwarning('Missing Form Value', + 'Please indicate if the sample is homozygous or not.') + return False + + + # All options look good, right? + return True + + + def closeWindow(self): + #writeConfigurationFile() + + self.parent.destroy() + \ No newline at end of file diff --git a/src/AlleleGuiMain.py b/src/AlleleGuiMain.py new file mode 100755 index 0000000..e49dbd5 --- /dev/null +++ b/src/AlleleGuiMain.py @@ -0,0 +1,204 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import os + +import Tkinter, Tkconstants, tkFileDialog, tkMessageBox +from Tkinter import * + +from AlleleGuiEMBL import AlleleGuiEMBL +from AlleleGuiIMGT import AlleleGuiIMGT + +from AlleleSubCommon import * + +class AlleleGuiMain(Tkinter.Frame): + + # Initialize the GUI + def __init__(self, root): + Tkinter.Frame.__init__(self, root) + root.title("An HLA Allele Submission Generator") + self.parent = root + + self.initialize() + + # Initialize GUI elements + def initialize(self): + + button_opt = {'fill': Tkconstants.BOTH, 'padx': 35, 'pady': 5} + + # Load configuration + loadConfigurationFile() + + # To define the exit behavior + self.parent.protocol('WM_DELETE_WINDOW', self.closeWindow) + + # This window should not be resizeable. I guess. + self.parent.resizable(width=False, height=False) + + # Instruction Frame + self.instructionFrame = Tkinter.Frame(self) + self.instructionText = Tkinter.StringVar() + self.instructionText.set('\nSaddlebags is an HLA Allele Submission Generator.\n' + + 'You can generate an allele submission text file for either\n' + + 'the EMBL/ENA or IMGT/HLA nucleotide databases. You must choose:\n' + ) + Tkinter.Label(self.instructionFrame, width=85, height=5, textvariable=self.instructionText).pack() + self.instructionFrame.pack() + + # Make a frame for the more-info buttons + self.moreInfoFrame = Tkinter.Frame(self) + Tkinter.Button(self.moreInfoFrame, text='Generate an EMBL submission', command=lambda: self.openAlleleSubGUI('EMBL')).grid(row=0, column=0) + Tkinter.Button(self.moreInfoFrame, text='Generate an IMGT submission', command=lambda: self.openAlleleSubGUI('IMGT')).grid(row=0, column=1) + Tkinter.Button(self.moreInfoFrame, text=' How to use this tool ', command=self.howToUse).grid(row=1, column=0) + Tkinter.Button(self.moreInfoFrame, text='Contacting or Citing MUMC', command=self.contactInformation).grid(row=1, column=1) + self.moreInfoFrame.pack() + + # Frame for the exit button + self.exitFrame = Tkinter.Frame(self) + Tkinter.Button(self.exitFrame, text='Exit', command=self.closeWindow).pack(**button_opt) + self.exitFrame.pack() + + self.pack() + + self.initializeWindowLocation() + + # Put the GUI on the center of the screen. Doesn't make sense for it to start in a corner. + # Well, lets divide by 4 instead of 2. Center is too...centered. + def initializeWindowLocation(self): + self.parent.update_idletasks() + w = self.parent.winfo_screenwidth() + h = self.parent.winfo_screenheight() + size = tuple(int(_) for _ in self.parent.geometry().split('+')[0].split('x')) + x = w/4 - size[0]/2 + y = h/4 - size[1]/2 + self.parent.geometry("%dx%d+%d+%d" % (size + (x, y))) + + + # This method should popup some instruction text in a wee window. + # This should be explicit on how to use the tool. + def howToUse(self): + tkMessageBox.showinfo('How to use this tool', + 'This software is to be used to create an\n' + + 'EMBL-formatted submission document,\n' + + 'which specifies a (novel) HLA allele.\n\n' + + + 'This tool requires you to submit a\n' + + 'full length HLA allele, including\n' + + '5\' and 3\' UTRs.\n\n' + + + 'Use capital letters for exons,\n' + + 'lowercase for introns & UTRs.\n\n' + + + 'Push the "Example Sequence" button to see a small example of' + + ' a formatted sequence.\n' + + 'Sequences should follow this pattern:\n' + + '5\'utr EX1 int1 EX2 ... EX{X} 3\'utr\n\n' + + + 'To use this tool:\n' + + '1.) Fill in a Sample ID, Gene Name, and Allele.' + + ' This text will be included in the submission.\n' + + '2.) Paste your formatted sequence in the\n' + + 'Annotated Sequence text area.\n' + + '3.) Push \"Generate an EMBL submission\" button' + + ' to generate a submission.\n' + + '4.) Push the "Save the submission" button' + + ' to store the submission on your computer.\nYou can submit this file to EMBL.\n\n' + + + 'All spaces, tabs, and newlines are' + + ' removed before the nucleotide sequence is translated.' + ) + + + def contactInformation(self): + # This method should list contact information for MUMC, and a link to the github page. + tkMessageBox.showinfo('Contact Information', + 'This software was created at\n' + + 'Maastricht University Medical Center\n' + + 'Transplantation Immunology\n' + + 'Tissue Typing Laboratory.\n' + + 'by Ben Matern:\n' + + 'ben.matern@mumc.nl\n\n' + + + 'Please send Ben your bioinformatics\n' + + 'and data related questions.\n\n' + + + 'all other inquiries can be directed\n' + + 'to Marcel Tilanus:\n' + + 'm.tilanus@mumc.nl\n\n' + + + 'This code will be hosted at:\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + + 'You will find more information on\n' + + 'EMBL\'s data format on that page.' + + ) + + def closeWindow(self): + writeConfigurationFile() + self.parent.destroy() + + + def restoreWindowPosition(self): + # Geometry is a string that looks like this: 599x144+681+52 + # WidthxHeight+Xpos+Ypos + newGeometry = self.windowWidth + 'x' + self.windowHeight + '+' + self.windowXpos + '+' + self.windowYpos + self.parent.geometry(newGeometry) + + + def onCloseOtherFrame(self, event): + # is triggered for each widget on the subframe. + # We want to only trigger if the main subframe is destroyed. + if(event.widget is self.alleleSubRoot): + self.parent.deiconify() + self.restoreWindowPosition() + + def rememberWindowPosition(self): + # Remember the geometry of this window. + self.windowWidth = str(self.parent.winfo_width()) + self.windowHeight = str(self.parent.winfo_height()) + # "Geometry" is a string that looks like this: 599x144+681+52 + # WidthxHeight+Xpos+Ypos + windowGeometryPosTokens = self.parent.winfo_geometry().split('+') + self.windowXpos = str(windowGeometryPosTokens[1]) + self.windowYpos = str(windowGeometryPosTokens[2]) + + def openAlleleSubGUI(self, submissionType): + self.rememberWindowPosition() + + self.parent.withdraw() + self.alleleSubRoot = Tkinter.Toplevel() + self.alleleSubRoot.bind("", self.onCloseOtherFrame) + + if(submissionType=='IMGT'): + print ('Opening the IMGT Submission GUI') + AlleleGuiIMGT(self.alleleSubRoot).pack() + elif(submissionType=='EMBL'): + print ('Opening the EMBL Submission GUI') + AlleleGuiEMBL(self.alleleSubRoot).pack() + else: + raise Exception('Unknown Submission Type. I expected IMGT or EMBL:' + str(submissionType)) + + # Set the X and the Y Position of the window, so it is nearby. + # it is necessary to update the window before assigning geometry. + # Using Size Values from Subwindow, but Position values from Parent window + self.alleleSubRoot.update() + #print('after update geometry subwindow:' + self.alleleSubRoot.winfo_geometry()) + newGeometry = (str(self.alleleSubRoot.winfo_width()) + 'x' + + str(self.alleleSubRoot.winfo_height()) + '+' + + str(self.windowXpos) + '+' + + str(self.windowYpos)) + self.alleleSubRoot.geometry(newGeometry) + + self.alleleSubRoot.mainloop() diff --git a/src/AlleleSubCommon.py b/src/AlleleSubCommon.py new file mode 100755 index 0000000..f108d8d --- /dev/null +++ b/src/AlleleSubCommon.py @@ -0,0 +1,319 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import xml.etree.ElementTree as ET +import xml.dom.minidom + +from os.path import isdir, split +from os import makedirs + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna + +import tkMessageBox + +import sys +from os.path import dirname, join, abspath, isfile, expanduser + +from HLAGene import * + +# This is a short wrapper method to use biopython's translation method. +# Most of this code is just checking for things that went wrong +def translateSequence(inputSequence): + + proteinSequence = '' + + try: + # Do nothing if the input sequence is blank. + if( len(inputSequence) > 0 ): + + coding_dna = Seq(inputSequence, generic_dna) + proteinSequence = str(coding_dna.translate()) + print ('Exon Sequence before translation:' + coding_dna) + print ('Translated Protein:' + proteinSequence) + + # Perform Sanity Checks. + # Stop codon *should* be at the end of the protein. + # Here we seek out the first instance of a stop codon, + # and remove the peptides afterwards. + # because that's what happens in real life. + stopCodonLocation = proteinSequence.find('*') + + # If no stop codon was found + if (stopCodonLocation == -1): + assignConfigurationValue('is_pseudo_gene','1') + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.\n' + + 'This is indicated by a /pseudo flag in the sequence submission.' + ) + + # Wrong Codon Length + else: + tkMessageBox.showinfo('No Stop Codon Found', + 'The translated protein does not contain a stop codon.\n' + + 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n' + + 'This is indicated by a /pseudo flag in the sequence submission.') + + # If Stop Codon is in the end of the protein (This is expected and correct) + elif (stopCodonLocation == len(proteinSequence) - 1): + assignConfigurationValue('is_pseudo_gene','0') + + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + # Everything is fine in this case. Trim off the stop codon + proteinSequence = proteinSequence[0:stopCodonLocation] + pass + # Wrong Codon Length + else: + tkMessageBox.showinfo('Extra Nucleotides After the Stop Codon', + 'The stop codon is at the correct position in the protein, but ' + + 'The coding nucleotide sequence length (' + str(len(coding_dna)) + ') is not a multiple of 3.\n\n' + + 'Please double check your sequence.') + proteinSequence = proteinSequence[0:stopCodonLocation] + + # Else Stop Codon is premature (before the end of the protein) + else: + assignConfigurationValue('is_pseudo_gene','1') + + # If multiple of three (correct codon length) + if(len(coding_dna) % 3 == 0): + tkMessageBox.showinfo('Premature Stop Codon Detected', + 'Premature stop codon found:\nProtein Position (' + + str(stopCodonLocation + 1) + '/' + + str(len(proteinSequence)) + ')\n\n' + + 'This is indicated by a /pseudo flag in the sequence submission.\n' + + 'Double check your protein sequence,\n' + + 'this might indicate a missense mutation.\n\n' + + 'Translated Protein:\n' + proteinSequence + + '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + + '\n' + ) + proteinSequence = proteinSequence[0:stopCodonLocation] + + + # Wrong Codon Length + else: + tkMessageBox.showinfo('Premature Stop Codon Detected', + 'Premature stop codon found:\nProtein Position (' + + str(stopCodonLocation + 1) + '/' + + str(len(proteinSequence)) + ')\n\n' + + 'This is indicated by a /pseudo flag in the sequence submission.\n' + + 'Nucleotide count is not a multiple of 3,\n' + + 'Double check your protein sequence,\n' + + 'this might indicate a missense mutation.\n\n' + + 'Translated Protein:\n' + proteinSequence + + '\n\nProtein in EMBL Submission:\n' + proteinSequence[0:stopCodonLocation] + + '\n' + ) + proteinSequence = proteinSequence[0:stopCodonLocation] + else: + print('Translating a nucleotide sequence of length 0. That was easy.') + pass + + return proteinSequence + + except Exception: + print 'Problem when translating protein:' + print sys.exc_info()[1] + tkMessageBox.showinfo('Protein Translation Error', + 'I could not translate your protein:\n' + str(sys.exc_info()[1])) + + raise + + + +# The input file should be a string of nucleotides, with capital letters to identify exons and introns. +# Annotations are expected and read in this format: +# fiveprimeutrEXONONEintrononeEXONTWOintrontwoEXONTHREEthreeprimeutr +# agctagctagctAGCTAGCtagctagctAGCTAGCtagctagctAGCTAGCTAgctagctagctag +# All spaces, line feeds, and tabs are removed and ignored. +def annotateRoughInputSequence(inputSequenceText): + + # TODO: I should accept a Fasta Input. + # Disregard the header line completely. Is there still sequence? + resultGeneLoci = HLAGene() + + # Trim out any spaces, tabs, newlines. Uppercase. + cleanedGene = inputSequenceText.replace(' ','').replace('\n','').replace('\t','').replace('\r','') + + # Capitalize, so I can store a copy of the full unannotated sequence. + unannotatedGene = cleanedGene.upper() + resultGeneLoci.fullSequence = unannotatedGene + print('Total Sequence Length = ' + str(len(unannotatedGene))) + + # Loop through the cleaned and annotated input sequence, + # capitals and lowercase letters to determine exon start and end + if(len(cleanedGene) > 0): + + # Is the first feature an exon or an intron? + # If we begin in an Exon + if( cleanedGene[0] in ('A','G','C','T')): + insideAnExon = True + # If we begin in an Intron/UTR + elif( cleanedGene[0] in ('a','g','c','t')): + insideAnExon = False + else: + # Nonstandard nucleotide? I should start panicking. + #raise Exception('Nonstandard Nucleotide, not sure how to handle it') + print('Nonstandard Nucleotide at the beginning of the sequence, not sure how to handle it') + insideAnExon = False + + + locusBeginPosition = 0 + for x in range(0, len(cleanedGene)): + currentChar = cleanedGene[x] + + # Is this a standard nucleotide character? + if(currentChar.upper() in ('A','G','C','T')): + + if(currentChar.isupper()): + if(insideAnExon): + #We're STILL in an exon. In this case, I should just do nothing and continue. + pass + else: + #In this case, we're just starting an EXON. + #Store the last Intron in the list. + currentIntron = GeneLocus() + currentIntron.sequence = cleanedGene[locusBeginPosition:x].upper() + currentIntron.exon = False + resultGeneLoci.loci.append(currentIntron) + insideAnExon=True + locusBeginPosition = x + pass + + else: + if not (insideAnExon): + #We're STILL in an intron. Continue. + pass + else: + #Starting a new Intron. + # Store an Exon in the list. + currentExon = GeneLocus() + currentExon.sequence = cleanedGene[locusBeginPosition:x].upper() + currentExon.exon = True + resultGeneLoci.loci.append(currentExon) + insideAnExon = False + locusBeginPosition=x + pass + else: + print('Nonstandard nucleotide detected at position ' + str(x) + ' : ' + currentChar + + '. If this is a wildcard character, you might be ok.') + + # We've reached the end of the loop and we still need to store the last feature. + # Should be a 3' UTR, but I can't be sure, people like to put in weird sequences. + currentIntron = GeneLocus() + currentIntron.sequence = cleanedGene[locusBeginPosition:len(cleanedGene)].upper() + currentIntron.exon = insideAnExon + resultGeneLoci.loci.append(currentIntron) + + # Annotate the loci (name them) and print the results of the read file. + resultGeneLoci.annotateLoci() + resultGeneLoci.printGeneSummary() + + # If the sequence is empty + else: + print('Empty sequence, I don\'t have anything to do.') + + return resultGeneLoci + #self.sequenceAnnotation = resultGeneLoci + +# This method is a directory-safe way to open up a write file. +def createOutputFile(outputfileName): + tempDir, tempFilename = split(outputfileName) + if not isdir(tempDir): + print('Making Directory:' + tempDir) + makedirs(tempDir) + resultsOutput = open(outputfileName, 'w') + return resultsOutput + +# I'm storing global variables in a dictionary for now. +def initializeGlobalVariables(): + global globalVariables + + if not ("globalVariables" in globals()): + globalVariables={} + +def assignConfigurationValue(configurationKey, configurationValue): + initializeGlobalVariables() + globalVariables[configurationKey] = configurationValue + +def getConfigurationValue(configurationKey): + if configurationKey in globalVariables.keys(): + return globalVariables[configurationKey] + else: + print ('Configuration Key Not Found:' + configurationKey) + #raise KeyError('Key Not Found:' + configurationKey) + return None + +def assignConfigName(): + assignConfigurationValue('config_file_location',join(expanduser("~"),'Saddlebags.Config.xml')) + +def writeConfigurationFile(): + assignConfigName() + print ('Writing a config file to:\n' + globalVariables['config_file_location']) + + root = ET.Element("config") + + for key in globalVariables.keys(): + # Some config values I don't want to store. + # Add to this: Sequence Text, EMBL Submission Text, IMGT Submission Text + if(key not in [ + 'embl_password' + ,'imgt_password' + , 'sequence' + ]): + ET.SubElement(root, key).text = globalVariables[key] + + xmlText = ET.tostring(root, encoding='utf8', method='xml') + prettyXmlText = xml.dom.minidom.parseString(xmlText).toprettyxml() + + xmlOutput = createOutputFile(globalVariables['config_file_location']) + xmlOutput.write(prettyXmlText) + xmlOutput.close() + + + + + +def loadConfigurationFile(): + assignConfigName() + + if not isfile(globalVariables['config_file_location']): + print ('The config file does not exist yet. I will not load it:\n' + globalVariables['config_file_location']) + + # Here is where I assign the common configuration values + # test_submission indicates if we should use the "test" values. + # I think I'll use this value for both EMBL and IMGT submissions, if it applies. + assignConfigurationValue('test_submission', '1') + + # I'm storing FTP without the ftp:// identifier, because it is not necessary. + # The test and prod ftp sites have the same address. This is intentional, embl doesn't have a test ftp + assignConfigurationValue('embl_ftp_upload_site_test', 'webin.ebi.ac.uk') + assignConfigurationValue('embl_ftp_upload_site_prod', 'webin.ebi.ac.uk') + assignConfigurationValue('embl_rest_address_test', 'https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/') + assignConfigurationValue('embl_rest_address_prod', 'https://www.ebi.ac.uk/ena/submit/drop-box/submit/') + + else: + print ('The config file already exists, I will load it:\n' + globalVariables['config_file_location']) + + tree = ET.parse(globalVariables['config_file_location']) + root = tree.getroot() + + for child in root: + assignConfigurationValue(child.tag, child.text) + + \ No newline at end of file diff --git a/src/AlleleSubInstallerOptions_Windows.spec b/src/AlleleSubInstallerOptions_Windows.spec index 72a4717..4210ec3 100755 --- a/src/AlleleSubInstallerOptions_Windows.spec +++ b/src/AlleleSubInstallerOptions_Windows.spec @@ -1,21 +1,19 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 - -# This file contains specifications for packaging of the MinION Extractor GUI +# This file contains specifications for packaging of saddlebags # As a standalone executable. This file is meant to be used with pyinstaller # http://www.pyinstaller.org/ @@ -25,7 +23,7 @@ block_cipher = None -a = Analysis(['AlleleSubmissionEMBL.py'], +a = Analysis(['AlleleSubmissionMain.py'], binaries=None, datas=None, hiddenimports=['six', 'packaging', 'packaging.requirements', 'packaging.version', 'packaging.specifiers', 'Tkinter', 'tkFileDialog', 'Tkconstants'], @@ -42,7 +40,7 @@ exe = EXE(pyz, a.binaries, a.zipfiles, a.datas, - name='AlleleSubmissionEMBLWindows', + name='SaddlebagsWindows', debug=False, strip=False, upx=True, diff --git a/src/AlleleSubmissionEMBLRestMethods.py b/src/AlleleSubmissionEMBLRestMethods.py new file mode 100644 index 0000000..566cb2e --- /dev/null +++ b/src/AlleleSubmissionEMBLRestMethods.py @@ -0,0 +1,129 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + + + + +from AlleleSubCommon import * +import pycurl +import StringIO + +import xml.etree.ElementTree as ET +import xml.dom.minidom + +# Here we have methods to perform REST interactions necessary for EMBL submission. + +def performProjectSubmission(submissionFileName, projectFileName): + POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)), + ('PROJECT', (pycurl.FORM_FILE, projectFileName))] + + responseText = performSubmission(submissionFileName, POST_DATA) + return interpretProjectSubmissionResults(responseText) + +def performAnalysisSubmission(submissionFileName, analysisFileName): + POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)), + ('ANALYSIS', (pycurl.FORM_FILE, analysisFileName))] + + responseText = performSubmission(submissionFileName, POST_DATA) + + return interpretAnalysisSubmissionResults(responseText) + +def performSubmission(submissionFileName, POST_DATA): + if (str(getConfigurationValue('test_submission')) == '0'): + print 'THIS IS A LIVE SUBMISSION AT EMBL.' + requestURL = str(getConfigurationValue('embl_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password')) + else: + print 'THIS IS A TEST SUBMISSION AT EMBL.' + requestURL = str(getConfigurationValue('embl_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password')) + + curlResponseBuffer = StringIO.StringIO() + curlObject = pycurl.Curl() + curlObject.setopt(curlObject.URL, requestURL) + curlObject.setopt(curlObject.POST, 1) + curlObject.setopt(curlObject.HTTPPOST, POST_DATA) + curlObject.setopt(curlObject.USERAGENT, 'Curl') + curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write) + curlObject.setopt(pycurl.HTTPHEADER, ['Accept:application/xml']) + # Insecure. Any security experts want to make this better? + curlObject.setopt(pycurl.SSL_VERIFYHOST, 0) + curlObject.setopt(pycurl.SSL_VERIFYPEER, 0) + curlObject.perform() + curlObject.close() + + responseText = curlResponseBuffer.getvalue() + + # write XML to file. + projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml') + resultsFile = createOutputFile(projectSubResultsFileName) + resultsFile.write(responseText) + resultsFile.close() + + return responseText + +def interpretProjectSubmissionResults(responseText): + # Open XML to report results: + root = ET.fromstring(responseText) + submissionSuccess = (root.attrib['success'] == 'true') + + projectAccession = None + messages = [] + + for child in root: + if(child.tag == 'PROJECT'): + if ('accession' in child.attrib.keys()): + projectAccession = child.attrib['accession'] + else: + projectAccession = None + #print('I found a project node.') + elif(child.tag == 'MESSAGES'): + print('I found some messages.') + for messageNode in child: + #print (messageNode.tag + ':' + messageNode.text) + messages.append(messageNode.tag + ':' + messageNode.text) + else: + # Don't care about the other nodes + pass + + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + return (submissionSuccess,projectAccession,messages) + +def interpretAnalysisSubmissionResults(responseText): + root = ET.fromstring(responseText) + submissionSuccess = (root.attrib['success'] == 'true') + + analysisAccession = None + messages = [] + + for child in root: + if(child.tag == 'ANALYSIS'): + if ('accession' in child.attrib.keys()): + analysisAccession = child.attrib['accession'] + else: + analysisAccession = None + #print('I found a project node.') + elif(child.tag == 'MESSAGES'): + print('I found some messages.') + for messageNode in child: + #print (messageNode.tag + ':' + messageNode.text) + messages.append(messageNode.tag + ':' + messageNode.text) + else: + # Don't care about the other nodes + pass + + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + return (submissionSuccess,analysisAccession,messages) + diff --git a/src/AlleleSubmissionEMBLXml.py b/src/AlleleSubmissionEMBLXml.py new file mode 100755 index 0000000..32ae791 --- /dev/null +++ b/src/AlleleSubmissionEMBLXml.py @@ -0,0 +1,118 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +from AlleleSubCommon import * + +import xml.etree.ElementTree as ET +import xml.dom.minidom + +# Here we have methods to create XML files necessary for EMBL submission. +# Schemas are defined on github. +# https://github.com/enasequence/schema + +def writeToXml(fullXmlFilePath, xmlElementTree): + xmlText = ET.tostring(xmlElementTree, encoding='utf8', method='xml') + prettyXmlText = xml.dom.minidom.parseString(xmlText).toprettyxml() + + xmlOutput = createOutputFile(fullXmlFilePath) + xmlOutput.write(prettyXmlText) + xmlOutput.close + + return prettyXmlText + +def getCenterName(): + # TODO: Should I use REST here? + # Probably not, center_name is not required in the xmls. + return 'Center_Name' + +def createProjectXML(fullXmlFilePath): + # They are called "Project" in xml, but "Study" on the website. + # Project = Study + root = ET.Element('PROJECT_SET') + + projectID = getConfigurationValue('study_identifier') + projectShortTitle = getConfigurationValue('study_short_title') + projectAbstract = getConfigurationValue('study_abstract') + + projectElement = ET.SubElement(root, 'PROJECT') + projectElement.set('alias', projectID) + # Center Name is optional according to schemas. Forget it. EMBL Knows our login info. + #projectElement.set('center_name', getCenterName() ) + titleElement = ET.SubElement(projectElement, 'TITLE') + titleElement.text = projectShortTitle + descriptionElement = ET.SubElement(projectElement, 'DESCRIPTION') + descriptionElement.text = projectAbstract + submissionProjectElement = ET.SubElement(projectElement, 'SUBMISSION_PROJECT') + sequencingProjectElement = ET.SubElement(submissionProjectElement, 'SEQUENCING_PROJECT') + + return writeToXml(fullXmlFilePath, root) + +def createProjectSubmissionXML(fullXmlFilePath, submissionAlias, shortProjectFileName): + root = ET.Element('SUBMISSION') + root.set('alias', submissionAlias) + # Center Name is optional according to schemas. Forget it. + #root.set('center_name', getCenterName() ) + actionsElement = ET.SubElement(root, 'ACTIONS') + actionElement = ET.SubElement(actionsElement, 'ACTION') + addElement = ET.SubElement(actionElement, 'ADD') + addElement.set('source',shortProjectFileName) + addElement.set('schema','project') + + return writeToXml(fullXmlFilePath, root) + +def createAnalysisXML(fullXmlFilePath, checksumValue, flatfileZipFileName): + # An analysis xml is just a wrapper for a sequence submission. + root = ET.Element('ANALYSIS_SET') + + # TODO: I haven't created these three analysis configuration values yet. + # Probably need to add this to the GUI, or somehow generate them automagically. + analysisElement = ET.SubElement(root, 'ANALYSIS') + analysisElement.set('alias', getConfigurationValue('analysis_alias')) + + titleElement = ET.SubElement(analysisElement, 'TITLE') + titleElement.text = (getConfigurationValue('analysis_title')) + + descriptionElement = ET.SubElement(analysisElement, 'DESCRIPTION') + descriptionElement.text = (getConfigurationValue('analysis_description')) + + studyRefElement = ET.SubElement(analysisElement, 'STUDY_REF') + studyRefElement.set('accession', getConfigurationValue('study_accession')) + + analysisTypeElement = ET.SubElement(analysisElement, 'ANALYSIS_TYPE') + sequenceFlatfileElement = ET.SubElement(analysisTypeElement, 'SEQUENCE_FLATFILE') + + filesElement = ET.SubElement(analysisElement, 'FILES') + + fileElement = ET.SubElement(filesElement, 'FILE') + fileElement.set('checksum', checksumValue) + fileElement.set('checksum_method', 'MD5') + fileElement.set('filename', flatfileZipFileName) + fileElement.set('filetype', 'flatfile') + + return writeToXml(fullXmlFilePath, root) + +def createAnalysisSubmissionXML(fullXmlFilePath, submissionAlias, shortAnalysisFileName): + root = ET.Element('SUBMISSION') + + root.set('alias', submissionAlias) + actionsElement = ET.SubElement(root, 'ACTIONS') + actionElement = ET.SubElement(actionsElement, 'ACTION') + addElement = ET.SubElement(actionElement, 'ADD') + addElement.set('source',shortAnalysisFileName) + addElement.set('schema','analysis') + + return writeToXml(fullXmlFilePath, root) + + diff --git a/src/AlleleSubmissionEMBL.py b/src/AlleleSubmissionMain.py similarity index 70% rename from src/AlleleSubmissionEMBL.py rename to src/AlleleSubmissionMain.py index 6070ddb..e7f8274 100755 --- a/src/AlleleSubmissionEMBL.py +++ b/src/AlleleSubmissionMain.py @@ -1,39 +1,40 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 - -SoftwareVersion = "Bhast Version 1.0" +# along with saddle-bags. If not, see . import Tkinter import sys -from AlleleGui import AlleleGui +from AlleleGuiMain import AlleleGuiMain +from AlleleSubCommon import * +SoftwareVersion = 'saddlebags Version 1.1' + if __name__=='__main__': try: # This is a really simple way to read commandline args, # because there really shouldn't be any. # TODO: Be more graceful with this, there are better ways to read args. - # No parameters are expected at all. sys.argv[0] doesn't count. if (len(sys.argv) == 1): - print('\n\n\n\n\n***Creating an EMBL Allele submission***\n') + + loadConfigurationFile() + + print('\n\n\n\n\n***Starting the HLA Allele Submission Tool***\n') root = Tkinter.Tk() - AlleleGui(root).pack() + AlleleGuiMain(root).pack() root.mainloop() print('Done. Hooray.') @@ -45,14 +46,16 @@ sys.argv[1].lower() == '-version') ): print (SoftwareVersion) + pass + # # You executed the software wrong. Sorry. else: print("usage:\n" + "\tRun this program using standard python call:\n" + - "\t$python AlleleSubmissionEMBL.py\n" + + "\t$python AlleleSubmissionMain.py\n" + "\tbiopython must be accessible in your python environment. To run using Anaconda,\n" - "\tCheck readme at https://github.com/transplantation-immunology/EMBL-HLA-Submission\n" + "\tCheck readme at https://github.com/transplantation-immunology/saddle-bags\n" ) @@ -61,5 +64,4 @@ # This is not really doing anything. print 'Unexpected problem during execution:' print sys.exc_info()[1] - raise - + raise \ No newline at end of file diff --git a/src/HLAGene.py b/src/HLAGene.py index 69565e3..d9940b3 100755 --- a/src/HLAGene.py +++ b/src/HLAGene.py @@ -1,19 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 +# along with saddle-bags. If not, see . # The GeneLocus class specifies a locus on a Gene, # Either an Exon, intron, or UTR. diff --git a/src/ScrolledWindow.py b/src/ScrolledWindow.py new file mode 100644 index 0000000..15e00f8 --- /dev/null +++ b/src/ScrolledWindow.py @@ -0,0 +1,64 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + + + +from Tkinter import Scrollbar, Frame, Canvas, VERTICAL, RIGHT, Y, LEFT, BOTH, TRUE, FALSE, NW + +# http://tkinter.unpythonic.net/wiki/VerticalScrolledFrame + +class VerticalScrolledFrame(Frame): + def __init__(self, parent, *args, **kw): + Frame.__init__(self, parent, *args, **kw) + + + # create a canvas object and a vertical scrollbar for scrolling it + vscrollbar = Scrollbar(self, orient=VERTICAL) + vscrollbar.pack(fill=Y, side=RIGHT, expand=FALSE) + canvas = Canvas(self, bd=0, highlightthickness=0, + yscrollcommand=vscrollbar.set) + canvas.pack(side=LEFT, fill=BOTH, expand=TRUE) + vscrollbar.config(command=canvas.yview) + + # TODO: Fix the vertical scrolling behavior. The Mouse does not work. + + + # reset the view + canvas.xview_moveto(0) + canvas.yview_moveto(0) + + # create a frame inside the canvas which will be scrolled with it + self.interior = interior = Frame(canvas) + interior_id = canvas.create_window(0, 0, window=interior, + anchor=NW) + + # track changes to the canvas and frame width and sync them, + # also updating the scrollbar + def _configure_interior(event): + + # update the scrollbars to match the size of the inner frame + size = (interior.winfo_reqwidth(), interior.winfo_reqheight()) + canvas.config(scrollregion="0 0 %s %s" % size) + if interior.winfo_reqwidth() != canvas.winfo_width(): + # update the canvas's width to fit the inner frame + canvas.config(width=interior.winfo_reqwidth()) + interior.bind('', _configure_interior) + + def _configure_canvas(event): + + if interior.winfo_reqwidth() != canvas.winfo_width(): + # update the inner frame's width to fill the canvas + canvas.itemconfigure(interior_id, width=canvas.winfo_width()) + canvas.bind('', _configure_canvas) diff --git a/src/SubmissionGeneratorEMBL.py b/src/SubmissionGeneratorEMBL.py new file mode 100755 index 0000000..15b7fe1 --- /dev/null +++ b/src/SubmissionGeneratorEMBL.py @@ -0,0 +1,349 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +from Bio.Seq import Seq +from Bio.Alphabet import generic_dna +import sys +import tkMessageBox + +import math + +from HLAGene import * + +from AlleleSubCommon import * + +# The AlleleGenerator class contains logic to generate an EMBL HLA allele submission +# In ENA format. +class SubmissionGeneratorEMBL(): + + def __init__(self): + self.sequenceAnnotation = HLAGene() + + + def printHeader(self): + #print('The EMBL Print Header Method.') + headerText = '' + + # Print header + headerText += 'ID XXX; XXX; linear; genomic DNA; XXX; XXX; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n' + headerText += 'XX\n' + # A valid document should have an AC (Accession Number) and DE (Description) field. + # I don't have an AC number available, so it's blank. + headerText += 'AC \n' + headerText += 'XX\n' + #headerText += 'DE Human Leukocyte Antigen\n' + #Requested change to the DE line. It should look like: + #Homo sapiens HLA-B gene for MHC class I antigen, allele "/allele name" + headerText += ('DE Homo sapiens ' + str(getConfigurationValue('gene')) + + ' gene for MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II')) + + ' antigen, allele "' + str(getConfigurationValue('allele_name')) + '"\n') + headerText += 'XX\n' + + # Print key + headerText += ('FH Key Location/Qualifiers\n') + headerText += ('FH\n') + + # Print source + # It's from a human. + headerText += ('FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n') + headerText += ('FT /organism="Homo sapiens"\n') + headerText += ('FT /db_xref="taxon:9606"\n') + headerText += ('FT /mol_type="genomic DNA"\n') + headerText += ('FT /chromosome="6"\n') + headerText += ('FT /isolate="' + str(getConfigurationValue('sample_id')) + '"\n') + + return headerText + + def printMRNA(self): + mRNAText = '' + # Print mRNA + mRNAText += ('FT mRNA join(') + + # Iterate through the indices of the UTRs and exons. + # The 3' and 5' UTR are included in the mRNA + for x in range(0,len(self.sequenceAnnotation.loci)): + geneLocus = self.sequenceAnnotation.loci[x] + # If it is an exon or UTR + if (geneLocus.exon or 'UT' in geneLocus.name): + mRNAText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ',' + + # Trim off the last comma and add a parenthese + mRNAText = mRNAText[0:len(mRNAText)-1] + ')\n' + + mRNAText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + mRNAText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + mRNAText += ('FT /product=\"MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II')) + ' antigen\"\n') + + return mRNAText + + + def printCDS(self): + # I need to perform the translation first, so I know if this is a "pseudogene" or not + peptideSequence = translateSequence(self.sequenceAnnotation.getExonSequence()) + + cdsText = '' + + # Print CDS + # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns + # The range 1:featureCount-1 will exclude the UTRs. + cdsText += ('FT CDS join(') + for x in range(0,len(self.sequenceAnnotation.loci)): + geneLocus = self.sequenceAnnotation.loci[x] + if (geneLocus.exon): + cdsText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + if not x==len(self.sequenceAnnotation.loci)-2: + cdsText += ',' + else: + cdsText += ')\n' + + cdsText += ('FT /transl_table=1\n') + cdsText += ('FT /codon_start=1\n') + + # If this sequence has premature stop codon, add the "/pseudo" flag. + # This indicates the gene is a /pseudo gene, not a complete protein. + if(str(getConfigurationValue('is_pseudo_gene') == '1')): + print("putting pseudo in the submission") + cdsText += ('FT /pseudo\n') + else: + print("not putting pseudo in the submission") + pass + + + cdsText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + cdsText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + cdsText += ('FT /product=\"MHC class ' + str(('I' if ('1'==str(getConfigurationValue('class'))) else 'II')) + ' antigen\"\n') + cdsText += ('FT /translation=\"') + + # Some simple formatting for the peptide sequence, making it human and computer readable. + # 80 peptides per line. Except the first line, which is 66. + # 66 is 80-14, where 14 is the length of { /translation=" } + + # The translation is commented out here. I had to move it to the top of this method. + #peptideSequence = self.translateSequence(self.sequenceAnnotation.getExonSequence()) + if(len(peptideSequence) < 66): + cdsText += (peptideSequence) + '\"\n' + else: + cdsText += peptideSequence[0:66] + '\n' + i=66 + while (i < len(peptideSequence)): + cdsText += 'FT ' + peptideSequence[i:i+80] + i += 80 + + # If we're not yet at the end of the sequence, go to the next line + if(i < len(peptideSequence)): + cdsText += '\n' + # We're at the end. close the quote and new line. + else: + cdsText += '\"\n' + + return cdsText + + def printFeatures(self): + featureText = '' + + exonIndex = 1 + intronIndex = 1 + + geneHas3UTR = False + geneHas5UTR = False + + for x in range(0,len(self.sequenceAnnotation.loci)): + currentFeature = self.sequenceAnnotation.loci[x] + + # 3' UTR + if(currentFeature.name == '3UT'): + featureText += ('FT 3\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT /note=\"3\'UTR\"\n') + featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + geneHas3UTR = True + + # 5' UTR + elif(currentFeature.name == '5UT'): + featureText += ('FT 5\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT /note=\"5\'UTR\"\n') + featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + geneHas5UTR = True + + # Exon + elif(currentFeature.exon): + featureText += ('FT exon ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT /number=' + str(exonIndex) + '\n') + featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + exonIndex += 1 + + # Intron + else: + featureText += ('FT intron ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT /number=' + str(intronIndex) + '\n') + featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') + featureText += ('FT /allele="' + str(getConfigurationValue('allele_name')) + '"\n') + intronIndex += 1 + + + featureText += ('XX\n') + + # Do a quick sanity check. If we are missing either UTR I should warn the user. + # But move on with your life, this is not worth getting upset over. + if (not geneHas3UTR and not geneHas5UTR): + tkMessageBox.showinfo('Missing UTRs', + 'This sequence has no 5\' or 3\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + elif (not geneHas5UTR): + tkMessageBox.showinfo('Missing 5\' UTR', + 'This sequence has no 5\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + elif (not geneHas3UTR): + tkMessageBox.showinfo('Missing 3\' UTR', + 'This sequence has no 3\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + else: + print('The UTRs look fine.') + pass + + return featureText + + def printSequence(self): + sequenceText = '' + + completeSequence = self.sequenceAnnotation.getCompleteSequence().upper() + + cCount = completeSequence.count('C') + gCount = completeSequence.count('G') + tCount = completeSequence.count('T') + aCount = completeSequence.count('A') + otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount) + + sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; ' + + str(aCount) + ' A; ' + str(cCount) + ' C; ' + + str(gCount) + ' G; ' + str(tCount) + ' T; ' + + str(otherCount) + ' other;\n') + + # Here's some logic to print the sequence information in groups of 10. + # This format is specified in the User manual specified by EMBL. + currentSeqIndex = 0 + + while (currentSeqIndex < self.sequenceAnnotation.totalLength()): + # The character code for a sequence region is two blank spaces, + # followed by three blank spaces, for a total of 5 blanks. + sequenceText += ' ' + sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60] + + # A sequenceChunk is 10 nucleotides in this context. + # Format specifies up to six "chunks" per line. + for i in range(0,6): + sequenceChunk = sequenceRow[i*10 : (i+1)*10] + sequenceText += sequenceChunk + ' ' + + # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row. + if(len(sequenceRow) == 60): + sequenceText += str(currentSeqIndex + 60) + '\n' + # but if line is not complete (this is more likely, and more complicated.) + else: + # Fill with spaces to align the nucleotide indices at the end of the sequence. + numberSpaces = 60-len(sequenceRow) + for n in range (0, numberSpaces): + sequenceText += ' ' + sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n') + + # The next row of the sequence + currentSeqIndex += 60 + + return sequenceText + + + # Create the text submission based on the ENA format. + def buildENASubmission(self): + + # ENA format is the preferred submission type for EMBL. More information: + # http://www.ebi.ac.uk/ena/submit/sequence-submission + # http://www.ebi.ac.uk/ena/submit/entry-upload-templates + # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt + # ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html + # http://www.ebi.ac.uk/ena/software/flat-file-validator + + documentBuffer = '' + + totalLength = self.sequenceAnnotation.totalLength() + print('total calculated length = ' + str(totalLength)) + + if(totalLength > 0 and self.validateInputs()): + + # These are the main sections of the ENA submission. + documentBuffer += self.printHeader() + documentBuffer += self.printMRNA() + documentBuffer += self.printCDS() + documentBuffer += self.printFeatures() + documentBuffer += self.printSequence() + + # Print entry terminator. The last line of an ENA entry. + documentBuffer += ('//\n') + + else: + tkMessageBox.showinfo('No HLA Sequence Found', + 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' ) + return None + + + return documentBuffer + + + # Return True if our input values are all present and accomodated for. + # If something is missing, then throw a fit and give up. + # TODO: I should probably not raise these exceptions actually. + # Instead, I should have the GUI Automatically open the choose options screen + + # TODO: Maybe I should delete this method, and add error handling to the generate methods. + def validateInputs(self): + #raise Exception ('Validate Inputs Method is being used, after all.') + + if (getConfigurationValue('sample_id') is None or len(getConfigurationValue('sample_id')) < 1): + print('Invalid Sequence ID:' + str(getConfigurationValue('sample_id'))) + #raise Exception ('Invalid Sequence ID:' + str(getConfigurationValue('sample_id'))) + return False + + elif (self.sequenceAnnotation is None): + #raise Exception ('Invalid Sequence Annotation:' + str(self.sequenceAnnotation)) + print('Invalid Sequence Annotation:' + str(self.sequenceAnnotation)) + return False + + elif (getConfigurationValue('gene') is None or len(getConfigurationValue('gene')) < 1): + #raise Exception ('Invalid Input Gene:' + str(getConfigurationValue('gene'))) + print('Invalid Input Gene:' + str(getConfigurationValue('gene'))) + return False + + elif (getConfigurationValue('allele_name') is None or len(getConfigurationValue('allele_name')) < 1): + #raise Exception ('Invalid Input Allele:' + str(getConfigurationValue('allele_name'))) + print('Invalid Input Allele:' + str(getConfigurationValue('allele_name'))) + return False + + elif (getConfigurationValue('class') is None or len(getConfigurationValue('class')) < 1): + #raise Exception ('Invalid Input Class:' + str(getConfigurationValue('class'))) + print('Invalid Input Class:' + str(getConfigurationValue('class'))) + return False + + else: + return True + diff --git a/src/SubmissionGeneratorIMGT.py b/src/SubmissionGeneratorIMGT.py new file mode 100755 index 0000000..2365d70 --- /dev/null +++ b/src/SubmissionGeneratorIMGT.py @@ -0,0 +1,395 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + +import sys + +import datetime +import tkMessageBox + +from AlleleSubCommon import * +#import math + +from HLAGene import * + +# The AlleleGenerator class contains logic to generate an IMGT HLA allele submission +# In ENA format. +class SubmissionGeneratorIMGT(): + + def __init__(self): + self.sequenceAnnotation = HLAGene() + + # Create the text submission based on the IMGT format. + def buildIMGTSubmission(self): + + documentBuffer = '' + + totalLength = self.sequenceAnnotation.totalLength() + print('total calculated length = ' + str(totalLength)) + + if(totalLength > 0): + + print ('im gonna add the header in here:') + + # These are the main sections of the ENA submission. + documentBuffer += self.printHeader() + documentBuffer += self.printSubmitter() + documentBuffer += self.printSource() + documentBuffer += self.printMethods() + documentBuffer += self.printFeatures() + documentBuffer += self.printSequence() + + # Print entry terminator. The last line of an ENA entry. + documentBuffer += ('//\n') + + else: + tkMessageBox.showinfo('No HLA Sequence Found', + 'The HLA sequence is empty.\nPlease fill in an annotated HLA sequence\nbefore generating the submission.' ) + + pass + + + return documentBuffer + + + def printHeader(self): + + headerText = '' + + # TODO: Get these values from IMGT, they shouldn't be hardcoded. + # Maybe it should be an unknown identifier with + imgtIdentifier = 'HWS10012345' + imgtIdentifierWithVersion = 'HWS10012345.1' + currentSubmissionDate = '{:%d/%m/%Y}'.format(datetime.datetime.now()) + + headerText += 'ID ' + str(imgtIdentifier) + '; Sequence Submission; Confidential; ' + str(self.sequenceAnnotation.totalLength()) + ' BP.\n' + headerText += 'XX\n' + headerText += 'AC ' + str(imgtIdentifier) + ';\n' + headerText += 'XX\n' + headerText += 'SV ' + str(imgtIdentifierWithVersion) + '\n' + headerText += 'XX\n' + headerText += 'DT ' + str(currentSubmissionDate) + ' (Submitted)\n' + headerText += 'DT ' + str(getConfigurationValue('embl_release_date')) + ' (Release)\n' + headerText += 'XX\n' + + # TODO: I'm using the local allele name that is assigned by the user. + # Maybe this allele name should be based on the closest allele. + # Do I want the allele name, or should I generate a new one based on the closest allele? + + headerText += 'DE ' + str(getConfigurationValue('allele_name')) + '\n' + headerText += 'XX\n' + headerText += 'KW HLA WEB SUBMISSION;\n' + headerText += 'XX\n' + + # The new allele description is split into multiple lines. I should add a new 'CC' line for each part of the description. + rawDescription = str(getConfigurationValue('closest_allele_written_description')) + rawDescriptionLineTokens = rawDescription.split('\n') + for lineToken in rawDescriptionLineTokens: + headerText += 'CC ' + lineToken + '\n' + + #headerText += 'CC A*03:01:01:01new is identical to A*03:01:01:01 except for position 382 is a A\n' + #headerText += 'CC in the new allele. This result in an amino change from W to stopcodon.\n' + + headerText += 'XX\n' + headerText += 'OS Homo sapiens (human);\n' + headerText += 'OC Eukaryota; Metazoa; Chordata; Vertebrata; Mammalia; Eutheria; Primates;\n' + headerText += 'OC Catarrhini; Hominidae; Homo.\n' + headerText += 'XX\n' + # TODO: Our submission says GENBANK, but we're using EMBL Numbers. Also what does that [1] mean? + headerText += 'DR GENBANK; ' + str(getConfigurationValue('embl_sequence_accession')) + '.\n' + headerText += 'XX\n' + headerText += 'RN [1]\n' + # TODO: This submission is Unpublished. What if it is published? + # Ask James what a published study looks like. I need to include study name etc. + headerText += 'RC Unpublished.\n' + headerText += 'XX\n' + headerText += 'FH Key Location/Qualifier\n' + headerText += 'FH\n' + + return headerText + + def printSubmitter(self): + submitterText = '' + + # TODO: I don't know any of this data. Should it be int he form? + # Maybe I just need the submitter ID, and i can or can not get the rest? + # I should be able to calculate the indices, at least. + + submitterText += 'FT submittor 1..' + str(self.sequenceAnnotation.totalLength()) + '\n' + submitterText += 'FT /ID="**IMGT_SUBMITTER_EMAIL_ID**"\n' + submitterText += 'FT /name="**IMGT_SUBMITTER_NAME**"\n' + submitterText += 'FT /alt_contact=""\n' + submitterText += 'FT /email="**IMGT_SUBMITTER_EMAIL_ADDRESS**"\n' + + return submitterText + + def printSource(self): + sourceText = '' + + # TODO: Submitting Laboratory Information. Can this be fetched from IMGT? + + sourceText += 'FT source 1..' + str(self.sequenceAnnotation.totalLength()) + '\n' + sourceText += 'FT /cell_id="' + str(getConfigurationValue('sample_id')) + '"\n' + sourceText += 'FT /ethnic_origin="' + str(getConfigurationValue('ethnic_origin')) + '"\n' + sourceText += 'FT /sex="' + str(getConfigurationValue('sex')) + '"\n' + sourceText += 'FT /consanguineous="' + str(getConfigurationValue('consanguineous')) + '"\n' + sourceText += 'FT /homozygous="Yes"\n' + sourceText += 'FT /lab_of_origin="**IMGT_SUBMITTING_LAB_NAME**"\n' + sourceText += 'FT /lab_contact="**IMGT_SUBMITTER_NAME**"\n' + + # TODO: No Material Available. What if Material is available? + # I think I need to add this to the form still. + # Same story with "cell_bank" + + sourceText += 'FT /material_available="No Material Available"\n' + sourceText += 'FT /cell_bank="Not Available"\n' + + # TODO: James suggested that I only allow valid fully-sequenced alleles. + # Should I validate this, or should I leave that work to IMGT? + + sourceText += 'FT /HLA-A*="02:01,03new"\n' + sourceText += 'FT /HLA-C*="07,-"\n' + sourceText += 'FT /HLA-B*="07,-"\n' + sourceText += 'FT /HLA-DRB1*="15:01,-"\n' + + return sourceText + + def printMethods(self): + methodsText = '' + + # TODO: Get primer info from the form. Make sure this all is correct + + methodsText += 'FT method 1..' + str(self.sequenceAnnotation.totalLength()) + '\n' + + # TODO: What are the options for sequencing methodology? + # I can provide an open-text field. + + methodsText += 'FT /primary_sequencing="Direct sequencing of PCR product from DNA (SBT)"\n' + methodsText += 'FT /secondary_sequencing="Direct sequencing of PCR product from DNA (SBT)"\n' + methodsText += 'FT /type_of_primer="Both allele and locus specific"\n' + methodsText += 'FT /sequenced_in_isolation="Yes"\n' + + # TODO Add these primers dynamically + # A primer has these pieces of information + # "ID" ("primer_1") "Sequence" "Feature" "locus/indices" + # locus seems to be genomic index, from the beginning of the sequence. + # I suppose this has to be locations in the reference sequence? + # I should store a dictionary of primers in the configuration. + # Errr, nodes underneath the Primer nodes. + # They put a "tab" character between some of this data. Why? Because Tabs, sigh. + + methodsText += 'FT /primer_1="97022 GAGCCCCGCTTCAACGCC E2 257-274"\n' + methodsText += 'FT /primer_2="09148 CCAGGCGTGGCTCTCAGA 5UT -265--248"\n' + methodsText += 'FT /primer_3="09152 AACCTACGTAGGGTCCTTCA 5UT -161--142"\n' + methodsText += 'FT /primer_4="09154 AGTGTCGTCGCGGTCGCT 5UT -72--55"\n' + methodsText += 'FT /primer_5="09167 CAGACSCCGAGGATGGCC 5UT -12-6"\n' + methodsText += 'FT /primer_6="09162 AACACCCAACACACATTAGGT I7 2745-2765"\n' + methodsText += 'FT /primer_7="09168 GGGAGCACAGGTCAGCGTGGGAAG 3UT 3075-3098"\n' + methodsText += 'FT /primer_8="98008 GTTTAGGCCAAAAATYCCCCC I2 635-655"\n' + methodsText += 'FT /no_of_reactions="3"\n' + methodsText += 'FT /sequencing_direction="Both"\n' + + + # TODO: There's something up with these primers. + # Why are they in the comments? Did we run out of space? + + methodsText += 'FT /method_comments="98021 GTCCAGGCTGGTGTCTGG I3 1432-1449\n' + methodsText += 'FT 01026seq GGGGAGAAGCAASGGGC I1 108-124 02100seq\n' + methodsText += 'FT CCGCACGCACCCACCG 5UT -44--29 03026 GAGGTTCCTCTAGGACCTTAA I5\n' + methodsText += 'FT 2439-2459 03052 TAAGGAGGGAGAYGGGGGT I4 1847-1865 03055\n' + methodsText += 'FT CTGCYGTGAKGTGGAGGAG E5 2035-2053 14256 GAATCCTCCTGGGTTTCCAG\n' + methodsText += 'FT I3 1115-1134 97094seq TGTCGTCCACGTAGC E2 279-293 98070\n' + methodsText += 'FT GGCCTAAACTGAAAATGAAACC I2 622-643 00029 GGTCCCAATTGTCTCCCCTC\n' + methodsText += 'FT I3 1055-1074 02038seq GGCCAGCAATGATGC E5 1981-1995 03017\n' + methodsText += 'FT CCTTTGCAGAAACAAAGTCAGGGT 3UT 2970-2993 03050\n' + methodsText += 'FT TTAAGGTCCTAGAGGAACCTC I5 2439-2459 14019 CCAGACACCAGCCTGGAC\n' + methodsText += 'FT I3 1432-1449 Exons and introns are defined as in regular\n' + methodsText += 'FT HLA genes although in this allele a stopcodon is present in\n' + methodsText += 'FT Exon 2."\n' + + # This is the "closest allele, right?" + + methodsText += 'FT /alignment="' + str(getConfigurationValue('closest_known_allele')) + '"\n' + + return methodsText + + def printFeatures(self): + featureText = '' + + # TODO: I might double check with James Robinson about the backslashes before "number". + # Seems inconsistent. + + + #featureText += 'FT CDS join(248..320,457..720,962..1237,1816..2091,2194..2310,\n' + #featureText += 'FT 2753..2785,2928..2975,3145..3149)\n' + # Coding sequence is just the exons. Print out each exon. + # Ignoring line-breaks for now, this might create a really wide line. Ok? + featureText += ('FT CDS join(') + for x in range(0,len(self.sequenceAnnotation.loci)): + geneLocus = self.sequenceAnnotation.loci[x] + if (geneLocus.exon): + featureText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + if not x==len(self.sequenceAnnotation.loci)-2: + featureText += ',' + else: + featureText += ')\n' + + + exonIndex = 1 + intronIndex = 1 + + geneHas3UTR = False + geneHas5UTR = False + + for x in range(0,len(self.sequenceAnnotation.loci)): + currentFeature = self.sequenceAnnotation.loci[x] + + # 3' UTR + if(currentFeature.name == '3UT'): + featureText += ('FT 3\' UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') + geneHas3UTR = True + + # 5' UTR + elif(currentFeature.name == '5UT'): + featureText += ('FT 5\' UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') + geneHas5UTR = True + + # Exon + elif(currentFeature.exon): + featureText += ('FT Exon ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT \\number="' + str(exonIndex) + '"\n') + exonIndex += 1 + + # Intron + else: + featureText += ('FT Intron ' + str(currentFeature.beginIndex) + + '..' + str(currentFeature.endIndex) + '\n') + featureText += ('FT \\number="' + str(intronIndex) + '"\n') + intronIndex += 1 + + # Do a quick sanity check. If we are missing either UTR I should warn the user. + # But move on with your life, this is not worth getting upset over. + if (not geneHas3UTR and not geneHas5UTR): + tkMessageBox.showinfo('Missing UTRs', + 'This sequence has no 5\' or 3\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + elif (not geneHas5UTR): + tkMessageBox.showinfo('Missing 5\' UTR', + 'This sequence has no 5\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + elif (not geneHas3UTR): + tkMessageBox.showinfo('Missing 3\' UTR', + 'This sequence has no 3\' UTR.\n\n' + + 'Use lowercase nucleotides at the\n' + + 'beginning and end of your DNA\n' + + 'sequence to specify the 5\' and 3\' UTRs.' ) + else: + print('The UTRs look fine.') + pass + + + + return featureText + + def printSequence(self): + sequenceText = '' + + completeSequence = self.sequenceAnnotation.getCompleteSequence().upper() + + cCount = completeSequence.count('C') + gCount = completeSequence.count('G') + tCount = completeSequence.count('T') + aCount = completeSequence.count('A') + otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount) + + sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; ' + + str(aCount) + ' A; ' + str(cCount) + ' C; ' + + str(gCount) + ' G; ' + str(tCount) + ' T; ' + + str(otherCount) + ' other;\n') + + # Here's some logic to print the sequence information in groups of 10. + # This format is specified in the User manual specified by EMBL. + currentSeqIndex = 0 + + while (currentSeqIndex < self.sequenceAnnotation.totalLength()): + # The character code for a sequence region is two blank spaces, + # followed by three blank spaces, for a total of 5 blanks. + sequenceText += ' ' + sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60] + + # A sequenceChunk is 10 nucleotides in this context. + # Format specifies up to six "chunks" per line. + for i in range(0,6): + sequenceChunk = sequenceRow[i*10 : (i+1)*10] + sequenceText += sequenceChunk + ' ' + + # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row. + if(len(sequenceRow) == 60): + sequenceText += str(currentSeqIndex + 60) + '\n' + # but if line is not complete (this is more likely, and more complicated.) + else: + # Fill with spaces to align the nucleotide indices at the end of the sequence. + numberSpaces = 60-len(sequenceRow) + for n in range (0, numberSpaces): + sequenceText += ' ' + sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n') + + # The next row of the sequence + currentSeqIndex += 60 + + return sequenceText + + + # Return True if our input values are all present and accomodated for. + # If something is missing, then throw a fit and give up. + # TODO: I should probably not raise these exceptions actually. + # Instead, I should have the GUI Automatically open the choose options screen + def validateInputs(self): + + # TODO: I'm using the self. values. These should mostly be configuration values, load them from there instead. + + # TODO: This method is not being used? Right, I should just delete this method + # Instead of this method, maybe I should consider adding more robust error handling to the sequence generator. + + raise Exception ('Validate Inputs Method is being used, after all.') + + if (self.inputSampleID is None or len(self.inputSampleID) < 1): + raise Exception ('Invalid Sequence ID:' + str(self.inputSampleID)) + return False + + elif (self.sequenceAnnotation is None): + raise Exception ('Invalid Sequence Annotation:' + str(self.sequenceAnnotation)) + return False + + elif (getConfigurationValue('gene') is None or len(getConfigurationValue('gene')) < 1): + raise Exception ('Invalid Input Gene:' + str(getConfigurationValue('gene'))) + return False + + elif (self.inputAllele is None or len(self.inputAllele) < 1): + raise Exception ('Invalid Input Allele:' + str(self.inputAllele)) + return False + + elif (self.inputClass is None or len(self.inputClass) < 1): + raise Exception ('Invalid Input Class:' + str(self.inputClass)) + return False + + else: + return True + +