From 748cb015c69d7e0d4c27035080b6ee716d2711c5 Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 6 Jul 2017 16:20:15 +0200 Subject: [PATCH] REST works for EMBL now --- HEADER | 9 +- MakeExecutables.bat | 8 +- MakeExecutables.sh | 9 +- README.md | 31 +- Run_allele_submission.bat | 10 +- Run_allele_submission.sh | 10 +- src/AlleleGuiEMBL.py | 336 +++++++++--------- src/AlleleGuiEMBLInputForm.py | 191 ++++++---- src/AlleleGuiIMGT.py | 19 +- src/AlleleGuiIMGTInputForm.py | 43 ++- src/AlleleGuiMain.py | 10 +- src/AlleleSubCommon.py | 44 +-- .../AlleleSubInstallerOptions_Windows.spec | 16 +- src/AlleleSubmissionEMBLRestMethods.py | 129 +++++++ src/AlleleSubmissionEMBLXml.py | 91 +++-- src/AlleleSubmissionMain.py | 16 +- src/HLAGene.py | 8 +- src/ScrolledWindow.py | 29 +- src/SubmissionGeneratorEMBL.py | 20 +- src/SubmissionGeneratorIMGT.py | 284 +-------------- 20 files changed, 629 insertions(+), 684 deletions(-) rename AlleleSubInstallerOptions_Windows.spec => src/AlleleSubInstallerOptions_Windows.spec (71%) create mode 100644 src/AlleleSubmissionEMBLRestMethods.py diff --git a/HEADER b/HEADER index 6d61fb3..dc43bfc 100755 --- a/HEADER +++ b/HEADER @@ -1,16 +1,15 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 diff --git a/MakeExecutables.bat b/MakeExecutables.bat index db32d1d..68cbe52 100755 --- a/MakeExecutables.bat +++ b/MakeExecutables.bat @@ -1,17 +1,17 @@ -:: This file is part of EMBL-HLA-Submission. +:: This file is part of saddle-bags. :: -:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: saddle-bags is free software: you can redistribute it and/or modify :: it under the terms of the GNU Lesser General Public License as published by :: the Free Software Foundation, either version 3 of the License, or :: (at your option) any later version. :: -:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: saddle-bags is distributed in the hope that it will be useful, :: but WITHOUT ANY WARRANTY; without even the implied warranty of :: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the :: GNU Lesser General Public License for more details. :: :: You should have received a copy of the GNU Lesser General Public License -:: along with EMBL-HLA-Submission. If not, see . +:: along with saddle-bags. If not, see . :: Version 1.0 diff --git a/MakeExecutables.sh b/MakeExecutables.sh index 1e83bbe..364309b 100755 --- a/MakeExecutables.sh +++ b/MakeExecutables.sh @@ -1,19 +1,18 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 # This isn't working in it's current state. The spec file is apparently just for windows, # it's still pulling in windows DLLs. diff --git a/README.md b/README.md index 304f28c..3635d17 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,15 @@ -# Bhast -Ben's HLA Allele Submission Tool - -A tool for generating an EMBL-formatted submission of a standard novel HLA allele. +# Saddlebags +A tool for generating allele submissions for the EMBL and IMGT nucleotide databases. ## Download the executable Download an executable for Windows from the Release page. -[Download Bhast for Windows Here](https://github.com/transplantation-immunology/EMBL-HLA-Submission/releases) +[Download Saddlebags for Windows Here](https://github.com/transplantation-immunology/saddle-bags/releases) ## Run using Python -Alternatively, you can run this program using Python 2.7. This works for Mac and Linux users (or Windows). There are prerequesites, you can install them inside an Anaconda environment. +Alternatively, you can run this program using Python 2.7. This works for Mac and Linux users (or Windows). There are prerequesites, I recommend you install them inside an Anaconda environment. See Run_allele_submission.sh and Run_allele_submission.bat for an example of this in Linux and Windows environments, respectively. + ``` -python AlleleSubmissionEMBL.py +python AlleleSubmissionMain.py ``` ## To configure Anaconda @@ -21,7 +20,7 @@ To set up the environment in anaconda: Linux/Mac: ``` -conda create --name AlleleSubEnvironment biopython six +conda create --name AlleleSubEnvironment biopython six pycurl source activate AlleleSubEnvironment pip install pyinstaller packaging source deactivate @@ -68,11 +67,17 @@ AGCTAGCTA gctagctagctag ## Annotating your exons -Identifying the exons in your HLA sequence is a nontrivial challenge. We have provided a list of common sequences surrounding exon boundaries. See the [Release Page](https://github.com/transplantation-immunology/EMBL-HLA-Submission/releases) for a .pdf reference. These sequences can probably be found within your HLA consensus sequence at exon boundary sites. It may also help to use the [IMGT/HLA sequence alignment tool](http://www.ebi.ac.uk/ipd/imgt/hla/align.html) for more information on common exon patterns. +Identifying the exons in your HLA sequence is a nontrivial challenge. We have provided a list of common sequences surrounding exon boundaries. See the [Release Page](https://github.com/transplantation-immunology/saddle-bags/releases) for a .pdf reference. These sequences can probably be found within your HLA consensus sequence at exon boundary sites. It may also help to use the [IMGT/HLA sequence alignment tool](http://www.ebi.ac.uk/ipd/imgt/hla/align.html) for more information on common exon patterns. + +## EMBL Metadata Input format +Sample ID: Specified by the submitting laboratory, you may use a value that is informative to you. +Gene: +Class I or II: +Allele Local Name: -## Output Data -The resulting report is in the form of an EMBL HLA Novel Allele submission flatfile. You can submit this to EMBL as a new HLA allele +Saddlebags will submit to EMBL Test environment by default, you must specify that the software target the Live / Production environment. +EMBL sequence submissions must be associated with a Study/Project. You may specify the accession number of an existing EMBL study (Get this accession number from [EMBL Webin](https://www.ebi.ac.uk/ena/submit/sra/#home) ), or Saddlebags can create a new project to your specifications. ## For more information on EMBL's ENA format: @@ -81,3 +86,7 @@ http://www.ebi.ac.uk/ena/submit/entry-upload-templates ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/usrman.txt ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html http://www.ebi.ac.uk/ena/software/flat-file-validator + +## For more information on IMGT metadata: + +TODO: Put a description of the IMGT metadata form. There is lots of information that goes in here, and much of it is confusing. How should Primers and Sequencing methodology be provided? What are the options for Ethnic Origin or Sex or Cosanguineous? diff --git a/Run_allele_submission.bat b/Run_allele_submission.bat index a7152a8..b893555 100755 --- a/Run_allele_submission.bat +++ b/Run_allele_submission.bat @@ -1,21 +1,21 @@ -:: This file is part of EMBL-HLA-Submission. +:: This file is part of saddle-bags. :: -:: EMBL-HLA-Submission is free software: you can redistribute it and/or modify +:: saddle-bags is free software: you can redistribute it and/or modify :: it under the terms of the GNU Lesser General Public License as published by :: the Free Software Foundation, either version 3 of the License, or :: (at your option) any later version. :: -:: EMBL-HLA-Submission is distributed in the hope that it will be useful, +:: saddle-bags is distributed in the hope that it will be useful, :: but WITHOUT ANY WARRANTY; without even the implied warranty of :: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the :: GNU Lesser General Public License for more details. :: :: You should have received a copy of the GNU Lesser General Public License -:: along with EMBL-HLA-Submission. If not, see . +:: along with saddle-bags. If not, see . :: Version 1.0 :: See the file README.MD for how to set up your anaconda environment. -activate AlleleSubEnvironment && cd src && python AlleleSubmissionEMBL.py && deactivate +activate AlleleSubEnvironment && cd src && python AlleleSubmissionMain.py && deactivate diff --git a/Run_allele_submission.sh b/Run_allele_submission.sh index b8f9331..3da7d12 100755 --- a/Run_allele_submission.sh +++ b/Run_allele_submission.sh @@ -1,19 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -# Version 1.0 +# along with saddle-bags. If not, see . # See the file README.MD for how to set up your anaconda environment. diff --git a/src/AlleleGuiEMBL.py b/src/AlleleGuiEMBL.py index 38dfe0e..8737461 100755 --- a/src/AlleleGuiEMBL.py +++ b/src/AlleleGuiEMBL.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . import os @@ -24,7 +24,7 @@ import gzip import shutil #import pycurl -import requests +#import StringIO import Tkinter, Tkconstants, tkFileDialog, tkMessageBox from Tkinter import * @@ -33,6 +33,7 @@ from AlleleGuiEMBLInputForm import AlleleGuiEMBLInputForm from AlleleSubCommon import * from AlleleSubmissionEMBLXml import * +from AlleleSubmissionEMBLRestMethods import * #from HLAGene import HLAGene # The AlleleGui class is an extension of Tkinter. The GUI elements and interactions are specified in this class. @@ -182,9 +183,42 @@ def writeMd5(self, inputFileName, outputFileName): # I don't know why 2 spaces, but I'll roll with it. outputFile.write(str(hashValue) + ' ' + str(split(inputFileName)[1])) outputFile.close() + + return hashValue + + + def uploadSubmission(self): print('Uploading Submission to EMBL') + + # Determine a working directory. Folder underneath executable called temp. + try: + workingDirectory = join(expanduser("~"), 'temp_upload_directory') + print('I can work in this directory:' + workingDirectory) + + if not isdir(workingDirectory): + print('Making Directory:' + workingDirectory) + makedirs(workingDirectory) + except Exception: + print 'Cannot Initialize Working Directory' + print sys.exc_info()[1] + tkMessageBox.showinfo('Working Directory Error', + 'Sorry, I failed to create this working directory:\n' + + str(workingDirectory) + + '\n and I cannot continue.\nMaybe this is a ' + + 'permissions issue, are these folders read only?\n' + + str(sys.exc_info()[1])) + return + + restLog = createOutputFile(join(workingDirectory, 'Submission_Log.txt')) + + + + # TODO: Make a REST log. + # For each step report success or failure. Same as popup messages. + + emblUsername = getConfigurationValue('embl_username') emblPassword = getConfigurationValue('embl_password') @@ -195,14 +229,19 @@ def uploadSubmission(self): tkMessageBox.showinfo('Missing Login Credentials', 'You must provide EMBL username and password.\n' 'Please use the "Submission Options" button.') + restLog.write('Missing EMBL Username or Password.' + '\n') return + else: + restLog.write('EMBL Username and Password exist.' + '\n') useTestServers = (int(getConfigurationValue('test_submission')) == 1) # Are you sure? if useTestServers: + restLog.write('Using Test EMBL Server.' + '\n') result = tkMessageBox.askquestion("Submit to TEST / DEMO environment", "You are about to submit a sequence to the\n\nTEST / DEMO EMBL environment.\n\nAre You Sure?", icon='warning') else: + restLog.write('Using Production EMBL Server.' + '\n') result = tkMessageBox.askquestion("Submit to LIVE / PROD environment", "You are about to submit a sequence to the\n\nLIVE / PROD EMBL environment.\n\nAre You Sure?", icon='warning') if result == 'yes': @@ -212,47 +251,20 @@ def uploadSubmission(self): # TODO: Existing project? Maybe I should check if the study/project exists, before I get started - - - - # Determine a working directory. Folder underneath executable called temp. - try: - workingDirectory = join(expanduser("~"), 'temp_upload_directory') - print('I can work in this directory:' + workingDirectory) - - if not isdir(workingDirectory): - print('Making Directory:' + workingDirectory) - makedirs(workingDirectory) - except Exception: - print 'Cannot Initialize Working Directory' - print sys.exc_info()[1] - tkMessageBox.showinfo('Working Directory Error', - 'Sorry, I failed to create this working directory:\n' - + str(workingDirectory) - + '\n and I cannot continue.\nMaybe this is a ' - + 'permissions issue, are these folders read only?\n' - + str(sys.exc_info()[1])) - return + + # Give my submission a filename. SOmething with a datetime stamp try: # This includes a "seconds" measure, should be pretty unique. dateTimeNow = '{:%Y_%m_%d_%H_%M_%S}'.format(datetime.datetime.now()) - submissionFileName = join(workingDirectory, 'HLA_Submission_' + dateTimeNow + '.txt') + submissionShortFileName = 'HLA_Submission_' + dateTimeNow + '.txt' + submissionFileName = join(workingDirectory, submissionShortFileName) + zippedShortFileName = submissionShortFileName + '.gz' + zippedFileName = join(workingDirectory, zippedShortFileName) + md5FileName = zippedFileName + '.md5' - except Exception: - print 'Cannot Assign File Name' - print sys.exc_info()[1] - tkMessageBox.showinfo('File Name Error', - 'Sorry, I failed to create this file:\n' - + str(submissionFileName) - + '\n and I cannot continue.\n' - + str(sys.exc_info()[1])) - return - - # Write submission to a file - try: submissionText = self.submOutputGuiObject.get('1.0', 'end') outputFileObject = open(submissionFileName, 'w') @@ -268,11 +280,14 @@ def uploadSubmission(self): + '\n and I cannot continue.\nMaybe this is a ' + 'permissions issue, are these folders read only?\n' + str(sys.exc_info()[1])) + restLog.write('Failure to create submission file:' + str(sys.exc_info()[1]) + '\n') return + restLog.write('Submission file was created:' + str(submissionFileName) + '\n') + # gzip the submission file. Make a gz file. try: - zippedFileName = submissionFileName + '.gz' + #zippedFileName = submissionFileName + '.gz' with open(submissionFileName, 'rb') as fileIn, gzip.open(zippedFileName, 'wb') as fileOut: shutil.copyfileobj(fileIn, fileOut) @@ -285,12 +300,15 @@ def uploadSubmission(self): + str(zippedFileName) + '\n and I cannot continue.\n' + str(sys.exc_info()[1])) + restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n') return + restLog.write('Zip file was created:' + str(zippedFileName) + '\n') + # Calculate an MD5SUM try: - md5FileName = zippedFileName + '.md5' - self.writeMd5(zippedFileName,md5FileName) + #md5FileName = zippedFileName + '.md5' + md5HashValue = self.writeMd5(zippedFileName,md5FileName) except Exception: print 'Cannot Calculate MD5' @@ -298,7 +316,10 @@ def uploadSubmission(self): tkMessageBox.showinfo('Cannot Calculate an Md5 checksum', 'Sorry, I failed to calculate an md5 checksum\nand I cannot continue.\n' + str(sys.exc_info()[1])) + restLog.write('Failure to create zip file:' + str(sys.exc_info()[1]) + '\n') return + + restLog.write('md5 file was created:' + str(md5FileName) + '\n') # Use FTP to send the file to EMBL try: @@ -321,12 +342,10 @@ def uploadSubmission(self): tkMessageBox.showinfo('Cannot Upload to FTP site', 'Sorry, I failed to upload your submission files to the EMBL FTP site\nand I cannot continue.\n' + str(sys.exc_info()[1])) + restLog.write('Failure to upload to FTP site:' + str(sys.exc_info()[1]) + '\n') return - - # TODO: I Need a center_name. This is based on the username but it goes into these submissions. - # Maybe I can get that from REST - + restLog.write('Submission and MD5 successfully uploaded.\n') # Handle the new project # effectively, study = project @@ -337,17 +356,15 @@ def uploadSubmission(self): # Generate Project and Project Submission XML Files try: projectFileName = join(workingDirectory, 'project.xml') - projectText = createProjectXML(projectFileName - , getConfigurationValue('study_name') - , getConfigurationValue('study_description') - , getConfigurationValue('study_abstract')) + projectText = createProjectXML(projectFileName) - projectSubFileName = join(workingDirectory, 'project_submission.xml') - projectSubmissionText = createProjectSubmissionXML('proj_sub_' + dateTimeNow - , projectSubFileName) + projectSubmissionFileName = join(workingDirectory, 'project_submission.xml') + projectSubmissionText = createProjectSubmissionXML(projectSubmissionFileName + ,'proj_sub_' + dateTimeNow + ,'project.xml') - print('I made this project text:\n' + projectText) - print('I made this project submission text:\n' + projectSubmissionText) + #print('I made this project text:\n' + projectText) + #print('I made this project submission text:\n' + projectSubmissionText) except Exception: print 'Cannot Create Project Submission XML' @@ -355,84 +372,32 @@ def uploadSubmission(self): tkMessageBox.showinfo('Cannot Create Project Submission XML', 'Sorry, I failed to create a project XML file\nand I cannot continue.\n' + str(sys.exc_info()[1])) + restLog.write('Failure to create project submission file:' + str(sys.exc_info()[1]) + '\n') return + + restLog.write('Project Submission XML files were created.\n') # Use REST to submit this project try: - #"https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-NNN%20PASSWORD" - requestURL = (getConfigurationValue('embl_rest_address_test') - + '?auth=ENA%20' - + getConfigurationValue('embl_username') - + '%20' - + getConfigurationValue('embl_password') - - - ) - - - # So i keep getting 415 errors. Trying to figure out what's up with REST. - - print ('URL=\n'+str(requestURL)) - #auth=HTTPBasicAuth(getConfigurationValue('embl_username'), getConfigurationValue('embl_password')) - #requestUser=(getConfigurationValue('embl_username'), getConfigurationValue('embl_password')) - requestData = {'SUBMISSION':projectSubmissionText - , 'PROJECT':projectText - # ,'content-type':'text/xml' - } + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + (projectSubmissionSuccess, projectAccessionNumber, projectErrorMessages) = performProjectSubmission(projectSubmissionFileName,projectFileName) - requestHeaders = {'content-type':'application/xml' - # 'HTTP Name':'Content-Type' - - } - - - - postResponse = requests.post( - requestURL - , params=requestData - , headers=requestHeaders - -# , auth=requestUser - #, auth=(getConfigurationValue('embl_username'), getConfigurationValue('embl_password')) - ) - - print ('the post response object:\n' + str(postResponse)) - - print ('response status:\n' + str(postResponse.status_code)) - - print('response text:' + str(postResponse.text)) - - print('response content:' + str(postResponse.content)) - - print('is response okay?:' + str(postResponse.status_code == requests.codes.ok)) - - - # headers have the login credentials? - - # -k, --insecure Allow connections to SSL sites without certs (H) - # -F, --form CONTENT Specify HTTP multipart POST data (H) - # those login credentials are passed plain text in the URL? Why? - #curl -k -F "SUBMISSION=@sub.xml" -F "PROJECT=@project.xml" "https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA%20Webin-NNN%20PASSWORD" - #curl -k -F "SUBMISSION=@"$submissionFileName -F "PROJECT=@"$projectFileName $testEnaSite"?auth=ENA%20"$userName"%20"$password > curlProjResults.xml - - - #data = ['SUBMISSION': '@projectSubFileName' - # , - # 'tx': str(request.GET.get('tx')), - # 'at': paypal_pdt_test - # ] - - #post = urllib.urlencode(data) - - #c = pycurl.Curl() - #if(useTestServers): - #c.setopt(pycurl.URL, getConfigurationValue('embl_rest_address_test')) - #c.setopt(pycurl.HTTPHEADER, ['X-Postmark-Server-Token: API_TOKEN_HERE','Accept: application/json']) - #c.setopt(pycurl.POST, 1) - #c.setopt(pycurl.POSTFIELDS, data) - - #print ('about to perform curl:' + str(c)) - #c.perform() + if(projectSubmissionSuccess): + # Great. The project was created successfully. + # Lets use this new study accession moving forward. + assignConfigurationValue('study_accession', projectAccessionNumber) + assignConfigurationValue('choose_project','1') + pass + else: + messageText = ('There was a problem in the Project Submission.\n' + + 'I cannot continue.\n' + + 'These messages were reported by EMBL:\n') + for errorMessage in projectErrorMessages: + messageText += ('\n' + errorMessage + '\n') + tkMessageBox.showinfo('Cannot Submit Project XML via REST', messageText) + restLog.write('Failure to submit project submission file:' + str(sys.exc_info()[1]) + '\n') + return except Exception: print 'Cannot Submit Project XML' @@ -440,37 +405,86 @@ def uploadSubmission(self): tkMessageBox.showinfo('Cannot Submit Project XML', 'Sorry, I failed to submit the project XML file\nand I cannot continue.\n' + str(sys.exc_info()[1])) - return - - # Create a new project REST - # Open Response, determine if success - - # If errors: - # report Errors and give up - # else: - # store accession# - + restLog.write('Failure to upload project submission file:' + str(sys.exc_info()[1]) + '\n') + return - else: #(existing project) + restLog.write('New study has been uploaded, accession:' + str(getConfigurationValue('study_accession')) + '\n') + + # existing project, we will use the supplied accession # + else: + restLog.write('Using existing study accession:' + str(getConfigurationValue('study_accession')) + '\n') + # projectAccessionNumber = getConfigurationValue('study_accession') pass - - # else (existing project) - # Store the project accession # + # Generate Analysis and Analysis Submission xmls + try: + analysisFileName = join(workingDirectory, 'analysis.xml') + analysisText = createAnalysisXML(analysisFileName, md5HashValue, zippedShortFileName) + + analysisSubmissionFileName = join(workingDirectory, 'analysis_submission.xml') + analysisSubmissionText = createAnalysisSubmissionXML(analysisSubmissionFileName + ,'analysis_sub_' + dateTimeNow + ,'analysis.xml') + + except Exception: + print 'Cannot Create Analysis Submission XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Create Analysis Submission XML', + 'Sorry, I failed to create a Analysis XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + restLog.write('Failure to create analysis submission file:' + str(sys.exc_info()[1]) + '\n') + return - # Generate XML Files for new sequence - # REST the new XML files over to them. - # Open response determine if success? - # Gather Important Accession Numbers - # Store accession number in our config file + restLog.write('Analysis Submission XML files were created.\n') + + # Use REST to submit this analysis + try: + # Return value should be a tuple: + # (Success, analysisAccessionNumber, Messages[]) + (analysisSubmissionSuccess, analysisAccessionNumber, analysisErrorMessages) = performAnalysisSubmission(analysisSubmissionFileName,analysisFileName) + + if(analysisSubmissionSuccess): + # Great. The analysis was created successfully. + pass + else: + messageText = ('There was a problem in the Analysis Submission.\n' + + 'I cannot continue.\n' + + 'These messages were reported by EMBL:\n') + for errorMessage in analysisErrorMessages: + messageText += ('\n' + errorMessage + '\n') + tkMessageBox.showinfo('Cannot Submit Analysis XML via REST', messageText) + restLog.write('Failure to submit analysis submission file:' + str(sys.exc_info()[1]) + '\n') + return + + except Exception: + print 'Cannot Submit Analysis XML' + print sys.exc_info()[1] + tkMessageBox.showinfo('Cannot Submit Analysis XML via REST', + 'Sorry, I failed to submit the analysis XML file\nand I cannot continue.\n' + + str(sys.exc_info()[1])) + return + + restLog.write('New analysis has been Uploaded, accession:' + str(analysisAccessionNumber) + '\n') + + restLog.close() + # Popup message with Results + tkMessageBox.showinfo('Success uploading submission to EMBL.', + 'The sequence and analysis was uploaded to EMBL ENA Successfully.\n\n' + + 'For your reference:\n\n' + + 'You can use this Project/Study accession\nnumber on future submissions:\n' + + 'Study Accession:' + str(getConfigurationValue('study_accession') + '\n\n') + + 'Use the Analysis Accession number if you\ncontact EMBL regarding this\nsequence submission:\n' + + 'Analysis Accession:' + str(analysisAccessionNumber) + '\n\n' + + 'Find your submission files here:\n' + + workingDirectory + '\n\n' + + 'If EMBL successfully validates your sequence, you will\n' + + 'recieve an email with an EMBL Sequence accession number.\n' + + 'This *SEQUENCE* accession number is necessary for IMGT submission.\n' + + 'Contact EMBL Support with your\nAnalysis Accession # if it has been\nmore than 48 hours since submission.\n' + + ) - tkMessageBox.showinfo('Success uploading submission.', - 'Everything worked fine.\n' - +'Congratulations. TODO: This message should have more information in it.') - - - def sampleSequence(self): self.featureInputGuiObject.delete('1.0','end') @@ -489,10 +503,14 @@ def sampleSequence(self): assignConfigurationValue('choose_project','2') - assignConfigurationValue('study_name','HLA_Analysis_Project') - assignConfigurationValue('study_description','Our laboratory performs HLA typing for Research') + assignConfigurationValue('study_identifier','HLA_Analysis_Project') + assignConfigurationValue('study_short_title','HLA Typing for Cancer Research.') assignConfigurationValue('study_abstract','An abstract is a more in-depth description of the nature of the research project.') + assignConfigurationValue('analysis_alias','unique_HLA_analysis_alias') + assignConfigurationValue('analysis_title','Novel HLA sequence from patient with Leukemia') + assignConfigurationValue('analysis_description','This is an HLA-A sequence from a patient. It was discovered that he has Leukemia, so we decided to sequence his HLA.') + self.constructSubmission() # This method should popup some instruction text in a wee window. @@ -547,7 +565,7 @@ def contactInformation(self): + 'm.tilanus@mumc.nl\n\n' + 'This code will be hosted at:\n' - + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + 'You will find more information on\n' + 'EMBL\'s data format on that page.' @@ -578,12 +596,6 @@ def constructSubmission(self): allGen = SubmissionGeneratorEMBL() roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') - # TODO: I uncommented these, hope there is no problem. - #allGen.inputSampleID = getConfigurationValue('sample_id') - #allGen.inputGene = getConfigurationValue('gene') - #allGen.inputAllele = getConfigurationValue('allele_name') - #allGen.inputClass = getConfigurationValue('class') - allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence) enaSubmission = allGen.buildENASubmission() diff --git a/src/AlleleGuiEMBLInputForm.py b/src/AlleleGuiEMBLInputForm.py index 1dd6b23..44fcbef 100755 --- a/src/AlleleGuiEMBLInputForm.py +++ b/src/AlleleGuiEMBLInputForm.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . import os @@ -32,21 +32,49 @@ def __init__(self, root): # To define the exit behavior. Save and exit. self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions) + + # Define the return behavior. Same as "close window" etc + root.bind('', self.returnFunction) # This window should not be resizeable. I guess. self.parent.resizable(width=False, height=False) + #Standard Inputs widths for the form elements + formInputWidth = 30 + labelInputWidth = 30 + self.instructionsFrame = Tkinter.Frame(self) self.instructionText = Tkinter.StringVar() - self.instructionText.set('\nThese options are required for an EMBL allele submission.\n' - + 'Login Credentials will not be stored, but they will be sent to EMBL via\n' - + 'secure https connection.\n') - Tkinter.Label(self.instructionsFrame, width=85, height=6, textvariable=self.instructionText).pack() + self.instructionText.set('\nThese options are required for an EMBL allele submission.\n') + Tkinter.Label(self.instructionsFrame, width=85, height=3, textvariable=self.instructionText).pack() self.instructionsFrame.pack() - #Standard Inputs widths for the form elements - formInputWidth = 30 - labelInputWidth = 30 + self.submissionDetailsInputFrame2 = Tkinter.Frame(self) + + self.sampleIDInstrText = Tkinter.StringVar() + self.sampleIDInstrText.set('Sample ID:') + self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=0, column=0) + self.inputSampleID = Tkinter.StringVar() + self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSampleID).grid(row=0, column=1) + + self.geneInstrStringVar = Tkinter.StringVar() + self.geneInstrStringVar.set('Gene:') + self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=1, column=0) + self.inputGene = Tkinter.StringVar() + self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputGene).grid(row=1, column=1) + + self.chooseClassIntVar = IntVar() + self.chooseClassIntVar.set(1) + Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=2, column=0) + Radiobutton(self.submissionDetailsInputFrame2, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=2, column=1) + + self.alleleInstrText = Tkinter.StringVar() + self.alleleInstrText.set('Allele Local Name:') + self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=3, column=0) + self.inputAllele = Tkinter.StringVar() + self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputAllele).grid(row=3, column=1) + + self.submissionDetailsInputFrame2.pack() # Make a frame to contain the Test/Production radio buttons. @@ -56,8 +84,10 @@ def __init__(self, root): self.testProductionInstrText.set('\nBy default, you submit to the EMBL test servers,\n' + 'where submissions are regularly deleted.\n' + 'change this option if you want to submit to the live EMBL environment.\n' + + 'Login Credentials will not be stored, but they will be sent\n' + + 'to EMBL via secure https connection.\n' ) - self.alleleInstrLabel = Tkinter.Label(self.testProductionFrame, width=70, height=5, textvariable=self.testProductionInstrText).pack()#.grid(row=2, column=0) + self.alleleInstrLabel = Tkinter.Label(self.testProductionFrame, width=70, height=7, textvariable=self.testProductionInstrText).pack()#.grid(row=2, column=0) # 1 = Test. 0 = Production/live server self.chooseTestServersIntVar = IntVar() @@ -83,30 +113,33 @@ def __init__(self, root): self.inputPassword = Tkinter.StringVar() self.inputPasswordEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputPassword, show="*").grid(row=1, column=1) - self.sampleIDInstrText = Tkinter.StringVar() - self.sampleIDInstrText.set('Sample ID:') - self.sampleIDinstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.sampleIDInstrText).grid(row=2, column=0) - self.inputSampleID = Tkinter.StringVar() - self.inputSampleIDEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputSampleID).grid(row=2, column=1) - - self.geneInstrStringVar = Tkinter.StringVar() - self.geneInstrStringVar.set('Gene:') - self.geneInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.geneInstrStringVar).grid(row=3, column=0) - self.inputGene = Tkinter.StringVar() - self.inputGeneEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputGene).grid(row=3, column=1) + self.submissionDetailsInputFrame.pack() + + + # Frame to specify Analysis Information + self.newAnalysisFrame = Tkinter.Frame(self) - self.chooseClassIntVar = IntVar() - self.chooseClassIntVar.set(1) - Radiobutton(self.submissionDetailsInputFrame, text="HLA Class I ", variable=self.chooseClassIntVar, value=1).grid(row=4, column=0) - Radiobutton(self.submissionDetailsInputFrame, text="HLA Class II", variable=self.chooseClassIntVar, value=2).grid(row=4, column=1) + self.analysisAliasInstrText = Tkinter.StringVar() + self.analysisAliasInstrText.set('Analysis Alias:') + self.analysisAliasInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisAliasInstrText).grid(row=0, column=0) + self.inputAnalysisAlias = Tkinter.StringVar() + self.inputStudyIdEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisAlias).grid(row=0, column=1) - self.alleleInstrText = Tkinter.StringVar() - self.alleleInstrText.set('Allele Local Name:') - self.alleleInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame, width=labelInputWidth, height=1, textvariable=self.alleleInstrText).grid(row=5, column=0) - self.inputAllele = Tkinter.StringVar() - self.inputAlleleEntry = Tkinter.Entry(self.submissionDetailsInputFrame, width=formInputWidth, textvariable=self.inputAllele).grid(row=5, column=1) + self.analysisTitleInstrText = Tkinter.StringVar() + self.analysisTitleInstrText.set('Analysis Title:') + self.analysisTitleInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisTitleInstrText).grid(row=1, column=0) + self.inputAnalysisTitle = Tkinter.StringVar() + self.inputAnalysisTitleEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisTitle).grid(row=1, column=1) - self.submissionDetailsInputFrame.pack() + self.analysisDescriptionInstrText = Tkinter.StringVar() + self.analysisDescriptionInstrText.set('Analysis Description:') + self.analysisDescriptionInstrLabel = Tkinter.Label(self.newAnalysisFrame, width=labelInputWidth, height=1, textvariable=self.analysisDescriptionInstrText).grid(row=2, column=0) + self.inputAnalysisDescription = Tkinter.StringVar() + self.inputAnalysisDescriptionEntry = Tkinter.Entry(self.newAnalysisFrame, width=formInputWidth, textvariable=self.inputAnalysisDescription).grid(row=2, column=1) + + self.newAnalysisFrame.pack() + + # A Frame for specifing the details of the Study / Project self.projectDetailsFrame = Tkinter.Frame(self) @@ -125,7 +158,7 @@ def __init__(self, root): self.existingProjectFrame = Tkinter.Frame(self.projectDetailsFrame) Radiobutton(self.existingProjectFrame, text="Use this study accession:", variable=self.chooseProjectIntVar, value=1).grid(row=0,column=0) self.inputStudyAccession = Tkinter.StringVar() - self.inputStudyNameEntry = Tkinter.Entry(self.existingProjectFrame, width=formInputWidth, textvariable=self.inputStudyAccession).grid(row=0, column=1) + self.inputStudyIdEntry = Tkinter.Entry(self.existingProjectFrame, width=formInputWidth, textvariable=self.inputStudyAccession).grid(row=0, column=1) self.existingProjectFrame.pack() @@ -139,20 +172,20 @@ def __init__(self, root): self.newProjectFrame = Tkinter.Frame(self.projectDetailsFrame) - self.studyNameInstrText = Tkinter.StringVar() - self.studyNameInstrText.set('Study Name:') - self.studyNameInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyNameInstrText).grid(row=0, column=0) - self.inputStudyName = Tkinter.StringVar() - self.inputStudyNameEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyName).grid(row=0, column=1) + self.studyIdInstrText = Tkinter.StringVar() + self.studyIdInstrText.set('Short Study Identifier:') + self.studyIdInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyIdInstrText).grid(row=0, column=0) + self.inputStudyId = Tkinter.StringVar() + self.inputStudyIdEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyId).grid(row=0, column=1) - self.studyShortDescriptionInstrText = Tkinter.StringVar() - self.studyShortDescriptionInstrText.set('Short Description:') - self.studyShortDescriptionInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyShortDescriptionInstrText).grid(row=1, column=0) - self.inputStudyShortDescription = Tkinter.StringVar() - self.inputStudyShortDescriptionEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyShortDescription).grid(row=1, column=1) + self.studyShortTitleInstrText = Tkinter.StringVar() + self.studyShortTitleInstrText.set('Descriptive Study Title:') + self.studyShortTitleInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyShortTitleInstrText).grid(row=1, column=0) + self.inputStudyShortTitle = Tkinter.StringVar() + self.inputStudyShortTitleEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyShortTitle).grid(row=1, column=1) self.studyAbstractInstrText = Tkinter.StringVar() - self.studyAbstractInstrText.set('Study Abstract:') + self.studyAbstractInstrText.set('Study Description / Abstract:') self.studyAbstractInstrLabel = Tkinter.Label(self.newProjectFrame, width=labelInputWidth, height=1, textvariable=self.studyAbstractInstrText).grid(row=2, column=0) self.inputStudyAbstract = Tkinter.StringVar() self.inputStudyAbstractEntry = Tkinter.Entry(self.newProjectFrame, width=formInputWidth, textvariable=self.inputStudyAbstract).grid(row=2, column=1) @@ -166,8 +199,16 @@ def __init__(self, root): Tkinter.Button(self.saveOptionsFrame, text='Save Options', command=self.saveOptions).grid(row=0, column=0) self.saveOptionsFrame.pack() + # TODO: Should there be a cancel button, to close this window without saving? + self.loadOptions() + # I needed a function for the return keypress to latch onto. + # It is just a wrapper for the saveOptions method. + def returnFunction(self, event): + self.saveOptions() + + # submissionOptions is a dictionary, passed by the parent. def loadOptions(self): if getConfigurationValue('embl_username') is not None: @@ -204,11 +245,11 @@ def loadOptions(self): if getConfigurationValue('study_accession') is not None: self.inputStudyAccession.set(getConfigurationValue('study_accession')) - if getConfigurationValue('study_name') is not None: - self.inputStudyName.set(getConfigurationValue('study_name')) + if getConfigurationValue('study_identifier') is not None: + self.inputStudyId.set(getConfigurationValue('study_identifier')) - if getConfigurationValue('study_description') is not None: - self.inputStudyShortDescription.set(getConfigurationValue('study_description')) + if getConfigurationValue('study_short_title') is not None: + self.inputStudyShortTitle.set(getConfigurationValue('study_short_title')) if getConfigurationValue('study_abstract') is not None: self.inputStudyAbstract.set(getConfigurationValue('study_abstract')) @@ -216,12 +257,16 @@ def loadOptions(self): if getConfigurationValue('test_submission') is not None: # 1 = Test. 0 = Production/live server self.chooseTestServersIntVar.set(int(getConfigurationValue('test_submission'))) - - + if getConfigurationValue('analysis_alias') is not None: + self.inputAnalysisAlias.set(getConfigurationValue('analysis_alias')) + if getConfigurationValue('analysis_title') is not None: + self.inputAnalysisTitle.set(getConfigurationValue('analysis_title')) + if getConfigurationValue('analysis_description') is not None: + self.inputAnalysisDescription.set(getConfigurationValue('analysis_description')) + def saveOptions(self): - # TODO: Save the options to our configuration dictionary # Close the window if (self.checkOptions()): print ('Saving Options....') @@ -236,10 +281,13 @@ def saveOptions(self): assignConfigurationValue('allele_name', self.inputAllele.get()) assignConfigurationValue('choose_project', str(self.chooseProjectIntVar.get())) assignConfigurationValue('study_accession', self.inputStudyAccession.get()) - assignConfigurationValue('study_name', self.inputStudyName.get()) - assignConfigurationValue('study_description', self.inputStudyShortDescription.get()) + assignConfigurationValue('study_identifier', self.inputStudyId.get()) + assignConfigurationValue('study_short_title', self.inputStudyShortTitle.get()) assignConfigurationValue('study_abstract', self.inputStudyAbstract.get()) assignConfigurationValue('test_submission', str(self.chooseTestServersIntVar.get())) + assignConfigurationValue('analysis_alias', str(self.inputAnalysisAlias.get())) + assignConfigurationValue('analysis_title', str(self.inputAnalysisTitle.get())) + assignConfigurationValue('analysis_description', str(self.inputAnalysisDescription.get())) self.parent.destroy() @@ -248,11 +296,8 @@ def saveOptions(self): pass def checkOptions(self): - # TODO this method - print ('Checking options.') - - #chooseProjectIntVar - + #print ('Checking options.') + # Don't check the EMBL Username # Don't check the EMBL Password @@ -282,12 +327,12 @@ def checkOptions(self): elif(str(self.chooseProjectIntVar.get()) == '2'): # Use New Project - if (not self.inputStudyName.get()): + if (not self.inputStudyId.get()): tkMessageBox.showwarning('Missing Form Value', 'You are missing a Study Name. Please try again.') return False - if (not self.inputStudyShortDescription.get()): + if (not self.inputStudyShortTitle.get()): tkMessageBox.showwarning('Missing Form Value', 'You are missing a Study Description. Please try again.') return False @@ -299,14 +344,32 @@ def checkOptions(self): return False else: - raise Exception ('Unknown value of self.chooseProjectIntVar. I expect 1 or 2. Observed:' + str(self.chooseProjectIntVar)) + raise Exception ('Unknown value of self.chooseProjectIntVar. I expect 1 or 2. Observed:' + str(self.chooseProjectIntVar)) + + + if (not self.inputAnalysisAlias.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Alias. Please try again.') + return False + + if (not self.inputAnalysisTitle.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Title. Please try again.') + return False + + if (not self.inputAnalysisDescription.get()): + tkMessageBox.showwarning('Missing Form Value', + 'You are missing an Analysis Description. Please try again.') + return False + + # All options look good, right? + + return True def closeWindow(self): - #writeConfigurationFile() - self.parent.destroy() \ No newline at end of file diff --git a/src/AlleleGuiIMGT.py b/src/AlleleGuiIMGT.py index 8ba9363..42c53d4 100755 --- a/src/AlleleGuiIMGT.py +++ b/src/AlleleGuiIMGT.py @@ -1,19 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - - +# along with saddle-bags. If not, see . import os from os.path import expanduser @@ -189,7 +187,7 @@ def sampleSequence(self): assignConfigurationValue('ethnic_origin', 'Unknown') assignConfigurationValue('sex', 'Unknown') - assignConfigurationValue('cosanguinous', 'Unknown') + assignConfigurationValue('consanguineous', 'Unknown') assignConfigurationValue('homozygous', 'Unknown') @@ -247,7 +245,7 @@ def contactInformation(self): + 'm.tilanus@mumc.nl\n\n' + 'This code will be hosted at:\n' - + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + 'You will find more information on\n' + 'IMGT\'s data format on that page.' @@ -275,11 +273,6 @@ def constructSubmission(self): allGen = SubmissionGeneratorIMGT() roughFeatureSequence = self.featureInputGuiObject.get('1.0', 'end') - # Don't assign these, they should already be stored in our configuration. - #allGen.inputSampleID = getConfigurationValue('sample_id') - #allGen.inputGene = getConfigurationValue('gene') - # allGen.inputAllele = getConfigurationValue('allele_name') - allGen.sequenceAnnotation = annotateRoughInputSequence(roughFeatureSequence) imgtSubmission = allGen.buildIMGTSubmission() diff --git a/src/AlleleGuiIMGTInputForm.py b/src/AlleleGuiIMGTInputForm.py index 7da8b43..644f275 100755 --- a/src/AlleleGuiIMGTInputForm.py +++ b/src/AlleleGuiIMGTInputForm.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . import os @@ -47,6 +47,9 @@ def __init__(self, root): # To define the exit behavior. Save and exit. self.parent.protocol('WM_DELETE_WINDOW', self.saveOptions) + # Define the return behavior. Same as "close window" etc + root.bind('', self.returnFunction) + self.instructionsFrame = Tkinter.Frame(self.interior) self.instructionText = Tkinter.StringVar() self.instructionText.set('\nThese options are required for an IMGT allele submission.\n' @@ -224,12 +227,12 @@ def __init__(self, root): self.inputSex = Tkinter.StringVar() self.inputSexEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputSex).grid(row=4, column=1) - # Cosanguinous (T/F) - self.cosanguinousInstrText = Tkinter.StringVar() - self.cosanguinousInstrText.set('Sample is Cosanguinous:') - self.cosanguinousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.cosanguinousInstrText).grid(row=5, column=0) - self.inputCosanguinous = Tkinter.StringVar() - self.inputCosanguinousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputCosanguinous).grid(row=5, column=1) + # Consanguineous (T/F) + self.consanguineousInstrText = Tkinter.StringVar() + self.consanguineousInstrText.set('Sample is Consanguineous:') + self.consanguineousInstrLabel = Tkinter.Label(self.submissionDetailsInputFrame2, width=labelInputWidth, height=1, textvariable=self.consanguineousInstrText).grid(row=5, column=0) + self.inputConsanguineous = Tkinter.StringVar() + self.inputConsanguineousEntry = Tkinter.Entry(self.submissionDetailsInputFrame2, width=formInputWidth, textvariable=self.inputConsanguineous).grid(row=5, column=1) # Homozygous (T/F) self.homozygousInstrText = Tkinter.StringVar() @@ -288,6 +291,11 @@ def __init__(self, root): self.saveOptionsFrame.pack() self.loadOptions() + + # I needed a function for the return keypress to latch onto. + # It is just a wrapper for the saveOptions method. + def returnFunction(self, event): + self.saveOptions() # submissionOptions is a dictionary, passed by the parent. def loadOptions(self): @@ -343,8 +351,8 @@ def loadOptions(self): self.inputEthnicOrigin.set(getConfigurationValue('ethnic_origin')) if getConfigurationValue('sex') is not None: self.inputSex.set(getConfigurationValue('sex')) - if getConfigurationValue('cosanguinous') is not None: - self.inputCosanguinous.set(getConfigurationValue('cosanguinous')) + if getConfigurationValue('consanguineous') is not None: + self.inputConsanguineous.set(getConfigurationValue('consanguineous')) if getConfigurationValue('homozygous') is not None: self.inputHomozygous.set(getConfigurationValue('homozygous')) @@ -353,7 +361,6 @@ def loadOptions(self): def saveOptions(self): - # TODO: Save the options to our configuration dictionary # Close the window if (self.checkOptions()): print ('Saving Options....') @@ -383,11 +390,9 @@ def saveOptions(self): assignConfigurationValue('sex', self.inputSex.get()) # TODO: Accepted values are 'Yes', 'No', 'Unknown' - assignConfigurationValue('cosanguinous', self.inputCosanguinous.get()) + assignConfigurationValue('consanguineous', self.inputConsanguineous.get()) assignConfigurationValue('homozygous', self.inputHomozygous.get()) - - - + self.parent.destroy() else: @@ -449,9 +454,9 @@ def checkOptions(self): return False # TODO: Accepted values are 'Yes', 'No', 'Unknown' I think - if (not self.inputCosanguinous.get()): + if (not self.inputConsanguineous.get()): tkMessageBox.showwarning('Missing Form Value', - 'Please indicate if the sample is cosanguinous or not.') + 'Please indicate if the sample is consanguineous or not.') return False if (not self.inputHomozygous.get()): tkMessageBox.showwarning('Missing Form Value', diff --git a/src/AlleleGuiMain.py b/src/AlleleGuiMain.py index 48ad36c..e49dbd5 100755 --- a/src/AlleleGuiMain.py +++ b/src/AlleleGuiMain.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . import os @@ -139,7 +139,7 @@ def contactInformation(self): + 'm.tilanus@mumc.nl\n\n' + 'This code will be hosted at:\n' - + 'https://github.com/transplantation-\nimmunology/EMBL-HLA-Submission\n' + + 'https://github.com/transplantation-\nimmunology/saddle-bags\n' + 'You will find more information on\n' + 'EMBL\'s data format on that page.' diff --git a/src/AlleleSubCommon.py b/src/AlleleSubCommon.py index f1cbea9..f108d8d 100755 --- a/src/AlleleSubCommon.py +++ b/src/AlleleSubCommon.py @@ -1,22 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - - -#from numpy.compat.setup import configuration - -#SoftwareVersion = "Bhast Version 1.0" +# along with saddle-bags. If not, see . import xml.etree.ElementTree as ET import xml.dom.minidom @@ -30,7 +25,7 @@ import tkMessageBox import sys -from os.path import dirname, join, abspath, isfile +from os.path import dirname, join, abspath, isfile, expanduser from HLAGene import * @@ -236,8 +231,6 @@ def annotateRoughInputSequence(inputSequenceText): return resultGeneLoci #self.sequenceAnnotation = resultGeneLoci - - # This method is a directory-safe way to open up a write file. def createOutputFile(outputfileName): tempDir, tempFilename = split(outputfileName) @@ -247,8 +240,6 @@ def createOutputFile(outputfileName): resultsOutput = open(outputfileName, 'w') return resultsOutput - - # I'm storing global variables in a dictionary for now. def initializeGlobalVariables(): global globalVariables @@ -267,26 +258,9 @@ def getConfigurationValue(configurationKey): print ('Configuration Key Not Found:' + configurationKey) #raise KeyError('Key Not Found:' + configurationKey) return None - def assignConfigName(): - initializeGlobalVariables() - - # Find the directory the program is running from. - # It is not straight-forward, because sometimes we run this program inside an .exe - # pyinstaller puts the exe path in sys._MEIPASS - # This is useful because we want a config file in the same directory. - if getattr(sys, 'frozen', False): - globalVariables['saddlebags_application_path'] = sys._MEIPASS - else: - globalVariables['saddlebags_application_path'] = dirname(abspath(__file__)) - - # TODO: Store the directory someone saves in. - # I should assign the directory to a default value. - - print 'This application is running from the following directory:\n' + globalVariables['saddlebags_application_path'] - globalVariables['config_file_location'] = join(globalVariables['saddlebags_application_path'], 'Saddlebags.Config.xml') - + assignConfigurationValue('config_file_location',join(expanduser("~"),'Saddlebags.Config.xml')) def writeConfigurationFile(): assignConfigName() @@ -300,8 +274,6 @@ def writeConfigurationFile(): if(key not in [ 'embl_password' ,'imgt_password' - , 'saddlebags_application_path' - , 'config_file_location' , 'sequence' ]): ET.SubElement(root, key).text = globalVariables[key] @@ -334,9 +306,7 @@ def loadConfigurationFile(): assignConfigurationValue('embl_ftp_upload_site_prod', 'webin.ebi.ac.uk') assignConfigurationValue('embl_rest_address_test', 'https://www-test.ebi.ac.uk/ena/submit/drop-box/submit/') assignConfigurationValue('embl_rest_address_prod', 'https://www.ebi.ac.uk/ena/submit/drop-box/submit/') - - - + else: print ('The config file already exists, I will load it:\n' + globalVariables['config_file_location']) diff --git a/AlleleSubInstallerOptions_Windows.spec b/src/AlleleSubInstallerOptions_Windows.spec similarity index 71% rename from AlleleSubInstallerOptions_Windows.spec rename to src/AlleleSubInstallerOptions_Windows.spec index 72a4717..4210ec3 100755 --- a/AlleleSubInstallerOptions_Windows.spec +++ b/src/AlleleSubInstallerOptions_Windows.spec @@ -1,21 +1,19 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -# Version 1.0 - -# This file contains specifications for packaging of the MinION Extractor GUI +# This file contains specifications for packaging of saddlebags # As a standalone executable. This file is meant to be used with pyinstaller # http://www.pyinstaller.org/ @@ -25,7 +23,7 @@ block_cipher = None -a = Analysis(['AlleleSubmissionEMBL.py'], +a = Analysis(['AlleleSubmissionMain.py'], binaries=None, datas=None, hiddenimports=['six', 'packaging', 'packaging.requirements', 'packaging.version', 'packaging.specifiers', 'Tkinter', 'tkFileDialog', 'Tkconstants'], @@ -42,7 +40,7 @@ exe = EXE(pyz, a.binaries, a.zipfiles, a.datas, - name='AlleleSubmissionEMBLWindows', + name='SaddlebagsWindows', debug=False, strip=False, upx=True, diff --git a/src/AlleleSubmissionEMBLRestMethods.py b/src/AlleleSubmissionEMBLRestMethods.py new file mode 100644 index 0000000..566cb2e --- /dev/null +++ b/src/AlleleSubmissionEMBLRestMethods.py @@ -0,0 +1,129 @@ +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + + + + +from AlleleSubCommon import * +import pycurl +import StringIO + +import xml.etree.ElementTree as ET +import xml.dom.minidom + +# Here we have methods to perform REST interactions necessary for EMBL submission. + +def performProjectSubmission(submissionFileName, projectFileName): + POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)), + ('PROJECT', (pycurl.FORM_FILE, projectFileName))] + + responseText = performSubmission(submissionFileName, POST_DATA) + return interpretProjectSubmissionResults(responseText) + +def performAnalysisSubmission(submissionFileName, analysisFileName): + POST_DATA = [('SUBMISSION', (pycurl.FORM_FILE, submissionFileName)), + ('ANALYSIS', (pycurl.FORM_FILE, analysisFileName))] + + responseText = performSubmission(submissionFileName, POST_DATA) + + return interpretAnalysisSubmissionResults(responseText) + +def performSubmission(submissionFileName, POST_DATA): + if (str(getConfigurationValue('test_submission')) == '0'): + print 'THIS IS A LIVE SUBMISSION AT EMBL.' + requestURL = str(getConfigurationValue('embl_rest_address_prod')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password')) + else: + print 'THIS IS A TEST SUBMISSION AT EMBL.' + requestURL = str(getConfigurationValue('embl_rest_address_test')) + '?auth=ENA%20' + str(getConfigurationValue('embl_username')) + '%20' + str(getConfigurationValue('embl_password')) + + curlResponseBuffer = StringIO.StringIO() + curlObject = pycurl.Curl() + curlObject.setopt(curlObject.URL, requestURL) + curlObject.setopt(curlObject.POST, 1) + curlObject.setopt(curlObject.HTTPPOST, POST_DATA) + curlObject.setopt(curlObject.USERAGENT, 'Curl') + curlObject.setopt(curlObject.WRITEFUNCTION, curlResponseBuffer.write) + curlObject.setopt(pycurl.HTTPHEADER, ['Accept:application/xml']) + # Insecure. Any security experts want to make this better? + curlObject.setopt(pycurl.SSL_VERIFYHOST, 0) + curlObject.setopt(pycurl.SSL_VERIFYPEER, 0) + curlObject.perform() + curlObject.close() + + responseText = curlResponseBuffer.getvalue() + + # write XML to file. + projectSubResultsFileName = submissionFileName.replace('.xml','_results.xml') + resultsFile = createOutputFile(projectSubResultsFileName) + resultsFile.write(responseText) + resultsFile.close() + + return responseText + +def interpretProjectSubmissionResults(responseText): + # Open XML to report results: + root = ET.fromstring(responseText) + submissionSuccess = (root.attrib['success'] == 'true') + + projectAccession = None + messages = [] + + for child in root: + if(child.tag == 'PROJECT'): + if ('accession' in child.attrib.keys()): + projectAccession = child.attrib['accession'] + else: + projectAccession = None + #print('I found a project node.') + elif(child.tag == 'MESSAGES'): + print('I found some messages.') + for messageNode in child: + #print (messageNode.tag + ':' + messageNode.text) + messages.append(messageNode.tag + ':' + messageNode.text) + else: + # Don't care about the other nodes + pass + + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + return (submissionSuccess,projectAccession,messages) + +def interpretAnalysisSubmissionResults(responseText): + root = ET.fromstring(responseText) + submissionSuccess = (root.attrib['success'] == 'true') + + analysisAccession = None + messages = [] + + for child in root: + if(child.tag == 'ANALYSIS'): + if ('accession' in child.attrib.keys()): + analysisAccession = child.attrib['accession'] + else: + analysisAccession = None + #print('I found a project node.') + elif(child.tag == 'MESSAGES'): + print('I found some messages.') + for messageNode in child: + #print (messageNode.tag + ':' + messageNode.text) + messages.append(messageNode.tag + ':' + messageNode.text) + else: + # Don't care about the other nodes + pass + + # Return value should be a tuple: + # (Success, ProjectAccession, Messages[]) + return (submissionSuccess,analysisAccession,messages) + diff --git a/src/AlleleSubmissionEMBLXml.py b/src/AlleleSubmissionEMBLXml.py index 107c195..32ae791 100755 --- a/src/AlleleSubmissionEMBLXml.py +++ b/src/AlleleSubmissionEMBLXml.py @@ -1,20 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - - -#import os +# along with saddle-bags. If not, see . from AlleleSubCommon import * @@ -35,22 +32,26 @@ def writeToXml(fullXmlFilePath, xmlElementTree): return prettyXmlText -def createProjectXML(fullXmlFilePath, projectName, projectDescription, projectAbstract): +def getCenterName(): + # TODO: Should I use REST here? + # Probably not, center_name is not required in the xmls. + return 'Center_Name' + +def createProjectXML(fullXmlFilePath): + # They are called "Project" in xml, but "Study" on the website. + # Project = Study root = ET.Element('PROJECT_SET') - # TODO: How do I get the center name? - # Maybe I should get this via REST? + + projectID = getConfigurationValue('study_identifier') + projectShortTitle = getConfigurationValue('study_short_title') + projectAbstract = getConfigurationValue('study_abstract') - # Ok this is really confusing. - # According to the docs, http://ena-docs.readthedocs.io/en/latest/prog_01.html - # "alias" attribute on the project node contains projectName - # "title" node contains project description - # "description" node is the project abstract - # EMBL should be more consistent in their terminology. projectElement = ET.SubElement(root, 'PROJECT') - projectElement.set('alias', projectName) - projectElement.set('center_name', 'Maastricht University Medical Center' ) + projectElement.set('alias', projectID) + # Center Name is optional according to schemas. Forget it. EMBL Knows our login info. + #projectElement.set('center_name', getCenterName() ) titleElement = ET.SubElement(projectElement, 'TITLE') - titleElement.text = projectDescription + titleElement.text = projectShortTitle descriptionElement = ET.SubElement(projectElement, 'DESCRIPTION') descriptionElement.text = projectAbstract submissionProjectElement = ET.SubElement(projectElement, 'SUBMISSION_PROJECT') @@ -58,18 +59,60 @@ def createProjectXML(fullXmlFilePath, projectName, projectDescription, projectAb return writeToXml(fullXmlFilePath, root) -def createProjectSubmissionXML(submissionAlias, fullXmlFilePath): +def createProjectSubmissionXML(fullXmlFilePath, submissionAlias, shortProjectFileName): root = ET.Element('SUBMISSION') - # TODO: How do I get the center name? - # Maybe I should get this via REST? root.set('alias', submissionAlias) - root.set('center_name', 'Maastricht University Medical Center' ) + # Center Name is optional according to schemas. Forget it. + #root.set('center_name', getCenterName() ) actionsElement = ET.SubElement(root, 'ACTIONS') actionElement = ET.SubElement(actionsElement, 'ACTION') addElement = ET.SubElement(actionElement, 'ADD') - addElement.set('source','project.xml') + addElement.set('source',shortProjectFileName) addElement.set('schema','project') return writeToXml(fullXmlFilePath, root) + +def createAnalysisXML(fullXmlFilePath, checksumValue, flatfileZipFileName): + # An analysis xml is just a wrapper for a sequence submission. + root = ET.Element('ANALYSIS_SET') + + # TODO: I haven't created these three analysis configuration values yet. + # Probably need to add this to the GUI, or somehow generate them automagically. + analysisElement = ET.SubElement(root, 'ANALYSIS') + analysisElement.set('alias', getConfigurationValue('analysis_alias')) + + titleElement = ET.SubElement(analysisElement, 'TITLE') + titleElement.text = (getConfigurationValue('analysis_title')) + + descriptionElement = ET.SubElement(analysisElement, 'DESCRIPTION') + descriptionElement.text = (getConfigurationValue('analysis_description')) + + studyRefElement = ET.SubElement(analysisElement, 'STUDY_REF') + studyRefElement.set('accession', getConfigurationValue('study_accession')) + + analysisTypeElement = ET.SubElement(analysisElement, 'ANALYSIS_TYPE') + sequenceFlatfileElement = ET.SubElement(analysisTypeElement, 'SEQUENCE_FLATFILE') + + filesElement = ET.SubElement(analysisElement, 'FILES') + + fileElement = ET.SubElement(filesElement, 'FILE') + fileElement.set('checksum', checksumValue) + fileElement.set('checksum_method', 'MD5') + fileElement.set('filename', flatfileZipFileName) + fileElement.set('filetype', 'flatfile') + + return writeToXml(fullXmlFilePath, root) + +def createAnalysisSubmissionXML(fullXmlFilePath, submissionAlias, shortAnalysisFileName): + root = ET.Element('SUBMISSION') + + root.set('alias', submissionAlias) + actionsElement = ET.SubElement(root, 'ACTIONS') + actionElement = ET.SubElement(actionsElement, 'ACTION') + addElement = ET.SubElement(actionElement, 'ADD') + addElement.set('source',shortAnalysisFileName) + addElement.set('schema','analysis') + + return writeToXml(fullXmlFilePath, root) diff --git a/src/AlleleSubmissionMain.py b/src/AlleleSubmissionMain.py index 7a06468..e7f8274 100755 --- a/src/AlleleSubmissionMain.py +++ b/src/AlleleSubmissionMain.py @@ -1,25 +1,25 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . - -SoftwareVersion = "Bhast Version 1.0" +# along with saddle-bags. If not, see . import Tkinter import sys from AlleleGuiMain import AlleleGuiMain from AlleleSubCommon import * + +SoftwareVersion = 'saddlebags Version 1.1' if __name__=='__main__': try: @@ -46,6 +46,8 @@ sys.argv[1].lower() == '-version') ): print (SoftwareVersion) + pass + # # You executed the software wrong. Sorry. else: @@ -53,7 +55,7 @@ "\tRun this program using standard python call:\n" + "\t$python AlleleSubmissionMain.py\n" + "\tbiopython must be accessible in your python environment. To run using Anaconda,\n" - "\tCheck readme at https://github.com/transplantation-immunology/EMBL-HLA-Submission\n" + "\tCheck readme at https://github.com/transplantation-immunology/saddle-bags\n" ) diff --git a/src/HLAGene.py b/src/HLAGene.py index 3e39c4c..d9940b3 100755 --- a/src/HLAGene.py +++ b/src/HLAGene.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . # The GeneLocus class specifies a locus on a Gene, # Either an Exon, intron, or UTR. diff --git a/src/ScrolledWindow.py b/src/ScrolledWindow.py index f4c06b8..15e00f8 100644 --- a/src/ScrolledWindow.py +++ b/src/ScrolledWindow.py @@ -1,17 +1,25 @@ -from Tkinter import Scrollbar, Frame, Canvas, VERTICAL, RIGHT, Y, LEFT, BOTH, TRUE, FALSE, NW -#from ttk import * +# This file is part of saddle-bags. +# +# saddle-bags is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# saddle-bags is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with saddle-bags. If not, see . + -#Obviously this doesn't work. Fix it, I want to be mroe specific witht he imports' + +from Tkinter import Scrollbar, Frame, Canvas, VERTICAL, RIGHT, Y, LEFT, BOTH, TRUE, FALSE, NW # http://tkinter.unpythonic.net/wiki/VerticalScrolledFrame class VerticalScrolledFrame(Frame): - """A pure Tkinter scrollable frame that actually works! - * Use the 'interior' attribute to place widgets inside the scrollable frame - * Construct and pack/place/grid normally - * This frame only allows vertical scrolling - - """ def __init__(self, parent, *args, **kw): Frame.__init__(self, parent, *args, **kw) @@ -23,6 +31,9 @@ def __init__(self, parent, *args, **kw): yscrollcommand=vscrollbar.set) canvas.pack(side=LEFT, fill=BOTH, expand=TRUE) vscrollbar.config(command=canvas.yview) + + # TODO: Fix the vertical scrolling behavior. The Mouse does not work. + # reset the view canvas.xview_moveto(0) diff --git a/src/SubmissionGeneratorEMBL.py b/src/SubmissionGeneratorEMBL.py index 17bf6d0..15b7fe1 100755 --- a/src/SubmissionGeneratorEMBL.py +++ b/src/SubmissionGeneratorEMBL.py @@ -1,17 +1,17 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . from Bio.Seq import Seq from Bio.Alphabet import generic_dna @@ -29,19 +29,7 @@ class SubmissionGeneratorEMBL(): def __init__(self): - - # TODO: These values are stored in a config file. I don't think I need to assign them. - # Except the HLAGene value, initialize that. - # Comment out the rest of these initializations. - - #self.inputFileName = '' - #self.outputFileName = '' self.sequenceAnnotation = HLAGene() - #getConfigurationValue('sample_id') = 0 - #getConfigurationValue('gene') = '' - #getConfigurationValue('allele_name') = '' - #s#elf.inputClass = '' - #self.isPseudoGene = False def printHeader(self): diff --git a/src/SubmissionGeneratorIMGT.py b/src/SubmissionGeneratorIMGT.py index c7e8892..2365d70 100755 --- a/src/SubmissionGeneratorIMGT.py +++ b/src/SubmissionGeneratorIMGT.py @@ -1,20 +1,18 @@ -# This file is part of EMBL-HLA-Submission. +# This file is part of saddle-bags. # -# EMBL-HLA-Submission is free software: you can redistribute it and/or modify +# saddle-bags is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # -# EMBL-HLA-Submission is distributed in the hope that it will be useful, +# saddle-bags is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License -# along with EMBL-HLA-Submission. If not, see . +# along with saddle-bags. If not, see . -#from Bio.Seq import Seq -#from Bio.Alphabet import generic_dna import sys import datetime @@ -145,7 +143,7 @@ def printSource(self): sourceText += 'FT /cell_id="' + str(getConfigurationValue('sample_id')) + '"\n' sourceText += 'FT /ethnic_origin="' + str(getConfigurationValue('ethnic_origin')) + '"\n' sourceText += 'FT /sex="' + str(getConfigurationValue('sex')) + '"\n' - sourceText += 'FT /consanguineous="Unknown"\n' + sourceText += 'FT /consanguineous="' + str(getConfigurationValue('consanguineous')) + '"\n' sourceText += 'FT /homozygous="Yes"\n' sourceText += 'FT /lab_of_origin="**IMGT_SUBMITTING_LAB_NAME**"\n' sourceText += 'FT /lab_contact="**IMGT_SUBMITTER_NAME**"\n' @@ -395,275 +393,3 @@ def validateInputs(self): return True - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -#I assume the rest of this code is outdated and deprectaed: - -""" - - - - - - def printMRNA(self): - mRNAText = '' - # Print mRNA - mRNAText += ('FT mRNA join(') - - # Iterate through the indices of the UTRs and exons. - # The 3' and 5' UTR are included in the mRNA - for x in range(0,len(self.sequenceAnnotation.loci)): - geneLocus = self.sequenceAnnotation.loci[x] - # If it is an exon or UTR - if (geneLocus.exon or 'UT' in geneLocus.name): - mRNAText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) + ',' - - # Trim off the last comma and add a parenthese - mRNAText = mRNAText[0:len(mRNAText)-1] + ')\n' - - mRNAText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - mRNAText += ('FT /allele="' + str(self.inputAllele) + '"\n') - mRNAText += ('FT /product=\"MHC class I antigen\"\n') - - return mRNAText - - - def printFeatures(self): - cdsText = '' - - # Print CDS - # CDS is the coding sequence. It should include the exons, but not the UTRs/Introns - # The range 1:featureCount-1 will exclude the UTRs. - cdsText += ('FT CDS join(') - for x in range(0,len(self.sequenceAnnotation.loci)): - geneLocus = self.sequenceAnnotation.loci[x] - if (geneLocus.exon): - cdsText += str(geneLocus.beginIndex) + '..' + str(geneLocus.endIndex) - if not x==len(self.sequenceAnnotation.loci)-2: - cdsText += ',' - else: - cdsText += ')\n' - - cdsText += ('FT /transl_table=1\n') - cdsText += ('FT /codon_start=1\n') - cdsText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - cdsText += ('FT /allele="' + str(self.inputAllele) + '"\n') - - # TODO: This is a problem. I need to specify Class I or Class II - cdsText += ('FT /product=\"MHC class I antigen\"\n') - cdsText += ('FT /translation=\"') - - # Some simple formatting for the peptide sequence, making it human and computer readable. - # 80 peptides per line. Except the first line, which is 66. - # 66 is 80-14, where 14 is the length of { /translation=" } - peptideSequence = self.translateSequence(self.sequenceAnnotation.getExonSequence()) - if(len(peptideSequence) < 66): - cdsText += (peptideSequence) + '\"\n' - else: - cdsText += peptideSequence[0:66] + '\n' - i=66 - while (i < len(peptideSequence)): - cdsText += 'FT ' + peptideSequence[i:i+80] - i += 80 - - # If we're not yet at the end of the sequence, go to the next line - if(i < len(peptideSequence)): - cdsText += '\n' - # We're at the end. close the quote and new line. - else: - cdsText += '\"\n' - - return cdsText - - def printFeatures(self): - featureText = '' - - exonIndex = 1 - intronIndex = 1 - - geneHas3UTR = False - geneHas5UTR = False - - for x in range(0,len(self.sequenceAnnotation.loci)): - currentFeature = self.sequenceAnnotation.loci[x] - - # 3' UTR - if(currentFeature.name == '3UT'): - featureText += ('FT 3\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /note=\"3\'UTR\"\n') - featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - geneHas3UTR = True - - # 5' UTR - elif(currentFeature.name == '5UT'): - featureText += ('FT 5\'UTR ' + str(currentFeature.beginIndex) + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /note=\"5\'UTR\"\n') - featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - geneHas5UTR = True - - # Exon - elif(currentFeature.exon): - featureText += ('FT exon ' + str(currentFeature.beginIndex) - + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /number=' + str(exonIndex) + '\n') - featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - exonIndex += 1 - - # Intron - else: - featureText += ('FT intron ' + str(currentFeature.beginIndex) - + '..' + str(currentFeature.endIndex) + '\n') - featureText += ('FT /number=' + str(intronIndex) + '\n') - featureText += ('FT /gene="' + str(getConfigurationValue('gene')) + '"\n') - featureText += ('FT /allele="' + str(self.inputAllele) + '"\n') - intronIndex += 1 - - - featureText += ('XX\n') - - # Do a quick sanity check. If we are missing either UTR I should warn the user. - # But move on with your life, this is not worth getting upset over. - if (not geneHas3UTR and not geneHas5UTR): - tkMessageBox.showinfo('Missing UTRs', - 'This sequence has no 5\' or 3\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - elif (not geneHas5UTR): - tkMessageBox.showinfo('Missing 5\' UTR', - 'This sequence has no 5\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - elif (not geneHas3UTR): - tkMessageBox.showinfo('Missing 3\' UTR', - 'This sequence has no 3\' UTR.\n\n' + - 'Use lowercase nucleotides at the\n' + - 'beginning and end of your DNA\n' + - 'sequence to specify the 5\' and 3\' UTRs.' ) - else: - print('The UTRs look fine.') - pass - - return featureText - - def printSequence(self): - sequenceText = '' - - completeSequence = self.sequenceAnnotation.getCompleteSequence().upper() - - cCount = completeSequence.count('C') - gCount = completeSequence.count('G') - tCount = completeSequence.count('T') - aCount = completeSequence.count('A') - otherCount = self.sequenceAnnotation.totalLength() - (cCount + gCount + tCount + aCount) - - sequenceText += ('SQ Sequence ' + str(self.sequenceAnnotation.totalLength()) + ' BP; ' - + str(aCount) + ' A; ' + str(cCount) + ' C; ' - + str(gCount) + ' G; ' + str(tCount) + ' T; ' - + str(otherCount) + ' other;\n') - - # Here's some logic to print the sequence information in groups of 10. - # This format is specified in the User manual specified by IMGT. - currentSeqIndex = 0 - - while (currentSeqIndex < self.sequenceAnnotation.totalLength()): - # The character code for a sequence region is two blank spaces, - # followed by three blank spaces, for a total of 5 blanks. - sequenceText += ' ' - sequenceRow = self.sequenceAnnotation.getCompleteSequence()[currentSeqIndex : currentSeqIndex + 60] - - # A sequenceChunk is 10 nucleotides in this context. - # Format specifies up to six "chunks" per line. - for i in range(0,6): - sequenceChunk = sequenceRow[i*10 : (i+1)*10] - sequenceText += sequenceChunk + ' ' - - # If line is complete (=60 bp), we can print the nucleotide index and move on to the next row. - if(len(sequenceRow) == 60): - sequenceText += str(currentSeqIndex + 60) + '\n' - # but if line is not complete (this is more likely, and more complicated.) - else: - # Fill with spaces to align the nucleotide indices at the end of the sequence. - numberSpaces = 60-len(sequenceRow) - for n in range (0, numberSpaces): - sequenceText += ' ' - sequenceText += (str(len(sequenceRow) + currentSeqIndex) + '\n') - - # The next row of the sequence - currentSeqIndex += 60 - - return sequenceText - - -"""