diff --git a/api/app/core/ena_helpers.py b/api/app/core/ena_helpers.py index a0aa53d..9d98c8c 100644 --- a/api/app/core/ena_helpers.py +++ b/api/app/core/ena_helpers.py @@ -9,9 +9,11 @@ from box import Box from django.conf import settings from django.utils import timezone as tz +from django.utils.translation import gettext_lazy as _ from ena_upload import ena_upload as ena from lxml import etree -from rest_framework.exceptions import ValidationError +from rest_framework import status +from rest_framework.exceptions import APIException, ValidationError from sh import Command, ErrorReturnCode from core import log @@ -28,6 +30,12 @@ } +class FTPUploadError(APIException): + status_code = status.HTTP_400_BAD_REQUEST + default_detail = _("File(s) could not be uploaded via FTP.") + default_code = "ftp_upload_error" + + def apply_template(job: Job): if not job.template: job.template = "default" @@ -86,6 +94,45 @@ def to_dataframe(job: Job): return schema_dataframe +def submit_data(file_paths: str): + """Submit data to webin ftp server. + + :param file_paths: a dictionary of filename string and file_path string + :param args: the command-line arguments parsed by ArgumentParser + """ + ftp_host = "webin2.ebi.ac.uk" + + log.info("\nConnecting to webin2.ebi.ac.uk....") + try: + ftps = ena.MyFTP_TLS(timeout=120) + ftps.context.set_ciphers("HIGH:!DH:!aNULL") + ftps.connect(ftp_host, port=21) + ftps.auth() + # log.debug(f"U/N, P/W: {settings.ENA_USERNAME}, {settings.ENA_PASSWORD}") + ftps.login(settings.ENA_USERNAME, settings.ENA_PASSWORD) + ftps.prot_p() + except IOError as ioe: + log.error( + "ERROR: could not connect to the ftp server.\ + Please check your login details." + ) + log.error(ioe) + raise FTPUploadError( + f"Cannot connect to the ftp server {ftp_host} while intending to upload file {file_paths}: {ioe}" + ) + for filename, path in file_paths.items(): + log.info(f"Uploading {path}...") + try: + log.info(ftps.storbinary(f"STOR {filename}", open(path, "rb"))) + except BaseException as err: + log.error(f"ERROR: {err}") + log.error( + "ERROR: If your connection times out at this stage, it probably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000." + ) + raise FTPUploadError(f"Cannot upload file {path} to {ftp_host}: {err}") + log.info(ftps.quit()) + + def handle_run(job: Job, schema_target): df = schema_target file_paths = {} @@ -114,7 +161,8 @@ def handle_run(job: Job, schema_target): ] df["file_checksum"] = file_md5.values() - ena.submit_data(file_paths, settings.ENA_PASSWORD, settings.ENA_USERNAME) + # ena.submit_data(file_paths, settings.ENA_PASSWORD, settings.ENA_USERNAME) + submit_data(file_paths) return df else: return None diff --git a/api/app/core/models.py b/api/app/core/models.py index bb48b55..eb8df2e 100644 --- a/api/app/core/models.py +++ b/api/app/core/models.py @@ -1,8 +1,10 @@ -from django.db import models +from copy import copy + +from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.postgres.fields import ArrayField -from django.conf import settings -from copy import copy +from django.db import models + from .helpers import merge @@ -63,18 +65,26 @@ def links(self): if not self.result: return {} return { - "experiment": f"{settings.ENA_BROWSER_URL}/{self.result['experiment']['accession']}" - if "experiment" in self.result - else "", - "sample": f"{settings.ENA_BROWSER_URL}/{self.result['sample']['accession']}" - if "sample" in self.result - else "", - "run": f"{settings.ENA_BROWSER_URL}/{self.result['run']['accession']}" - if "run" in self.result - else "", - "study": f"{settings.ENA_BROWSER_URL}/{self.result['study']['accession']}" - if "study" in self.result - else "", + "experiment": ( + f"{settings.ENA_BROWSER_URL}/{self.result['experiment']['accession']}" + if "experiment" in self.result + else "" + ), + "sample": ( + f"{settings.ENA_BROWSER_URL}/{self.result['sample']['accession']}" + if "sample" in self.result + else "" + ), + "run": ( + f"{settings.ENA_BROWSER_URL}/{self.result['run']['accession']}" + if "run" in self.result + else "" + ), + "study": ( + f"{settings.ENA_BROWSER_URL}/{self.result['study']['accession']}" + if "study" in self.result + else "" + ), } class Meta: diff --git a/api/app/core/views.py b/api/app/core/views.py index 3bb6ad1..07c548d 100644 --- a/api/app/core/views.py +++ b/api/app/core/views.py @@ -133,10 +133,11 @@ def modify(self, request, pk=None): new_job = job.clone(self.request.user, "MODIFY") data = request.data.get("data") # We filter out all irrelevant schemas for the modification - for schema in SCHEMAS: - if schema not in data and schema in new_job.data: - del new_job.data[schema] - new_job.data = merge(new_job.data, data) + if data is not None: + for schema in SCHEMAS: + if schema not in data and schema in new_job.data: + del new_job.data[schema] + new_job.data = merge(new_job.data, data) new_job.save() result = JobSerializer(new_job, context={"request": request}) return Response(result.data)