Skip to content

Commit

Permalink
[FIX] Handle upload better, upload file again if modify run
Browse files Browse the repository at this point in the history
  • Loading branch information
dameyerdave committed Jan 8, 2025
1 parent 278e554 commit 317243e
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 21 deletions.
52 changes: 50 additions & 2 deletions api/app/core/ena_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
from box import Box
from django.conf import settings
from django.utils import timezone as tz
from django.utils.translation import gettext_lazy as _
from ena_upload import ena_upload as ena
from lxml import etree
from rest_framework.exceptions import ValidationError
from rest_framework import status
from rest_framework.exceptions import APIException, ValidationError
from sh import Command, ErrorReturnCode

from core import log
Expand All @@ -28,6 +30,12 @@
}


class FTPUploadError(APIException):
status_code = status.HTTP_400_BAD_REQUEST
default_detail = _("File(s) could not be uploaded via FTP.")
default_code = "ftp_upload_error"


def apply_template(job: Job):
if not job.template:
job.template = "default"
Expand Down Expand Up @@ -86,6 +94,45 @@ def to_dataframe(job: Job):
return schema_dataframe


def submit_data(file_paths: str):
"""Submit data to webin ftp server.
:param file_paths: a dictionary of filename string and file_path string
:param args: the command-line arguments parsed by ArgumentParser
"""
ftp_host = "webin2.ebi.ac.uk"

log.info("\nConnecting to webin2.ebi.ac.uk....")
try:
ftps = ena.MyFTP_TLS(timeout=120)
ftps.context.set_ciphers("HIGH:!DH:!aNULL")
ftps.connect(ftp_host, port=21)
ftps.auth()
# log.debug(f"U/N, P/W: {settings.ENA_USERNAME}, {settings.ENA_PASSWORD}")
ftps.login(settings.ENA_USERNAME, settings.ENA_PASSWORD)
ftps.prot_p()
except IOError as ioe:
log.error(
"ERROR: could not connect to the ftp server.\
Please check your login details."
)
log.error(ioe)
raise FTPUploadError(
f"Cannot connect to the ftp server {ftp_host} while intending to upload file {file_paths}: {ioe}"
)
for filename, path in file_paths.items():
log.info(f"Uploading {path}...")
try:
log.info(ftps.storbinary(f"STOR {filename}", open(path, "rb")))
except BaseException as err:
log.error(f"ERROR: {err}")
log.error(
"ERROR: If your connection times out at this stage, it probably is because of a firewall that is in place. FTP is used in passive mode and connection will be opened to one of the ports: 40000 and 50000."
)
raise FTPUploadError(f"Cannot upload file {path} to {ftp_host}: {err}")
log.info(ftps.quit())


def handle_run(job: Job, schema_target):
df = schema_target
file_paths = {}
Expand Down Expand Up @@ -114,7 +161,8 @@ def handle_run(job: Job, schema_target):
]
df["file_checksum"] = file_md5.values()

ena.submit_data(file_paths, settings.ENA_PASSWORD, settings.ENA_USERNAME)
# ena.submit_data(file_paths, settings.ENA_PASSWORD, settings.ENA_USERNAME)
submit_data(file_paths)
return df
else:
return None
Expand Down
40 changes: 25 additions & 15 deletions api/app/core/models.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from django.db import models
from copy import copy

from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.postgres.fields import ArrayField
from django.conf import settings
from copy import copy
from django.db import models

from .helpers import merge


Expand Down Expand Up @@ -63,18 +65,26 @@ def links(self):
if not self.result:
return {}
return {
"experiment": f"{settings.ENA_BROWSER_URL}/{self.result['experiment']['accession']}"
if "experiment" in self.result
else "",
"sample": f"{settings.ENA_BROWSER_URL}/{self.result['sample']['accession']}"
if "sample" in self.result
else "",
"run": f"{settings.ENA_BROWSER_URL}/{self.result['run']['accession']}"
if "run" in self.result
else "",
"study": f"{settings.ENA_BROWSER_URL}/{self.result['study']['accession']}"
if "study" in self.result
else "",
"experiment": (
f"{settings.ENA_BROWSER_URL}/{self.result['experiment']['accession']}"
if "experiment" in self.result
else ""
),
"sample": (
f"{settings.ENA_BROWSER_URL}/{self.result['sample']['accession']}"
if "sample" in self.result
else ""
),
"run": (
f"{settings.ENA_BROWSER_URL}/{self.result['run']['accession']}"
if "run" in self.result
else ""
),
"study": (
f"{settings.ENA_BROWSER_URL}/{self.result['study']['accession']}"
if "study" in self.result
else ""
),
}

class Meta:
Expand Down
9 changes: 5 additions & 4 deletions api/app/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,11 @@ def modify(self, request, pk=None):
new_job = job.clone(self.request.user, "MODIFY")
data = request.data.get("data")
# We filter out all irrelevant schemas for the modification
for schema in SCHEMAS:
if schema not in data and schema in new_job.data:
del new_job.data[schema]
new_job.data = merge(new_job.data, data)
if data is not None:
for schema in SCHEMAS:
if schema not in data and schema in new_job.data:
del new_job.data[schema]
new_job.data = merge(new_job.data, data)
new_job.save()
result = JobSerializer(new_job, context={"request": request})
return Response(result.data)
Expand Down

0 comments on commit 317243e

Please sign in to comment.