Skip to content

Commit

Permalink
Add mimetype to UserUpload model
Browse files Browse the repository at this point in the history
  • Loading branch information
amickan committed May 22, 2024
1 parent 557283b commit aed3fc7
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 7 deletions.
1 change: 1 addition & 0 deletions app/grandchallenge/codebuild/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def add_image_to_algorithm(self):
dest_filename=f"{self.pk}.tar.gz",
src_bucket=settings.CODEBUILD_ARTIFACTS_BUCKET_NAME,
src_key=f"codebuild/artifacts/{self.build_number}/{self.build_config['projectName']}/container-image.tar.gz",
mimetype="application/gzip",
save=True,
)

Expand Down
13 changes: 7 additions & 6 deletions app/grandchallenge/core/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def get_mugshot_path(instance, filename):
return f"mugshots/{time_prefix}/{uuid4()}.{extension}"


def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
def copy_s3_object(
*, to_field, dest_filename, src_bucket, src_key, mimetype, save
):
"""Copies an S3 object to a Django file field on a model"""
if not isinstance(to_field, FieldFile):
raise ValueError("to_field must be a FieldFile")
Expand All @@ -163,6 +165,7 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
target_key = to_field.storage.get_available_name(
name=target_key, max_length=to_field.field.max_length
)
extra_args = {"ContentType": mimetype}

if settings.AWS_S3_OBJECT_PARAMETERS[
"StorageClass"
Expand All @@ -171,11 +174,9 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
settings.PROTECTED_S3_STORAGE_KWARGS["bucket_name"],
settings.PUBLIC_S3_STORAGE_KWARGS["bucket_name"],
}:
extra_args = {
"StorageClass": settings.AWS_S3_OBJECT_PARAMETERS["StorageClass"]
}
else:
extra_args = None
extra_args["StorageClass"] = settings.AWS_S3_OBJECT_PARAMETERS[
"StorageClass"
]

target_client.copy(
CopySource={"Bucket": src_bucket, "Key": src_key},
Expand Down
25 changes: 25 additions & 0 deletions app/grandchallenge/uploads/migrations/0006_userupload_mimetype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.13 on 2024-05-22 10:50

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
(
"uploads",
"0005_useruploadgroupobjectpermission_useruploaduserobjectpermission",
),
]

operations = [
migrations.AddField(
model_name="userupload",
name="mimetype",
field=models.CharField(
default="application/octet-stream",
editable=False,
max_length=255,
),
),
]
19 changes: 18 additions & 1 deletion app/grandchallenge/uploads/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

import boto3
import magic
from botocore.config import Config
from django.conf import settings
from django.db import models
Expand Down Expand Up @@ -77,13 +78,15 @@ class StatusChoices(models.IntegerChoices):
choices=StatusChoices.choices, default=StatusChoices.PENDING
)
s3_upload_id = models.CharField(max_length=192, blank=True)
mimetype = models.CharField(
max_length=255, editable=False, default="application/octet-stream"
)

class Meta(UUIDModel.Meta):
pass

def save(self, *args, **kwargs):
adding = self._state.adding

if adding:
self.create_multipart_upload()

Expand Down Expand Up @@ -196,6 +199,18 @@ def assign_permissions(self):
assign_perm("view_userupload", self.creator, self)
assign_perm("change_userupload", self.creator, self)

@property
def mimetype_from_file(self):
if self.status != self.StatusChoices.COMPLETED:
raise RuntimeError("Cannot get mimetype of incomplete upload")
header = self._client.get_object(
Bucket=self.bucket,
Key=self.key,
# 2048 bytes for best results with libmagic
Range="bytes=0-2047",
)["Body"].read()
return magic.from_buffer(header, mime=True)

def create_multipart_upload(self):
if self.status != self.StatusChoices.PENDING:
raise RuntimeError("Upload is not pending")
Expand Down Expand Up @@ -260,6 +275,7 @@ def complete_multipart_upload(self, *, parts):
MultipartUpload={"Parts": parts},
)
self.status = self.StatusChoices.COMPLETED
self.mimetype = self.mimetype_from_file

def abort_multipart_upload(self):
if self.status != self.StatusChoices.INITIALIZED:
Expand All @@ -286,6 +302,7 @@ def copy_object(self, *, to_field, save=True):
dest_filename=self.filename,
src_key=self.key,
src_bucket=self.bucket,
mimetype=self.mimetype,
save=save,
)

Expand Down
15 changes: 15 additions & 0 deletions app/grandchallenge/uploads/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.core.exceptions import ValidationError

from grandchallenge.uploads.models import UserUpload


def validate_gzip_mimetype(value):
try:
user_upload = UserUpload.objects.get(
pk=value, status=UserUpload.StatusChoices.COMPLETED
)
except UserUpload.DoesNotExist:
raise ValidationError("This upload does not exist")

if user_upload.mimetype not in {"application/gzip", "application/x-gzip"}:
raise ValidationError("This upload is not a valid .tar.gz file")
26 changes: 26 additions & 0 deletions app/tests/uploads_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,29 @@ def test_can_upload_more_other_objects(settings):

assert upload.can_upload_more is False
assert new_upload.can_upload_more is False


@pytest.mark.parametrize(
"content,expected_mimetype",
(
(b"hello", "text/plain"),
(
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x00\x00\x00\x00:~\x9bU\x00\x00\x00\nIDATx\x9cc`\x00\x00\x00\x02\x00\x01H\xaf\xa4q\x00\x00\x00\x00IEND\xaeB`\x82",
"image/png",
),
),
)
def test_mimetype_set(content, expected_mimetype):
u = UserFactory.build(pk=42)
upload = UserUpload(creator=u)
upload.create_multipart_upload()

assert upload.mimetype == "application/octet-stream"

presigned_urls = upload.generate_presigned_urls(part_numbers=[1])
response = put(presigned_urls["1"], data=content)
upload.complete_multipart_upload(
parts=[{"ETag": response.headers["ETag"], "PartNumber": 1}]
)

assert upload.mimetype == expected_mimetype

0 comments on commit aed3fc7

Please sign in to comment.