From aed3fc70ecd126f6507b7c6891e28e86008a33d0 Mon Sep 17 00:00:00 2001 From: amickan Date: Wed, 22 May 2024 15:03:39 +0200 Subject: [PATCH] Add mimetype to UserUpload model --- app/grandchallenge/codebuild/models.py | 1 + app/grandchallenge/core/storage.py | 13 +++++----- .../migrations/0006_userupload_mimetype.py | 25 ++++++++++++++++++ app/grandchallenge/uploads/models.py | 19 +++++++++++++- app/grandchallenge/uploads/validators.py | 15 +++++++++++ app/tests/uploads_tests/test_models.py | 26 +++++++++++++++++++ 6 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 app/grandchallenge/uploads/migrations/0006_userupload_mimetype.py create mode 100644 app/grandchallenge/uploads/validators.py diff --git a/app/grandchallenge/codebuild/models.py b/app/grandchallenge/codebuild/models.py index dc43ddaa15..08a2ecf42e 100644 --- a/app/grandchallenge/codebuild/models.py +++ b/app/grandchallenge/codebuild/models.py @@ -98,6 +98,7 @@ def add_image_to_algorithm(self): dest_filename=f"{self.pk}.tar.gz", src_bucket=settings.CODEBUILD_ARTIFACTS_BUCKET_NAME, src_key=f"codebuild/artifacts/{self.build_number}/{self.build_config['projectName']}/container-image.tar.gz", + mimetype="application/gzip", save=True, ) diff --git a/app/grandchallenge/core/storage.py b/app/grandchallenge/core/storage.py index 86d16507f1..390e59d7bf 100644 --- a/app/grandchallenge/core/storage.py +++ b/app/grandchallenge/core/storage.py @@ -150,7 +150,9 @@ def get_mugshot_path(instance, filename): return f"mugshots/{time_prefix}/{uuid4()}.{extension}" -def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save): +def copy_s3_object( + *, to_field, dest_filename, src_bucket, src_key, mimetype, save +): """Copies an S3 object to a Django file field on a model""" if not isinstance(to_field, FieldFile): raise ValueError("to_field must be a FieldFile") @@ -163,6 +165,7 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save): target_key = to_field.storage.get_available_name( name=target_key, max_length=to_field.field.max_length ) + extra_args = {"ContentType": mimetype} if settings.AWS_S3_OBJECT_PARAMETERS[ "StorageClass" @@ -171,11 +174,9 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save): settings.PROTECTED_S3_STORAGE_KWARGS["bucket_name"], settings.PUBLIC_S3_STORAGE_KWARGS["bucket_name"], }: - extra_args = { - "StorageClass": settings.AWS_S3_OBJECT_PARAMETERS["StorageClass"] - } - else: - extra_args = None + extra_args["StorageClass"] = settings.AWS_S3_OBJECT_PARAMETERS[ + "StorageClass" + ] target_client.copy( CopySource={"Bucket": src_bucket, "Key": src_key}, diff --git a/app/grandchallenge/uploads/migrations/0006_userupload_mimetype.py b/app/grandchallenge/uploads/migrations/0006_userupload_mimetype.py new file mode 100644 index 0000000000..119680f1d0 --- /dev/null +++ b/app/grandchallenge/uploads/migrations/0006_userupload_mimetype.py @@ -0,0 +1,25 @@ +# Generated by Django 4.2.13 on 2024-05-22 10:50 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ( + "uploads", + "0005_useruploadgroupobjectpermission_useruploaduserobjectpermission", + ), + ] + + operations = [ + migrations.AddField( + model_name="userupload", + name="mimetype", + field=models.CharField( + default="application/octet-stream", + editable=False, + max_length=255, + ), + ), + ] diff --git a/app/grandchallenge/uploads/models.py b/app/grandchallenge/uploads/models.py index 1af5ae6410..d0a87474af 100644 --- a/app/grandchallenge/uploads/models.py +++ b/app/grandchallenge/uploads/models.py @@ -1,6 +1,7 @@ import os import boto3 +import magic from botocore.config import Config from django.conf import settings from django.db import models @@ -77,13 +78,15 @@ class StatusChoices(models.IntegerChoices): choices=StatusChoices.choices, default=StatusChoices.PENDING ) s3_upload_id = models.CharField(max_length=192, blank=True) + mimetype = models.CharField( + max_length=255, editable=False, default="application/octet-stream" + ) class Meta(UUIDModel.Meta): pass def save(self, *args, **kwargs): adding = self._state.adding - if adding: self.create_multipart_upload() @@ -196,6 +199,18 @@ def assign_permissions(self): assign_perm("view_userupload", self.creator, self) assign_perm("change_userupload", self.creator, self) + @property + def mimetype_from_file(self): + if self.status != self.StatusChoices.COMPLETED: + raise RuntimeError("Cannot get mimetype of incomplete upload") + header = self._client.get_object( + Bucket=self.bucket, + Key=self.key, + # 2048 bytes for best results with libmagic + Range="bytes=0-2047", + )["Body"].read() + return magic.from_buffer(header, mime=True) + def create_multipart_upload(self): if self.status != self.StatusChoices.PENDING: raise RuntimeError("Upload is not pending") @@ -260,6 +275,7 @@ def complete_multipart_upload(self, *, parts): MultipartUpload={"Parts": parts}, ) self.status = self.StatusChoices.COMPLETED + self.mimetype = self.mimetype_from_file def abort_multipart_upload(self): if self.status != self.StatusChoices.INITIALIZED: @@ -286,6 +302,7 @@ def copy_object(self, *, to_field, save=True): dest_filename=self.filename, src_key=self.key, src_bucket=self.bucket, + mimetype=self.mimetype, save=save, ) diff --git a/app/grandchallenge/uploads/validators.py b/app/grandchallenge/uploads/validators.py new file mode 100644 index 0000000000..8b83f7fa7d --- /dev/null +++ b/app/grandchallenge/uploads/validators.py @@ -0,0 +1,15 @@ +from django.core.exceptions import ValidationError + +from grandchallenge.uploads.models import UserUpload + + +def validate_gzip_mimetype(value): + try: + user_upload = UserUpload.objects.get( + pk=value, status=UserUpload.StatusChoices.COMPLETED + ) + except UserUpload.DoesNotExist: + raise ValidationError("This upload does not exist") + + if user_upload.mimetype not in {"application/gzip", "application/x-gzip"}: + raise ValidationError("This upload is not a valid .tar.gz file") diff --git a/app/tests/uploads_tests/test_models.py b/app/tests/uploads_tests/test_models.py index 0c272254d7..8ba44fbda2 100644 --- a/app/tests/uploads_tests/test_models.py +++ b/app/tests/uploads_tests/test_models.py @@ -299,3 +299,29 @@ def test_can_upload_more_other_objects(settings): assert upload.can_upload_more is False assert new_upload.can_upload_more is False + + +@pytest.mark.parametrize( + "content,expected_mimetype", + ( + (b"hello", "text/plain"), + ( + b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x00\x00\x00\x00:~\x9bU\x00\x00\x00\nIDATx\x9cc`\x00\x00\x00\x02\x00\x01H\xaf\xa4q\x00\x00\x00\x00IEND\xaeB`\x82", + "image/png", + ), + ), +) +def test_mimetype_set(content, expected_mimetype): + u = UserFactory.build(pk=42) + upload = UserUpload(creator=u) + upload.create_multipart_upload() + + assert upload.mimetype == "application/octet-stream" + + presigned_urls = upload.generate_presigned_urls(part_numbers=[1]) + response = put(presigned_urls["1"], data=content) + upload.complete_multipart_upload( + parts=[{"ETag": response.headers["ETag"], "PartNumber": 1}] + ) + + assert upload.mimetype == expected_mimetype