Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add mimetype to UserUpload model #3350

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions app/grandchallenge/codebuild/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def add_image_to_algorithm(self):
dest_filename=f"{self.pk}.tar.gz",
src_bucket=settings.CODEBUILD_ARTIFACTS_BUCKET_NAME,
src_key=f"codebuild/artifacts/{self.build_number}/{self.build_config['projectName']}/container-image.tar.gz",
mimetype="application/gzip",
save=True,
)

Expand Down
13 changes: 7 additions & 6 deletions app/grandchallenge/core/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,9 @@ def get_mugshot_path(instance, filename):
return f"mugshots/{time_prefix}/{uuid4()}.{extension}"


def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
def copy_s3_object(
*, to_field, dest_filename, src_bucket, src_key, mimetype, save
):
"""Copies an S3 object to a Django file field on a model"""
if not isinstance(to_field, FieldFile):
raise ValueError("to_field must be a FieldFile")
Expand All @@ -163,6 +165,7 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
target_key = to_field.storage.get_available_name(
name=target_key, max_length=to_field.field.max_length
)
extra_args = {"ContentType": mimetype}

if settings.AWS_S3_OBJECT_PARAMETERS[
"StorageClass"
Expand All @@ -171,11 +174,9 @@ def copy_s3_object(*, to_field, dest_filename, src_bucket, src_key, save):
settings.PROTECTED_S3_STORAGE_KWARGS["bucket_name"],
settings.PUBLIC_S3_STORAGE_KWARGS["bucket_name"],
}:
extra_args = {
"StorageClass": settings.AWS_S3_OBJECT_PARAMETERS["StorageClass"]
}
else:
extra_args = None
extra_args["StorageClass"] = settings.AWS_S3_OBJECT_PARAMETERS[
"StorageClass"
]

target_client.copy(
CopySource={"Bucket": src_bucket, "Key": src_key},
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Generated by Django 4.2.13 on 2024-05-22 10:50

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
(
"uploads",
"0005_useruploadgroupobjectpermission_useruploaduserobjectpermission",
),
]

operations = [
migrations.AddField(
model_name="userupload",
name="mimetype",
field=models.CharField(
default="application/octet-stream",
editable=False,
max_length=255,
),
),
]
19 changes: 18 additions & 1 deletion app/grandchallenge/uploads/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

import boto3
import magic
from botocore.config import Config
from django.conf import settings
from django.db import models
Expand Down Expand Up @@ -77,13 +78,15 @@ class StatusChoices(models.IntegerChoices):
choices=StatusChoices.choices, default=StatusChoices.PENDING
)
s3_upload_id = models.CharField(max_length=192, blank=True)
mimetype = models.CharField(
max_length=255, editable=False, default="application/octet-stream"
)

class Meta(UUIDModel.Meta):
pass

def save(self, *args, **kwargs):
adding = self._state.adding

if adding:
self.create_multipart_upload()

Expand Down Expand Up @@ -196,6 +199,18 @@ def assign_permissions(self):
assign_perm("view_userupload", self.creator, self)
assign_perm("change_userupload", self.creator, self)

@property
def mimetype_from_file(self):
if self.status != self.StatusChoices.COMPLETED:
raise RuntimeError("Cannot get mimetype of incomplete upload")
header = self._client.get_object(
Bucket=self.bucket,
Key=self.key,
# 2048 bytes for best results with libmagic
Range="bytes=0-2047",
)["Body"].read()
return magic.from_buffer(header, mime=True)

def create_multipart_upload(self):
if self.status != self.StatusChoices.PENDING:
raise RuntimeError("Upload is not pending")
Expand Down Expand Up @@ -260,6 +275,7 @@ def complete_multipart_upload(self, *, parts):
MultipartUpload={"Parts": parts},
)
self.status = self.StatusChoices.COMPLETED
self.mimetype = self.mimetype_from_file

def abort_multipart_upload(self):
if self.status != self.StatusChoices.INITIALIZED:
Expand All @@ -286,6 +302,7 @@ def copy_object(self, *, to_field, save=True):
dest_filename=self.filename,
src_key=self.key,
src_bucket=self.bucket,
mimetype=self.mimetype,
save=save,
)

Expand Down
15 changes: 15 additions & 0 deletions app/grandchallenge/uploads/validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from django.core.exceptions import ValidationError

from grandchallenge.uploads.models import UserUpload


def validate_gzip_mimetype(value):
try:
user_upload = UserUpload.objects.get(
pk=value, status=UserUpload.StatusChoices.COMPLETED
)
except UserUpload.DoesNotExist:
raise ValidationError("This upload does not exist")

if user_upload.mimetype not in {"application/gzip", "application/x-gzip"}:
raise ValidationError("This upload is not a valid .tar.gz file")
26 changes: 26 additions & 0 deletions app/tests/uploads_tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,29 @@ def test_can_upload_more_other_objects(settings):

assert upload.can_upload_more is False
assert new_upload.can_upload_more is False


@pytest.mark.parametrize(
"content,expected_mimetype",
(
(b"hello", "text/plain"),
(
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x00\x00\x00\x00:~\x9bU\x00\x00\x00\nIDATx\x9cc`\x00\x00\x00\x02\x00\x01H\xaf\xa4q\x00\x00\x00\x00IEND\xaeB`\x82",
"image/png",
),
),
)
def test_mimetype_set(content, expected_mimetype):
u = UserFactory.build(pk=42)
upload = UserUpload(creator=u)
upload.create_multipart_upload()

assert upload.mimetype == "application/octet-stream"

presigned_urls = upload.generate_presigned_urls(part_numbers=[1])
response = put(presigned_urls["1"], data=content)
upload.complete_multipart_upload(
parts=[{"ETag": response.headers["ETag"], "PartNumber": 1}]
)

assert upload.mimetype == expected_mimetype