From afb5f600ba8283852ab54a3f9556c7747645f671 Mon Sep 17 00:00:00 2001 From: Daniel Grossmann-Kavanagh Date: Wed, 27 Feb 2019 16:22:32 +1030 Subject: [PATCH] files API --- .envs/.local/.django | 3 + Pipfile | 4 + Pipfile.lock | 124 ++++- cli/psql | 2 +- config/settings/base.py | 8 + config/settings/test.py | 1 + local.yml | 1 + missioncontrol/api.py | 2 +- missioncontrol/datalake/__init__.py | 0 missioncontrol/datalake/admin.py | 6 + missioncontrol/datalake/apps.py | 5 + .../datalake/migrations/0001_initial.py | 59 +++ .../datalake/migrations/__init__.py | 0 missioncontrol/datalake/models.py | 287 +++++++++++ missioncontrol/datalake/urls.py | 3 + missioncontrol/home/models.py | 2 +- missioncontrol/openapi/openapi.yaml | 468 ++++++++++++++++-- missioncontrol/tests/conftest.py | 59 ++- missioncontrol/tests/test_files.py | 383 ++++++++++++++ missioncontrol/tests/test_files_admin.py | 51 ++ missioncontrol/v0/files.py | 109 ++++ missioncontrol/v0/files_admin.py | 15 + 22 files changed, 1550 insertions(+), 42 deletions(-) create mode 100644 missioncontrol/datalake/__init__.py create mode 100644 missioncontrol/datalake/admin.py create mode 100644 missioncontrol/datalake/apps.py create mode 100644 missioncontrol/datalake/migrations/0001_initial.py create mode 100644 missioncontrol/datalake/migrations/__init__.py create mode 100644 missioncontrol/datalake/models.py create mode 100644 missioncontrol/datalake/urls.py create mode 100644 missioncontrol/tests/test_files.py create mode 100644 missioncontrol/tests/test_files_admin.py create mode 100644 missioncontrol/v0/files.py create mode 100644 missioncontrol/v0/files_admin.py diff --git a/.envs/.local/.django b/.envs/.local/.django index bb2c772..08cda96 100644 --- a/.envs/.local/.django +++ b/.envs/.local/.django @@ -1,5 +1,8 @@ # General # ------------------------------------------------------------------------------ +DATALAKE_STRICT_WHATS=False +DATALAKE_STRICT_WORK_ID=False USE_DOCKER=yes IPYTHONDIR=/app/.ipython DJANGO_JWT_SECRET=ILIKEASCREThowlongdoesitNeedtTOBeHey +DJANGO_FILE_STORAGE_PATH=/tmp/django-file-storage diff --git a/Pipfile b/Pipfile index 3ffdae1..36bbfcb 100644 --- a/Pipfile +++ b/Pipfile @@ -23,6 +23,10 @@ python-jose = "*" tabulate = "*" django-environ = "*" python-dateutil = "*" +boto3 = "*" +datalake-common-dtkav = ">=0.29" +py-multihash = "*" +py-multibase = "*" [dev-packages] pylint = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 68da64c..67e46f2 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "ddb449d2765b7d47b73bf2c2218fda8fe32f187d62d32deedf82d328ac7b9cab" + "sha256": "06314cd88c13697b62fa8023b594ead38863813f8428739a2d9cbddbd4901b6f" }, "pipfile-spec": 6, "requires": { @@ -82,6 +82,14 @@ ], "version": "==0.1.0" }, + "base58": { + "hashes": [ + "sha256:1e42993c0628ed4f898c03b522b26af78fb05115732549b21a028bc4633d19ab", + "sha256:6aa0553e477478993588303c54659d15e3c17ae062508c854a8b752d07c716bd", + "sha256:9a793c599979c497800eb414c852b80866f28daaed5494703fc129592cc83e60" + ], + "version": "==1.0.3" + }, "bitarray": { "hashes": [ "sha256:050cd30b810ddb3aa941e7ddfbe0d8065e793012d0a88cb5739ec23624b9895e" @@ -89,6 +97,21 @@ "index": "pypi", "version": "==0.8.3" }, + "boto3": { + "hashes": [ + "sha256:3927beac97e5467f869d63d60920b83c2d39964f69fbf944bc1db724116bfe1a", + "sha256:be88cae6f16bb9fe3b850b6c8259b297f60b46855175cadae57594c9a403c582" + ], + "index": "pypi", + "version": "==1.9.124" + }, + "botocore": { + "hashes": [ + "sha256:bb756a8da2c6e3ccf42dccb0ac71c1df2e07844db339183da06f4e0285b251d0", + "sha256:fc7560a2676df2f0bab4ef0638277b86e5a00944c2ce1c3bb124b3066e6d3d2a" + ], + "version": "==1.12.124" + }, "brotli": { "hashes": [ "sha256:0538dc1744fd17c314d2adc409ea7d1b779783b89fd95bcfb0c2acc93a6ea5a7", @@ -194,6 +217,14 @@ "index": "pypi", "version": "==2.2.0" }, + "datalake-common-dtkav": { + "hashes": [ + "sha256:285745f0981df7878b1194f09c30d7c204a03805731d9740e491d366810cf7b8", + "sha256:d834071405ecb81fc856568d2dc0e4c3f82110b954e1a864d87ee53705280813" + ], + "index": "pypi", + "version": "==0.29" + }, "decorator": { "hashes": [ "sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de", @@ -220,6 +251,14 @@ "index": "pypi", "version": "==0.4.5" }, + "docutils": { + "hashes": [ + "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", + "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", + "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" + ], + "version": "==0.14" + }, "ecdsa": { "hashes": [ "sha256:40d002cf360d0e035cf2cb985e1308d41aaa087cbfc135b2dc2d844296ea546c", @@ -263,10 +302,10 @@ }, "ipdb": { "hashes": [ - "sha256:7081c65ed7bfe7737f83fa4213ca8afd9617b42ff6b3f1daf9a3419839a2a00a" + "sha256:dce2112557edfe759742ca2d0fee35c59c97b0cc7a05398b791079d78f1519ce" ], "index": "pypi", - "version": "==0.11" + "version": "==0.12" }, "ipython": { "hashes": [ @@ -303,6 +342,13 @@ ], "version": "==2.10" }, + "jmespath": { + "hashes": [ + "sha256:3720a4b1bd659dd2eecad0666459b9788813e032b83e7ba58578e48254e0a0e6", + "sha256:bde2aef6f44302dfb30320115b17d030798de8c4110e28d5cf6cf91a7a31074c" + ], + "version": "==0.9.4" + }, "jplephem": { "hashes": [ "sha256:9dffb9f3d3f6d996ade875102431fe385e8ea422da25c8ba17b0508d9ca1282b" @@ -357,6 +403,12 @@ "markers": "python_version > '2.7'", "version": "==7.0.0" }, + "morphys": { + "hashes": [ + "sha256:76d6dbaa4d65f597e59d332c81da786d83e4669387b9b2a750cfec74e7beec20" + ], + "version": "==1.0" + }, "numpy": { "hashes": [ "sha256:1980f8d84548d74921685f68096911585fee393975f53797614b34d4f409b6da", @@ -486,6 +538,22 @@ ], "version": "==1.8.0" }, + "py-multibase": { + "hashes": [ + "sha256:6ed706ea321b487ba82e4172a9c82d61dacd675c865f576a937a94bca1a23443", + "sha256:dde22b8d1c544e4636beefa1cf62c4131d25a57261c2b1c41f7d8a3f0142b4bc" + ], + "index": "pypi", + "version": "==1.0.1" + }, + "py-multihash": { + "hashes": [ + "sha256:a0602c99093587dfbf1634e2e8c7726de39374b0d68587a36093b4c237af6969", + "sha256:f0ade4de820afdc4b4aaa40464ec86c9da5cae3a4578cda2daab4b0eb7e5b18d" + ], + "index": "pypi", + "version": "==0.2.3" + }, "pyasn1": { "hashes": [ "sha256:da2420fe13a9452d8ae97a0e478adde1dee153b11ba832a95b223a2ba01c10f7", @@ -528,6 +596,12 @@ "index": "pypi", "version": "==3.4.8" }, + "python-baseconv": { + "hashes": [ + "sha256:1b98b11d0d1c00bf1165d62b0d183c8d2d496ae5baaa0991c0d4ffef079772d6" + ], + "version": "==1.2.1" + }, "python-dateutil": { "hashes": [ "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", @@ -536,6 +610,13 @@ "index": "pypi", "version": "==2.8.0" }, + "python-dotenv": { + "hashes": [ + "sha256:a84569d0e00d178bc5b957f7ff208bf49287cbf61857c31c258c4a91f571527b", + "sha256:c9b1ddd3cdbe75c7d462cb84674d87130f4b948f090f02c7d7144779afb99ae0" + ], + "version": "==0.10.1" + }, "python-jose": { "hashes": [ "sha256:29701d998fe560e52f17246c3213a882a4a39da7e42c7015bcc1f7823ceaff1c", @@ -583,6 +664,13 @@ ], "version": "==4.0" }, + "s3transfer": { + "hashes": [ + "sha256:7b9ad3213bff7d357f888e0fab5101b56fa1a0548ee77d121c3a3dbfbef4cb2e", + "sha256:f23d5cb7d862b104401d9021fc82e5fa0e0cf57b7660a1331425aab0c691d021" + ], + "version": "==0.2.0" + }, "scipy": { "hashes": [ "sha256:014cb900c003b5ac81a53f2403294e8ecf37aedc315b59a6b9370dce0aa7627a", @@ -623,6 +711,23 @@ ], "version": "==1.4" }, + "simplejson": { + "hashes": [ + "sha256:067a7177ddfa32e1483ba5169ebea1bc2ea27f224853211ca669325648ca5642", + "sha256:2fc546e6af49fb45b93bbe878dea4c48edc34083729c0abd09981fe55bdf7f91", + "sha256:354fa32b02885e6dae925f1b5bbf842c333c1e11ea5453ddd67309dc31fdb40a", + "sha256:37e685986cf6f8144607f90340cff72d36acf654f3653a6c47b84c5c38d00df7", + "sha256:3af610ee72efbe644e19d5eaad575c73fb83026192114e5f6719f4901097fce2", + "sha256:3b919fc9cf508f13b929a9b274c40786036b31ad28657819b3b9ba44ba651f50", + "sha256:3dd289368bbd064974d9a5961101f080e939cbe051e6689a193c99fb6e9ac89b", + "sha256:6c3258ffff58712818a233b9737fe4be943d306c40cf63d14ddc82ba563f483a", + "sha256:75e3f0b12c28945c08f54350d91e624f8dd580ab74fd4f1bbea54bc6b0165610", + "sha256:b1f329139ba647a9548aa05fb95d046b4a677643070dc2afc05fa2e975d09ca5", + "sha256:ee9625fc8ee164902dfbb0ff932b26df112da9f871c32f0f9c1bcf20c350fe2a", + "sha256:fb2530b53c28f0d4d84990e945c2ebb470edb469d63e389bf02ff409012fe7c5" + ], + "version": "==3.16.0" + }, "six": { "hashes": [ "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", @@ -671,8 +776,15 @@ "sha256:61bf29cada3fc2fbefad4fdf059ea4bd1b4a86d2b6d15e1c7c0b582b9752fe39", "sha256:de9529817c93f27c8ccbfead6985011db27bd0ddfcdb2d86f3f663385c6a9c22" ], + "markers": "python_version >= '3.4'", "version": "==1.24.1" }, + "varint": { + "hashes": [ + "sha256:a6ecc02377ac5ee9d65a6a8ad45c9ff1dac8ccee19400a5950fb51d594214ca5" + ], + "version": "==1.0.2" + }, "wcwidth": { "hashes": [ "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", @@ -821,10 +933,12 @@ }, "rope": { "hashes": [ - "sha256:031eb54b3eeec89f4304ede816995ed2b93a21e6fba16bd02aff10a0d6c257b7" + "sha256:6b728fdc3e98a83446c27a91fc5d56808a004f8beab7a31ab1d7224cecc7d969", + "sha256:c5c5a6a87f7b1a2095fb311135e2a3d1f194f5ecb96900fdd0a9100881f48aaf", + "sha256:f0dcf719b63200d492b85535ebe5ea9b29e0d0b8aebeb87fe03fc1a65924fdaf" ], "index": "pypi", - "version": "==0.12.0" + "version": "==0.14.0" }, "six": { "hashes": [ diff --git a/cli/psql b/cli/psql index 5a24cf7..aee10ff 100755 --- a/cli/psql +++ b/cli/psql @@ -2,5 +2,5 @@ COMPOSE_ENV=${COMPOSE_ENV:-local.yml} CMD='PGPASSWORD=$POSTGRES_PASSWORD psql -h $POSTGRES_HOST -U $POSTGRES_USER -p $POSTGRES_PORT $POSTGRES_DB' -docker-compose -f $COMPOSE_ENV run --rm postgres \ +docker-compose -f $COMPOSE_ENV exec postgres \ bash -c "$CMD" diff --git a/config/settings/base.py b/config/settings/base.py index 5dccd5f..94801f0 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -74,6 +74,7 @@ LOCAL_APPS = [ # Your stuff: custom apps go here 'home', + 'datalake', ] # https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps INSTALLED_APPS = DJANGO_APPS + THIRD_PARTY_APPS + LOCAL_APPS @@ -260,6 +261,11 @@ } +# Datalake +# ------------------------------------------------------------------------------ +DATALAKE_STRICT_WHATS = True +DATALAKE_STRICT_WORK_ID = True + # Your stuff... # ------------------------------------------------------------------------------ EPHEM_DIR = os.path.abspath(os.path.join(APPS_DIR, 'ephemeris')) @@ -279,3 +285,5 @@ def immutable_file_test(path, url): WHITENOISE_IMMUTABLE_FILE_TEST = immutable_file_test + +FILE_STORAGE_PATH = env.str('DJANGO_FILE_STORAGE_PATH') diff --git a/config/settings/test.py b/config/settings/test.py index 72d180f..b8c0482 100644 --- a/config/settings/test.py +++ b/config/settings/test.py @@ -53,3 +53,4 @@ # Your stuff... # ------------------------------------------------------------------------------ +FILE_STORAGE_PATH = 's3://bucketname/django-file-storage/' diff --git a/local.yml b/local.yml index 1e367bd..d1288f9 100644 --- a/local.yml +++ b/local.yml @@ -20,6 +20,7 @@ services: ports: - "8000:8000" command: /start + restart: always frontend: build: diff --git a/missioncontrol/api.py b/missioncontrol/api.py index 617c876..602dc08 100644 --- a/missioncontrol/api.py +++ b/missioncontrol/api.py @@ -19,7 +19,7 @@ def object_does_not_exist(exception): def validation_error(exception): - problem = connexion.problem(400, "Validation Error", str(exception)) + problem = connexion.problem(400, "Validation Error", exception.messages) return connexion.FlaskApi.get_response(problem) diff --git a/missioncontrol/datalake/__init__.py b/missioncontrol/datalake/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/missioncontrol/datalake/admin.py b/missioncontrol/datalake/admin.py new file mode 100644 index 0000000..44723d8 --- /dev/null +++ b/missioncontrol/datalake/admin.py @@ -0,0 +1,6 @@ +from django.contrib import admin +from django import forms +from . import models + +admin.site.register(models.What) +admin.site.register(models.RelatedFile) diff --git a/missioncontrol/datalake/apps.py b/missioncontrol/datalake/apps.py new file mode 100644 index 0000000..4d32659 --- /dev/null +++ b/missioncontrol/datalake/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class DatalakeConfig(AppConfig): + name = 'datalake' diff --git a/missioncontrol/datalake/migrations/0001_initial.py b/missioncontrol/datalake/migrations/0001_initial.py new file mode 100644 index 0000000..b91e742 --- /dev/null +++ b/missioncontrol/datalake/migrations/0001_initial.py @@ -0,0 +1,59 @@ +# Generated by Django 2.1.7 on 2019-03-20 06:35 + +import datalake.models +from django.db import migrations, models +import django.db.models.deletion +import django.utils.timezone +import uuid + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('contenttypes', '0002_remove_content_type_name'), + ] + + operations = [ + migrations.CreateModel( + name='DatalakeFile', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uuid', models.UUIDField(default=uuid.uuid4, unique=True)), + ('cid', models.CharField(max_length=33)), + ('what', models.TextField()), + ('where', models.TextField()), + ('path', models.TextField(blank=True, null=True)), + ('start', datalake.models.ISODateTimeField(default=django.utils.timezone.now, help_text='The time of the first event in the file. If instantaneous, set this and leave end as null')), + ('end', datalake.models.ISODateTimeField(blank=True, help_text='The time of the last event in the file. Can be blank if instantaneous file.', null=True)), + ('created', datalake.models.ISODateTimeField(auto_now_add=True)), + ('work_id', models.TextField(blank=True, null=True)), + ('version', models.IntegerField(choices=[(1, 1)])), + ], + options={'get_latest_by': ('start', 'created'), 'ordering': ('-start', 'created')}, + bases=(models.Model, datalake.models.Serializable), + ), + migrations.CreateModel( + name='RelatedFile', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('work_id', models.CharField(max_length=256)), + ('object_id', models.PositiveIntegerField()), + ('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='contenttypes.ContentType')), + ], + ), + migrations.CreateModel( + name='What', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('what', models.CharField(max_length=128, unique=True)), + ], + bases=(models.Model, datalake.models.Serializable), + ), + migrations.AddField( + model_name='datalakefile', + name='_related_to', + field=models.ForeignKey(blank=True, editable=False, null=True, on_delete=django.db.models.deletion.SET_NULL, to='datalake.RelatedFile'), + ), + ] diff --git a/missioncontrol/datalake/migrations/__init__.py b/missioncontrol/datalake/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/missioncontrol/datalake/models.py b/missioncontrol/datalake/models.py new file mode 100644 index 0000000..9632132 --- /dev/null +++ b/missioncontrol/datalake/models.py @@ -0,0 +1,287 @@ +import logging +import six + +import json +import datetime +from itertools import chain +from uuid import uuid4 + +from connexion.exceptions import ProblemException +from datalake_common import Metadata, InvalidDatalakeMetadata +from django.apps import apps +from django.conf import settings +from django.contrib.contenttypes.fields import GenericForeignKey +from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError, ObjectDoesNotExist +from django.db import models +from django.db.models import Max +from django.db.models.signals import pre_save, post_save +from django.dispatch import receiver +from django.utils import timezone, dateformat +import boto3 +from pytz import UTC + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +@receiver(pre_save) +def pre_save_handler(sender, instance, *args, **kwargs): + # always validate local models before saving + if sender in apps.all_models['datalake'].values(): + instance.full_clean() + + +class DatalakeJSONEncoder(json.JSONEncoder): + """ + JSONEncoder subclass that knows how to encode date/time, decimal types, and + UUIDs. + Based on DjangoJSONEncoder but uses ISO8601 with microsecond precision + instead of ECMA-262 + """ + def default(self, o): + # See "Date Time String Format" in the ECMA-262 specification. + if isinstance(o, datetime.datetime): + r = o.isoformat('T', timespec='microseconds') + if r.endswith('+00:00'): + r = r[:-6] + 'Z' + return r + elif isinstance(o, datetime.date): + return o.isoformat() + elif isinstance(o, datetime.time): + if is_aware(o): + raise ValueError("JSON can't represent timezone-aware times.") + r = o.isoformat() + return r + elif isinstance(o, datetime.timedelta): + return duration_iso_string(o) + elif isinstance(o, (decimal.Decimal, uuid.UUID, Promise)): + return str(o) + else: + return super().default(o) + + +class Serializable(object): + """ A mixin for turning the django record into an object that can be + serialized. + """ + + def to_dict(self): + """ Adapted from django.forms.models.model_to_dict() + https://docs.djangoproject.com/en/2.1/_modules/django/forms/models/ + + The main difference is that this includes fields that are not editable. + """ + opts = self._meta + data = {} + for f in chain(opts.concrete_fields, opts.private_fields, opts.many_to_many): + if f.name == 'id': + continue + + if f.name == '_related_to': + continue + + if f.name == 'uuid': + data[f.name] = str(f.value_from_object(self)) + continue + + data[f.name] = f.value_from_object(self) + return data + + +class ISODateTimeField(models.DateTimeField): + """ + We *REALLY* want stuff to be UTC here + + Check it and convert if nessesary at all steps + """ + def value_to_string(self, obj): + val = self.value_from_object(obj) + if val: + if val.tzinfo is None: + raise ValueError("Naive timezone was passed in") + if val.tzinfo != UTC: + val = val.astimezone(tz=UTC) + formatter = dateformat.DateFormat(val) + return formatter.format(settings.DATETIME_FORMAT) + return '' + + def to_python(self, value): + result = super().to_python(value) + if result: + if result.tzinfo is None: + raise ValidationError("Timezone must be specified") + if result.tzinfo != UTC: + result = result.astimezone(tz=UTC) + return result + + +class What(models.Model, Serializable): + what = models.CharField(unique=True, max_length=128) + + def __repr__(self): + return self.what + + def __str__(self): + return self.__repr__() + + +class RelatedFile(models.Model): + work_id = models.CharField(max_length=256) + content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) + object_id = models.PositiveIntegerField() + content_object = GenericForeignKey('content_type', 'object_id') + + def __str__(self): + return self.work_id + + @classmethod + def from_datalake_file(cls, dlfile): + try: + model, uuid = dlfile.work_id.split(".") + if model.startswith("mc-"): + model = model[len("mc-"):] + else: + return None + ct = ContentType(app_label="home", model=model) + try: + obj = ct.get_object_for_this_type(uuid=uuid) + rel = cls(content_object=obj, work_id=dlfile.work_id) + rel.save() + return rel + except ObjectDoesNotExist: + if not settings.DATALAKE_STRICT_WORK_ID: + return + + raise ProblemException( + status=400, + title='ValidationError', + detail=('work_id is in the missioncontrol namespace, ' + 'but the object with the given id does not exist.'), + ext={ + "work_id": dlfile.work_id, + "model": model, + "id": uuid + } + ) + except (IndexError, AttributeError) as e: + pass + + +class DatalakeFile(models.Model, Serializable): + uuid = models.UUIDField(default=uuid4, unique=True) + cid = models.CharField(max_length=33) + what = models.TextField() + where = models.TextField() + path = models.TextField(null=True, blank=True) + start = ISODateTimeField( + help_text='The time of the first event in the file. ' + 'If instantaneous, set this and leave end as null', + default=timezone.now) + end = ISODateTimeField( + help_text='The time of the last event in the file. ' + 'Can be blank if instantaneous file.', + null=True, blank=True) + created = ISODateTimeField(auto_now_add=True) + work_id = models.TextField(null=True, blank=True) + version = models.IntegerField(choices=((1, 1),)) # FIXME + _related_to = models.ForeignKey(RelatedFile, on_delete=models.SET_NULL, + null=True, editable=False, blank=True) + + class Meta: + ordering = ('-start', 'created') + get_latest_by = ('start', 'created') + + @property + def related(self): + if self._related_to is None: + self._related_to = RelatedFile.from_datalake_file(self) + if self._related_to is not None: + return self._related_to.content_object + + def clean(self): + try: + Metadata(self.to_dict()) + except InvalidDatalakeMetadata as e: + raise ProblemException( + status=400, + title='InvalidDatalakeMetadata', + detail=e.args[0], + ext={"invalid_object": self.to_dict()} + ) + if ( + settings.DATALAKE_STRICT_WHATS and + not What.objects.filter(what=self.what).exists() + ): + raise ProblemException( + status=400, + title='ValidationError', + detail=f'Unknown what: {self.what}', + ext={"invalid_object": self.to_dict()} + ) + if self._related_to is None: + self._related_to = RelatedFile.from_datalake_file(self) + + # s3://bucket/some_path + @property + def prefix(self): + path = settings.FILE_STORAGE_PATH + if path.startswith('s3://'): + return '/'.join(path.split('/')[3:]) + # TODO + raise NotImplementedError("Not yet implemented non s3 paths") + + @property + def bucket(self): + path = settings.FILE_STORAGE_PATH + if path.startswith('s3://'): + return path.split('/')[2] + # TODO + raise NotImplementedError("Not yet implemented non s3 paths") + + @property + def key(self): + return f'{self.prefix}{self.cid}/data' + + @property + def metadata_key(self): + return f'{self.prefix}{self.cid}/metadata/{self.uuid}' + + def get_download_url(self): + s3 = boto3.client('s3') + url = s3.generate_presigned_url( + ClientMethod='get_object', + Params={ + 'Bucket': self.bucket, + 'Key': self.key, + } + ) + return url + + @classmethod + def get_post_data_fields(cls, **kwargs): + # Create the object but don't save it + obj = cls(**kwargs) + s3 = boto3.client('s3') + post = s3.generate_presigned_post( + Bucket=obj.bucket, + Key=obj.key, + Fields={"Content-Encoding": "gzip"}, + Conditions=[ + ["eq", "$Content-Encoding", "gzip"], + ] + ) + return post + + +@receiver(post_save, sender=DatalakeFile) +def save_metadata_to_s3(sender, instance, *args, **kwargs): + # store metadata for recovery + s3 = boto3.client('s3') + s3.put_object( + Body=json.dumps(instance.to_dict(), cls=DatalakeJSONEncoder, sort_keys=True), + Bucket=instance.bucket, + Key=instance.metadata_key, + ContentType="application/json" + ) + return instance diff --git a/missioncontrol/datalake/urls.py b/missioncontrol/datalake/urls.py new file mode 100644 index 0000000..10d5a5b --- /dev/null +++ b/missioncontrol/datalake/urls.py @@ -0,0 +1,3 @@ +from django.conf.urls import url + +urlpatterns = [] diff --git a/missioncontrol/home/models.py b/missioncontrol/home/models.py index bccf6a4..82fd305 100644 --- a/missioncontrol/home/models.py +++ b/missioncontrol/home/models.py @@ -73,7 +73,7 @@ def to_dict(self): opts = self._meta data = {} for f in chain(opts.concrete_fields, opts.private_fields, opts.many_to_many): - if f.name is 'id': + if f.name == 'id': continue data[f.name] = f.value_from_object(self) return data diff --git a/missioncontrol/openapi/openapi.yaml b/missioncontrol/openapi/openapi.yaml index 67a7992..7ab5279 100644 --- a/missioncontrol/openapi/openapi.yaml +++ b/missioncontrol/openapi/openapi.yaml @@ -867,6 +867,7 @@ paths: schema: $ref: '#/components/schemas/Error' + /accesses/: get: tags: ['accesses'] @@ -999,6 +1000,397 @@ paths: application/json: schema: $ref: '#/components/schemas/Error' + + # Some of the structure here taken from + # https://github.com/planetlabs/datalake-api + # and modified to suit. + /files/: + get: + tags: ['files'] + description: Search for files + operationId: v0.files.search + parameters: + - in: query + name: cid + description: + The content ID of a file (base32-encoded blake2b-16 multihash) + schema: + type: string + maxLength: 33 + minLength: 33 + - in: query + name: what + description: + Only return this type of file. + schema: + type: string + required: true + - in: query + name: where + description: + Only return files from here. + schema: + type: string + - in: query + name: range_start + description: + Only return files with data after this start time. + schema: + type: string + format: date-time + - in: query + name: range_end + description: + Only return files with data before this end time. + schema: + type: string + format: date-time + - in: query + name: range_inclusive + description: | + If the time range should include accesses that partially overlap + the range_start or range_end. + + * both - all accesses that overlap the range are returned + * start - accesses overlapping the range_start time are returned + * end - accesses overlapping the range_end time are returned + * neither - only returns accesses that are fully contained by the range + schema: + type: string + default: both + enum: + - start + - end + - both + - neither + - in: query + name: limit + schema: + type: integer + default: 100 + - in: query + name: order_by + description: | + Order the results by this field. Use `-` to reverse the ordering. + schema: + type: string + default: "-start" + enum: + - "start" + - "-start" + responses: + 200: + description: A list of files + content: + application/json: + schema: + $ref: '#/components/schemas/Files' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /files/cid/{cid}/: + get: + tags: ['files'] + description: Search for file metadata by content id + operationId: v0.files.search_by_cid + parameters: + - in: path + name: cid + required: true + description: Only return metadata for this cid + schema: + type: string + minLength: 33 + maxLength: 33 + - in: query + name: limit + schema: + type: integer + default: 100 + responses: + 200: + description: A list of files + content: + application/json: + schema: + $ref: '#/components/schemas/Files' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + + /files/work-id/{work_id}/: + get: + tags: ['files'] + description: Search for files as they relate to a work_id + operationId: v0.files.search_work_id + parameters: + - in: path + name: work_id + required: true + description: + Only return files with this work_id. + schema: + type: string + format: uuid + - in: query + name: what + description: + Only return this type of file. + schema: + type: string + - in: query + name: where + description: + Only return files from here. + schema: + type: string + - in: query + name: limit + schema: + type: integer + default: 100 + responses: + 200: + description: A list of files + content: + application/json: + schema: + $ref: '#/components/schemas/Files' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /files/latest/{what}/{where}/: + get: + tags: ['files'] + description: Get the latest 'what' from a 'where' + operationId: v0.files.get_latest + parameters: + - in: path + required: true + name: what + schema: + type: string + - in: path + required: true + name: where + schema: + type: string + responses: + 200: + description: The details of a file + content: + application/json: + schema: + $ref: '#/components/schemas/File' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /files/presign/: + post: + tags: ['files'] + description: Get the fields required for uploading a new file + operationId: v0.files.presign_upload + requestBody: + content: + application/json: + schema: + x-body-name: file_meta + $ref: "#/components/schemas/File" + responses: + 200: + description: The fields required for a file upload + content: + application/json: + schema: + $ref: '#/components/schemas/SignedUpload' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /files/{uuid}/: + get: + tags: ['files'] + description: Get the details of a file + operationId: v0.files.get + parameters: + - in: path + required: true + name: uuid + schema: + type: string + responses: + 200: + description: The details of a file + content: + application/json: + schema: + $ref: '#/components/schemas/File' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + put: + tags: ['files'] + description: Create a new file container + operationId: v0.files.put + parameters: + - in: path + required: true + name: uuid + schema: + type: string + requestBody: + content: + application/json: + schema: + x-body-name: file_meta + $ref: "#/components/schemas/File" + responses: + 200: + description: The details of a file + content: + application/json: + schema: + $ref: '#/components/schemas/File' + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /files/{uuid}/data/: + get: + tags: ['files'] + description: Get the data contents of a file + operationId: v0.files.get_data + parameters: + - in: path + required: true + name: uuid + schema: + type: string + responses: + 302: + description: A redirect to the URL to get the data from + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /files/content/{cid}/: + get: + tags: ['files'] + description: Get file contents via the content id + operationId: v0.files.get_raw + parameters: + - in: path + required: true + name: cid + schema: + type: string + responses: + 302: + description: A redirect to the URL to get the data from + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /datalake/admin/whats/: + get: + security: + - jwt: ['admins'] + tags: ['admin'] + description: list allowed datalake whats + operationId: v0.files_admin.search_whats + responses: + 200: + description: A list of whats + content: + application/json: + schema: + type: array + items: + type: string + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + /datalake/admin/whats/{what}/: + parameters: + - in: path + required: true + name: what + schema: + type: string + put: + tags: ['admin'] + security: + - jwt: ['admins'] + description: add allowed datalake whats + operationId: v0.files_admin.put_what + responses: + 200: + description: OK + content: + text/plain: + schema: + type: string + 201: + description: Created + content: + text/plain: + schema: + type: string + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + delete: + tags: ['admin'] + security: + - jwt: ['admins'] + description: add allowed datalake whats + operationId: v0.files_admin.delete_what + responses: + 204: + description: No content + default: + description: unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + components: securitySchemes: jwt: @@ -1223,54 +1615,66 @@ components: readOnly: true type: string - TaskRun: + Attributes: + type: object + additionalProperties: + type: string + + Files: + type: array + items: + "$ref": "#/components/schemas/File" + + File: + required: + - uuid + - cid + - what + - where + - start properties: uuid: - description: the unique identifier for the TaskRun type: string format: uuid - task: - description: The task that was run + cid: type: string - task_stack: - description: The task stack that spawned this task + minLength: 33 + maxLength: 33 + what: type: string - format: uri - pass: - description: A link to the pass that this task was run on + where: type: string - format: uri - start_time: - description: the start_time of the pass + created: type: string format: date-time - end_time: - description: the end_time of the pass + readOnly: true + start: + type: string + format: date-time + end: type: string format: date-time - exit_code: - description: the exit code of the process + nullable: true + work_id: + type: string + nullable: true + version: type: integer - stdout: - description: a link to the stdout log - readOnly: true + description: the metadata version number + minimum: 1 + maximum: 1 + + SignedUpload: + properties: + url: + description: The url to post to type: string format: uri - stderr: - description: a link to the stderr log readOnly: true - type: string - format: uri - _href: - description: link to this TaskStack + url_fields: + description: form fields to add to the post request + type: object readOnly: true - type: string - format: uri - - Attributes: - type: object - additionalProperties: - type: string Error: description: schema for problem+json (RFC 7807) diff --git a/missioncontrol/tests/conftest.py b/missioncontrol/tests/conftest.py index 3b91ec0..41e332e 100644 --- a/missioncontrol/tests/conftest.py +++ b/missioncontrol/tests/conftest.py @@ -1,9 +1,15 @@ -import pytest +import hashlib +import datetime from base64 import b64encode from uuid import uuid4 +import pytest +import multihash +import multibase + from flask.testing import FlaskClient from django.contrib.auth.models import User +from django.utils import timezone class AuthorizedClient(FlaskClient): def __init__(self, *args, **kwargs): @@ -103,18 +109,67 @@ def simple_sat(): @pytest.fixture def simple_pass(simple_sat, simple_gs): return { + "uuid": "9f6236cc-6bce-4e78-b8fa-8de758c20d73", "satellite": simple_sat["hwid"], "groundstation": simple_gs["hwid"], "start_time": "2018-11-25T00:00:00.000000Z", "end_time": "2018-11-25T01:00:00.000000Z", } +@pytest.fixture +def simple_pass2(simple_sat, simple_gs): + return { + "uuid": "9f6236cc-6bce-4e78-b8fa-8de758c20d74", + "satellite": simple_sat["hwid"], + "groundstation": simple_gs["hwid"], + "start_time": "2019-11-25T00:00:00.000000Z", + "end_time": "2019-11-25T01:00:00.000000Z", + } + +@pytest.fixture +def simple_file(some_hash, some_uuid): + return { + 'start': '2019-05-23T00:00:00.000001Z', + 'end': None, + 'where': 'mc-00', + 'what': 'license', + 'path': '/tmp/LICENSE', + 'work_id': 'mc-pass.9f6236cc-6bce-4e78-b8fa-8de758c20d73', + 'cid': some_hash, + 'uuid': some_uuid, + 'version': 1 + } + +@pytest.fixture +def file_gen(some_hash): + t = datetime.datetime.utcnow() + t -= datetime.timedelta(days=360) + def gen(t): + while True: + t += datetime.timedelta(days=1) + yield { + 'start': t.isoformat('T', timespec='microseconds') + "Z", + 'end': None, + 'where': 'mc-00.infra.wow', + 'what': 'license', + 'path': '/tmp/LICENSE', + 'work_id': 'mc-pass.9f6236cc-6bce-4e78-b8fa-8de758c20d73', + 'cid': some_hash, + 'uuid': str(uuid4()), + 'version': 1 + } + return gen(t) @pytest.fixture def some_uuid(): return str(uuid4()) - @pytest.fixture def another_uuid(): return str(uuid4()) + +@pytest.fixture +def some_hash(): + b2 = hashlib.blake2b(b'some_hash', digest_size=16) + mh = multihash.encode(b2.digest(), 'blake2b-16') + return multibase.encode('base32', mh).decode("utf-8") diff --git a/missioncontrol/tests/test_files.py b/missioncontrol/tests/test_files.py new file mode 100644 index 0000000..8dbaddb --- /dev/null +++ b/missioncontrol/tests/test_files.py @@ -0,0 +1,383 @@ +import uuid +import json +from unittest.mock import patch, call + +import pytest + +from django.conf import settings +from django.utils import timezone, dateformat + + +# Still needs a database for the user setup. +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_signing(boto3_mock, test_client, some_hash, simple_file): + post_values = { + 'url': 'https://test.example', + 'url_fields': {}, + } + presign_mock = boto3_mock.client.return_value.generate_presigned_post + presign_mock.return_value = post_values + + response = test_client.post( + f'/api/v0/files/presign/', + json=simple_file + ) + assert response.status_code == 200, response.get_data() + assert response.json == post_values + + # we require gzip + presign_mock.assert_called_with( + Bucket=settings.FILE_STORAGE_PATH.split('/')[2], + Key=f'django-file-storage/{some_hash}/data', + Conditions=[['eq', '$Content-Encoding', 'gzip']], + Fields={'Content-Encoding': 'gzip'} + ) + + +# TODO use botocore.Stubber +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_metadata_put(boto3_mock, test_client, some_hash, some_uuid, + simple_file, settings): + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + + created = timezone.now() + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + assert response.status_code == 201, response.get_data() + response = test_client.get(f'/api/v0/files/{some_uuid}/') + + assert response.status_code == 200, response.get_data() + expected = simple_file.copy() + + formatter = dateformat.DateFormat(created) + expected['created'] = formatter.format(settings.DATETIME_FORMAT) + assert response.json == expected + + # ensure metadata is written to S3 for backup + boto3_mock.assert_has_calls([ + call.client('s3'), + call.client().put_object( + Body=json.dumps(expected, sort_keys=True), + ContentType='application/json', + Bucket='bucketname', + Key=f'django-file-storage/{some_hash}/metadata/{some_uuid}'), + ]) + + +# TODO use botocore.Stubber +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_metadata_put_strict_whats(boto3_mock, test_client, some_hash, + some_uuid, simple_file, settings): + settings.DATALAKE_STRICT_WHATS = True + settings.DATALAKE_STRICT_WORK_ID = False + wat = simple_file["what"] + + created = timezone.now() + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + # expect failure, unknown what + assert response.status_code == 400, response.get_data() + assert response.json["detail"] == f"Unknown what: {wat}" + + # add file what to allowed whats + response = test_client.put(f'/api/v0/datalake/admin/whats/{wat}/') + assert response.status_code == 201, response.get_data() + + # try PUT again + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + assert response.status_code == 201, response.get_data() + + # check with GET + response = test_client.get(f'/api/v0/files/{some_uuid}/') + assert response.status_code == 200, response.get_data() + + +# TODO use botocore.Stubber +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_metadata_put_strict_work_id(boto3_mock, test_client, some_hash, + some_uuid, simple_file, simple_sat, + simple_gs, settings): + settings.DATALAKE_STRICT_WHATS = True + settings.DATALAKE_STRICT_WORK_ID = True + wat = simple_file["what"] + + created = timezone.now() + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + # expect failure, pass does not exist + assert response.status_code == 400, response.get_data() + + # add file what to allowed whats + response = test_client.put(f'/api/v0/datalake/admin/whats/{wat}/') + assert response.status_code == 201, response.get_data() + + # try PUT again + def create_asset(asset_type, asset): + asset_hwid = asset["hwid"] + response = test_client.put( + f"/api/v0/{asset_type}s/{asset_hwid}/", + json=asset + ) + + create_asset('satellite', simple_sat) + create_asset('groundstation', simple_gs) + + _pass = { + "satellite": simple_sat["hwid"], + "groundstation": simple_gs["hwid"], + "start_time": "2018-11-25T00:00:00Z", + "end_time": "2018-11-25T01:00:00Z", + } + + # create + response = test_client.put( + f'/api/v0/passes/{simple_file["work_id"].split(".")[1]}/', + json=_pass + ) + assert response.status_code == 201 + assert response.json + + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + assert response.status_code == 201, response.get_data() + + # check with GET + response = test_client.get(f'/api/v0/files/{some_uuid}/') + assert response.status_code == 200, response.get_data() + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_download(boto3_mock, test_client, simple_file, some_uuid, + settings): + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + test_url = 'http://someurl' + signed_get_mock = boto3_mock.client.return_value.generate_presigned_url + signed_get_mock.return_value = test_url + response = test_client.put(f'/api/v0/files/{some_uuid}/', json=simple_file) + assert response.status_code == 201, response.get_data() + + response = test_client.get(f'/api/v0/files/{some_uuid}/data/') + assert response.status_code == 302, response.get_data() + + assert response.headers['Location'] == test_url + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_raw_content_download(boto3_mock, test_client, simple_file, some_uuid, + settings): + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + test_url = 'http://someurl' + signed_get_mock = boto3_mock.client.return_value.generate_presigned_url + signed_get_mock.return_value = test_url + response = test_client.put(f'/api/v0/files/{some_uuid}/', json=simple_file) + assert response.status_code == 201, response.get_data() + + response = test_client.get(f'/api/v0/files/content/{simple_file["cid"]}/') + assert response.status_code == 302, response.get_data() + assert response.headers['Location'] == test_url + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_latest_search(boto3_mock, test_client, file_gen, settings): + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + + last = None + for _ in range(0, 10): + last = f = next(file_gen) + response = test_client.put( + f'/api/v0/files/{f["uuid"]}/', + json=f + ) + assert response.status_code == 201, response.get_data() + + response = test_client.get( + f'/api/v0/files/latest/{f["what"]}/{f["where"]}/') + assert response.status_code == 200, response.get_data() + resp = response.json + resp.pop("created") + assert resp == last + + +@pytest.mark.django_db +def test_file_latest_search_empty(test_client, some_uuid): + response = test_client.get(f'/api/v0/files/latest/a_thing/mc-flat/') + assert response.status_code == 404, response.get_data() + assert response.json + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_work_id_search(boto3_mock, test_client, some_uuid, simple_file, + simple_sat, simple_gs): + settings.DATALAKE_STRICT_WHATS = True + settings.DATALAKE_STRICT_WORK_ID = True + + wat = simple_file["what"] + + created = timezone.now() + + def create_asset(asset_type, asset): + asset_hwid = asset["hwid"] + response = test_client.put( + f"/api/v0/{asset_type}s/{asset_hwid}/", + json=asset + ) + + create_asset('satellite', simple_sat) + create_asset('groundstation', simple_gs) + + _pass = { + "satellite": simple_sat["hwid"], + "groundstation": simple_gs["hwid"], + "start_time": "2018-11-25T00:00:00Z", + "end_time": "2018-11-25T01:00:00Z", + } + + # create + response = test_client.put( + f'/api/v0/passes/{simple_file["work_id"].split(".")[1]}/', + json=_pass + ) + assert response.status_code == 201 + assert response.json + + # add file what to allowed whats + response = test_client.put(f'/api/v0/datalake/admin/whats/{wat}/') + assert response.status_code == 201, response.get_data() + + with patch('django.utils.timezone.now', return_value=created): + response = test_client.put( + f'/api/v0/files/{some_uuid}/', + json=simple_file + ) + assert response.status_code == 201, response.get_data() + + # check with GET + response = test_client.get( + f'/api/v0/files/work-id/{simple_file["work_id"]}/') + assert response.status_code == 200, response.get_data() + assert len(response.json) == 1 + resp = response.json[0] + resp.pop("created") + assert resp == simple_file + + +@pytest.mark.django_db +def test_file_work_id_search_empty(test_client, some_uuid): + response = test_client.get('/api/v0/files/work-id/mc-jenkins.13203002/') + assert response.status_code == 200, response.get_data() + assert response.json == [] + + +@pytest.mark.django_db +def test_file_timeline_search_empty(test_client): + response = test_client.get( + f'/api/v0/files/', + query_string={ + 'what': 'banana', + }) + assert response.status_code == 200, response.get_data() + assert response.json == [] + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_file_timeline_search(boto3_mock, test_client, file_gen): + + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + + files = [] + created = timezone.now() + with patch('django.utils.timezone.now', return_value=created): + for _ in range(0, 10): + last = f = next(file_gen) + response = test_client.put( + f'/api/v0/files/{f["uuid"]}/', + json=f + ) + f["created"] = created.isoformat( + "T", timespec="microseconds").replace("+00:00", "Z") + files += [f] + assert response.status_code == 201, response.get_data() + + # create an arbitrary slice of files + _slice = files[2:7] + + response = test_client.get( + f'/api/v0/files/', + query_string={ + 'what': last["what"], + 'range_start': _slice[0]["start"], + 'range_end': _slice[-1]["start"] + } + ) + assert response.status_code == 200, response.get_data() + assert response.json == _slice[::-1] # results go back in time + + +@patch('datalake.models.boto3') +@pytest.mark.django_db +def test_reverse_metadata_loopkup(boto3_mock, test_client, file_gen): + + settings.DATALAKE_STRICT_WHATS = False + settings.DATALAKE_STRICT_WORK_ID = False + + files = [] + created = timezone.now() + with patch('django.utils.timezone.now', return_value=created): + for _ in range(0, 10): + last = f = next(file_gen) + response = test_client.put( + f'/api/v0/files/{f["uuid"]}/', + json=f + ) + f["created"] = created.isoformat( + "T", timespec="microseconds").replace("+00:00", "Z") + files += [f] + assert response.status_code == 201, response.get_data() + + response = test_client.get( + f'/api/v0/files/cid/{f["cid"]}/', + ) + assert response.status_code == 200, response.get_data() + assert response.json == files[::-1] # results go back in time + + +@pytest.mark.django_db +def test_file_search_required_what(test_client): + response = test_client.get( + f'/api/v0/files/', + query_string={ + 'where': 'a thing', + } + ) + assert response.status_code == 400, response.get_data() + assert response.json['detail'] == "Missing query parameter 'what'" diff --git a/missioncontrol/tests/test_files_admin.py b/missioncontrol/tests/test_files_admin.py new file mode 100644 index 0000000..951b144 --- /dev/null +++ b/missioncontrol/tests/test_files_admin.py @@ -0,0 +1,51 @@ +import uuid +import json +from unittest.mock import patch, call + +import pytest +from django.conf import settings +from django.utils import timezone, dateformat + + +@pytest.mark.django_db +def test_put_what(test_client, simple_file): + what = simple_file["what"] + response = test_client.put( + f'/api/v0/datalake/admin/whats/{what}/' + ) + +@pytest.mark.django_db +def test_list_whats(test_client): + whats = ["a", "b", "c"] + for what in whats: + response = test_client.put( + f'/api/v0/datalake/admin/whats/{what}/' + ) + assert response.status_code == 201, response.get_body() + response = test_client.get( + '/api/v0/datalake/admin/whats/' + ) + assert response.status_code == 200, response.get_body() + assert response.json == whats + +@pytest.mark.django_db +def test_delete_what(test_client): + whats = ["a", "b", "c"] + for what in whats: + response = test_client.put( + f'/api/v0/datalake/admin/whats/{what}/' + ) + assert response.status_code == 201, response.get_body() + + del_what = whats.pop() + response = test_client.delete( + f'/api/v0/datalake/admin/whats/{del_what}/' + ) + assert response.status_code == 204 + + response = test_client.get( + '/api/v0/datalake/admin/whats/' + ) + assert response.status_code == 200, response.get_body() + assert response.json == whats + diff --git a/missioncontrol/v0/files.py b/missioncontrol/v0/files.py new file mode 100644 index 0000000..c10612f --- /dev/null +++ b/missioncontrol/v0/files.py @@ -0,0 +1,109 @@ +import requests + +from connexion.exceptions import ProblemException +from django.conf import settings +from django.db.models import Q + +from datalake.models import DatalakeFile +from v0.time import utc + + +def search_by_cid(cid, limit=100): + files = DatalakeFile.objects.filter(cid=cid) + + results = files.all()[:limit] + return [x.to_dict() for x in results] + + +def search_work_id(work_id, what=None, where=None, limit=100): + files = DatalakeFile.objects.filter(work_id=work_id) + if what is not None: + files = files.filter(what=what) + + if where is not None: + files = files.filter(where=where) + + results = files.all()[:limit] + return [x.to_dict() for x in results] + + +def search(what, where=None, range_start=None, range_end=None, + range_inclusive="both", limit=100, order_by='-start'): + + files = DatalakeFile.objects.filter(what=what) + + if where is not None: + files = files.filter(where=where) + + # filter the start of the range + if range_start is not None: + range_start = utc(range_start) + if range_inclusive in ['end', 'neither']: + files = files.filter(start__gte=range_start) + else: + # can overlap if window, else instant must be >= range_start + files = files.filter( + Q(end__gte=range_start) | + (Q(start__gte=range_start) & Q(end=None)) + ) + + # filter the end of the range + if range_end is not None: + range_end = utc(range_end) + if range_inclusive in ['start', 'neither']: + # can overlap if window, else instant must be < range_end + files = files.filter( + Q(end__lte=range_end) | + (Q(start__lte=range_end) & Q(end=None)) + ) + else: + files = files.filter(start__lte=range_end) + + results = files.all().order_by(order_by)[:limit] + return [x.to_dict() for x in results] + + +def get_latest(what, where): + files = DatalakeFile.objects.filter(what=what, where=where) + result = files.latest() + return result.to_dict() + + +def get_raw(cid): + obj = DatalakeFile.objects.filter(cid=cid).first() + url = obj.get_download_url() + headers = {'Location': url} + return '', 302, headers + + +def get_data(uuid): + obj = DatalakeFile.objects.get(uuid=uuid) + url = obj.get_download_url() + headers = {'Location': url} + return '', 302, headers + + +def get(uuid): + obj = DatalakeFile.objects.get(uuid=uuid) + retval = obj.to_dict() + return retval + + +def put(uuid, file_meta): + file_meta["uuid"] = uuid + obj, created = DatalakeFile.objects.update_or_create( + uuid=uuid, defaults=file_meta + ) + retval = obj.to_dict() + return retval, 201 if created else 200 + + +def presign_upload(file_meta): + cid = file_meta["cid"] + if DatalakeFile.objects.filter(cid=cid).exists(): + raise ProblemException( + status=202, + title='Accepted', + detail='File already exists in datalake, no upload is required', + ) + return DatalakeFile.get_post_data_fields(**file_meta) diff --git a/missioncontrol/v0/files_admin.py b/missioncontrol/v0/files_admin.py new file mode 100644 index 0000000..76bfa17 --- /dev/null +++ b/missioncontrol/v0/files_admin.py @@ -0,0 +1,15 @@ +from datalake.models import What + +def search_whats(limit=250): + whats = What.objects.all()[:limit].values_list('what', flat=True) + return whats, 200 + +def put_what(what): + obj, created = What.objects.update_or_create(what=what) + return what, 201 if created else 200 + +def delete_what(what): + # what kind of cascade should happen here? + # should we be hiding existing files from search? + What.objects.filter(what=what).delete() + return None, 204