Skip to content

Commit

Permalink
Merge pull request #451 from geoadmin/feat-PB-848-delete-expired-items
Browse files Browse the repository at this point in the history
PB-848: Delete expired items
  • Loading branch information
benschs authored Aug 20, 2024
2 parents 0f11852 + a3a6e64 commit 0425176
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 3 deletions.
2 changes: 2 additions & 0 deletions app/config/settings_prod.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@
'.yml': 'application/vnd.oai.openapi+yaml;version=3.0'
}

DELETE_EXPIRED_ITEMS_OLDER_THAN_HOURS = 24

# Media files (i.e. uploaded content=assets in this project)
UPLOAD_FILE_CHUNK_SIZE = 1024 * 1024 # Size in Bytes
STORAGES = {
Expand Down
66 changes: 66 additions & 0 deletions app/stac_api/management/commands/remove_expired_items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from datetime import timedelta

from django.conf import settings
from django.core.management.base import CommandParser
from django.utils import timezone

from stac_api.models import Item
from stac_api.utils import CommandHandler
from stac_api.utils import CustomBaseCommand


class Handler(CommandHandler):

def delete(self, instance, object_type):
if self.options['dry_run']:
self.print_success(f'skipping deletion of {object_type} {instance}')
else:
instance.delete()

def run(self):
self.print_success('running command to remove expired items')
min_age_hours = self.options['min_age_hours']
self.print_warning(f"deleting all items expired longer than {min_age_hours} hours")
items = Item.objects.filter(
properties_expires__lte=timezone.now() - timedelta(hours=min_age_hours)
).all()
for item in items:
assets = item.assets.all()
assets_length = len(assets)
self.delete(assets, 'assets')
self.delete(item, 'item')
if not self.options['dry_run']:
self.print_success(
f"deleted item {item.name} and {assets_length}" + " assets belonging to it.",
extra={"item": item.name}
)

if self.options['dry_run']:
self.print_success(f'[dry run] would have removed {len(items)} expired items')
else:
self.print_success(f'successfully removed {len(items)} expired items')


class Command(CustomBaseCommand):
help = """Remove items and their assets that have expired more than
DELETE_EXPIRED_ITEMS_OLDER_THAN_HOURS hours ago.
This command is thought to be scheduled as cron job.
"""

def add_arguments(self, parser: CommandParser) -> None:
super().add_arguments(parser)
parser.add_argument(
'--dry-run',
action='store_true',
help='Simulate deleting items, without actually deleting them'
)
default_min_age = settings.DELETE_EXPIRED_ITEMS_OLDER_THAN_HOURS
parser.add_argument(
'--min-age-hours',
type=int,
default=default_min_age,
help=f"Minimum hours the item must have been expired for (default {default_min_age})"
)

def handle(self, *args, **options):
Handler(self, options).run()
8 changes: 6 additions & 2 deletions app/stac_api/validators_view.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging

from django.db.models import Q
from django.http import Http404
from django.utils import timezone
from django.utils.translation import gettext_lazy as _

from rest_framework import serializers
Expand Down Expand Up @@ -28,7 +30,7 @@ def validate_collection(kwargs):


def validate_item(kwargs):
'''Validate that the item given in request kwargs exists
'''Validate that the item given in request kwargs exists and is not expired
Args:
kwargs: dict
Expand All @@ -38,7 +40,9 @@ def validate_item(kwargs):
Http404: when the item doesn't exists
'''
if not Item.objects.filter(
name=kwargs['item_name'], collection__name=kwargs['collection_name']
Q(properties_expires=None) | Q(properties_expires__gte=timezone.now()),
name=kwargs['item_name'],
collection__name=kwargs['collection_name']
).exists():
logger.error(
"The item %s is not part of the collection %s",
Expand Down
7 changes: 7 additions & 0 deletions app/stac_api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from django.db import transaction
from django.db.models import Min
from django.db.models import Prefetch
from django.db.models import Q
from django.utils import timezone
from django.utils.translation import gettext_lazy as _

from rest_framework import generics
Expand Down Expand Up @@ -364,6 +366,8 @@ class ItemsList(generics.GenericAPIView):
def get_queryset(self):
# filter based on the url
queryset = Item.objects.filter(
# filter expired items
Q(properties_expires__gte=timezone.now()) | Q(properties_expires=None),
collection__name=self.kwargs['collection_name']
).prefetch_related(Prefetch('assets', queryset=Asset.objects.order_by('name')), 'links')
bbox = self.request.query_params.get('bbox', None)
Expand Down Expand Up @@ -428,6 +432,8 @@ class ItemDetail(
def get_queryset(self):
# filter based on the url
queryset = Item.objects.filter(
# filter expired items
Q(properties_expires__gte=timezone.now()) | Q(properties_expires=None),
collection__name=self.kwargs['collection_name']
).prefetch_related(Prefetch('assets', queryset=Asset.objects.order_by('name')), 'links')

Expand Down Expand Up @@ -536,6 +542,7 @@ class AssetDetail(
def get_queryset(self):
# filter based on the url
return Asset.objects.filter(
Q(item__properties_expires=None) | Q(item__properties_expires__gte=timezone.now()),
item__collection__name=self.kwargs['collection_name'],
item__name=self.kwargs['item_name']
)
Expand Down
29 changes: 29 additions & 0 deletions app/tests/tests_10/test_assets_endpoint.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
from datetime import datetime
from datetime import timedelta
from json import dumps
from json import loads
from pprint import pformat

from django.contrib.auth import get_user_model
from django.test import Client
from django.urls import reverse
from django.utils import timezone

from stac_api.models import Asset
from stac_api.utils import get_asset_path
Expand Down Expand Up @@ -88,6 +90,19 @@ def test_assets_endpoint_item_does_not_exist(self):
)
self.assertStatusCode(404, response)

def test_assets_endpoint_item_expired(self):
collection_name = self.collection.name
item_expired = self.factory.create_item_sample(
self.collection,
name='item-expired',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=1)
).model
response = self.client.get(
f"/{STAC_BASE_V}/collections/{collection_name}/items/{item_expired.name}/assets"
)
self.assertStatusCode(404, response)

def test_single_asset_endpoint(self):
collection_name = self.collection.name
item_name = self.item.name
Expand All @@ -105,6 +120,20 @@ def test_single_asset_endpoint(self):
# hash computation of the ETag
self.assertEtagHeader(None, response)

def test_single_assets_endpoint_item_expired(self):
collection_name = self.collection.name
item = self.factory.create_item_sample(
self.collection,
name='item-expired',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=1)
).model
asset = self.factory.create_asset_sample(item=item, db_create=True).model
response = self.client.get(
f"/{STAC_BASE_V}/collections/{collection_name}/items/{item.name}/assets/{asset.name}"
)
self.assertStatusCode(404, response)


class AssetsUnimplementedEndpointTestCase(StacBaseTestCase):

Expand Down
23 changes: 23 additions & 0 deletions app/tests/tests_10/test_items_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from django.contrib.auth import get_user_model
from django.test import Client
from django.urls import reverse
from django.utils import timezone

from stac_api.models import Item
from stac_api.utils import fromisoformat
Expand Down Expand Up @@ -44,6 +45,13 @@ def test_items_endpoint(self):
# To make sure that item sorting is working, make sure that the items where not
# created in ascending order, same for assets
item_3 = self.factory.create_item_sample(self.collection, name='item-0', db_create=True)
# created item that is expired should not show up in the get result
self.factory.create_item_sample(
self.collection,
name='item-expired',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=1)
)
assets = self.factory.create_asset_samples(
3, item_3.model, name=['asset-1.tiff', 'asset-0.tiff', 'asset-2.tiff'], db_create=True
)
Expand Down Expand Up @@ -146,6 +154,21 @@ def test_single_item_endpoint(self):
ignore=['id', 'links']
)

def test_single_item_endpoint_expired(self):
collection_name = self.collection.name
# created item that is expired should not be found
item = self.factory.create_item_sample(
self.collection,
name='item-expired',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=1)
)

response = self.client.get(
f"/{STAC_BASE_V}/collections/{collection_name}/items/{item['name']}"
)
self.assertStatusCode(404, response)

def test_items_endpoint_non_existing_collection(self):
response = self.client.get(f"/{STAC_BASE_V}/collections/non-existing-collection/items")
self.assertStatusCode(404, response)
Expand Down
121 changes: 121 additions & 0 deletions app/tests/tests_10/test_remove_expired_items.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from datetime import timedelta
from io import StringIO

from django.core.management import call_command
from django.test import TestCase
from django.utils import timezone

from stac_api.models import Asset
from stac_api.models import Item

from tests.tests_10.data_factory import Factory
from tests.utils import mock_s3_asset_file


class RemoveExpiredItems(TestCase):

@classmethod
def setUpTestData(cls):
cls.factory = Factory()
cls.collection = cls.factory.create_collection_sample().model

def _call_command(self, *args, **kwargs):
out = StringIO()
call_command(
"remove_expired_items",
*args,
stdout=out,
stderr=StringIO(),
**kwargs,
)
return out.getvalue()

@mock_s3_asset_file
def test_remove_item_dry_run(self):
item_0 = self.factory.create_item_sample(
self.collection,
name='item-0',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=50)
)
assets = self.factory.create_asset_samples(
2, item_0.model, name=['asset-0.tiff', 'asset-1.tiff'], db_create=True
)

out = self._call_command("--dry-run", "--no-color")
self.assertEqual(
out,
"""running command to remove expired items
deleting all items expired longer than 24 hours
skipping deletion of assets <QuerySet [<Asset: asset-0.tiff>, <Asset: asset-1.tiff>]>
skipping deletion of item collection-1/item-0
[dry run] would have removed 1 expired items
"""
)

self.assertTrue(
Item.objects.filter(name=item_0['name']).exists(),
msg="Item has been deleted by dry run"
)
self.assertTrue(
Asset.objects.filter(name=assets[0]['name']).exists(),
msg="Asset has been deleted by dry run"
)
self.assertTrue(
Asset.objects.filter(name=assets[1]['name']).exists(),
msg="Asset has been deleted by dry run"
)

@mock_s3_asset_file
def test_remove_item(self):
item_1 = self.factory.create_item_sample(
self.collection,
name='item-1',
db_create=True,
properties_expires=timezone.now() - timedelta(hours=10)
)
assets = self.factory.create_asset_samples(
2, item_1.model, name=['asset-2.tiff', 'asset-3.tiff'], db_create=True
)
out = self._call_command("--no-color")
self.assertEqual(
out,
"""running command to remove expired items
deleting all items expired longer than 24 hours
successfully removed 0 expired items
"""
)

self.assertTrue(
Item.objects.filter(name=item_1['name']).exists(),
msg="not expired item has been deleted"
)
self.assertTrue(
Asset.objects.filter(name=assets[0]['name']).exists(),
msg="not expired asset has been deleted"
)
self.assertTrue(
Asset.objects.filter(name=assets[1]['name']).exists(),
msg="not expired asset has been deleted"
)

out = self._call_command("--min-age-hours=9", "--no-color")
self.assertEqual(
out,
"""running command to remove expired items
deleting all items expired longer than 9 hours
deleted item item-1 and 2 assets belonging to it. extra={'item': 'item-1'}
successfully removed 1 expired items
"""
)
self.assertFalse(
Item.objects.filter(name=item_1['name']).exists(), msg="Expired item was not deleted"
)
self.assertFalse(
Asset.objects.filter(name=assets[0]['name']).exists(),
msg="Asset of expired item was not deleted"
)
self.assertFalse(
Asset.objects.filter(name=assets[1]['name']).exists(),
msg="Asset of expired item was not deleted"
)
3 changes: 2 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ services:
image: minio/minio
env_file: ./minio.env
user: ${UID}
command: server /data
command: server /data --console-address ":9001"
volumes:
- type: bind
source: ${PWD}/.volumes/minio
target: /data
ports:
- 9090:${S3_PORT:-9000}
- 9001:9001
s3-client:
image: minio/mc
links:
Expand Down

0 comments on commit 0425176

Please sign in to comment.