Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download results with signed url's #63

Merged
merged 7 commits into from
Apr 20, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions openeo_driver/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,17 @@ def __init__(self, filename: str):
super().__init__(message=self.message.format(file=filename))


class FileExpiredException(OpenEOApiException):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New errors in this file have first to be defined through https://github.com/Open-EO/openeo-api/blob/master/errors.json

see Open-EO/openeo-api#379

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in the mean time, you can just raise a generic exception:

        raise OpenEOApiException(
            status_code=410,
            code="FileExpired",
            message="File '{file}' has expired".format(file=filename)"
         )

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting suggestion from Open-EO/openeo-api#379 (comment):
raise it with something like:

raise OpenEOApiException( 
    status_code=410, 
    code="AssetLinkExpired", 
    message="Batch job asset link has expired. Request the batch job results again for fresh asset links" ,
)

status_code = 410
code = 'FileExpired'
message = "File '{file}' has expired."
_description = 'The requested file does not exist anymore.'
_tags = ['File Management']

def __init__(self, filename: str):
super().__init__(message=self.message.format(file=filename))


class FileContentInvalidException(OpenEOApiException):
status_code = 400
code = 'FileContentInvalid'
Expand Down
58 changes: 52 additions & 6 deletions openeo_driver/views.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import base64
import copy
import datetime
import functools
import logging
import os
import re
import uuid
import time
import typing
from collections import namedtuple, defaultdict
from typing import Callable, Tuple, List
import uuid
from hashlib import md5
from typing import Callable, Tuple, List, Union

import flask
import flask_cors
Expand All @@ -24,7 +28,8 @@
from openeo_driver.datacube import DriverDataCube
from openeo_driver.delayed_vector import DelayedVector
from openeo_driver.errors import OpenEOApiException, ProcessGraphMissingException, ServiceNotFoundException, \
FilePathInvalidException, ProcessGraphNotFoundException, FeatureUnsupportedException
FilePathInvalidException, ProcessGraphNotFoundException, FeatureUnsupportedException, CredentialsInvalidException, \
FileExpiredException
from openeo_driver.processes import DEFAULT_NAMESPACE
from openeo_driver.save_result import SaveResult, get_temp_file
from openeo_driver.users import HttpAuthHandler, User
Expand Down Expand Up @@ -591,13 +596,34 @@ def list_job_results(job_id, user: User):
job_info = backend_implementation.batch_jobs.get_job_info(job_id, user.user_id)
results = backend_implementation.batch_jobs.get_results(job_id=job_id, user_id=user.user_id)

def base64_user_id() -> str:
return base64.urlsafe_b64encode(user.user_id.encode()).decode()

def secure_token(filename, expires) -> str:
return _compute_secure_token(job_id, user.user_id, filename, expires)

def expiration_timestamp() -> Union[str, None]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expiration timestamp return value can still be int at this point I think

if 'SIGNED_URL_EXPIRATION' in os.environ:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's cleaner to use current_app.config instead of os.environ (see usage in other places of views.py)

this config is populated here:

app.config['OPENEO_BACKEND_VERSION'] = backend_version
app.config['OPENEO_TITLE'] = title
app.config['OPENEO_DESCRIPTION'] = description
app.config['OPENEO_BACKEND_DEPLOY_METADATA'] = deploy_metadata
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 # bytes

at this place you could get the values from os.environ. I would just avoid using os.environ in views.py directly

return str(time.time() + int(os.environ['SIGNED_URL_EXPIRATION']))
else:
return None

def download_url(filename) -> str:
if os.getenv("SIGNED_URL", "FALSE").upper() == "TRUE":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use smart_bool (imported already here from openeo_driver.utils)

expires = expiration_timestamp()
return url_for('.download_job_result_signed', job_id=job_id, user_base64=base64_user_id(),
secure_key=secure_token(filename, expires), filename=filename,
expires=expires, _external=True)
else:
return url_for('.download_job_result', job_id=job_id, filename=filename, _external=True)

def asset_object(filename: str, asset_metadata: dict) -> dict:
bands = asset_metadata.get("bands")
nodata = asset_metadata.get("nodata")

return dict_no_none(**{
"title": asset_metadata.get("title",filename), #there has to be title
"href": url_for('.download_job_result', job_id=job_id, filename=filename, _external=True),
"href": download_url(filename),
"type": asset_metadata.get("media_type"),
"eo:bands": [dict_no_none(**{"name": band.name, "center_wavelength": band.wavelength_um})
for band in bands] if bands else None,
Expand Down Expand Up @@ -645,8 +671,7 @@ def asset_object(filename: str, asset_metadata: dict) -> dict:
else:
result = {
"links": [
{"href": url_for('.download_job_result', job_id=job_id, filename=filename, _external=True)}
for filename in results.keys()
{"href": download_url(filename)} for filename in results.keys()
]
}

Expand Down Expand Up @@ -702,6 +727,27 @@ def download_job_result(job_id, filename, user: User):
return send_from_directory(output_dir, filename, mimetype=results[filename].get("media_type"))


@api_endpoint
@openeo_bp.route('/jobs/<job_id>/results/<user_base64>/<secure_key>/<filename>', methods=['GET'])
def download_job_result_signed(job_id, user_base64, secure_key, filename):
expires = request.args.get('expires')
user_id = base64.urlsafe_b64decode(user_base64).decode()
if secure_key != _compute_secure_token(job_id, user_id, filename, expires):
raise CredentialsInvalidException()
if expires and int(expires) < time.time():
raise FileExpiredException(str(filename))
results = backend_implementation.batch_jobs.get_results(job_id=job_id, user_id=user_id)
if filename not in results.keys():
raise FilePathInvalidException(str(filename) + ' not in ' + str(list(results.keys())))
output_dir = results[filename]["output_dir"]
return send_from_directory(output_dir, filename, mimetype=results[filename].get("media_type"))


def _compute_secure_token(job_id, user_id, filename, expiration_timestamp):
token_key = job_id + user_id + filename + str(expiration_timestamp) + os.environ['SIGNED_URL_SECRET']
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above: avoid os.environ here

return md5(token_key.encode()).hexdigest()


@api_endpoint
@openeo_bp.route('/jobs/<job_id>/logs', methods=['GET'])
@auth_handler.requires_bearer_auth
Expand Down
168 changes: 168 additions & 0 deletions tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,6 +864,138 @@ def test_get_job_results_100(self, api100):
'type': 'Feature'
}

@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#'})
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

regarding the avoiding of os.environ not of above: I think you can just do @mock.patch.dict(app.config, {'SIG... here (app object is already available at this point)

def test_get_job_results_signed_040(self, api040):
with self._fresh_job_registry(next_job_id='job-370'):
dummy_backend.DummyBatchJobs._update_status(
job_id='07024ee9-7847-4b8a-b260-6c879a2b3cdc', user_id=TEST_USER, status='finished')
resp = api040.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results', headers=self.AUTH_HEADER)
assert resp.assert_status_code(200).json == {
'links': [
{
'href': 'http://oeo.net/openeo/0.4.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/50afb0cad129e61d415278c4ffcd8a83/output.tiff'
}
]
}

@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#'})
def test_get_job_results_signed_100(self, api100):
with self._fresh_job_registry(next_job_id='job-372'):
dummy_backend.DummyBatchJobs._update_status(
job_id='07024ee9-7847-4b8a-b260-6c879a2b3cdc', user_id=TEST_USER, status='finished')
resp = api100.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results', headers=self.AUTH_HEADER)
assert resp.assert_status_code(200).json == {
'assets': {
'output.tiff': {
'roles': ['data'],
'title': 'output.tiff',
'href': 'http://oeo.net/openeo/1.0.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/50afb0cad129e61d415278c4ffcd8a83/output.tiff',
'type': 'image/tiff; application=geotiff',
'eo:bands': [{
'name': 'NDVI',
'center_wavelength': 1.23
}],
'file:nodata': [123]
}
},
'geometry': None,
'id': '07024ee9-7847-4b8a-b260-6c879a2b3cdc',
'links': [
{
'rel': 'self',
'href': 'http://oeo.net/openeo/1.0.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results',
'type': 'application/json'
},
{
'rel': 'card4l-document',
'href': 'http://ceos.org/ard/files/PFS/SR/v5.0/CARD4L_Product_Family_Specification_Surface_Reflectance-v5.0.pdf',
'type': 'application/pdf'
}
],
'properties': {
'created': '2017-01-01T09:32:12Z',
'datetime': None,
'card4l:processing_chain': {'process_graph': {'foo': {'process_id': 'foo', 'arguments': {}}}},
'card4l:specification': 'SR',
'card4l:specification_version': '5.0',
'processing:facility': 'VITO - SPARK',
'processing:software': 'openeo-geotrellis-0.0.1'
},
'stac_extensions': ['processing',
'card4l-eo',
'https://stac-extensions.github.io/file/v1.0.0/schema.json',
'eo'],
'stac_version': '0.9.0',
'type': 'Feature'
}

@mock.patch('time.time', mock.MagicMock(return_value=1234))
@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#', 'SIGNED_URL_EXPIRATION': '1000'})
def test_get_job_results_signed_with_expiration_040(self, api040):
with self._fresh_job_registry(next_job_id='job-371'):
dummy_backend.DummyBatchJobs._update_status(
job_id='07024ee9-7847-4b8a-b260-6c879a2b3cdc', user_id=TEST_USER, status='finished')
resp = api040.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results', headers=self.AUTH_HEADER)
assert resp.assert_status_code(200).json == {
'links': [
{
'href': 'http://oeo.net/openeo/0.4.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/fd0ca65e29c6d223da05b2e73a875683/output.tiff?expires=2234'
}
]
}

@mock.patch('time.time', mock.MagicMock(return_value=1234))
@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#', 'SIGNED_URL_EXPIRATION': '1000'})
def test_get_job_results_signed_with_expiration_100(self, api100):
with self._fresh_job_registry(next_job_id='job-373'):
dummy_backend.DummyBatchJobs._update_status(
job_id='07024ee9-7847-4b8a-b260-6c879a2b3cdc', user_id=TEST_USER, status='finished')
resp = api100.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results', headers=self.AUTH_HEADER)
assert resp.assert_status_code(200).json == {
'assets': {
'output.tiff': {
'roles': ['data'],
'title': 'output.tiff',
'href': 'http://oeo.net/openeo/1.0.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/fd0ca65e29c6d223da05b2e73a875683/output.tiff?expires=2234',
'type': 'image/tiff; application=geotiff',
'eo:bands': [{
'name': 'NDVI',
'center_wavelength': 1.23
}],
'file:nodata': [123]
}
},
'geometry': None,
'id': '07024ee9-7847-4b8a-b260-6c879a2b3cdc',
'links': [
{
'rel': 'self',
'href': 'http://oeo.net/openeo/1.0.0/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results',
'type': 'application/json'
},
{
'rel': 'card4l-document',
'href': 'http://ceos.org/ard/files/PFS/SR/v5.0/CARD4L_Product_Family_Specification_Surface_Reflectance-v5.0.pdf',
'type': 'application/pdf'
}
],
'properties': {
'created': '2017-01-01T09:32:12Z',
'datetime': None,
'card4l:processing_chain': {'process_graph': {'foo': {'process_id': 'foo', 'arguments': {}}}},
'card4l:specification': 'SR',
'card4l:specification_version': '5.0',
'processing:facility': 'VITO - SPARK',
'processing:software': 'openeo-geotrellis-0.0.1'
},
'stac_extensions': ['processing',
'card4l-eo',
'https://stac-extensions.github.io/file/v1.0.0/schema.json',
'eo'],
'stac_version': '0.9.0',
'type': 'Feature'
}

def test_get_job_results_invalid_job(self, api):
api.get('/jobs/deadbeef-f00/results', headers=self.AUTH_HEADER).assert_error(404, "JobNotFound")

Expand All @@ -881,6 +1013,42 @@ def test_download_result(self, api, tmp_path):
assert resp.assert_status_code(200).data == b"tiffdata"
assert resp.headers["Content-Type"] == "image/tiff; application=geotiff"

@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#'})
def test_download_result_signed(self, api, tmp_path):
output_root = Path(tmp_path)
with mock.patch.object(dummy_backend.DummyBatchJobs, '_output_root', return_value=output_root):
output = output_root / '07024ee9-7847-4b8a-b260-6c879a2b3cdc' / 'output.tiff'
output.parent.mkdir(parents=True)
with output.open('wb') as f:
f.write(b'tiffdata')
resp = api.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/50afb0cad129e61d415278c4ffcd8a83/output.tiff')
assert resp.assert_status_code(200).data == b'tiffdata'
assert resp.headers['Content-Type'] == 'image/tiff; application=geotiff'

@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#'})
def test_download_result_signed_invalid(self, api):
resp = api.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/test123/output.tiff')
assert resp.assert_error(403, 'CredentialsInvalid')

@mock.patch('time.time', mock.MagicMock(return_value=1234))
@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#', 'SIGNED_URL_EXPIRATION': '1000'})
def test_download_result_signed_with_expiration(self, api, tmp_path):
output_root = Path(tmp_path)
with mock.patch.object(dummy_backend.DummyBatchJobs, '_output_root', return_value=output_root):
output = output_root / '07024ee9-7847-4b8a-b260-6c879a2b3cdc' / 'output.tiff'
output.parent.mkdir(parents=True)
with output.open('wb') as f:
f.write(b'tiffdata')
resp = api.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/fd0ca65e29c6d223da05b2e73a875683/output.tiff?expires=2234')
assert resp.assert_status_code(200).data == b'tiffdata'
assert resp.headers['Content-Type'] == 'image/tiff; application=geotiff'

@mock.patch('time.time', mock.MagicMock(return_value=3456))
@mock.patch.dict(os.environ, {'SIGNED_URL': 'TRUE', 'SIGNED_URL_SECRET': '123&@#', 'SIGNED_URL_EXPIRATION': '1000'})
def test_download_result_signed_with_expiration_invalid(self, api, tmp_path):
resp = api.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/results/TXIuVGVzdA%3D%3D/fd0ca65e29c6d223da05b2e73a875683/output.tiff?expires=2234')
assert resp.assert_error(410, 'FileExpired')

def test_get_batch_job_logs(self, api):
resp = api.get('/jobs/07024ee9-7847-4b8a-b260-6c879a2b3cdc/logs', headers=self.AUTH_HEADER)
assert resp.assert_status_code(200).json == {
Expand Down