From 582c8b8734e69887d0e8827745cee3a0198f5a45 Mon Sep 17 00:00:00 2001 From: Tim Beccue Date: Tue, 15 Feb 2022 12:48:29 -0800 Subject: [PATCH] Add capabilities for tif downloads --- api/db.py | 6 +- api/handler.py | 66 +++++++++++++++---- api/helpers.py | 2 +- api/s3_helpers.py | 74 ++++++++++++++++++++++ api/tests/request_events/download_tif.json | 7 ++ api/tests/unit/test_info_images.py | 2 +- requirements.txt | 7 +- serverless.yml | 2 + 8 files changed, 148 insertions(+), 18 deletions(-) create mode 100644 api/s3_helpers.py create mode 100644 api/tests/request_events/download_tif.json diff --git a/api/db.py b/api/db.py index 78e66b6..3e9ae6a 100644 --- a/api/db.py +++ b/api/db.py @@ -441,7 +441,7 @@ def filtered_images_query_handler(event, context): return http_response(HTTPStatus.NOT_FOUND, error_msg) except Exception as e: logger.exception("Error in filter images query. ") - return http_response(HTTPStatus.NOT_FOUND, error_msg) + return http_response(HTTPStatus.NOT_FOUND, e) return http_response(HTTPStatus.OK, images) @@ -455,6 +455,6 @@ def remove_image_by_filename_handler(event, context): except Exception as e: error_msg = f"Could not delete {base_filename}. Error: {e}" logger.exception(error_msg) - return http_response(HTTPStatus.NOT_FOUND, error_msg) + return http_response(HTTPStatus.NOT_FOUND, e) - return http_response(HTTPStatus.OK, image) + return http_response(HTTPStatus.OK, f'Successfully removed {base_filename}') diff --git a/api/handler.py b/api/handler.py index f0846b4..7c84de4 100644 --- a/api/handler.py +++ b/api/handler.py @@ -15,6 +15,8 @@ from api.helpers import dynamodb_r from api.helpers import DecimalEncoder, http_response, _get_body, _get_secret, get_db_connection from api.helpers import get_base_filename_from_full_filename +from api.helpers import get_s3_file_url +from api.s3_helpers import save_tiff_to_s3 from api.db import get_files_within_date_range @@ -145,7 +147,21 @@ def upload(event, context): def download(event, context): - log.info(json.dumps(event, indent=2)) + """ This method is used to handle requests to download individual data files. + + Request body args: + s3_directory (str): data | info-images | allsky | test, specifies the s3 object prefix (ie folder) + where the data is stored. Default is 'data'. + object_name (str): the full filename of the requested file. Appending this to the end of s3_directory + should specify the full key for the object in s3. + image_type (str): tif | fits, used if the requester wants a tif file created from the underlying fits + image. If so, the tif file is create on the fly. Default is 'fits'. + stretch (str): linear | arcsinh, used to specify the stretch parameters if a tif file is requested. + Default is 'arcsinh'. + + Return: (str) presigned s3 download url that the requester can use to access the file. + """ + log.info(event) body = _get_body(event) # retrieve and validate the s3_directory @@ -155,28 +171,48 @@ def download(event, context): log.warning(error_msg) return http_response(HTTPStatus.FORBIDDEN, error_msg) - key = f"{s3_directory}/{body['object_name']}" + key = f"{s3_directory}/{body['object_name']}" # full path to object in s3 bucket params = { "Bucket": BUCKET_NAME, "Key": key, } - url = s3.generate_presigned_url( - ClientMethod='get_object', - Params=params, - ExpiresIn=S3_GET_TTL - ) - log.info(f"Presigned download url: {url}") - return http_response(HTTPStatus.OK, str(url)) + + image_type = body.get('image_type', 'fits') # assume 'tif' if not otherwise specified + + # Routine if TIFF file is specified + if image_type in ['tif', 'tiff']: + stretch = body.get('stretch', 'arcsinh') + #s3_destination_key = f"downloads/tif/{body['object_name']}" + s3_destination_key = save_tiff_to_s3(BUCKET_NAME, key, stretch) + url = get_s3_file_url(s3_destination_key) + log.info(f"Presigned download url: {url}") + return http_response(HTTPStatus.OK, str(url)) + + # if TIFF file not requested, just get the file as-is from s3 + else: + url = s3.generate_presigned_url( + ClientMethod='get_object', + Params=params, + ExpiresIn=S3_GET_TTL + ) + log.info(f"Presigned download url: {url}") + return http_response(HTTPStatus.OK, str(url)) + def download_zip(event, context): + """ This method returns a link to download a zip of multiple images in fits format. + First, get a list of files to be zipped based on the query parameters specified. + Next, call a lambda function (defined in the repository zip-downloads) that creates a zip + from the list of specified files and uploads that back to s3, returning a presigned download url. + Finally, this function returns the url in the http response to the requester. + """ - pprint(event) body = _get_body(event) pprint(body) start_timestamp_s = int(body.get('start_timestamp_s')) end_timestamp_s = int(body.get('end_timestamp_s')) - fits_size = body.get('fits_size') + fits_size = body.get('fits_size') # small | large | best site = body.get('site') files = get_files_within_date_range(site, start_timestamp_s, end_timestamp_s, fits_size) @@ -210,8 +246,14 @@ def download_zip(event, context): pprint(logs) return http_response(HTTPStatus.OK, zip_url) + def get_recent_uploads(event, context): - + """ Query for a list of files recently uploaded to s3. + The logs routine is found in the ptrdata repository, in which a lambda funciton is triggered for new objects + in the s3 bucket with prefix 'data/' (where all the regular site data is sent). + + This is mainly used for easier debugging, and is displayed in the PTR web UI. + """ print("Query string params: ", event['queryStringParameters']) try: site = event['queryStringParameters']['site'] diff --git a/api/helpers.py b/api/helpers.py index 7974aef..523e1a3 100644 --- a/api/helpers.py +++ b/api/helpers.py @@ -94,4 +94,4 @@ def get_s3_file_url(path, ttl=604800): Params={"Bucket": BUCKET_NAME, "Key": path}, ExpiresIn=ttl ) - return url \ No newline at end of file + return url diff --git a/api/s3_helpers.py b/api/s3_helpers.py new file mode 100644 index 0000000..49e5e2c --- /dev/null +++ b/api/s3_helpers.py @@ -0,0 +1,74 @@ +import boto3 +import uuid +from astropy.io import fits +import numpy +import tifffile +import logging +import bz2 +import os + +s3_client = boto3.client('s3') +log = logging.getLogger() +log.setLevel(logging.INFO) + +def save_tiff_to_s3(bucket, s3_source_key, stretch): + tmpkey = s3_source_key.replace('/', '') + local_source_file_path = f"/tmp/{uuid.uuid4()}{tmpkey}" + local_tiff_file_path = f"/tmp/tiff-{tmpkey}.tif" + local_tiff_file_path_bz2 = f"/tmp/tiff-{tmpkey}.tif.bz2" + + s3_client.download_file(bucket, s3_source_key, local_source_file_path) + image_metadata = create_tiff(local_source_file_path, local_tiff_file_path, stretch) + + # generate the name for the item in s3, also the name of the downloaded file + source_filename = s3_source_key.split('/')[-1] + tif_filename = f"{source_filename.split('.')[0]}.tif.bz2" + tif_filename = f"{image_metadata['FILTER']}-{stretch}-{tif_filename}" + s3_destination_key = f"downloads/tif/{tif_filename}" + + s3_client.upload_file(local_tiff_file_path_bz2, bucket, s3_destination_key) + return s3_destination_key + + +def create_tiff(local_source_file_path, local_tiff_file_path, stretch): + with fits.open(local_source_file_path) as hdulist: + prihdr = hdulist[0].header + + metadata = { + 'PTRName': 'thename', + 'FILTER': prihdr['FILTER'] + } + + # Linear 16bit tif + ts = numpy.asarray(hdulist[0].data) + + # Arcsinh 16bit tif (It is an artificial stretch so there is some massaging to get it into a 16 bit tif) + if stretch == "arcsinh": + ts = numpy.arcsinh(ts) + ts = ts - numpy.min(ts) + # rescale it to take all of the integer range + ts = ts * (65535.0/(numpy.max(ts))) + ts = ts.astype(numpy.uint16) + + tifffile.imwrite(local_tiff_file_path, ts, metadata=metadata) + to_bz2(local_tiff_file_path) + return metadata + + +def to_bz2(filename, delete=False): + try: + uncomp = open(filename, 'rb') + comp = bz2.compress(uncomp.read()) + uncomp.close() + if delete: + try: + os.remove(filename) + except: + pass + target = open(filename + '.bz2', 'wb') + target.write(comp) + target.close() + return True + except: + log.info('to_bz2 failed.') + return False diff --git a/api/tests/request_events/download_tif.json b/api/tests/request_events/download_tif.json new file mode 100644 index 0000000..6d3ce43 --- /dev/null +++ b/api/tests/request_events/download_tif.json @@ -0,0 +1,7 @@ +{ + "resource": "/", + "path": "/", + "httpMethod": "POST", + "key": "val", + "body": "{\"s3_directory\": \"data\", \"object_name\": \"sro-kb001ms-20220208-00000679-EX10.fits.bz2\", \"stretch\": \"arcsinh\", \"image_type\": \"tif\"}" +} \ No newline at end of file diff --git a/api/tests/unit/test_info_images.py b/api/tests/unit/test_info_images.py index ba0261e..9a14d07 100644 --- a/api/tests/unit/test_info_images.py +++ b/api/tests/unit/test_info_images.py @@ -16,7 +16,7 @@ def create_info_image_entry(site, channel, base_filename, data_type, header=None optional_attributes = {} if include_fits_01: optional_attributes['fits_01_exists'] = True - optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}01.fits.bz2" + optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}00.fits.bz2" if include_fits_10: optional_attributes['fits_10_exists'] = True optional_attributes['fits_10_file_path'] = f"info-images/{base_filename}-{data_type}10.fits.bz2" diff --git a/requirements.txt b/requirements.txt index 77a142d..62f76fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,6 @@ +astropy==4.3.1 attrs==19.3.0 +boto==2.49.0 boto3==1.9.134 botocore==1.12.188 certifi==2019.6.16 @@ -25,6 +27,7 @@ pluggy==0.13.1 psycopg2-binary==2.8.4 py==1.9.0 pycparser==2.20 +pyerfa==2.0.0.1 PyJWT==1.6.4 pyparsing==2.4.7 pytest==6.0.0 @@ -37,12 +40,14 @@ rope==0.17.0 Rx==3.1.0 s3transfer==0.2.1 sgp4==2.12 -Shapely==1.7.1 +Shapely==1.7.0 six==1.12.0 skyfield==1.24 SQLAlchemy==1.3.19 +tifffile==2021.11.2 toml==0.10.1 tqdm==4.32.2 +typing-extensions==3.7.4.3 tzwhere==3.0.3 urllib3==1.25.3 zipp==3.1.0 diff --git a/serverless.yml b/serverless.yml index 23cea00..81c5796 100644 --- a/serverless.yml +++ b/serverless.yml @@ -185,6 +185,8 @@ functions: method: post download: handler: api/handler.download + memorySize: 2024 + timeout: 60 events: - http: path: /download