From 582c8b8734e69887d0e8827745cee3a0198f5a45 Mon Sep 17 00:00:00 2001
From: Tim Beccue <timothybeccue@gmail.com>
Date: Tue, 15 Feb 2022 12:48:29 -0800
Subject: [PATCH] Add capabilities for tif downloads

---
 api/db.py                                  |  6 +-
 api/handler.py                             | 66 +++++++++++++++----
 api/helpers.py                             |  2 +-
 api/s3_helpers.py                          | 74 ++++++++++++++++++++++
 api/tests/request_events/download_tif.json |  7 ++
 api/tests/unit/test_info_images.py         |  2 +-
 requirements.txt                           |  7 +-
 serverless.yml                             |  2 +
 8 files changed, 148 insertions(+), 18 deletions(-)
 create mode 100644 api/s3_helpers.py
 create mode 100644 api/tests/request_events/download_tif.json

diff --git a/api/db.py b/api/db.py
index 78e66b6..3e9ae6a 100644
--- a/api/db.py
+++ b/api/db.py
@@ -441,7 +441,7 @@ def filtered_images_query_handler(event, context):
         return http_response(HTTPStatus.NOT_FOUND, error_msg)
     except Exception as e:
         logger.exception("Error in filter images query. ")
-        return http_response(HTTPStatus.NOT_FOUND, error_msg)
+        return http_response(HTTPStatus.NOT_FOUND, e)
 
     return http_response(HTTPStatus.OK, images)
 
@@ -455,6 +455,6 @@ def remove_image_by_filename_handler(event, context):
     except Exception as e:
         error_msg = f"Could not delete {base_filename}. Error: {e}"
         logger.exception(error_msg)
-        return http_response(HTTPStatus.NOT_FOUND, error_msg)
+        return http_response(HTTPStatus.NOT_FOUND, e)
 
-    return http_response(HTTPStatus.OK, image)
+    return http_response(HTTPStatus.OK, f'Successfully removed {base_filename}')
diff --git a/api/handler.py b/api/handler.py
index f0846b4..7c84de4 100644
--- a/api/handler.py
+++ b/api/handler.py
@@ -15,6 +15,8 @@
 from api.helpers import dynamodb_r
 from api.helpers import DecimalEncoder, http_response, _get_body, _get_secret, get_db_connection
 from api.helpers import get_base_filename_from_full_filename
+from api.helpers import get_s3_file_url
+from api.s3_helpers import save_tiff_to_s3
 
 from api.db import get_files_within_date_range
 
@@ -145,7 +147,21 @@ def upload(event, context):
 
 
 def download(event, context): 
-    log.info(json.dumps(event, indent=2))
+    """ This method is used to handle requests to download individual data files. 
+    
+    Request body args:
+        s3_directory (str): data | info-images | allsky | test, specifies the s3 object prefix (ie folder)
+                            where the data is stored. Default is 'data'.
+        object_name (str): the full filename of the requested file. Appending this to the end of s3_directory 
+                           should specify the full key for the object in s3. 
+        image_type (str): tif | fits, used if the requester wants a tif file created from the underlying fits
+                          image. If so, the tif file is create on the fly. Default is 'fits'.
+        stretch (str): linear | arcsinh, used to specify the stretch parameters if a tif file is requested. 
+                       Default is 'arcsinh'.
+
+    Return: (str) presigned s3 download url that the requester can use to access the file. 
+    """
+    log.info(event)
     body = _get_body(event)
 
     # retrieve and validate the s3_directory
@@ -155,28 +171,48 @@ def download(event, context):
         log.warning(error_msg)
         return http_response(HTTPStatus.FORBIDDEN, error_msg)
 
-    key = f"{s3_directory}/{body['object_name']}"
+    key = f"{s3_directory}/{body['object_name']}"  # full path to object in s3 bucket
     params = {
         "Bucket": BUCKET_NAME,
         "Key": key,
     }
-    url = s3.generate_presigned_url(
-        ClientMethod='get_object',
-        Params=params,
-        ExpiresIn=S3_GET_TTL
-    )
-    log.info(f"Presigned download url: {url}")
-    return http_response(HTTPStatus.OK, str(url))
+    
+    image_type = body.get('image_type', 'fits')  # assume 'tif' if not otherwise specified
+
+    # Routine if TIFF file is specified
+    if image_type in ['tif', 'tiff']:   
+        stretch = body.get('stretch', 'arcsinh')
+        #s3_destination_key = f"downloads/tif/{body['object_name']}"
+        s3_destination_key = save_tiff_to_s3(BUCKET_NAME, key, stretch)
+        url = get_s3_file_url(s3_destination_key)
+        log.info(f"Presigned download url: {url}")
+        return http_response(HTTPStatus.OK, str(url))
+
+    # if TIFF file not requested, just get the file as-is from s3
+    else: 
+        url = s3.generate_presigned_url(
+            ClientMethod='get_object',
+            Params=params,
+            ExpiresIn=S3_GET_TTL
+        )
+        log.info(f"Presigned download url: {url}")
+        return http_response(HTTPStatus.OK, str(url))
+
 
 def download_zip(event, context):
+    """ This method returns a link to download a zip of multiple images in fits format. 
+    First, get a list of files to be zipped based on the query parameters specified. 
+    Next, call a lambda function (defined in the repository zip-downloads) that creates a zip
+    from the list of specified files and uploads that back to s3, returning a presigned download url. 
+    Finally, this function returns the url in the http response to the requester. 
+    """
 
-    pprint(event)
     body = _get_body(event)
     pprint(body)
 
     start_timestamp_s = int(body.get('start_timestamp_s'))
     end_timestamp_s = int(body.get('end_timestamp_s'))
-    fits_size = body.get('fits_size')
+    fits_size = body.get('fits_size')  # small | large | best
     site = body.get('site')
 
     files = get_files_within_date_range(site, start_timestamp_s, end_timestamp_s, fits_size)
@@ -210,8 +246,14 @@ def download_zip(event, context):
     pprint(logs)
     return http_response(HTTPStatus.OK, zip_url)
 
+
 def get_recent_uploads(event, context):
-    
+    """ Query for a list of files recently uploaded to s3. 
+    The logs routine is found in the ptrdata repository, in which a lambda funciton is triggered for new objects
+    in the s3 bucket with prefix 'data/' (where all the regular site data is sent). 
+
+    This is mainly used for easier debugging, and is displayed in the PTR web UI. 
+    """
     print("Query string params: ", event['queryStringParameters'])
     try: 
         site = event['queryStringParameters']['site']
diff --git a/api/helpers.py b/api/helpers.py
index 7974aef..523e1a3 100644
--- a/api/helpers.py
+++ b/api/helpers.py
@@ -94,4 +94,4 @@ def get_s3_file_url(path, ttl=604800):
         Params={"Bucket": BUCKET_NAME, "Key": path},
         ExpiresIn=ttl
     )
-    return url
\ No newline at end of file
+    return url
diff --git a/api/s3_helpers.py b/api/s3_helpers.py
new file mode 100644
index 0000000..49e5e2c
--- /dev/null
+++ b/api/s3_helpers.py
@@ -0,0 +1,74 @@
+import boto3
+import uuid
+from astropy.io import fits
+import numpy
+import tifffile
+import logging
+import bz2
+import os
+
+s3_client = boto3.client('s3')
+log = logging.getLogger()
+log.setLevel(logging.INFO)
+
+def save_tiff_to_s3(bucket, s3_source_key, stretch):
+    tmpkey = s3_source_key.replace('/', '')
+    local_source_file_path = f"/tmp/{uuid.uuid4()}{tmpkey}"
+    local_tiff_file_path = f"/tmp/tiff-{tmpkey}.tif"
+    local_tiff_file_path_bz2 = f"/tmp/tiff-{tmpkey}.tif.bz2"
+
+    s3_client.download_file(bucket, s3_source_key, local_source_file_path)
+    image_metadata = create_tiff(local_source_file_path, local_tiff_file_path, stretch)
+
+    # generate the name for the item in s3, also the name of the downloaded file
+    source_filename = s3_source_key.split('/')[-1]
+    tif_filename = f"{source_filename.split('.')[0]}.tif.bz2"
+    tif_filename = f"{image_metadata['FILTER']}-{stretch}-{tif_filename}"
+    s3_destination_key = f"downloads/tif/{tif_filename}"
+
+    s3_client.upload_file(local_tiff_file_path_bz2, bucket, s3_destination_key)
+    return s3_destination_key
+
+
+def create_tiff(local_source_file_path, local_tiff_file_path, stretch):
+    with fits.open(local_source_file_path) as hdulist:
+        prihdr = hdulist[0].header
+
+        metadata = {
+            'PTRName': 'thename',
+            'FILTER': prihdr['FILTER']
+        }
+
+        # Linear 16bit tif
+        ts = numpy.asarray(hdulist[0].data)
+
+        # Arcsinh 16bit tif (It is an artificial stretch so there is some massaging to get it into a 16 bit tif)
+        if stretch == "arcsinh":
+            ts = numpy.arcsinh(ts)
+            ts = ts - numpy.min(ts)
+            # rescale it to take all of the integer range
+            ts = ts * (65535.0/(numpy.max(ts)))
+            ts = ts.astype(numpy.uint16)
+
+        tifffile.imwrite(local_tiff_file_path, ts, metadata=metadata)
+        to_bz2(local_tiff_file_path)
+        return metadata
+
+
+def to_bz2(filename, delete=False):
+    try:
+        uncomp = open(filename, 'rb')
+        comp = bz2.compress(uncomp.read())
+        uncomp.close()
+        if delete:
+            try:
+                os.remove(filename)
+            except:
+                pass
+        target = open(filename + '.bz2', 'wb')
+        target.write(comp)
+        target.close()
+        return True
+    except:
+        log.info('to_bz2 failed.')
+        return False
diff --git a/api/tests/request_events/download_tif.json b/api/tests/request_events/download_tif.json
new file mode 100644
index 0000000..6d3ce43
--- /dev/null
+++ b/api/tests/request_events/download_tif.json
@@ -0,0 +1,7 @@
+{
+    "resource": "/",
+    "path": "/",
+    "httpMethod": "POST",
+    "key": "val",
+    "body": "{\"s3_directory\": \"data\", \"object_name\": \"sro-kb001ms-20220208-00000679-EX10.fits.bz2\", \"stretch\": \"arcsinh\", \"image_type\": \"tif\"}"
+}
\ No newline at end of file
diff --git a/api/tests/unit/test_info_images.py b/api/tests/unit/test_info_images.py
index ba0261e..9a14d07 100644
--- a/api/tests/unit/test_info_images.py
+++ b/api/tests/unit/test_info_images.py
@@ -16,7 +16,7 @@ def create_info_image_entry(site, channel, base_filename, data_type, header=None
     optional_attributes = {}
     if include_fits_01: 
         optional_attributes['fits_01_exists'] = True
-        optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}01.fits.bz2"
+        optional_attributes['fits_01_file_path'] = f"info-images/{base_filename}-{data_type}00.fits.bz2"
     if include_fits_10: 
         optional_attributes['fits_10_exists'] = True
         optional_attributes['fits_10_file_path'] = f"info-images/{base_filename}-{data_type}10.fits.bz2"
diff --git a/requirements.txt b/requirements.txt
index 77a142d..62f76fa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,6 @@
+astropy==4.3.1
 attrs==19.3.0
+boto==2.49.0
 boto3==1.9.134
 botocore==1.12.188
 certifi==2019.6.16
@@ -25,6 +27,7 @@ pluggy==0.13.1
 psycopg2-binary==2.8.4
 py==1.9.0
 pycparser==2.20
+pyerfa==2.0.0.1
 PyJWT==1.6.4
 pyparsing==2.4.7
 pytest==6.0.0
@@ -37,12 +40,14 @@ rope==0.17.0
 Rx==3.1.0
 s3transfer==0.2.1
 sgp4==2.12
-Shapely==1.7.1
+Shapely==1.7.0
 six==1.12.0
 skyfield==1.24
 SQLAlchemy==1.3.19
+tifffile==2021.11.2
 toml==0.10.1
 tqdm==4.32.2
+typing-extensions==3.7.4.3
 tzwhere==3.0.3
 urllib3==1.25.3
 zipp==3.1.0
diff --git a/serverless.yml b/serverless.yml
index 23cea00..81c5796 100644
--- a/serverless.yml
+++ b/serverless.yml
@@ -185,6 +185,8 @@ functions:
           method: post
   download:
     handler: api/handler.download
+    memorySize: 2024
+    timeout: 60 
     events:
       - http:
           path: /download